{"id":44345,"date":"2022-05-25T05:41:13","date_gmt":"2022-05-25T10:41:13","guid":{"rendered":"https:\/\/simfoni.com\/?p=44345"},"modified":"2022-08-22T09:39:13","modified_gmt":"2022-08-22T14:39:13","slug":"transformers-more-than-meets-the-eye-in-spend-classification","status":"publish","type":"post","link":"https:\/\/simfoni.com\/engineering\/transformers-more-than-meets-the-eye-in-spend-classification\/","title":{"rendered":"Transformers: More Than Meets the Eye (in Spend Classification)"},"content":{"rendered":"<div data-elementor-type=\"wp-post\" data-elementor-id=\"44345\" class=\"elementor elementor-44345\" data-elementor-post-type=\"post\">\n\t\t\t\t\t\t<section class=\"has_eae_slider elementor-section elementor-top-section elementor-element elementor-element-a5b6c7c elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-eae-slider=\"84099\" data-id=\"a5b6c7c\" data-element_type=\"section\" data-e-type=\"section\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-41bdbb1\" data-eae-slider=\"10022\" data-id=\"41bdbb1\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-f59ed89 elementor-blockquote--skin-border elementor-widget elementor-widget-blockquote\" data-id=\"f59ed89\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"blockquote.default\">\n\t\t\t\t\t\t\t<blockquote class=\"elementor-blockquote\">\n\t\t\t<p class=\"elementor-blockquote__content\">\n\t\t\t\tWhen you&rsquo;re working out your <a href=\"https:\/\/simfoni.com\/procurement\/\" data-internallinksmanager029f6b8e52c=\"1\" title=\"Procurement\" target=\"_blank\" rel=\"noopener\">procurement<\/a> strategy you need the right level of data granularity. But often the data in your corporate systems is not structured in a way that is most useful for <span class=\"\"  data-mobile-support=\"0\"  data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex='0' role='link'>buyer<\/span>s. <strong> Enter <span class=\"\" data-mobile-support=\"0\" data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex=\"0\" role=\"link\">Spend Classification<\/span>: <\/strong>the process of taking raw or partially structured spend data and mapping it to a category hierarchy. \t\t\t<\/p>\n\t\t\t\t\t<\/blockquote>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"has_eae_slider elementor-section elementor-top-section elementor-element elementor-element-4b4293d elementor-reverse-mobile elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-eae-slider=\"73729\" data-id=\"4b4293d\" data-element_type=\"section\" data-e-type=\"section\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-50 elementor-top-column elementor-element elementor-element-0f1b1fe\" data-eae-slider=\"59871\" data-id=\"0f1b1fe\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-f45d932 elementor-widget elementor-widget-text-editor\" data-id=\"f45d932\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t\t\t\t\t\t<p><span data-contrast=\"auto\">Historically, this been done by a combination of rules and oversight from a human analyst. Developing these kinds of rules is very time-consuming and prone to error.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p><p><span data-contrast=\"auto\">For example, think of the <\/span><b><span data-contrast=\"auto\">mobile problem<\/span><\/b><span data-contrast=\"auto\">. The following four descriptions all use the word<strong> &lsquo;mobile<\/strong>&rsquo; in them but relate to vastly distinct types of purchase.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p><ul><li data-leveltext=\"&#61623;\" data-font=\"Symbol\" data-listid=\"6\" data-list-defn-props=\"{\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">Mobile application<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/li><li data-leveltext=\"&#61623;\" data-font=\"Symbol\" data-listid=\"6\" data-list-defn-props=\"{\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">Mobile phone<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/li><li data-leveltext=\"&#61623;\" data-font=\"Symbol\" data-listid=\"6\" data-list-defn-props=\"{\" aria-setsize=\"-1\" data-aria-posinset=\"3\" data-aria-level=\"1\"><span data-contrast=\"auto\">Mobile billboard<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/li><li data-leveltext=\"&#61623;\" data-font=\"Symbol\" data-listid=\"6\" data-list-defn-props=\"{\" aria-setsize=\"-1\" data-aria-posinset=\"4\" data-aria-level=\"1\"><span data-contrast=\"auto\">Mobile office<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/li><\/ul>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-50 elementor-top-column elementor-element elementor-element-a371a28\" data-eae-slider=\"55865\" data-id=\"a371a28\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"background_background\":\"classic\",\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-a379ce8 elementor-widget elementor-widget-spacer\" data-id=\"a379ce8\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"spacer.default\">\n\t\t\t\t\t\t\t<div class=\"elementor-spacer\">\n\t\t\t<div class=\"elementor-spacer-inner\"><\/div>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"has_eae_slider elementor-section elementor-top-section elementor-element elementor-element-82d8bc3 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-eae-slider=\"9440\" data-id=\"82d8bc3\" data-element_type=\"section\" data-e-type=\"section\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-9fefa8d\" data-eae-slider=\"49209\" data-id=\"9fefa8d\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-81d0686 elementor-widget elementor-widget-text-editor\" data-id=\"81d0686\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t\t\t\t\t\t<p><span data-contrast=\"auto\">To handle this case, you would have to write at least 4 rules. I say &lsquo;at least&rsquo; because you also must take account of <\/span><b><span data-contrast=\"auto\">data drift<\/span><\/b><span data-contrast=\"auto\">. Different people in various places write descriptions slightly differently. For example, &lsquo;billboard&rsquo; could be abbreviated in all kinds of ways which would need more rules to handle.<\/span><span data-ccp-props='{\"201341983\":0,\"335559739\":160,\"335559740\":259}'>&nbsp;<\/span><\/p><p><span data-contrast=\"auto\">You could easily end up with millions of rules. No human could keep on top of this.<\/span><span data-ccp-props='{\"201341983\":0,\"335559739\":160,\"335559740\":259}'>&nbsp;<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-a7f1c65 elementor-widget elementor-widget-heading\" data-id=\"a7f1c65\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Benefits of Using Transformers <\/h2>\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-8a76c1d elementor-widget elementor-widget-text-editor\" data-id=\"8a76c1d\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t\t\t\t\t\t<p><span class=\"TextRun SCXW29615133 BCX0\" lang=\"EN-GB\" xml:lang=\"EN-GB\" data-contrast=\"auto\"><span class=\"NormalTextRun SCXW29615133 BCX0\">Fortunately, <\/span><strong><a href=\"http:\/\/www.prweb.com\/releases\/2022\/6\/prweb18702610.htm\" target=\"_blank\" rel=\"noopener\"><span class=\"NormalTextRun CommentStart CommentHighlightPipeRest CommentHighlightRest SCXW29615133 BCX0\" style=\"color: #ff6600;\">recent advances in machine learning<\/span><\/a><\/strong><span class=\"NormalTextRun CommentHighlightPipeRest SCXW29615133 BCX0\"> mean that we can have the machine figure out for itself what these rules should be. The machine learning techniques available nowadays &ndash; <\/span><\/span><span class=\"TextRun SCXW29615133 BCX0\" lang=\"EN-GB\" xml:lang=\"EN-GB\" data-contrast=\"auto\"><span class=\"NormalTextRun SCXW29615133 BCX0\">Transformers <\/span><\/span><span class=\"TextRun SCXW29615133 BCX0\" lang=\"EN-GB\" xml:lang=\"EN-GB\" data-contrast=\"auto\"><span class=\"NormalTextRun SCXW29615133 BCX0\">&ndash;<\/span><\/span> <span class=\"TextRun SCXW29615133 BCX0\" lang=\"EN-GB\" xml:lang=\"EN-GB\" data-contrast=\"auto\"><span class=\"NormalTextRun SCXW29615133 BCX0\">can handle tens or hundreds of millions of different parameters, meaning they can correctly categori<\/span><span class=\"NormalTextRun SCXW29615133 BCX0\">z<\/span><span class=\"NormalTextRun SCXW29615133 BCX0\">e data to a much higher level of accuracy than ever before.<\/span><\/span><span class=\"EOP SCXW29615133 BCX0\" data-ccp-props=\"{\">&nbsp;<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-41ed340 elementor-widget elementor-widget-text-editor\" data-id=\"41ed340\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t\t\t\t\t\t<p><strong><span class=\"TextRun SCXW134425279 BCX0\" lang=\"EN-GB\" xml:lang=\"EN-GB\" data-contrast=\"auto\"><span class=\"NormalTextRun SCXW134425279 BCX0\">Here is a summary of some recent results we are seeing <\/span><span class=\"NormalTextRun SCXW134425279 BCX0\">at<\/span> <span class=\"NormalTextRun SCXW134425279 BCX0\">Simfoni<\/span><span class=\"NormalTextRun SCXW134425279 BCX0\">:<\/span><\/span><span class=\"EOP SCXW134425279 BCX0\" data-ccp-props='{\"201341983\":0,\"335559739\":160,\"335559740\":259}'>&nbsp;<\/span><\/strong><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-fa831f5 elementor-widget elementor-widget-image\" data-id=\"fa831f5\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img fetchpriority=\"high\" decoding=\"async\" width=\"1337\" height=\"418\" src=\"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Spend-Classification.png\" class=\"attachment-full size-full wp-image-44353\" alt=\"Spend Classification\" srcset=\"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Spend-Classification.png 1337w, https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Spend-Classification-300x94.png 300w, https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Spend-Classification-1024x320.png 1024w, https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Spend-Classification-768x240.png 768w, https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Spend-Classification-150x47.png 150w\" sizes=\"(max-width: 1337px) 100vw, 1337px\"\/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-d65f3f2 elementor-widget elementor-widget-text-editor\" data-id=\"d65f3f2\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t\t\t\t\t\t<p><span data-contrast=\"auto\">The results are outstanding, and clearly show that as the amount of data increases you can expect to increase accuracy.<\/span><span data-ccp-props='{\"201341983\":0,\"335559739\":160,\"335559740\":259}'>&nbsp;<\/span><\/p><p><strong>In addition to the increase in speed\/accuracy of classification, especially for refreshes, we are seeing some other benefits:&nbsp;<\/strong><\/p><ul><li data-leveltext=\"&#61623;\" data-font=\"Symbol\" data-listid=\"3\" data-list-defn-props='{\"335552541\":1,\"335559684\":-2,\"335559685\":720,\"335559991\":360,\"469769226\":\"Symbol\",\"469769242\":[8226],\"469777803\":\"left\",\"469777804\":\"&#61623;\",\"469777815\":\"hybridMultilevel\"}' aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">The structure needed for this kind of true machine learning means that we are now able to identify inconsistencies in the source data &ndash; where an item has been mapped to more than category &ndash; more easily<\/span><span data-ccp-props='{\"201341983\":0,\"335559739\":160,\"335559740\":259}'>&nbsp;<\/span><\/li><li data-leveltext=\"&#61623;\" data-font=\"Symbol\" data-listid=\"3\" data-list-defn-props='{\"335552541\":1,\"335559684\":-2,\"335559685\":720,\"335559991\":360,\"469769226\":\"Symbol\",\"469769242\":[8226],\"469777803\":\"left\",\"469777804\":\"&#61623;\",\"469777815\":\"hybridMultilevel\"}' aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">Where we can use category hierarchies that are consistent with each other (i.e., they all use different subtrees from one overall hierarchy) we are able to use data learnt from one client to improve the quality of classification for other clients, without impacting any confidentiality concerns.<\/span><span data-ccp-props='{\"201341983\":0,\"335559739\":160,\"335559740\":259}'>&nbsp;<\/span><\/li><\/ul>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-a4df527 elementor-widget elementor-widget-heading\" data-id=\"a4df527\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">How Transformers Improve Upon Previous Machine Learning Techniques <\/h2>\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-5b50803 elementor-widget elementor-widget-text-editor\" data-id=\"5b50803\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t\t\t\t\t\t<p><span data-contrast=\"auto\">Let&rsquo;s take a trip through the recent history of machine learning, specifically machine learning as applied to <\/span><b><span data-contrast=\"auto\">natural language processing<\/span><\/b><span data-contrast=\"auto\">, to see how recent techniques succeed where previous versions struggled.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p><p><strong>It&rsquo;s worth thinking about natural language processing in <span class=\"\" data-mobile-support=\"0\" data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex=\"0\" role=\"link\">spend classification<\/span> as a two-step process.&nbsp;<\/strong><\/p><ol><li data-leveltext=\"%1.\" data-font=\"Calibri\" data-listid=\"5\" data-list-defn-props=\"{\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">Converting text from your purchasing data into numbers that a computer can understand (also known as <\/span><b><span data-contrast=\"auto\">tokenization<\/span><\/b><span data-contrast=\"auto\">)<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/li><\/ol><ol><li data-leveltext=\"%1.\" data-font=\"Calibri\" data-listid=\"5\" data-list-defn-props=\"{\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">Processing these numbers through a <\/span><b><span data-contrast=\"auto\">machine learning model<\/span><\/b><span data-contrast=\"auto\"> to train it on how to select (or &lsquo;infer&rsquo;) the most likely spend category for each line of spend&nbsp;<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/li><\/ol><p><span data-contrast=\"auto\">Then you can use this model to run inference on new data from every spend refresh, and even use this new data to further train the model to make its categorisation better and better over time.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-ec820ec elementor-widget elementor-widget-heading\" data-id=\"ec820ec\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">(Simplified) History of Tokenization <\/h2>\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-f3e4b42 elementor-widget elementor-widget-text-editor\" data-id=\"f3e4b42\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t\t\t\t\t\t<p><span data-contrast=\"auto\">The oldest and simplest version of tokenizing text is to break up a string of text into words and then to assign each word a unique number. Think of this as giving every word in a dictionary its own number, starting at one. This technique is usually called <\/span><b><span data-contrast=\"auto\">one-hot <\/span><\/b><b><span data-contrast=\"auto\">vectors<\/span><\/b><span data-contrast=\"auto\">. It is a simplistic approach because each word is treated independently. There is no way you can tell whether word 12345 and 34567 are synonyms, for example.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p><p><span data-contrast=\"auto\">The next evolution was to create <\/span><b><span data-contrast=\"auto\">word embeddings<\/span><\/b><span data-contrast=\"auto\">. In this approach, each word is given a range of values (usually about 300) that provide some level of meaning about the word. This allows you to do some math on words to find similar-meaning words. <a href=\"https:\/\/www.ed.ac.uk\/informatics\/news-events\/stories\/2019\/king-man-woman-queen-the-hidden-algebraic-struct\" target=\"_blank\" rel=\"noopener\">The example often given in the literature is: king &ndash; man + woman = queen.&nbsp;<\/a><\/span><a href=\"_wp_link_placeholder\"><span data-ccp-props=\"{\">&nbsp;<\/span><\/a><\/p><p><span data-contrast=\"auto\">Word embeddings are a significant improvement over One-Hot vectors, but they are still word-based. This presents two types of problems:<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p><ol><li data-leveltext=\"%1.\" data-font=\"Calibri\" data-listid=\"2\" data-list-defn-props=\"{\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">They can&rsquo;t handle spelling changes, e.g., we have seen the word STRAWBERRY abbreviated to STRWBRY.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/li><\/ol><ol><li data-leveltext=\"%1.\" data-font=\"Calibri\" data-listid=\"2\" data-list-defn-props=\"{\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">They don&rsquo;t know how to deal with &lsquo;out of vocabulary&rsquo; words that they have never seen before. The usual solution to this is to treat all unseen words the same.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/li><\/ol><p><span data-contrast=\"auto\">One way of minimizing these problems is to pre-process the data before you feed it to the tokenizer. For example, you can try to standardize word spellings between US and UK English, strip out special accents, try to handle expected spelling errors. But this takes you back down the route of having to manually write code that can easily become overly complex.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p><p><span data-contrast=\"auto\">Enter <\/span><b><span data-contrast=\"auto\">Sub-Word Tokenizers<\/span><\/b><span data-contrast=\"auto\">.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p><p><span data-contrast=\"auto\">Sub-Word Tokenizers break up words into their constituent parts without needing any specific coding. This approach requires less pre-processing and ensures that you don&rsquo;t have the same problems with unseen words than with previous techniques. See <a href=\"https:\/\/huggingface.co\/docs\/transformers\/tokenizer_summary\" target=\"_blank\" rel=\"noopener\">tokenizer summary<\/a> <\/span><span data-contrast=\"auto\">for more details on sub-word tokenization.<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-6ebb15f elementor-widget elementor-widget-heading\" data-id=\"6ebb15f\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">(Simplified) History of NLP Machine Learning Models<\/h2>\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"has_eae_slider elementor-section elementor-top-section elementor-element elementor-element-12b6e7f elementor-reverse-mobile elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-eae-slider=\"66742\" data-id=\"12b6e7f\" data-element_type=\"section\" data-e-type=\"section\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-50 elementor-top-column elementor-element elementor-element-fbfa589\" data-eae-slider=\"50347\" data-id=\"fbfa589\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-d386808 elementor-widget elementor-widget-text-editor\" data-id=\"d386808\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t\t\t\t\t\t<p><span data-contrast=\"auto\">Early machine learning techniques in NLP used a <\/span><b><span data-contrast=\"auto\">bag of words<\/span><\/b><span data-contrast=\"auto\"> model. These would look at all tokens independently. This approach does not work well in real life because often the context of a token depends on the other tokens in the sentence.<\/span><span data-ccp-props='{\"201341983\":0,\"335559739\":160,\"335559740\":259}'>&nbsp;<\/span><\/p><p><span data-contrast=\"auto\">Neural Networks like <\/span><b><span data-contrast=\"auto\">Long Short-Term Memory <\/span><\/b><span data-contrast=\"auto\">(LSTM) networks addressed this problem by looking at the tokens in the order they appear in the sentence. They start at the beginning of a sentence and moving forwards token by token through the sentence, learning some context from previous tokens as they go through the sentence.<\/span><span data-ccp-props='{\"201341983\":0,\"335559739\":160,\"335559740\":259}'>&nbsp;<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-50 elementor-top-column elementor-element elementor-element-5163ebf\" data-eae-slider=\"61605\" data-id=\"5163ebf\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-655306d elementor-widget elementor-widget-image\" data-id=\"655306d\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img decoding=\"async\" width=\"1055\" height=\"699\" src=\"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Machine-Learning-Spend-Classification.jpg\" class=\"attachment-full size-full wp-image-44367\" alt=\"Machine Learning Spend Classification\" srcset=\"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Machine-Learning-Spend-Classification.jpg 1055w, https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Machine-Learning-Spend-Classification-300x199.jpg 300w, https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Machine-Learning-Spend-Classification-1024x678.jpg 1024w, https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Machine-Learning-Spend-Classification-768x509.jpg 768w, https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Machine-Learning-Spend-Classification-391x260.jpg 391w, https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Machine-Learning-Spend-Classification-150x99.jpg 150w\" sizes=\"(max-width: 1055px) 100vw, 1055px\"\/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"has_eae_slider elementor-section elementor-top-section elementor-element elementor-element-1f70dbe elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-eae-slider=\"4583\" data-id=\"1f70dbe\" data-element_type=\"section\" data-e-type=\"section\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-b593d56\" data-eae-slider=\"59537\" data-id=\"b593d56\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-4cf89bf elementor-widget elementor-widget-text-editor\" data-id=\"4cf89bf\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t\t\t\t\t\t<p><span data-contrast=\"auto\">Early machine learning techniques in NLP used a <\/span><b><span data-contrast=\"auto\">bag of words<\/span><\/b><span data-contrast=\"auto\"> model. These would look at all tokens independently. This approach does not work well in real life because often the context of a token depends on the other tokens in the sentence.<\/span><span data-ccp-props='{\"201341983\":0,\"335559739\":160,\"335559740\":259}'>&nbsp;<\/span><\/p><p><span data-contrast=\"auto\">Neural Networks like <\/span><b><span data-contrast=\"auto\">Long Short-Term Memory <\/span><\/b><span data-contrast=\"auto\">(LSTM) networks addressed this problem by looking at the tokens in the order they appear in the sentence. They start at the beginning of a sentence and moving forwards token by token through the sentence, learning some context from previous tokens as they go through the sentence.<\/span><span data-ccp-props='{\"201341983\":0,\"335559739\":160,\"335559740\":259}'>&nbsp;<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-89c6470 elementor-widget elementor-widget-text-editor\" data-id=\"89c6470\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t\t\t\t\t\t<p><span data-contrast=\"auto\">Enter <\/span><b><span data-contrast=\"auto\">Transformers<\/span><\/b><span data-contrast=\"auto\">.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p><p><span data-contrast=\"auto\">Transformers improve on LSTMs by looking at all the tokens in a sentence at the same time using a technique called <\/span><b><span data-contrast=\"auto\">Self-Attention<\/span><\/b><span data-contrast=\"auto\">. The literature usually uses this example:<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p><ol><li data-leveltext=\"%1.\" data-font=\"Calibri\" data-listid=\"1\" data-list-defn-props=\"{\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">The animal did not cross the road because it was too tired.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/li><\/ol><ol><li data-leveltext=\"%1.\" data-font=\"Calibri\" data-listid=\"1\" data-list-defn-props=\"{\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">The animal did not cross the road because it was too wide.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/li><\/ol><p><span data-contrast=\"auto\">In the first example, &lsquo;it&rsquo; refers to the animal. In the second, &lsquo;it&rsquo; refers to the road. Transformers can handle this kind of challenge better than previous techniques. <a href=\"https:\/\/jalammar.github.io\/illustrated-transformer\/\" target=\"_blank\" rel=\"noopener\">Illustrated-transformer<\/a><\/span><span data-contrast=\"none\"><a href=\"_wp_link_placeholder\">&nbsp;<\/a>has a good write-up about how this works.<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"has_eae_slider elementor-section elementor-top-section elementor-element elementor-element-3e36a9e elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-eae-slider=\"26180\" data-id=\"3e36a9e\" data-element_type=\"section\" data-e-type=\"section\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-4fa6058\" data-eae-slider=\"38278\" data-id=\"4fa6058\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-d94ad20 elementor-widget elementor-widget-heading\" data-id=\"d94ad20\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">What this means for <span class=\"\" data-mobile-support=\"0\" data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex=\"0\" role=\"link\">Spend Classification<\/span> <\/h2>\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"has_eae_slider elementor-section elementor-top-section elementor-element elementor-element-5196916 elementor-reverse-mobile elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-eae-slider=\"90644\" data-id=\"5196916\" data-element_type=\"section\" data-e-type=\"section\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-50 elementor-top-column elementor-element elementor-element-5dc1dc4\" data-eae-slider=\"7107\" data-id=\"5dc1dc4\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-757db2f elementor-widget elementor-widget-text-editor\" data-id=\"757db2f\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t\t\t\t\t\t<p><span data-contrast=\"none\">Taken together, these two <span class=\"\" data-mobile-support=\"0\" data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex=\"0\" role=\"link\">innovation<\/span>s &ndash; <\/span><b><span data-contrast=\"none\">self-attention<\/span><\/b><span data-contrast=\"none\"> and <\/span><b><span data-contrast=\"none\">sub-word tokenization<\/span><\/b><span data-contrast=\"none\"> &ndash; mean that you can now achieve far higher quality classification with less human intervention than ever before.<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p><p><span data-contrast=\"none\">Overall, it is now possible to get access to fully <a href=\"https:\/\/simfoni.com\/spend-analysis-guide\/\" target=\"_blank\" rel=\"noopener\">categorized spend data<\/a> at whatever level of granularity your organisation requires, much quicker and more accurately than in the past. With detailed, authentic, and up to date spend data you can make smarter purchasing decisions and better data-driven decisions in support of your organisation&rsquo;s strategic objectives, whether those be <a href=\"https:\/\/simfoni.com\/cost-reduction\/\" target=\"_blank\" rel=\"noopener\"><span class=\"\" data-mobile-support=\"0\" data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex=\"0\" role=\"link\">cost savings<\/span><\/a> or <a href=\"https:\/\/simfoni.com\/supplier-management\/what-is-vendor-risk-management-why-and-how-it-is-implemented\/\" target=\"_blank\" rel=\"noopener\">risk management<\/a>, ESG initiatives or anything else.&nbsp;<\/span><span data-ccp-props=\"{\">&nbsp;<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-50 elementor-top-column elementor-element elementor-element-09fab44\" data-eae-slider=\"20552\" data-id=\"09fab44\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-c8dc656 elementor-widget elementor-widget-image\" data-id=\"c8dc656\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img decoding=\"async\" width=\"800\" height=\"533\" src=\"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Spend-Taxonomy.gif\" class=\"attachment-full size-full wp-image-44366\" alt=\"Spend Taxonomy\"\/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"has_eae_slider elementor-section elementor-top-section elementor-element elementor-element-f824144 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-eae-slider=\"68461\" data-id=\"f824144\" data-element_type=\"section\" data-e-type=\"section\" data-settings='{\"background_background\":\"classic\",\"pix_scale_in\":\"none\"}'>\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-50 elementor-top-column elementor-element elementor-element-fac2a9c\" data-eae-slider=\"2469\" data-id=\"fac2a9c\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-7aac2ef elementor-author-box--align-left elementor-widget elementor-widget-author-box\" data-id=\"7aac2ef\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"author-box.default\">\n\t\t\t\t\t\t\t<div class=\"elementor-author-box\">\n\t\t\t\t\t\t\t<a href=\"https:\/\/www.linkedin.com\/in\/alanbuxton\/\" class=\"elementor-author-box__avatar\">\n\t\t\t\t\t<img decoding=\"async\" src=\"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Alan-Buxton-300x300.jpg\" alt=\"Picture of Alan Buxton\" loading=\"lazy\"\/>\n\t\t\t\t<\/a>\n\t\t\t\n\t\t\t<div class=\"elementor-author-box__text\">\n\t\t\t\t\t\t\t\t\t<a href=\"https:\/\/www.linkedin.com\/in\/alanbuxton\/\">\n\t\t\t\t\t\t<h4 class=\"elementor-author-box__name\">\n\t\t\t\t\t\t\tAlan Buxton\t\t\t\t\t\t<\/h4>\n\t\t\t\t\t<\/a>\n\t\t\t\t\n\t\t\t\t\t\t\t\t\t<div class=\"elementor-author-box__bio\">\n\t\t\t\t\t\t<p>CTO at Simfoni<\/p>\n\t\t\t\t\t<\/div>\n\t\t\t\t\n\t\t\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"has_eae_slider elementor-column elementor-col-50 elementor-top-column elementor-element elementor-element-9c2fc32\" data-eae-slider=\"54142\" data-id=\"9c2fc32\" data-element_type=\"column\" data-e-type=\"column\" data-settings='{\"pix_scale_in\":\"none\"}'>\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-a2b4564 elementor-widget elementor-widget-author-box\" data-id=\"a2b4564\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"author-box.default\">\n\t\t\t\t\t\t\t<div class=\"elementor-author-box\">\n\t\t\t\t\t\t\t<a href=\"https:\/\/www.linkedin.com\/company\/simfoni\/\" class=\"elementor-author-box__avatar\">\n\t\t\t\t\t<img decoding=\"async\" src=\"https:\/\/simfoni.com\/wp-content\/uploads\/2020\/03\/Simfoni-Favicon-300x300.gif\" alt=\"Picture of Simfoni\" loading=\"lazy\"\/>\n\t\t\t\t<\/a>\n\t\t\t\n\t\t\t<div class=\"elementor-author-box__text\">\n\t\t\t\t\t\t\t\t\t<a href=\"https:\/\/www.linkedin.com\/company\/simfoni\/\">\n\t\t\t\t\t\t<h4 class=\"elementor-author-box__name\">\n\t\t\t\t\t\t\tSimfoni\t\t\t\t\t\t<\/h4>\n\t\t\t\t\t<\/a>\n\t\t\t\t\n\t\t\t\t\t\t\t\t\t<div class=\"elementor-author-box__bio\">\n\t\t\t\t\t\t<p>Follow Simfoni on LinkedIn<\/p>\n\t\t\t\t\t<\/div>\n\t\t\t\t\n\t\t\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<\/div>\n\t\t","protected":false},"excerpt":{"rendered":"<p>When you&rsquo;re working out your <span class=\"\" data-mobile-support=\"0\" data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex=\"0\" role=\"link\"><span class=\"\" data-mobile-support=\"0\" data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex=\"0\" role=\"link\">procurement strategy<\/span><\/span> you need the right level of data granularity. But often the data in your corporate systems is not structured in a way that is most useful for <span class=\"\" data-mobile-support=\"0\" data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex=\"0\" role=\"link\"><span class=\"\"  data-mobile-support=\"0\"  data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex='0' role='link'>buyer<\/span><\/span>s. Enter <span class=\"\" data-mobile-support=\"0\" data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex=\"0\" role=\"link\"><span class=\"\" data-mobile-support=\"0\" data-gt-translate-attributes='[{\"attribute\":\"data-cmtooltip\", \"format\":\"html\"}]' tabindex=\"0\" role=\"link\">Spend Classification<\/span><\/span>: the process&hellip;<\/p>\n","protected":false},"author":1,"featured_media":44348,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"give_campaign_id":0,"footnotes":""},"categories":[2768,528],"tags":[],"class_list":["post-44345","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-engineering","category-spend-management"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v27.2 (Yoast SEO v27.2) - https:\/\/yoast.com\/product\/yoast-seo-premium-wordpress\/ -->\n<title>Transformers: More Than Meets the Eye (in Spend Classification) - Simfoni.com<\/title>\n<meta name=\"description\" content=\"When you\u2019re working out your procurement strategy you need the right level of data granularity. Enter Spend Classification: the process of taking raw or partially structured spend data and mapping it to a category hierarchy.\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Transformers: More Than Meets the Eye (in Spend Classification) - Simfoni Engineering Blog\" \/>\n<meta property=\"og:description\" content=\"When you\u2019re working out your procurement strategy you need the right level of data granularity. Enter Spend Classification: the process of taking raw or partially structured spend data and mapping it to a category hierarchy.\" \/>\n<meta property=\"og:url\" content=\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/\" \/>\n<meta property=\"og:site_name\" content=\"Simfoni\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/SimfoniApps\/\" \/>\n<meta property=\"article:author\" content=\"https:\/\/www.facebook.com\/SimfoniApps\/\" \/>\n<meta property=\"article:published_time\" content=\"2022-05-25T10:41:13+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2022-08-22T14:39:13+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Transformers-in-Spend-Classification.jpg\" \/>\n\t<meta property=\"og:image:width\" content=\"1200\" \/>\n\t<meta property=\"og:image:height\" content=\"628\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/jpeg\" \/>\n<meta name=\"author\" content=\"Simfoni\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#article\",\"isPartOf\":{\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/\"},\"author\":{\"name\":\"Simfoni\",\"@id\":\"https:\/\/simfoni.com\/#\/schema\/person\/afc4d1749ccc888582602619fc5b02b8\"},\"headline\":\"Transformers: More Than Meets the Eye (in Spend Classification)\",\"datePublished\":\"2022-05-25T10:41:13+00:00\",\"dateModified\":\"2022-08-22T14:39:13+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/\"},\"wordCount\":1229,\"publisher\":{\"@id\":\"https:\/\/simfoni.com\/#organization\"},\"image\":{\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Transformers-in-Spend-Classification.jpg\",\"articleSection\":[\"Engineering\",\"Spend Management\"],\"inLanguage\":\"en\",\"copyrightYear\":\"2022\",\"copyrightHolder\":{\"@id\":\"https:\/\/simfoni.com\/#organization\"}},{\"@type\":\"WebPage\",\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/\",\"url\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/\",\"name\":\"Transformers: More Than Meets the Eye (in Spend Classification) - Simfoni.com\",\"isPartOf\":{\"@id\":\"https:\/\/simfoni.com\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#primaryimage\"},\"image\":{\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Transformers-in-Spend-Classification.jpg\",\"datePublished\":\"2022-05-25T10:41:13+00:00\",\"dateModified\":\"2022-08-22T14:39:13+00:00\",\"description\":\"When you\u2019re working out your procurement strategy you need the right level of data granularity. Enter Spend Classification: the process of taking raw or partially structured spend data and mapping it to a category hierarchy.\",\"breadcrumb\":{\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#breadcrumb\"},\"inLanguage\":\"en\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"en\",\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#primaryimage\",\"url\":\"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Transformers-in-Spend-Classification.jpg\",\"contentUrl\":\"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Transformers-in-Spend-Classification.jpg\",\"width\":1200,\"height\":628,\"caption\":\"Transformers in Spend Classification\"},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\/\/simfoni.com\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Transformers: More Than Meets the Eye (in Spend Classification)\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/simfoni.com\/#website\",\"url\":\"https:\/\/simfoni.com\/\",\"name\":\"Simfoni\",\"description\":\"Spend Intelligence and Spend Automation\",\"publisher\":{\"@id\":\"https:\/\/simfoni.com\/#organization\"},\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/simfoni.com\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en\"},{\"@type\":[\"Organization\",\"Place\"],\"@id\":\"https:\/\/simfoni.com\/#organization\",\"name\":\"Simfoni\",\"alternateName\":\"Simfoni\",\"url\":\"https:\/\/simfoni.com\/\",\"logo\":{\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#local-main-organization-logo\"},\"image\":{\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#local-main-organization-logo\"},\"sameAs\":[\"https:\/\/www.facebook.com\/SimfoniApps\/\",\"https:\/\/x.com\/simfoniapps\",\"https:\/\/www.instagram.com\/simfoniapps\/\",\"https:\/\/www.linkedin.com\/company\/simfoni\/\",\"https:\/\/www.youtube.com\/@simfoni\",\"https:\/\/g.page\/r\/CTMP26g2qypHEBM\/\",\"https:\/\/www.capterra.com\/p\/206211\/Spend-Analytics\/\",\"https:\/\/www.g2.com\/products\/simfoni-spend-analytics\/\",\"https:\/\/www.glassdoor.com\/Overview\/Working-at-Simfoni-EI_IE3290778.11,18.htm\",\"https:\/\/sourceforge.net\/software\/product\/Simfoni\/\",\"https:\/\/news.google.com\/publications\/CAAqBwgKMMaWxAsw6bHbAw\"],\"description\":\"Simfoni is an AI-powered procurement and spend management platform designed to help enterprises gain complete visibility into organizational spend and turn procurement insight into measurable financial impact. The platform combines advanced spend analytics, intelligent sourcing automation, and tail spend management to enable procurement teams to identify savings opportunities, execute sourcing strategies efficiently, and improve supplier performance across global operations. Built for modern procurement organizations, Simfoni supports Chief Procurement Officers, strategic sourcing leaders, and finance teams who are responsible for driving cost optimization, supplier governance, and operational efficiency. By consolidating procurement data across multiple systems and suppliers, Simfoni provides a unified view of enterprise spend and enables organizations to prioritize sourcing initiatives that deliver measurable savings. Simfoni\u2019s platform integrates spend intelligence with automated sourcing execution, allowing procurement teams to scale sourcing activities without increasing headcount. The system helps organizations manage indirect spend, improve supplier engagement, and strengthen procurement governance through data-driven decision making. Trusted by global enterprises, Simfoni enables organizations to transform procurement from a reactive cost center into a strategic value driver by delivering visibility, automation, and measurable financial outcomes across the procurement lifecycle.\",\"legalName\":\"Simfoni\",\"foundingDate\":\"2015-08-25\",\"numberOfEmployees\":{\"@type\":\"QuantitativeValue\",\"minValue\":\"201\",\"maxValue\":\"500\"},\"address\":{\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#local-main-place-address\"},\"telephone\":[\"+1-973-718-7071\",\"+44-208-098-2115\"],\"openingHoursSpecification\":[{\"@type\":\"OpeningHoursSpecification\",\"dayOfWeek\":[\"Monday\",\"Tuesday\",\"Wednesday\",\"Thursday\",\"Friday\",\"Saturday\",\"Sunday\"],\"opens\":\"00:00\",\"closes\":\"23:59\"}],\"email\":\"info@simfoni.com\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/simfoni.com\/#\/schema\/person\/afc4d1749ccc888582602619fc5b02b8\",\"name\":\"Simfoni\",\"description\":\"Simfoni Delivers Next-generation Digital Procurement Transformation Through Spend Intelligence, Spend Automation &amp; Spend Analytics Software.\",\"sameAs\":[\"https:\/\/simfoni.com\/\",\"https:\/\/www.facebook.com\/SimfoniApps\/\",\"https:\/\/www.instagram.com\/simfoniapps\/\",\"https:\/\/www.linkedin.com\/company\/simfoni\/\",\"https:\/\/x.com\/simfoniapps\",\"https:\/\/www.youtube.com\/@simfoni\"]},{\"@type\":\"PostalAddress\",\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#local-main-place-address\",\"streetAddress\":\"90 Washington Valley Road\",\"addressLocality\":\"Bedminster\",\"postalCode\":\"07921\",\"addressRegion\":\"New Jersey\",\"addressCountry\":\"US\"},{\"@type\":\"ImageObject\",\"inLanguage\":\"en\",\"@id\":\"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#local-main-organization-logo\",\"url\":\"https:\/\/simfoni.com\/wp-content\/uploads\/2021\/10\/Simfoni.com-Logo.jpg\",\"contentUrl\":\"https:\/\/simfoni.com\/wp-content\/uploads\/2021\/10\/Simfoni.com-Logo.jpg\",\"width\":1000,\"height\":1000,\"caption\":\"Simfoni\"}]}<\/script>\n<meta name=\"geo.placename\" content=\"Bedminster\" \/>\n<meta name=\"geo.region\" content=\"United States (US)\" \/>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"Transformers: More Than Meets the Eye (in Spend Classification) - Simfoni.com","description":"When you\u2019re working out your procurement strategy you need the right level of data granularity. Enter Spend Classification: the process of taking raw or partially structured spend data and mapping it to a category hierarchy.","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/","og_locale":"en_US","og_type":"article","og_title":"Transformers: More Than Meets the Eye (in Spend Classification) - Simfoni Engineering Blog","og_description":"When you\u2019re working out your procurement strategy you need the right level of data granularity. Enter Spend Classification: the process of taking raw or partially structured spend data and mapping it to a category hierarchy.","og_url":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/","og_site_name":"Simfoni","article_publisher":"https:\/\/www.facebook.com\/SimfoniApps\/","article_author":"https:\/\/www.facebook.com\/SimfoniApps\/","article_published_time":"2022-05-25T10:41:13+00:00","article_modified_time":"2022-08-22T14:39:13+00:00","og_image":[{"width":1200,"height":628,"url":"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Transformers-in-Spend-Classification.jpg","type":"image\/jpeg"}],"author":"Simfoni","schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#article","isPartOf":{"@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/"},"author":{"name":"Simfoni","@id":"https:\/\/simfoni.com\/#\/schema\/person\/afc4d1749ccc888582602619fc5b02b8"},"headline":"Transformers: More Than Meets the Eye (in Spend Classification)","datePublished":"2022-05-25T10:41:13+00:00","dateModified":"2022-08-22T14:39:13+00:00","mainEntityOfPage":{"@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/"},"wordCount":1229,"publisher":{"@id":"https:\/\/simfoni.com\/#organization"},"image":{"@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#primaryimage"},"thumbnailUrl":"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Transformers-in-Spend-Classification.jpg","articleSection":["Engineering","Spend Management"],"inLanguage":"en","copyrightYear":"2022","copyrightHolder":{"@id":"https:\/\/simfoni.com\/#organization"}},{"@type":"WebPage","@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/","url":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/","name":"Transformers: More Than Meets the Eye (in Spend Classification) - Simfoni.com","isPartOf":{"@id":"https:\/\/simfoni.com\/#website"},"primaryImageOfPage":{"@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#primaryimage"},"image":{"@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#primaryimage"},"thumbnailUrl":"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Transformers-in-Spend-Classification.jpg","datePublished":"2022-05-25T10:41:13+00:00","dateModified":"2022-08-22T14:39:13+00:00","description":"When you\u2019re working out your procurement strategy you need the right level of data granularity. Enter Spend Classification: the process of taking raw or partially structured spend data and mapping it to a category hierarchy.","breadcrumb":{"@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#breadcrumb"},"inLanguage":"en","potentialAction":[{"@type":"ReadAction","target":["https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/"]}]},{"@type":"ImageObject","inLanguage":"en","@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#primaryimage","url":"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Transformers-in-Spend-Classification.jpg","contentUrl":"https:\/\/simfoni.com\/wp-content\/uploads\/2022\/05\/Transformers-in-Spend-Classification.jpg","width":1200,"height":628,"caption":"Transformers in Spend Classification"},{"@type":"BreadcrumbList","@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/simfoni.com\/"},{"@type":"ListItem","position":2,"name":"Transformers: More Than Meets the Eye (in Spend Classification)"}]},{"@type":"WebSite","@id":"https:\/\/simfoni.com\/#website","url":"https:\/\/simfoni.com\/","name":"Simfoni","description":"Spend Intelligence and Spend Automation","publisher":{"@id":"https:\/\/simfoni.com\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/simfoni.com\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en"},{"@type":["Organization","Place"],"@id":"https:\/\/simfoni.com\/#organization","name":"Simfoni","alternateName":"Simfoni","url":"https:\/\/simfoni.com\/","logo":{"@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#local-main-organization-logo"},"image":{"@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#local-main-organization-logo"},"sameAs":["https:\/\/www.facebook.com\/SimfoniApps\/","https:\/\/x.com\/simfoniapps","https:\/\/www.instagram.com\/simfoniapps\/","https:\/\/www.linkedin.com\/company\/simfoni\/","https:\/\/www.youtube.com\/@simfoni","https:\/\/g.page\/r\/CTMP26g2qypHEBM\/","https:\/\/www.capterra.com\/p\/206211\/Spend-Analytics\/","https:\/\/www.g2.com\/products\/simfoni-spend-analytics\/","https:\/\/www.glassdoor.com\/Overview\/Working-at-Simfoni-EI_IE3290778.11,18.htm","https:\/\/sourceforge.net\/software\/product\/Simfoni\/","https:\/\/news.google.com\/publications\/CAAqBwgKMMaWxAsw6bHbAw"],"description":"Simfoni is an AI-powered procurement and spend management platform designed to help enterprises gain complete visibility into organizational spend and turn procurement insight into measurable financial impact. The platform combines advanced spend analytics, intelligent sourcing automation, and tail spend management to enable procurement teams to identify savings opportunities, execute sourcing strategies efficiently, and improve supplier performance across global operations. Built for modern procurement organizations, Simfoni supports Chief Procurement Officers, strategic sourcing leaders, and finance teams who are responsible for driving cost optimization, supplier governance, and operational efficiency. By consolidating procurement data across multiple systems and suppliers, Simfoni provides a unified view of enterprise spend and enables organizations to prioritize sourcing initiatives that deliver measurable savings. Simfoni\u2019s platform integrates spend intelligence with automated sourcing execution, allowing procurement teams to scale sourcing activities without increasing headcount. The system helps organizations manage indirect spend, improve supplier engagement, and strengthen procurement governance through data-driven decision making. Trusted by global enterprises, Simfoni enables organizations to transform procurement from a reactive cost center into a strategic value driver by delivering visibility, automation, and measurable financial outcomes across the procurement lifecycle.","legalName":"Simfoni","foundingDate":"2015-08-25","numberOfEmployees":{"@type":"QuantitativeValue","minValue":"201","maxValue":"500"},"address":{"@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#local-main-place-address"},"telephone":["+1-973-718-7071","+44-208-098-2115"],"openingHoursSpecification":[{"@type":"OpeningHoursSpecification","dayOfWeek":["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"],"opens":"00:00","closes":"23:59"}],"email":"info@simfoni.com"},{"@type":"Person","@id":"https:\/\/simfoni.com\/#\/schema\/person\/afc4d1749ccc888582602619fc5b02b8","name":"Simfoni","description":"Simfoni Delivers Next-generation Digital Procurement Transformation Through Spend Intelligence, Spend Automation &amp; Spend Analytics Software.","sameAs":["https:\/\/simfoni.com\/","https:\/\/www.facebook.com\/SimfoniApps\/","https:\/\/www.instagram.com\/simfoniapps\/","https:\/\/www.linkedin.com\/company\/simfoni\/","https:\/\/x.com\/simfoniapps","https:\/\/www.youtube.com\/@simfoni"]},{"@type":"PostalAddress","@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#local-main-place-address","streetAddress":"90 Washington Valley Road","addressLocality":"Bedminster","postalCode":"07921","addressRegion":"New Jersey","addressCountry":"US"},{"@type":"ImageObject","inLanguage":"en","@id":"https:\/\/simfoni.com\/spend-management\/transformers-more-than-meets-the-eye-in-spend-classification\/#local-main-organization-logo","url":"https:\/\/simfoni.com\/wp-content\/uploads\/2021\/10\/Simfoni.com-Logo.jpg","contentUrl":"https:\/\/simfoni.com\/wp-content\/uploads\/2021\/10\/Simfoni.com-Logo.jpg","width":1000,"height":1000,"caption":"Simfoni"}]},"geo.placename":"Bedminster","geo.region":"United States (US)"},"_links":{"self":[{"href":"https:\/\/simfoni.com\/wp-json\/wp\/v2\/posts\/44345","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/simfoni.com\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/simfoni.com\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/simfoni.com\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/simfoni.com\/wp-json\/wp\/v2\/comments?post=44345"}],"version-history":[{"count":0,"href":"https:\/\/simfoni.com\/wp-json\/wp\/v2\/posts\/44345\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/simfoni.com\/wp-json\/wp\/v2\/media\/44348"}],"wp:attachment":[{"href":"https:\/\/simfoni.com\/wp-json\/wp\/v2\/media?parent=44345"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/simfoni.com\/wp-json\/wp\/v2\/categories?post=44345"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/simfoni.com\/wp-json\/wp\/v2\/tags?post=44345"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}