1919# Dict of ingredient name tokens that bypass the usual foundation food matching process. 
2020# We do this because the embedding distance approach sometime gives poor results when 
2121# the name we're trying to match only has one token. 
22+ # The tokens in the dict keys are stemmed. 
2223FOUNDATION_FOOD_OVERRIDES : dict [tuple [str , ...], FoundationFood ] =  {
2324    ("salt" ,): FoundationFood (
2425        "Salt, table, iodized" , 1 , 746775 , "Spices and Herbs" , "foundation_food" 
2526    ),
27+     (
28+         "sea" ,
29+         "salt" ,
30+     ): FoundationFood (
31+         "Salt, table, iodized" , 1 , 746775 , "Spices and Herbs" , "foundation_food" 
32+     ),
2633    ("egg" ,): FoundationFood (
2734        "Eggs, Grade A, Large, egg whole" ,
2835        1 ,
4451        "Vegetables and Vegetable Products" ,
4552        "foundation_food" ,
4653    ),
54+     ("mayonnais" ,): FoundationFood (
55+         "Mayonnaise, regular" ,
56+         1 ,
57+         2710204 ,
58+         "Mayonnaise" ,
59+         "survey_fndds_food" ,
60+     ),
4761}
4862
4963# List of preferred FDC data types. 
@@ -638,16 +652,18 @@ def get_fuzzy_matcher() -> FuzzyEmbeddingMatcher:
638652    return  FuzzyEmbeddingMatcher (embeddings )
639653
640654
641- # These are stemmed 
642- FDC_PHRASE_SUBSTITUTIONS  =  {
655+ # Phrase and token substitutions to normalise spelling of ingredient name tokens to the 
656+ # spellings used in the FDC ingredient descriptions. 
657+ # All tokens in these dicts are stemmed. 
658+ FDC_PHRASE_SUBSTITUTIONS : dict [tuple [str , ...], list [str ]] =  {
643659    ("doubl" , "cream" ): ["heavi" , "cream" ],
644660    ("glac" , "cherri" ): ["maraschino" , "cherri" ],
645661    ("ice" , "sugar" ): ["powder" , "sugar" ],
646662    ("mang" , "tout" ): ["snow" , "pea" ],
647663    ("plain" , "flour" ): ["all-purpos" , "flour" ],
648664    ("singl" , "cream" ): ["light" , "cream" ],
649665}
650- FDC_TOKEN_SUBSTITUTIONS  =  {
666+ FDC_TOKEN_SUBSTITUTIONS :  dict [ str ,  str ]  =  {
651667    "aubergin" : "eggplant" ,
652668    "beetroot" : "beet" ,
653669    "capsicum" : "bell" ,
@@ -658,7 +674,7 @@ def get_fuzzy_matcher() -> FuzzyEmbeddingMatcher:
658674    "courgett" : "zucchini" ,
659675    "gherkin" : "pickl" ,
660676    "mangetout" : "snowpea" ,
661-     "prawns " : "shrimp" ,
677+     "prawn " : "shrimp" ,
662678    "rocket" : "arugula" ,
663679    "swede" : "rutabaga" ,
664680    "yoghurt" : "yogurt" ,
0 commit comments