Bugfix: "prawns" shouldn't be plural. Add additional ingredients to foundation foods overrides

strangetom · strangetom · commit d9487a49d0f0 · 2025-05-24T16:33:18.000+01:00
diff --git a/ingredient_parser/en/_foundationfoods.py b/ingredient_parser/en/_foundationfoods.py
@@ -19,10 +19,17 @@
 # Dict of ingredient name tokens that bypass the usual foundation food matching process.
 # We do this because the embedding distance approach sometime gives poor results when
 # the name we're trying to match only has one token.
+# The tokens in the dict keys are stemmed.
 FOUNDATION_FOOD_OVERRIDES: dict[tuple[str, ...], FoundationFood] = {
     ("salt",): FoundationFood(
         "Salt, table, iodized", 1, 746775, "Spices and Herbs", "foundation_food"
     ),
+    (
+        "sea",
+        "salt",
+    ): FoundationFood(
+        "Salt, table, iodized", 1, 746775, "Spices and Herbs", "foundation_food"
+    ),
     ("egg",): FoundationFood(
         "Eggs, Grade A, Large, egg whole",
         1,
@@ -44,6 +51,13 @@
         "Vegetables and Vegetable Products",
         "foundation_food",
     ),
+    ("mayonnais",): FoundationFood(
+        "Mayonnaise, regular",
+        1,
+        2710204,
+        "Mayonnaise",
+        "survey_fndds_food",
+    ),
 }
 
 # List of preferred FDC data types.
@@ -638,16 +652,18 @@ def get_fuzzy_matcher() -> FuzzyEmbeddingMatcher:
     return FuzzyEmbeddingMatcher(embeddings)
 
 
-# These are stemmed
-FDC_PHRASE_SUBSTITUTIONS = {
+# Phrase and token substitutions to normalise spelling of ingredient name tokens to the
+# spellings used in the FDC ingredient descriptions.
+# All tokens in these dicts are stemmed.
+FDC_PHRASE_SUBSTITUTIONS: dict[tuple[str, ...], list[str]] = {
     ("doubl", "cream"): ["heavi", "cream"],
     ("glac", "cherri"): ["maraschino", "cherri"],
     ("ice", "sugar"): ["powder", "sugar"],
     ("mang", "tout"): ["snow", "pea"],
     ("plain", "flour"): ["all-purpos", "flour"],
     ("singl", "cream"): ["light", "cream"],
 }
-FDC_TOKEN_SUBSTITUTIONS = {
+FDC_TOKEN_SUBSTITUTIONS: dict[str, str] = {
     "aubergin": "eggplant",
     "beetroot": "beet",
     "capsicum": "bell",
@@ -658,7 +674,7 @@ def get_fuzzy_matcher() -> FuzzyEmbeddingMatcher:
     "courgett": "zucchini",
     "gherkin": "pickl",
     "mangetout": "snowpea",
-    "prawns": "shrimp",
+    "prawn": "shrimp",
     "rocket": "arugula",
     "swede": "rutabaga",
     "yoghurt": "yogurt",