Skip to content

Commit d9487a4

Browse files
committed
Bugfix: "prawns" shouldn't be plural. Add additional ingredients to foundation foods overrides
1 parent ba8098d commit d9487a4

File tree

1 file changed

+20
-4
lines changed

1 file changed

+20
-4
lines changed

ingredient_parser/en/_foundationfoods.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,17 @@
1919
# Dict of ingredient name tokens that bypass the usual foundation food matching process.
2020
# We do this because the embedding distance approach sometime gives poor results when
2121
# the name we're trying to match only has one token.
22+
# The tokens in the dict keys are stemmed.
2223
FOUNDATION_FOOD_OVERRIDES: dict[tuple[str, ...], FoundationFood] = {
2324
("salt",): FoundationFood(
2425
"Salt, table, iodized", 1, 746775, "Spices and Herbs", "foundation_food"
2526
),
27+
(
28+
"sea",
29+
"salt",
30+
): FoundationFood(
31+
"Salt, table, iodized", 1, 746775, "Spices and Herbs", "foundation_food"
32+
),
2633
("egg",): FoundationFood(
2734
"Eggs, Grade A, Large, egg whole",
2835
1,
@@ -44,6 +51,13 @@
4451
"Vegetables and Vegetable Products",
4552
"foundation_food",
4653
),
54+
("mayonnais",): FoundationFood(
55+
"Mayonnaise, regular",
56+
1,
57+
2710204,
58+
"Mayonnaise",
59+
"survey_fndds_food",
60+
),
4761
}
4862

4963
# List of preferred FDC data types.
@@ -638,16 +652,18 @@ def get_fuzzy_matcher() -> FuzzyEmbeddingMatcher:
638652
return FuzzyEmbeddingMatcher(embeddings)
639653

640654

641-
# These are stemmed
642-
FDC_PHRASE_SUBSTITUTIONS = {
655+
# Phrase and token substitutions to normalise spelling of ingredient name tokens to the
656+
# spellings used in the FDC ingredient descriptions.
657+
# All tokens in these dicts are stemmed.
658+
FDC_PHRASE_SUBSTITUTIONS: dict[tuple[str, ...], list[str]] = {
643659
("doubl", "cream"): ["heavi", "cream"],
644660
("glac", "cherri"): ["maraschino", "cherri"],
645661
("ice", "sugar"): ["powder", "sugar"],
646662
("mang", "tout"): ["snow", "pea"],
647663
("plain", "flour"): ["all-purpos", "flour"],
648664
("singl", "cream"): ["light", "cream"],
649665
}
650-
FDC_TOKEN_SUBSTITUTIONS = {
666+
FDC_TOKEN_SUBSTITUTIONS: dict[str, str] = {
651667
"aubergin": "eggplant",
652668
"beetroot": "beet",
653669
"capsicum": "bell",
@@ -658,7 +674,7 @@ def get_fuzzy_matcher() -> FuzzyEmbeddingMatcher:
658674
"courgett": "zucchini",
659675
"gherkin": "pickl",
660676
"mangetout": "snowpea",
661-
"prawns": "shrimp",
677+
"prawn": "shrimp",
662678
"rocket": "arugula",
663679
"swede": "rutabaga",
664680
"yoghurt": "yogurt",

0 commit comments

Comments
 (0)