Refactor: Move LABELS and FORBIDDEN_TRANSITIONS to _constants.py

strangetom · strangetom · commit 7b92034eb601 · 2025-06-09T18:20:40.000+01:00
diff --git a/ingredient_parser/en/_constants.py b/ingredient_parser/en/_constants.py
@@ -422,3 +422,46 @@
     {"tablespoon", "tbsp", "tbs", "tb"},
     {"teaspoon", "tsp"},
 ]
+
+# Set of all labels.
+LABELS = {
+    "B_NAME_TOK",
+    "COMMENT",
+    "I_NAME_TOK",
+    "NAME_MOD",
+    "NAME_SEP",
+    "NAME_VAR",
+    "PREP",
+    "PUNC",
+    "PURPOSE",
+    "QTY",
+    "SIZE",
+    "UNIT",
+}
+
+# Dict of forbidden label transitions.
+# The key is the previous label, the values are the set of labels that cannot be
+# predicted for the next label.
+# This are generated from the training data: these transition never occur in the
+# training data.
+FORBIDDEN_TRANSITIONS = {
+    "B_NAME_TOK": {"B_NAME_TOK", "NAME_MOD"},
+    "I_NAME_TOK": {"NAME_MOD"},
+    "NAME_MOD": {"COMMENT", "I_NAME_TOK", "PURPOSE", "QTY", "UNIT"},
+    "NAME_SEP": {"I_NAME_TOK", "PURPOSE"},
+    "NAME_VAR": {"COMMENT", "I_NAME_TOK", "NAME_MOD", "PURPOSE", "QTY", "UNIT"},
+    "PREP": {"NAME_SEP"},
+    "PURPOSE": {
+        "B_NAME_TOK",
+        "I_NAME_TOK",
+        "NAME_MOD",
+        "NAME_SEP",
+        "NAME_VAR",
+        "PREP",
+        "QTY",
+        "SIZE",
+        "UNIT",
+    },
+    "QTY": {"NAME_SEP", "PURPOSE"},
+    "SIZE": {"NAME_SEP", "PURPOSE"},
+}
diff --git a/ingredient_parser/en/parser.py b/ingredient_parser/en/parser.py
@@ -4,6 +4,7 @@
 
 from .._common import group_consecutive_idx
 from ..dataclasses import ParsedIngredient, ParserDebugInfo
+from ._constants import FORBIDDEN_TRANSITIONS, LABELS
 from ._loaders import load_parser_model
 from ._utils import pluralise_units
 from .postprocess import PostProcessor
@@ -275,49 +276,6 @@ def guess_ingredient_name(
     return labels, scores
 
 
-# Dict of illegal transitions.
-# The key is the previous label, the values are the set of labels that cannot be
-# predicted for the next label.
-# This are generated from the training data: these transition never occur in the
-# training data.
-ILLEGAL_TRANSITIONS = {
-    "B_NAME_TOK": {"B_NAME_TOK", "NAME_MOD"},
-    "I_NAME_TOK": {"NAME_MOD"},
-    "NAME_MOD": {"COMMENT", "I_NAME_TOK", "PURPOSE", "QTY", "UNIT"},
-    "NAME_SEP": {"I_NAME_TOK", "PURPOSE"},
-    "NAME_VAR": {"COMMENT", "I_NAME_TOK", "NAME_MOD", "PURPOSE", "QTY", "UNIT"},
-    "PREP": {"NAME_SEP"},
-    "PURPOSE": {
-        "B_NAME_TOK",
-        "I_NAME_TOK",
-        "NAME_MOD",
-        "NAME_SEP",
-        "NAME_VAR",
-        "PREP",
-        "QTY",
-        "SIZE",
-        "UNIT",
-    },
-    "QTY": {"NAME_SEP", "PURPOSE"},
-    "SIZE": {"NAME_SEP", "PURPOSE"},
-}
-
-LABELS = [
-    "B_NAME_TOK",
-    "COMMENT",
-    "I_NAME_TOK",
-    "NAME_MOD",
-    "NAME_SEP",
-    "NAME_VAR",
-    "PREP",
-    "PUNC",
-    "PURPOSE",
-    "QTY",
-    "SIZE",
-    "UNIT",
-]
-
-
 def apply_label_constraints(
     TAGGER, labels: list[str], scores: list[float]
 ) -> tuple[list[str], list[float]]:
@@ -376,7 +334,7 @@ def apply_label_constraints(
 
         else:
             prev_label = sequence[-1]
-            forbidden = ILLEGAL_TRANSITIONS.get(prev_label, set())
+            forbidden = FORBIDDEN_TRANSITIONS.get(prev_label, set())
             if label in forbidden:
                 new_score, new_label = select_best_alternative_label(
                     TAGGER, i, forbidden