Skip to content

Commit 7b92034

Browse files
committed
Refactor: Move LABELS and FORBIDDEN_TRANSITIONS to _constants.py
1 parent df0f3de commit 7b92034

File tree

2 files changed

+45
-44
lines changed

2 files changed

+45
-44
lines changed

ingredient_parser/en/_constants.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,3 +422,46 @@
422422
{"tablespoon", "tbsp", "tbs", "tb"},
423423
{"teaspoon", "tsp"},
424424
]
425+
426+
# Set of all labels.
427+
LABELS = {
428+
"B_NAME_TOK",
429+
"COMMENT",
430+
"I_NAME_TOK",
431+
"NAME_MOD",
432+
"NAME_SEP",
433+
"NAME_VAR",
434+
"PREP",
435+
"PUNC",
436+
"PURPOSE",
437+
"QTY",
438+
"SIZE",
439+
"UNIT",
440+
}
441+
442+
# Dict of forbidden label transitions.
443+
# The key is the previous label, the values are the set of labels that cannot be
444+
# predicted for the next label.
445+
# This are generated from the training data: these transition never occur in the
446+
# training data.
447+
FORBIDDEN_TRANSITIONS = {
448+
"B_NAME_TOK": {"B_NAME_TOK", "NAME_MOD"},
449+
"I_NAME_TOK": {"NAME_MOD"},
450+
"NAME_MOD": {"COMMENT", "I_NAME_TOK", "PURPOSE", "QTY", "UNIT"},
451+
"NAME_SEP": {"I_NAME_TOK", "PURPOSE"},
452+
"NAME_VAR": {"COMMENT", "I_NAME_TOK", "NAME_MOD", "PURPOSE", "QTY", "UNIT"},
453+
"PREP": {"NAME_SEP"},
454+
"PURPOSE": {
455+
"B_NAME_TOK",
456+
"I_NAME_TOK",
457+
"NAME_MOD",
458+
"NAME_SEP",
459+
"NAME_VAR",
460+
"PREP",
461+
"QTY",
462+
"SIZE",
463+
"UNIT",
464+
},
465+
"QTY": {"NAME_SEP", "PURPOSE"},
466+
"SIZE": {"NAME_SEP", "PURPOSE"},
467+
}

ingredient_parser/en/parser.py

Lines changed: 2 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from .._common import group_consecutive_idx
66
from ..dataclasses import ParsedIngredient, ParserDebugInfo
7+
from ._constants import FORBIDDEN_TRANSITIONS, LABELS
78
from ._loaders import load_parser_model
89
from ._utils import pluralise_units
910
from .postprocess import PostProcessor
@@ -275,49 +276,6 @@ def guess_ingredient_name(
275276
return labels, scores
276277

277278

278-
# Dict of illegal transitions.
279-
# The key is the previous label, the values are the set of labels that cannot be
280-
# predicted for the next label.
281-
# This are generated from the training data: these transition never occur in the
282-
# training data.
283-
ILLEGAL_TRANSITIONS = {
284-
"B_NAME_TOK": {"B_NAME_TOK", "NAME_MOD"},
285-
"I_NAME_TOK": {"NAME_MOD"},
286-
"NAME_MOD": {"COMMENT", "I_NAME_TOK", "PURPOSE", "QTY", "UNIT"},
287-
"NAME_SEP": {"I_NAME_TOK", "PURPOSE"},
288-
"NAME_VAR": {"COMMENT", "I_NAME_TOK", "NAME_MOD", "PURPOSE", "QTY", "UNIT"},
289-
"PREP": {"NAME_SEP"},
290-
"PURPOSE": {
291-
"B_NAME_TOK",
292-
"I_NAME_TOK",
293-
"NAME_MOD",
294-
"NAME_SEP",
295-
"NAME_VAR",
296-
"PREP",
297-
"QTY",
298-
"SIZE",
299-
"UNIT",
300-
},
301-
"QTY": {"NAME_SEP", "PURPOSE"},
302-
"SIZE": {"NAME_SEP", "PURPOSE"},
303-
}
304-
305-
LABELS = [
306-
"B_NAME_TOK",
307-
"COMMENT",
308-
"I_NAME_TOK",
309-
"NAME_MOD",
310-
"NAME_SEP",
311-
"NAME_VAR",
312-
"PREP",
313-
"PUNC",
314-
"PURPOSE",
315-
"QTY",
316-
"SIZE",
317-
"UNIT",
318-
]
319-
320-
321279
def apply_label_constraints(
322280
TAGGER, labels: list[str], scores: list[float]
323281
) -> tuple[list[str], list[float]]:
@@ -376,7 +334,7 @@ def apply_label_constraints(
376334

377335
else:
378336
prev_label = sequence[-1]
379-
forbidden = ILLEGAL_TRANSITIONS.get(prev_label, set())
337+
forbidden = FORBIDDEN_TRANSITIONS.get(prev_label, set())
380338
if label in forbidden:
381339
new_score, new_label = select_best_alternative_label(
382340
TAGGER, i, forbidden

0 commit comments

Comments
 (0)