Skip to content

Commit 6136c4b

Browse files
committed
Bugfix: Allow currency to be suffixed with any number of asterisks, as seen in https://www.budgetbytes.com/chicken-alfredo/
1 parent b84c032 commit 6136c4b

File tree

2 files changed

+10
-4
lines changed

2 files changed

+10
-4
lines changed

ingredient_parser/en/_regex.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,11 @@
5858
)
5959

6060
# Regex pattern to match quantities split by "and" e.g. 1 and 1/2.
61-
# Capture the whole match, and the quantites before and after the "and".
61+
# Capture the whole match, and the quantities before and after the "and".
6262
FRACTION_SPLIT_AND_PATTERN = re.compile(r"((\d+)\sand\s(\d/\d+))")
6363

6464
# Regex pattern to match ranges where the unit appears after both quantities e.g.
65-
# 100 g - 200 g. This assumes the quantites and units have already been seperated
65+
# 100 g - 200 g. This assumes the quantities and units have already been separated
6666
# by a single space and that all number are decimals.
6767
# This regex matches:
6868
# <quantity> <unit> - <quantity> <unit>
@@ -117,7 +117,9 @@
117117
FRACTION_TOKEN_PATTERN = re.compile(r"^\d*\#\d+\$\d+(?:\-\d*\#\d+\$\d+)?$")
118118

119119
# Regex pattern to match currency within parentheses e.g. ($1.99)
120-
# Allows optional whitespace after opening parenthesis, before currency symbol, and
120+
# Allows optional white space after opening parenthesis, before currency symbol, and
121121
# before closing parenthesis.
122+
# Also allows the current to be suffixed with any number of asterisk characters because
123+
# that has been seen on budgetbytes.com.
122124
currency_pattern = "|".join(re.escape(c) for c in ["$", "£", "€", "¥", "₹"])
123-
CURRENCY_PATTERN = re.compile(rf"\(\s*(?:{currency_pattern})\s*[0-9.,]+\s*\)")
125+
CURRENCY_PATTERN = re.compile(rf"\(\s*(?:{currency_pattern})\s*[0-9.,]+\**\s*\)")

tests/preprocess/test_remove_price_annotations.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ def test_price_annotation_with_mixed_whitespace(self, p):
8484
input_sentence = "1 cup flour ( \t $ 0.20 )"
8585
assert p._remove_price_annotations(input_sentence) == "1 cup flour "
8686

87+
def test_price_annotation_with_asterisk_suffix(self, p):
88+
input_sentence = "1 cup flour ($0.20**)"
89+
assert p._remove_price_annotations(input_sentence) == "1 cup flour "
90+
8791
def test_non_price_parenthetical_remains(self, p):
8892
input_sentence = "1 cup flour (organic)"
8993
assert p._remove_price_annotations(input_sentence) == "1 cup flour (organic)"

0 commit comments

Comments
 (0)