Skip to content

Commit 6eff903

Browse files
committed
Relax required phrase filtering
Only process stopwords this for "is_continuous" rules Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent e5bff98 commit 6eff903

File tree

1 file changed

+22
-18
lines changed

1 file changed

+22
-18
lines changed

src/licensedcode/match.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2272,33 +2272,37 @@ def filter_matches_missing_required_phrases(
22722272
is_valid = False
22732273
break
22742274

2275-
has_same_stopwords_pos = True
2276-
for qpos, ipos in zip(qspan, ispan):
2277-
if qpos not in qkey_span or qpos == qkey_span_end:
2278-
continue
2279-
2280-
if istopwords_by_pos_get(ipos) != qstopwords_by_pos_get(qpos):
2281-
has_same_stopwords_pos = False
2275+
if is_continuous:
2276+
has_same_stopwords_pos = True
2277+
for qpos, ipos in zip(qspan, ispan):
2278+
if qpos not in qkey_span or qpos == qkey_span_end:
2279+
continue
2280+
2281+
if istopwords_by_pos_get(ipos) != qstopwords_by_pos_get(qpos):
2282+
has_same_stopwords_pos = False
2283+
break
2284+
2285+
if not has_same_stopwords_pos:
2286+
logger_debug(
2287+
' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT STOPWORDS NOT SAME:',
2288+
'qkey_span:', qkey_span, 'qpan:', qspan,
2289+
'istopwords_by_pos:', istopwords_by_pos,
2290+
'qstopwords_by_pos:', qstopwords_by_pos
2291+
)
2292+
2293+
is_valid = False
22822294
break
22832295

2284-
if not has_same_stopwords_pos:
2285-
logger_debug(
2286-
' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT STOPWORDS NOT SAME:',
2287-
'qkey_span:', qkey_span, 'qpan:', qspan,
2288-
'istopwords_by_pos:', istopwords_by_pos,
2289-
'qstopwords_by_pos:', qstopwords_by_pos
2290-
)
2291-
2292-
is_valid = False
2293-
break
2294-
22952296
if is_valid:
22962297
logger_debug(' ==> KEEPING, REQUIRED PHRASES PRESENT, CONTINUOUS AND NO UNKNOWNS')
22972298
kept_append(match)
22982299
else:
22992300
match.discard_reason = reason
23002301
discarded_append(match)
23012302

2303+
if discarded and not kept:
2304+
logger_debug(' ==> REINSTATING DISCARDED MISSING REQUIRED PHRASES')
2305+
23022306
if trace:
23032307
print()
23042308

0 commit comments

Comments
 (0)