@@ -756,7 +756,7 @@ def matched_text(
756
756
side effects as the caching depends on which index instance is being
757
757
used and this index can change during testing.
758
758
"""
759
- if TRACE_MATCHED_TEXT :
759
+ if TRACE_MATCHED_TEXT and not TRACE_REPR_ALL_MATCHED_TEXTS :
760
760
logger_debug (f'LicenseMatch.matched_text: self.query: { self .query } ' )
761
761
762
762
query = self .query
@@ -2205,7 +2205,7 @@ def filter_matches_missing_required_phrases(
2205
2205
2206
2206
# keep matches as candidate if they contain all required phrase positions in the ispan
2207
2207
if trace :
2208
- print (' CANDIDATE TO KEEP: all ikey_span in match.ispan:' , ikey_spans , ispan )
2208
+ print (' CANDIDATE TO KEEP: all ikey_span in match.ispan: ikey_spans: ' , ikey_spans , 'ispan:' , ispan )
2209
2209
2210
2210
# discard matches that contain required phrases, but interrupted by
2211
2211
# unknown or stop words.
@@ -2219,7 +2219,7 @@ def filter_matches_missing_required_phrases(
2219
2219
istopwords_by_pos_get = istopwords_by_pos .get
2220
2220
2221
2221
# iterate on each required phrase span to ensure that they are continuous
2222
- # and contain no unknown words on the query side
2222
+ # and contain no unknown words or stop words on the query side
2223
2223
2224
2224
is_valid = True
2225
2225
@@ -2239,18 +2239,15 @@ def filter_matches_missing_required_phrases(
2239
2239
2240
2240
qkey_span = Span (qkey_poss )
2241
2241
if len (qkey_span ) != qkey_span .magnitude ():
2242
-
2243
- logger_debug (
2244
- ' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT NOT CONTINUOUS:' ,
2245
- 'qkey_span:' , qkey_span , 'qpan:' , qspan
2246
- )
2247
-
2242
+ if trace :
2243
+ logger_debug (
2244
+ ' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT NOT CONTINUOUS:' ,
2245
+ 'qkey_span:' , qkey_span , 'qspan:' , qspan
2246
+ )
2248
2247
is_valid = False
2249
2248
break
2250
2249
2251
- # check that required phrase spans does not contain stop words and does
2252
- # not contain unknown words
2253
-
2250
+ # Check that required phrase spans does not contain unknown words.
2254
2251
# NOTE: we do not check the last qkey_span position of a required phrase
2255
2252
# since unknown is a number of words after a given span position:
2256
2253
# these are pinned to the last position and we would not care for
@@ -2265,34 +2262,36 @@ def filter_matches_missing_required_phrases(
2265
2262
if contains_unknown :
2266
2263
logger_debug (
2267
2264
' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT UNKNOWNS:' ,
2268
- 'qkey_span:' , qkey_span , 'qpan :' , qspan ,
2265
+ 'qkey_span:' , qkey_span , 'qspan :' , qspan ,
2269
2266
'unknown_by_pos:' , unknown_by_pos
2270
2267
)
2271
2268
2272
2269
is_valid = False
2273
2270
break
2274
2271
2275
- if is_continuous :
2276
- has_same_stopwords_pos = True
2277
- for qpos , ipos in zip (qspan , ispan ):
2278
- if qpos not in qkey_span or qpos == qkey_span_end :
2279
- continue
2280
-
2281
- if istopwords_by_pos_get (ipos ) != qstopwords_by_pos_get (qpos ):
2282
- has_same_stopwords_pos = False
2283
- break
2284
-
2285
- if not has_same_stopwords_pos :
2286
- logger_debug (
2287
- ' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT STOPWORDS NOT SAME:' ,
2288
- 'qkey_span:' , qkey_span , 'qpan:' , qspan ,
2289
- 'istopwords_by_pos:' , istopwords_by_pos ,
2290
- 'qstopwords_by_pos:' , qstopwords_by_pos
2291
- )
2292
-
2293
- is_valid = False
2272
+ # Check that required phrase spans does not contain stop words. This must be true for
2273
+ # continuous rules or not, as long as we have a key span: it cannot be interrupted
2274
+
2275
+ has_same_stopwords_pos = True
2276
+ for qpos , ipos in zip (qspan , ispan ):
2277
+ if qpos not in qkey_span or qpos == qkey_span_end :
2278
+ continue
2279
+
2280
+ if istopwords_by_pos_get (ipos ) != qstopwords_by_pos_get (qpos ):
2281
+ has_same_stopwords_pos = False
2294
2282
break
2295
2283
2284
+ if not has_same_stopwords_pos :
2285
+ logger_debug (
2286
+ ' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT STOPWORDS NOT SAME:' ,
2287
+ 'qkey_span:' , qkey_span , 'qspan:' , qspan ,
2288
+ 'istopwords_by_pos:' , istopwords_by_pos ,
2289
+ 'qstopwords_by_pos:' , qstopwords_by_pos
2290
+ )
2291
+
2292
+ is_valid = False
2293
+ break
2294
+
2296
2295
if is_valid :
2297
2296
logger_debug (' ==> KEEPING, REQUIRED PHRASES PRESENT, CONTINUOUS AND NO UNKNOWNS' )
2298
2297
kept_append (match )
0 commit comments