7
7
8
8
from __future__ import annotations
9
9
10
- from re import IGNORECASE , compile , escape
10
+ from operator import itemgetter
11
+ from re import IGNORECASE , escape , finditer , search
12
+ from typing import Iterable , NamedTuple
11
13
12
14
import rich .repr
13
15
from rich .style import Style
14
16
from rich .text import Text
15
17
16
- from textual .cache import LRUCache
18
+
19
+ class _Search (NamedTuple ):
20
+ """Internal structure to keep track of a recursive search."""
21
+
22
+ candidate_offset : int = 0
23
+ query_offset : int = 0
24
+ offsets : tuple [int , ...] = ()
25
+
26
+ def branch (self , offset : int ) -> tuple [_Search , _Search ]:
27
+ """Branch this search when an offset is found.
28
+
29
+ Args:
30
+ offset: Offset of a matching letter in the query.
31
+
32
+ Returns:
33
+ A pair of search objects.
34
+ """
35
+ _ , query_offset , offsets = self
36
+ return (
37
+ _Search (offset + 1 , query_offset + 1 , offsets + (offset ,)),
38
+ _Search (offset + 1 , query_offset , offsets ),
39
+ )
40
+
41
+ @property
42
+ def groups (self ) -> int :
43
+ """Number of groups in offsets."""
44
+ groups = 1
45
+ last_offset = self .offsets [0 ]
46
+ for offset in self .offsets [1 :]:
47
+ if offset != last_offset + 1 :
48
+ groups += 1
49
+ last_offset = offset
50
+ return groups
51
+
52
+
53
+ class FuzzySearch :
54
+ """Performs a fuzzy search.
55
+
56
+ Unlike a regex solution, this will finds all possible matches.
57
+ """
58
+
59
+ def __init__ (self , case_sensitive : bool = False ) -> None :
60
+ """Initialize fuzzy search.
61
+
62
+ Args:
63
+ case_sensitive: Is the match case sensitive?
64
+ """
65
+ self .cache : dict [tuple [str , str , bool ], tuple [float , tuple [int , ...]]] = {}
66
+ self .case_sensitive = case_sensitive
67
+
68
+ def match (self , query : str , candidate : str ) -> tuple [float , tuple [int , ...]]:
69
+ """Match against a query.
70
+
71
+ Args:
72
+ query: The fuzzy query.
73
+ candidate: A candidate to check,.
74
+
75
+ Returns:
76
+ A pair of (score, tuple of offsets). `(0, ())` for no result.
77
+ """
78
+
79
+ query_regex = ".*?" .join (f"({ escape (character )} )" for character in query )
80
+ if not search (
81
+ query_regex , candidate , flags = 0 if self .case_sensitive else IGNORECASE
82
+ ):
83
+ # Bail out early if there is no possibility of a match
84
+ return (0.0 , ())
85
+
86
+ cache_key = (query , candidate , self .case_sensitive )
87
+ if cache_key in self .cache :
88
+ return self .cache [cache_key ]
89
+ result = max (
90
+ self ._match (query , candidate ), key = itemgetter (0 ), default = (0.0 , ())
91
+ )
92
+ self .cache [cache_key ] = result
93
+ return result
94
+
95
+ def _match (
96
+ self , query : str , candidate : str
97
+ ) -> Iterable [tuple [float , tuple [int , ...]]]:
98
+ """Generator to do the matching.
99
+
100
+ Args:
101
+ query: Query to match.
102
+ candidate: Candidate to check against.
103
+
104
+ Yields:
105
+ Pairs of score and tuple of offsets.
106
+ """
107
+ if not self .case_sensitive :
108
+ query = query .lower ()
109
+ candidate = candidate .lower ()
110
+
111
+ # We need this to give a bonus to first letters.
112
+ first_letters = {match .start () for match in finditer (r"\w+" , candidate )}
113
+
114
+ def score (search : _Search ) -> float :
115
+ """Sore a search.
116
+
117
+ Args:
118
+ search: Search object.
119
+
120
+ Returns:
121
+ Score.
122
+
123
+ """
124
+ # This is a heuristic, and can be tweaked for better results
125
+ # Boost first letter matches
126
+ score : float = sum (
127
+ (2.0 if offset in first_letters else 1.0 ) for offset in search .offsets
128
+ )
129
+ # Boost to favor less groups
130
+ offset_count = len (search .offsets )
131
+ normalized_groups = (offset_count - (search .groups - 1 )) / offset_count
132
+ score *= 1 + (normalized_groups ** 2 )
133
+ return score
134
+
135
+ stack : list [_Search ] = [_Search ()]
136
+ push = stack .append
137
+ pop = stack .pop
138
+ query_size = len (query )
139
+ find = candidate .find
140
+ # Limit the number of loops out of an abundance of caution.
141
+ # This would be hard to reach without contrived data.
142
+ remaining_loops = 200
143
+
144
+ while stack and (remaining_loops := remaining_loops - 1 ):
145
+ search = pop ()
146
+ offset = find (query [search .query_offset ], search .candidate_offset )
147
+ if offset != - 1 :
148
+ advance_branch , branch = search .branch (offset )
149
+ if advance_branch .query_offset == query_size :
150
+ yield score (advance_branch ), advance_branch .offsets
151
+ push (branch )
152
+ else :
153
+ push (advance_branch )
154
+ push (branch )
17
155
18
156
19
157
@rich .repr .auto
@@ -36,11 +174,8 @@ def __init__(
36
174
"""
37
175
self ._query = query
38
176
self ._match_style = Style (reverse = True ) if match_style is None else match_style
39
- self ._query_regex = compile (
40
- ".*?" .join (f"({ escape (character )} )" for character in query ),
41
- flags = 0 if case_sensitive else IGNORECASE ,
42
- )
43
- self ._cache : LRUCache [str , float ] = LRUCache (1024 * 4 )
177
+ self ._case_sensitive = case_sensitive
178
+ self .fuzzy_search = FuzzySearch ()
44
179
45
180
@property
46
181
def query (self ) -> str :
@@ -52,15 +187,10 @@ def match_style(self) -> Style:
52
187
"""The style that will be used to highlight hits in the matched text."""
53
188
return self ._match_style
54
189
55
- @property
56
- def query_pattern (self ) -> str :
57
- """The regular expression pattern built from the query."""
58
- return self ._query_regex .pattern
59
-
60
190
@property
61
191
def case_sensitive (self ) -> bool :
62
192
"""Is this matcher case sensitive?"""
63
- return not bool ( self ._query_regex . flags & IGNORECASE )
193
+ return self ._case_sensitive
64
194
65
195
def match (self , candidate : str ) -> float :
66
196
"""Match the candidate against the query.
@@ -71,27 +201,7 @@ def match(self, candidate: str) -> float:
71
201
Returns:
72
202
Strength of the match from 0 to 1.
73
203
"""
74
- cached = self ._cache .get (candidate )
75
- if cached is not None :
76
- return cached
77
- match = self ._query_regex .search (candidate )
78
- if match is None :
79
- score = 0.0
80
- else :
81
- assert match .lastindex is not None
82
- offsets = [
83
- match .span (group_no )[0 ] for group_no in range (1 , match .lastindex + 1 )
84
- ]
85
- group_count = 0
86
- last_offset = - 2
87
- for offset in offsets :
88
- if offset > last_offset + 1 :
89
- group_count += 1
90
- last_offset = offset
91
-
92
- score = 1.0 - ((group_count - 1 ) / len (candidate ))
93
- self ._cache [candidate ] = score
94
- return score
204
+ return self .fuzzy_search .match (self .query , candidate )[0 ]
95
205
96
206
def highlight (self , candidate : str ) -> Text :
97
207
"""Highlight the candidate with the fuzzy match.
@@ -102,20 +212,11 @@ def highlight(self, candidate: str) -> Text:
102
212
Returns:
103
213
A [rich.text.Text][`Text`] object with highlighted matches.
104
214
"""
105
- match = self ._query_regex .search (candidate )
106
215
text = Text .from_markup (candidate )
107
- if match is None :
216
+ score , offsets = self .fuzzy_search .match (self .query , candidate )
217
+ if not score :
108
218
return text
109
- assert match .lastindex is not None
110
- if self ._query in text .plain :
111
- # Favor complete matches
112
- offset = text .plain .index (self ._query )
113
- text .stylize (self ._match_style , offset , offset + len (self ._query ))
114
- else :
115
- offsets = [
116
- match .span (group_no )[0 ] for group_no in range (1 , match .lastindex + 1 )
117
- ]
118
- for offset in offsets :
219
+ for offset in offsets :
220
+ if not candidate [offset ].isspace ():
119
221
text .stylize (self ._match_style , offset , offset + 1 )
120
-
121
222
return text
0 commit comments