Skip to content
This repository was archived by the owner on Dec 31, 2023. It is now read-only.

Commit 903cf4c

Browse files
authored
Merge pull request #15 from Garulf/add-flow-string-matcher
Add flow string matcher
2 parents c045de0 + fb18004 commit 903cf4c

File tree

2 files changed

+205
-1
lines changed

2 files changed

+205
-1
lines changed

flox/string_matcher.py

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
from typing import List
2+
3+
SPACE_CHAR: str = ' '
4+
USER_SEARCH_PRECISION = 50
5+
6+
"""
7+
This is a python copy of Flow Launcher's string matcher.
8+
I take no credit for the algorithm, I just translated it to python.
9+
"""
10+
11+
12+
def string_matcher(query: str, text: str, ignore_case: bool = True) -> float:
13+
"""Compare query to text"""
14+
if not text or not query:
15+
return False
16+
17+
query = query.strip()
18+
19+
current_acronym_query_index = 0
20+
acronym_match_data: List[int] = []
21+
acronyms_total_count: int = 0
22+
acronyms_matched: int = 0
23+
24+
full_text_lower: str = text.lower() if ignore_case else text
25+
query_lower: str = query.lower() if ignore_case else query
26+
27+
query_substrings: List[str] = query_lower.split(' ')
28+
current_query_substring_index: int = 0
29+
current_query_substring = query_substrings[current_query_substring_index]
30+
current_query_substring_char_index = 0
31+
32+
first_match_index = -1
33+
first_match_index_in_word = -1
34+
last_match_index = 0
35+
all_query_substrings_matched: bool = False
36+
match_found_in_previous_loop: bool = False
37+
all_substrings_contained_in_text: bool = True
38+
39+
index_list: List[int] = []
40+
space_indices: List[int] = []
41+
for text_index in range(len(full_text_lower)):
42+
if current_acronym_query_index >= len(query_lower) and acronyms_matched == len(query_lower):
43+
44+
if is_acronym_count(full_text_lower, text_index):
45+
acronyms_total_count += 1
46+
continue
47+
48+
if current_acronym_query_index >= len(query_lower) or current_acronym_query_index >= len(query_lower) and all_query_substrings_matched:
49+
break
50+
51+
if full_text_lower[text_index] == SPACE_CHAR and current_query_substring_char_index == 0:
52+
space_indices.append(text_index)
53+
54+
if is_acronym(text, text_index):
55+
if full_text_lower[text_index] == query_lower[current_acronym_query_index]:
56+
acronym_match_data.append(text_index)
57+
acronyms_matched += 1
58+
current_acronym_query_index += 1
59+
60+
if is_acronym_count(text, text_index):
61+
acronyms_total_count += 1
62+
63+
if all_query_substrings_matched or full_text_lower[text_index] != current_query_substring[current_query_substring_char_index]:
64+
match_found_in_previous_loop = False
65+
continue
66+
67+
if first_match_index < 0:
68+
first_match_index = text_index
69+
70+
if current_query_substring_char_index == 0:
71+
match_found_in_previous_loop = True
72+
first_match_index_in_word = text_index
73+
elif not match_found_in_previous_loop:
74+
start_index_to_verify = text_index - current_query_substring_char_index
75+
76+
if all_previous_chars_matched(start_index_to_verify, current_query_substring_char_index, full_text_lower, current_query_substring):
77+
match_found_in_previous_loop = True
78+
first_match_index_in_word = start_index_to_verify if current_query_substring_index == 0 else first_match_index
79+
80+
index_list = get_updated_index_list(
81+
start_index_to_verify, current_query_substring_char_index, first_match_index_in_word, index_list)
82+
83+
last_match_index = text_index + 1
84+
index_list.append(text_index)
85+
86+
current_query_substring_char_index += 1
87+
88+
if current_query_substring_char_index == len(current_query_substring):
89+
all_substrings_contained_in_text = match_found_in_previous_loop and all_substrings_contained_in_text
90+
91+
current_query_substring_index += 1
92+
93+
all_query_substrings_matched = all_query_substrings_matched_func(
94+
current_query_substring_index, len(query_substrings))
95+
96+
if all_query_substrings_matched:
97+
continue
98+
99+
current_query_substring = query_substrings[current_query_substring_index]
100+
current_query_substring_char_index = 0
101+
102+
if acronyms_matched > 0 and acronyms_matched == len(query):
103+
acronyms_score: int = acronyms_matched * 100 / acronyms_total_count
104+
105+
if acronyms_score >= USER_SEARCH_PRECISION:
106+
return (True, USER_SEARCH_PRECISION, acronym_match_data, acronyms_score)
107+
108+
if all_query_substrings_matched:
109+
110+
nearest_space_index = calculate_closest_space_index(
111+
space_indices, first_match_index)
112+
113+
score = calculate_search_score(query, text, first_match_index - nearest_space_index - 1,
114+
space_indices, last_match_index - first_match_index, all_substrings_contained_in_text)
115+
116+
return (True, USER_SEARCH_PRECISION, index_list, score)
117+
118+
return (False, USER_SEARCH_PRECISION)
119+
120+
121+
def calculate_search_score(query: str, text: str, first_index: int, space_indices: List[int], match_length: int, all_substrings_contained_in_text: bool):
122+
score = 100 * (len(query) + 1) / ((1 + first_index) + (match_length + 1))
123+
124+
if first_index == 0 and all_substrings_contained_in_text:
125+
score -= len(space_indices)
126+
127+
if (len(text) - len(query)) < 5:
128+
score += 20
129+
elif (len(text) - len(query)) < 10:
130+
score += 10
131+
132+
if all_substrings_contained_in_text:
133+
count: int = len(query.replace(' ', ''))
134+
threshold: int = 4
135+
if count <= threshold:
136+
score += count * 10
137+
else:
138+
score += threshold * 10 + (count - threshold) * 5
139+
140+
return score
141+
142+
143+
def get_updated_index_list(start_index_to_verify: int, current_query_substring_char_index: int, first_matched_index_in_word: int, index_list: List[int]):
144+
updated_list: List[int] = []
145+
146+
for idx, item in enumerate(index_list):
147+
if item >= first_matched_index_in_word:
148+
index_list.pop(idx)
149+
150+
updated_list.extend(index_list)
151+
152+
for i in range(current_query_substring_char_index):
153+
updated_list.append(start_index_to_verify + i)
154+
155+
return updated_list
156+
157+
158+
def all_query_substrings_matched_func(current_query_substring_index: int, query_substrings_length: int) -> bool:
159+
return current_query_substring_index >= query_substrings_length
160+
161+
162+
def all_previous_chars_matched(start_index_to_verify: int, current_query_substring_char_index: int, full_text_lower: str, current_query_substring: str) -> bool:
163+
all_match = True
164+
for i in range(current_query_substring_char_index):
165+
if full_text_lower[start_index_to_verify + i] != current_query_substring[i]:
166+
all_match = False
167+
168+
return all_match
169+
170+
171+
def is_acronym(text: str, text_index: int) -> bool:
172+
if is_acronym_char(text, text_index) or is_acronym_number(text, text_index):
173+
return True
174+
return False
175+
176+
177+
def is_acronym_count(text: str, text_index: int) -> bool:
178+
if is_acronym_char(text, text_index):
179+
return True
180+
if is_acronym_number(text, text_index):
181+
return text_index == 0 or text[text_index - 1] == SPACE_CHAR
182+
183+
return False
184+
185+
186+
def is_acronym_char(text: str, text_index: int) -> bool:
187+
return text[text_index].isupper() or text_index == 0 or text[text_index - 1] == SPACE_CHAR
188+
189+
190+
def is_acronym_number(text: str, text_index: int) -> bool:
191+
return text[text_index].isdigit()
192+
193+
194+
def calculate_closest_space_index(space_indices: List[int], first_match_index: int) -> int:
195+
196+
closest_space_index = -1
197+
198+
for i in space_indices:
199+
if i < first_match_index:
200+
closest_space_index = i
201+
else:
202+
break
203+
204+
return closest_space_index

flox/version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.18.1
1+
0.19.0

0 commit comments

Comments
 (0)