Skip to content

Commit 1acfbbe

Browse files
committed
Fuzzy search retruning set of matching set (optinnal threshold) + tests
1 parent b471431 commit 1acfbbe

File tree

2 files changed

+155
-1
lines changed

2 files changed

+155
-1
lines changed

string-analysis.go

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ package edlib
22

33
import (
44
"errors"
5+
"fmt"
56
"log"
7+
8+
"github.com/hbollon/go-edlib/internal/orderedmap"
69
)
710

811
// AlgorithMethod is an Integer type used to identify edit distance algorithms
@@ -77,7 +80,7 @@ func FuzzySearch(str string, strList []string, algo AlgorithMethod) string {
7780
}
7881

7982
// FuzzySearchThreshold realize an approximate search on a string list and return the closest one compared
80-
// to the string input. Take an similarity threshold in parameter.
83+
// to the string input. Takes a similarity threshold in parameter.
8184
func FuzzySearchThreshold(str string, strList []string, minSim float32, algo AlgorithMethod) string {
8285
var higherMatchPercent float32
8386
var tmpStr string
@@ -96,3 +99,46 @@ func FuzzySearchThreshold(str string, strList []string, minSim float32, algo Alg
9699
}
97100
return tmpStr
98101
}
102+
103+
// FuzzySearchSet realize an approximate search on a string list and return a set composed with x strings compared
104+
// to the string input sorted by similarity with the base string. Takes the a quantity parameter to define the number of output strings desired (For exemple 3 in the case of the Google Keyborad word suggestion).
105+
func FuzzySearchSet(str string, strList []string, quantity int, algo AlgorithMethod) []string {
106+
sortedMap := make(orderedmap.OrderedMap, quantity)
107+
for _, strToCmp := range strList {
108+
sim, err := StringsSimilarity(str, strToCmp, algo)
109+
if err != nil {
110+
log.Fatal(err)
111+
} else {
112+
fmt.Printf("Sim %s/%s : %f\n", str, strToCmp, sim)
113+
}
114+
115+
if sim > sortedMap[sortedMap.Len()-1].Value {
116+
sortedMap[sortedMap.Len()-1].Key = strToCmp
117+
sortedMap[sortedMap.Len()-1].Value = sim
118+
sortedMap.SortByValues()
119+
}
120+
}
121+
122+
return sortedMap.ToArray()
123+
}
124+
125+
// FuzzySearchSetThreshold realize an approximate search on a string list and return a set composed with x strings compared
126+
// to the string input sorted by similarity with the base string. Take a similarity threshold in parameter. Takes the a quantity parameter to define the number of output strings desired (For exemple 3 in the case of the Google Keyborad word suggestion).
127+
// Takes also a threshold parameter for similarity with base string.
128+
func FuzzySearchSetThreshold(str string, strList []string, quantity int, minSim float32, algo AlgorithMethod) []string {
129+
sortedMap := make(orderedmap.OrderedMap, quantity)
130+
for _, strToCmp := range strList {
131+
sim, err := StringsSimilarity(str, strToCmp, algo)
132+
if err != nil {
133+
log.Fatal(err)
134+
}
135+
136+
if sim >= minSim && sim > sortedMap[sortedMap.Len()-1].Value {
137+
sortedMap[sortedMap.Len()-1].Key = strToCmp
138+
sortedMap[sortedMap.Len()-1].Value = sim
139+
sortedMap.SortByValues()
140+
}
141+
}
142+
143+
return sortedMap.ToArray()
144+
}

tests/string-analysis_test.go

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,26 @@
11
package edlib
22

33
import (
4+
"reflect"
45
"testing"
56

67
"github.com/hbollon/go-edlib"
78
)
89

10+
var strList []string
11+
12+
func init() {
13+
strList = []string{
14+
"test",
15+
"tester",
16+
"tests",
17+
"testers",
18+
"testing",
19+
"tsting",
20+
"sting",
21+
}
22+
}
23+
924
func TestStringsSimilarity(t *testing.T) {
1025
type args struct {
1126
str1 string
@@ -118,3 +133,96 @@ func TestStringsSimilarity(t *testing.T) {
118133
})
119134
}
120135
}
136+
137+
func TestFuzzySearch(t *testing.T) {
138+
type args struct {
139+
str string
140+
strList []string
141+
algo edlib.AlgorithMethod
142+
}
143+
tests := []struct {
144+
name string
145+
args args
146+
want string
147+
}{
148+
{"FuzzySearch 'testing'", args{"testnig", strList, edlib.Levenshtein}, "testing"},
149+
}
150+
for _, tt := range tests {
151+
t.Run(tt.name, func(t *testing.T) {
152+
if got := edlib.FuzzySearch(tt.args.str, tt.args.strList, tt.args.algo); got != tt.want {
153+
t.Errorf("FuzzySearch() = %v, want %v", got, tt.want)
154+
}
155+
})
156+
}
157+
}
158+
159+
func TestFuzzySearchThreshold(t *testing.T) {
160+
type args struct {
161+
str string
162+
strList []string
163+
minSim float32
164+
algo edlib.AlgorithMethod
165+
}
166+
tests := []struct {
167+
name string
168+
args args
169+
want string
170+
}{
171+
{"FuzzySearch 'testing'", args{"testnig", strList, 0.7, edlib.Levenshtein}, "testing"},
172+
{"FuzzySearch 'testing'", args{"hello", strList, 0.7, edlib.Levenshtein}, ""},
173+
}
174+
for _, tt := range tests {
175+
t.Run(tt.name, func(t *testing.T) {
176+
if got := edlib.FuzzySearchThreshold(tt.args.str, tt.args.strList, tt.args.minSim, tt.args.algo); got != tt.want {
177+
t.Errorf("FuzzySearchThreshold() = %v, want %v", got, tt.want)
178+
}
179+
})
180+
}
181+
}
182+
183+
func TestFuzzySearchSet(t *testing.T) {
184+
type args struct {
185+
str string
186+
strList []string
187+
quantity int
188+
algo edlib.AlgorithMethod
189+
}
190+
tests := []struct {
191+
name string
192+
args args
193+
want []string
194+
}{
195+
{"FuzzySearch 'testing'", args{"testnig", strList, 3, edlib.Levenshtein}, []string{"testing", "test", "tester"}},
196+
}
197+
for _, tt := range tests {
198+
t.Run(tt.name, func(t *testing.T) {
199+
if got := edlib.FuzzySearchSet(tt.args.str, tt.args.strList, tt.args.quantity, tt.args.algo); !reflect.DeepEqual(got, tt.want) {
200+
t.Errorf("FuzzySearchSet() = %v, want %v", got, tt.want)
201+
}
202+
})
203+
}
204+
}
205+
206+
func TestFuzzySearchSetThreshold(t *testing.T) {
207+
type args struct {
208+
str string
209+
strList []string
210+
quantity int
211+
minSim float32
212+
algo edlib.AlgorithMethod
213+
}
214+
tests := []struct {
215+
name string
216+
args args
217+
want []string
218+
}{
219+
{"FuzzySearch 'testing'", args{"testnig", strList, 3, 0.7, edlib.Levenshtein}, []string{"testing", "", ""}},
220+
}
221+
for _, tt := range tests {
222+
t.Run(tt.name, func(t *testing.T) {
223+
if got := edlib.FuzzySearchSetThreshold(tt.args.str, tt.args.strList, tt.args.quantity, tt.args.minSim, tt.args.algo); !reflect.DeepEqual(got, tt.want) {
224+
t.Errorf("FuzzySearchSetThreshold() = %v, want %v", got, tt.want)
225+
}
226+
})
227+
}
228+
}

0 commit comments

Comments
 (0)