@@ -12,15 +12,21 @@ import (
1212 "slices"
1313 "strings"
1414
15+ "github.com/coreruleset/wnram"
16+
1517 "github.com/coreruleset/crs-toolchain/v2/utils"
1618)
1719
1820type FpFinderError struct {}
1921
20- const dictionaryURLFormat = "https://raw.githubusercontent.com/dwyl/english-words/%s /%s"
21- const dictionaryBaseFileName = "words_alpha.txt "
22+ const dictionaryURLFormat = "https://wordnetcode.princeton.edu /%s"
23+ const dictionaryBaseFileName = "wn3.1.dict.tar.gz "
2224const minSize = 3
2325
26+ type WordNet interface {
27+ Lookup (criteria wnram.Criteria ) ([]wnram.Lookup , error )
28+ }
29+
2430func (t * FpFinderError ) Error () string {
2531 return "FpFinder error"
2632}
@@ -33,41 +39,35 @@ func NewFpFinder() *FpFinder {
3339
3440func (t * FpFinder ) FpFinder (inputFilePath string , extendedDictionaryFilePath string , englishDictionaryCommitRef string ) error {
3541 // Get the dictionary path in ~/.crs-toolchain
36- dictionaryFileName := fmt .Sprintf ("%s-%s" , englishDictionaryCommitRef , dictionaryBaseFileName )
37- dictionaryPath , err := utils .GetCacheFilePath (dictionaryFileName )
42+ dictionaryPath , err := utils .GetCacheFilePath (dictionaryBaseFileName )
3843 if err != nil {
3944 logger .Fatal ().Err (err ).Msg ("Error getting dictionary path" )
4045 }
4146
4247 // Check if the dictionary exists, if not, download it
4348 if _ , err := os .Stat (dictionaryPath ); os .IsNotExist (err ) {
44- logger .Debug ().Msg ("Dictionary file not found. Downloading..." )
45- dictionaryURL := fmt .Sprintf (dictionaryURLFormat , englishDictionaryCommitRef , dictionaryBaseFileName )
46- if err := utils .DownloadFile (dictionaryPath , dictionaryURL ); err != nil {
49+ logger .Debug ().Msg ("Dictionary folder not found. Downloading..." )
50+ dictionaryArchivePath , err := utils .GetCacheFilePath (dictionaryBaseFileName )
51+ if err != nil {
52+ logger .Fatal ().Err (err ).Msg ("Error getting dictionary path" )
53+ }
54+
55+ dictionaryURL := fmt .Sprintf (dictionaryURLFormat , dictionaryBaseFileName )
56+ logger .Debug ().Msgf ("Downloading dictionary from %s to %s" , dictionaryURL , dictionaryArchivePath )
57+ if err := utils .DownloadFile (dictionaryArchivePath , dictionaryURL ); err != nil {
4758 logger .Fatal ().Err (err ).Msg ("Failed to download dictionary" )
4859 }
4960 logger .Debug ().Msg ("Download complete." )
5061 } else {
51- logger .Debug ().Msg ("Dictionary file found, skipping download." )
62+ logger .Debug ().Msg ("Dictionary folder found, skipping download." )
5263 }
5364
54- // Load dictionary into memory
55- englishDict , err := t .loadDictionary (dictionaryPath , minSize )
56- if err != nil {
57- logger .Fatal ().Err (err ).Msg ("Failed to load english dictionary" )
58- }
59-
60- var dict map [string ]struct {}
65+ var extendedDict map [string ]struct {}
6166 if extendedDictionaryFilePath != "" {
62- extendedDict , err : = t .loadDictionary (extendedDictionaryFilePath , 0 )
67+ extendedDict , err = t .loadDictionary (extendedDictionaryFilePath , 0 )
6368 if err != nil {
6469 logger .Fatal ().Err (err ).Msg ("Failed to load extended dictionary" )
6570 }
66-
67- // Add words from extendedDictionary
68- dict = t .mergeDictionaries (englishDict , extendedDict )
69- } else {
70- dict = englishDict
7171 }
7272
7373 // Load input file into memory
@@ -76,8 +76,10 @@ func (t *FpFinder) FpFinder(inputFilePath string, extendedDictionaryFilePath str
7676 logger .Fatal ().Err (err ).Msg ("Failed to load input file" )
7777 }
7878
79+ wn , _ := wnram .New (dictionaryPath )
80+
7981 // Process words from inputfile, sort the output and remove duplicates
80- filteredWords := t .processWords (inputFile , dict , minSize )
82+ filteredWords := t .processWords (inputFile , wn , extendedDict , minSize )
8183
8284 for _ , str := range filteredWords {
8385 fmt .Println (str )
@@ -102,19 +104,6 @@ func (t *FpFinder) loadDictionary(path string, minWordLength int) (map[string]st
102104 return content , nil
103105}
104106
105- func (t * FpFinder ) mergeDictionaries (a , b map [string ]struct {}) map [string ]struct {} {
106- merged := make (map [string ]struct {})
107-
108- for k := range a {
109- merged [k ] = struct {}{}
110- }
111- for k := range b {
112- merged [k ] = struct {}{}
113- }
114-
115- return merged
116- }
117-
118107func (t * FpFinder ) loadInput (path string ) ([]string , error ) {
119108 if path == "-" {
120109 return t .loadInputFromStdIn ()
@@ -158,9 +147,9 @@ func (t *FpFinder) wordsFromInput(reader io.Reader) ([]string, error) {
158147 return content , nil
159148}
160149
161- func (t * FpFinder ) processWords (inputFile []string , dict map [string ]struct {}, minSize int ) []string {
150+ func (t * FpFinder ) processWords (inputFile []string , wn WordNet , extendedDict map [string ]struct {}, minSize int ) []string {
162151 // Filter words not in the dictionary
163- filteredWords := t .filterContent (inputFile , dict , minSize )
152+ filteredWords := t .filterContent (inputFile , wn , extendedDict , minSize )
164153
165154 // Sort words alphabetically (case-insensitive)
166155 slices .SortFunc (filteredWords , func (a , b string ) int {
@@ -173,7 +162,7 @@ func (t *FpFinder) processWords(inputFile []string, dict map[string]struct{}, mi
173162 return filteredWords
174163}
175164
176- func (t * FpFinder ) filterContent (inputFile []string , dict map [string ]struct {}, minSize int ) []string {
165+ func (t * FpFinder ) filterContent (inputFile []string , wn WordNet , extendedDict map [string ]struct {}, minSize int ) []string {
177166 var commentPattern = regexp .MustCompile (`^\s*#` )
178167 var filteredWords []string
179168 for _ , word := range inputFile {
@@ -184,10 +173,21 @@ func (t *FpFinder) filterContent(inputFile []string, dict map[string]struct{}, m
184173 if word == "" || len (word ) < minSize {
185174 continue
186175 }
176+ // Check if the word exists in WordNet
177+ found , err := wn .Lookup (wnram.Criteria {Matching : word })
178+ if err != nil {
179+ logger .Fatal ().Err (err ).Msg ("Failed to lookup word in WordNet" )
180+ }
187181
188- // If the word is not in the dictionary, add it to the filtered list
189- if _ , found := dict [word ]; ! found {
190- filteredWords = append (filteredWords , word )
182+ // If the word is not in the dictionary and extended dictionary, add it to the filtered list
183+ if len (found ) == 0 {
184+ if _ , found := extendedDict [word ]; ! found {
185+ filteredWords = append (filteredWords , word )
186+ } else {
187+ logger .Debug ().Msgf ("Word '%s' found in extended dictionary" , word )
188+ }
189+ } else {
190+ logger .Debug ().Msgf ("Word '%s' found in WordNet" , word )
191191 }
192192 }
193193
0 commit comments