diff --git a/util/fp_finder.go b/util/fp_finder.go index d426ae1..118d617 100644 --- a/util/fp_finder.go +++ b/util/fp_finder.go @@ -75,11 +75,8 @@ func (t *FpFinder) FpFinder(inputFilePath string, extendedDictionaryFilePath str logger.Fatal().Err(err).Msg("Failed to load input file") } - // Filter words not in dictionary, remove duplicates, and sort alphabetically - filteredWords := t.filterContent(inputFile, dict, minSize) - - // Remove adjacent duplicate words from the sorted list - filteredWords = slices.Compact(filteredWords) + // Process words from inputfile, sort the output and remove duplicates + filteredWords := t.processWords(inputFile, dict, minSize) for _, str := range filteredWords { fmt.Println(str) @@ -140,6 +137,21 @@ func (t *FpFinder) loadFileContent(path string) ([]string, error) { return content, nil } +func (t *FpFinder) processWords(inputFile []string, dict map[string]struct{}, minSize int) []string { + // Filter words not in the dictionary + filteredWords := t.filterContent(inputFile, dict, minSize) + + // Sort words alphabetically (case-insensitive) + slices.SortFunc(filteredWords, func(a, b string) int { + return strings.Compare(strings.ToLower(a), strings.ToLower(b)) + }) + + // Remove adjacent duplicate words from the sorted list + filteredWords = slices.Compact(filteredWords) + + return filteredWords +} + func (t *FpFinder) filterContent(inputFile []string, dict map[string]struct{}, minSize int) []string { var commentPattern = regexp.MustCompile(`^\s*#`) var filteredWords []string diff --git a/util/fp_finder_test.go b/util/fp_finder_test.go index 1e67041..bf1a4fc 100644 --- a/util/fp_finder_test.go +++ b/util/fp_finder_test.go @@ -38,6 +38,31 @@ func (s *fpFinderTestSuite) TestFpFinder_FilterContent() { s.Equal(expected, result) } +func (s *fpFinderTestSuite) TestFpFinder_ProcessWords() { + input := []string{"apple", "banana", "orange", "banana", "pear", "#comment", "banana"} + dict := map[string]struct{}{ + "apple": {}, + "orange": {}, + } + + expected := []string{"banana", "pear"} + + result := NewFpFinder().processWords(input, dict, 3) + + s.Equal(expected, result) +} + +func (s *fpFinderTestSuite) TestFpFinder_ProcessWords_Sorting() { + input := []string{"pear", "Banana", ".hiddenfruit", "kiwi", "banana", "Apple", ".dotfruit"} + dict := map[string]struct{}{} // empty dictionary, so no filtering + + expected := []string{".dotfruit", ".hiddenfruit", "Apple", "Banana", "banana", "kiwi", "pear"} + + result := NewFpFinder().processWords(input, dict, 3) + + s.Equal(expected, result) +} + func (s *fpFinderTestSuite) TestFpFinder_MergeDictionaries() { a := map[string]struct{}{"apple": {}, "banana": {}} b := map[string]struct{}{"cherry": {}, "date": {}}