Skip to content

Commit ddcabaa

Browse files
authored
Merge pull request #19 from varnamproject/improve-search-symbol-table
Improve search symbol table, add all config vars to varnam_config(), deprecated previous config functions
2 parents 8b58943 + 82f958d commit ddcabaa

File tree

10 files changed

+120
-33
lines changed

10 files changed

+120
-33
lines changed

c-shared.go

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,22 +271,25 @@ func varnam_set_indic_digits(varnamHandleID C.int, val C.int) {
271271
varnam_config(varnamHandleID, C.VARNAM_CONFIG_USE_INDIC_DIGITS, val)
272272
}
273273

274-
// TODO move all config to varnam_config()
274+
// Deprecated. Use varnam_config()
275275
//export varnam_set_dictionary_suggestions_limit
276276
func varnam_set_dictionary_suggestions_limit(varnamHandleID C.int, val C.int) {
277277
getVarnamHandle(varnamHandleID).varnam.DictionarySuggestionsLimit = int(val)
278278
}
279279

280+
// Deprecated. Use varnam_config()
280281
//export varnam_set_pattern_dictionary_suggestions_limit
281282
func varnam_set_pattern_dictionary_suggestions_limit(varnamHandleID C.int, val C.int) {
282283
getVarnamHandle(varnamHandleID).varnam.PatternDictionarySuggestionsLimit = int(val)
283284
}
284285

286+
// Deprecated. Use varnam_config()
285287
//export varnam_set_tokenizer_suggestions_limit
286288
func varnam_set_tokenizer_suggestions_limit(varnamHandleID C.int, val C.int) {
287289
getVarnamHandle(varnamHandleID).varnam.TokenizerSuggestionsLimit = int(val)
288290
}
289291

292+
// Deprecated. Use varnam_config()
290293
//export varnam_set_dictionary_match_exact
291294
func varnam_set_dictionary_match_exact(varnamHandleID C.int, val C.int) {
292295
if val == 0 {
@@ -404,6 +407,27 @@ func varnam_get_vst_path(varnamHandleID C.int) *C.char {
404407
return C.CString(handle.varnam.VSTPath)
405408
}
406409

410+
//export varnam_new_search_symbol
411+
func varnam_new_search_symbol(resultPointer **C.struct_Symbol_t) C.int {
412+
symbol := govarnam.NewSearchSymbol()
413+
*resultPointer = C.makeSymbol(
414+
C.int(symbol.Identifier),
415+
C.int(symbol.Type),
416+
C.int(symbol.MatchType),
417+
C.CString(symbol.Pattern),
418+
C.CString(symbol.Value1),
419+
C.CString(symbol.Value2),
420+
C.CString(symbol.Value3),
421+
C.CString(symbol.Tag),
422+
C.int(symbol.Weight),
423+
C.int(symbol.Priority),
424+
C.int(symbol.AcceptCondition),
425+
C.int(symbol.Flags),
426+
)
427+
428+
return C.VARNAM_SUCCESS
429+
}
430+
407431
//export varnam_search_symbol_table
408432
func varnam_search_symbol_table(varnamHandleID C.int, id C.int, searchCriteria C.struct_Symbol_t, resultPointer **C.varray) C.int {
409433
ctx, cancel := makeContext(id)
@@ -633,6 +657,18 @@ func varnam_config(varnamHandleID C.int, key C.int, value C.int) C.int {
633657
case C.VARNAM_CONFIG_IGNORE_DUPLICATE_TOKEN:
634658
handle.varnam.VSTMakerConfig.IgnoreDuplicateTokens = cintToBool(value)
635659
break
660+
case C.VARNAM_CONFIG_SET_DICTIONARY_SUGGESTIONS_LIMIT:
661+
handle.varnam.DictionarySuggestionsLimit = int(value)
662+
break
663+
case C.VARNAM_CONFIG_SET_PATTERN_DICTIONARY_SUGGESTIONS_LIMIT:
664+
handle.varnam.PatternDictionarySuggestionsLimit = int(value)
665+
break
666+
case C.VARNAM_CONFIG_SET_TOKENIZER_SUGGESTIONS_LIMIT:
667+
handle.varnam.TokenizerSuggestionsLimit = int(value)
668+
break
669+
case C.VARNAM_CONFIG_SET_DICTIONARY_MATCH_EXACT:
670+
handle.varnam.DictionaryMatchExact = cintToBool(value)
671+
break
636672
}
637673

638674
return C.VARNAM_SUCCESS

c-shared.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,15 @@
88
#define VARNAM_ERROR 2
99
#define VARNAM_CANCELLED 3
1010

11-
#define VARNAM_CONFIG_USE_DEAD_CONSONANTS 100
12-
#define VARNAM_CONFIG_IGNORE_DUPLICATE_TOKEN 101
13-
#define VARNAM_CONFIG_ENABLE_SUGGESTIONS 102
14-
#define VARNAM_CONFIG_USE_INDIC_DIGITS 103
11+
#define VARNAM_CONFIG_USE_DEAD_CONSONANTS 100
12+
#define VARNAM_CONFIG_IGNORE_DUPLICATE_TOKEN 101
13+
// VARNAM_CONFIG_ENABLE_SUGGESTIONS hasn't been implemented yet
14+
#define VARNAM_CONFIG_ENABLE_SUGGESTIONS 102
15+
#define VARNAM_CONFIG_USE_INDIC_DIGITS 103
16+
#define VARNAM_CONFIG_SET_DICTIONARY_SUGGESTIONS_LIMIT 104
17+
#define VARNAM_CONFIG_SET_PATTERN_DICTIONARY_SUGGESTIONS_LIMIT 105
18+
#define VARNAM_CONFIG_SET_TOKENIZER_SUGGESTIONS_LIMIT 106
19+
#define VARNAM_CONFIG_SET_DICTIONARY_MATCH_EXACT 107
1520

1621
typedef struct Suggestion_t {
1722
char* Word;

govarnam/constants.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ var (
1212
VersionString string
1313
)
1414

15+
// Go's struct int has default value 0.
16+
// For SearchSymbolTable usecase this is a problem.
17+
// Hence we use a constructor with default value setting.
18+
// https://stackoverflow.com/q/37135193/1372424
19+
const STRUCT_INT_DEFAULT_VALUE = -1
20+
1521
/* General */
1622
const ZWNJ = "\u200c"
1723
const ZWJ = "\u200d"

govarnam/govarnam.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,10 @@ func (varnam *Varnam) ReverseTransliterate(word string) ([]Suggestion, error) {
469469

470470
tokens := varnam.splitTextByConjunct(ctx, word)
471471

472+
if varnam.Debug {
473+
fmt.Println(tokens)
474+
}
475+
472476
for i, token := range tokens {
473477
for j, symbol := range token.symbols {
474478
tokens[i].symbols[j].Value1 = symbol.Pattern

govarnam/govarnam_ml_test.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,19 +206,25 @@ func TestMLAtomicChil(t *testing.T) {
206206

207207
func TestMLReverseTransliteration(t *testing.T) {
208208
varnam := getVarnamInstance("ml")
209+
oldLimit := varnam.TokenizerSuggestionsLimit
210+
varnam.TokenizerSuggestionsLimit = 30
209211

210212
sugs, err := varnam.ReverseTransliterate("മലയാളം")
211213
checkError(err)
212214

213215
// The order of this will fail if VST weights change
214-
expected := []string{"malayaaLam", "malayALam", "malayaalam", "malayAlam", "malayaLam", "malayalam"}
215-
for i, sug := range sugs {
216-
assertEqual(t, sug.Word, expected[i])
216+
expected := []string{"malayaaLam", "malayaaLam_", "malayALam", "malayALam_", "malayaalam", "malayaalam_", "malayAlam", "malayAlam_", "malayaLam", "malayaLam_", "malayalam", "malayalam_"}
217+
218+
assertEqual(t, len(sugs), len(expected))
219+
for i, expectedWord := range expected {
220+
assertEqual(t, sugs[i].Word, expectedWord)
217221
}
218222

219223
sugs, err = varnam.ReverseTransliterate("2019 ഏപ്രിൽ 17-ന് മലയാളം വിക്കിപീഡിയയിലെ ലേഖനങ്ങളുടെ എണ്ണം 63,000 പിന്നിട്ടു.")
220224

221225
assertEqual(t, sugs[0].Word, "2019 Epril 17-n~ malayaaLam vikkipeeDiyayile lEkhanangaLuTe eNNam 63,000 pinnittu.")
226+
227+
varnam.TokenizerSuggestionsLimit = oldLimit
222228
}
223229

224230
func TestDictionaryLimit(t *testing.T) {
@@ -389,7 +395,7 @@ func TestMLExportAndImport(t *testing.T) {
389395
func TestMLSearchSymbolTable(t *testing.T) {
390396
varnam := getVarnamInstance("ml")
391397

392-
var search Symbol
398+
search := NewSearchSymbol()
393399
search.Value1 = "ക"
394400
results, err := varnam.SearchSymbolTable(context.Background(), search)
395401
checkError(err)

govarnam/govarnam_test.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ var testTempDir string
1717

1818
// AssertEqual checks if values are equal
1919
// Thanks https://gist.github.com/samalba/6059502#gistcomment-2710184
20-
func assertEqual(t *testing.T, a interface{}, b interface{}) {
21-
if a == b {
20+
func assertEqual(t *testing.T, value interface{}, expected interface{}) {
21+
if value == expected {
2222
return
2323
}
2424
debug.PrintStack()
25-
t.Errorf("Received %v (type %v), expected %v (type %v)", a, reflect.TypeOf(a), b, reflect.TypeOf(b))
25+
t.Errorf("Received %v (type %v), expected %v (type %v)", value, reflect.TypeOf(value), expected, reflect.TypeOf(expected))
2626
}
2727

2828
func checkError(err error) {
@@ -65,6 +65,9 @@ func getVarnamInstance(schemeID string) *Varnam {
6565
if ok {
6666
return instance
6767
}
68+
69+
log.Fatalf("Varnam instance for %s not found", schemeID)
70+
6871
return nil
6972
}
7073

govarnam/symbol.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,23 @@ func removeNonExactTokens(tokens []Token) []Token {
512512
return tokens
513513
}
514514

515+
// NewSearchSymbol a constructor for making Symbol.
516+
// We're doing this because default int value in
517+
// go structs is 0. This won't work with searching
518+
// because fields can have 0 value.
519+
// https://stackoverflow.com/q/37135193/137242
520+
func NewSearchSymbol() Symbol {
521+
symbol := Symbol{}
522+
symbol.Identifier = STRUCT_INT_DEFAULT_VALUE
523+
symbol.Type = STRUCT_INT_DEFAULT_VALUE
524+
symbol.MatchType = STRUCT_INT_DEFAULT_VALUE
525+
symbol.Weight = STRUCT_INT_DEFAULT_VALUE
526+
symbol.Priority = STRUCT_INT_DEFAULT_VALUE
527+
symbol.AcceptCondition = STRUCT_INT_DEFAULT_VALUE
528+
symbol.Flags = STRUCT_INT_DEFAULT_VALUE
529+
return symbol
530+
}
531+
515532
// SearchSymbolTable For searching symbol table
516533
func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symbol) ([]Symbol, error) {
517534
var (
@@ -534,7 +551,7 @@ func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symb
534551
return
535552
}
536553
} else {
537-
if val.(int) == 0 {
554+
if valInt, ok := val.(int); !ok || valInt == STRUCT_INT_DEFAULT_VALUE {
538555
return
539556
}
540557
}
@@ -587,7 +604,7 @@ func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symb
587604
}
588605

589606
func (varnam *Varnam) getVirama() (string, error) {
590-
var viramaSymbol Symbol
607+
viramaSymbol := NewSearchSymbol()
591608
viramaSymbol.Pattern = "~"
592609
results, _ := varnam.SearchSymbolTable(context.Background(), viramaSymbol)
593610

govarnam/vst_maker.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ func (varnam *Varnam) vmPersistToken(pattern string, value1 string, value2 strin
225225
return fmt.Errorf("arguments invalid")
226226
}
227227

228-
persisted, err := varnam.vmAlreadyPersisted(pattern, value1, matchType)
228+
persisted, err := varnam.vmAlreadyPersisted(pattern, value1, matchType, acceptCondition)
229229
if err != nil {
230230
return err
231231
}
@@ -260,9 +260,10 @@ func (varnam *Varnam) vmPersistToken(pattern string, value1 string, value2 strin
260260
return nil
261261
}
262262

263-
func (varnam *Varnam) vmAlreadyPersisted(pattern string, value1 string, matchType int) (bool, error) {
264-
var searchCriteria Symbol
263+
func (varnam *Varnam) vmAlreadyPersisted(pattern string, value1 string, matchType int, acceptCondition int) (bool, error) {
264+
searchCriteria := NewSearchSymbol()
265265
searchCriteria.Pattern = pattern
266+
searchCriteria.AcceptCondition = acceptCondition
266267

267268
if matchType == VARNAM_MATCH_EXACT {
268269
searchCriteria.MatchType = matchType

govarnamgo/govarnamgo.go

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,29 @@ func (handle *VarnamHandle) GetVSTPath() string {
633633
return C.GoString(cStr)
634634
}
635635

636+
func makeGoSymbol(cSymbol *C.Symbol) Symbol {
637+
var goSymbol Symbol
638+
goSymbol.Identifier = int(cSymbol.Identifier)
639+
goSymbol.Type = int(cSymbol.Type)
640+
goSymbol.MatchType = int(cSymbol.MatchType)
641+
goSymbol.Pattern = C.GoString(cSymbol.Pattern)
642+
goSymbol.Value1 = C.GoString(cSymbol.Value1)
643+
goSymbol.Value2 = C.GoString(cSymbol.Value2)
644+
goSymbol.Value3 = C.GoString(cSymbol.Value3)
645+
goSymbol.Tag = C.GoString(cSymbol.Tag)
646+
goSymbol.Weight = int(cSymbol.Weight)
647+
goSymbol.Priority = int(cSymbol.Priority)
648+
goSymbol.AcceptCondition = int(cSymbol.AcceptCondition)
649+
goSymbol.Flags = int(cSymbol.Flags)
650+
return goSymbol
651+
}
652+
653+
func NewSearchSymbol() Symbol {
654+
var resultPointer *C.Symbol
655+
C.varnam_new_search_symbol(&resultPointer)
656+
return makeGoSymbol(resultPointer)
657+
}
658+
636659
// SearchSymbolTable search VST
637660
func (handle *VarnamHandle) SearchSymbolTable(ctx context.Context, searchCriteria Symbol) []Symbol {
638661
var goResults []Symbol
@@ -671,21 +694,7 @@ func (handle *VarnamHandle) SearchSymbolTable(ctx context.Context, searchCriteri
671694
for i < int(C.varray_length(resultPointer)) {
672695
result := (*C.Symbol)(C.varray_get(resultPointer, C.int(i)))
673696

674-
var goResult Symbol
675-
goResult.Identifier = int(result.Identifier)
676-
goResult.Type = int(result.Type)
677-
goResult.MatchType = int(result.MatchType)
678-
goResult.Pattern = C.GoString(result.Pattern)
679-
goResult.Value1 = C.GoString(result.Value1)
680-
goResult.Value2 = C.GoString(result.Value2)
681-
goResult.Value3 = C.GoString(result.Value3)
682-
goResult.Tag = C.GoString(result.Tag)
683-
goResult.Weight = int(result.Weight)
684-
goResult.Priority = int(result.Priority)
685-
goResult.AcceptCondition = int(result.AcceptCondition)
686-
goResult.Flags = int(result.Flags)
687-
688-
goResults = append(goResults, goResult)
697+
goResults = append(goResults, makeGoSymbol(result))
689698
i++
690699
}
691700

govarnamgo/govarnamgo_ml_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ func TestRecentlyLearnedWords(t *testing.T) {
9191
func TestSearchSymbolTable(t *testing.T) {
9292
varnam := getVarnamInstance("ml")
9393

94-
var symbol Symbol
94+
symbol := NewSearchSymbol()
9595
symbol.Pattern = "la"
9696
result := varnam.SearchSymbolTable(context.Background(), symbol)
9797

0 commit comments

Comments
 (0)