Skip to content

Commit 529ef5e

Browse files
authored
Add simple user dictionary migrations (#25)
1 parent fccc0ee commit 529ef5e

File tree

10 files changed

+279
-134
lines changed

10 files changed

+279
-134
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
module github.com/varnamproject/govarnam
22

3-
go 1.15
3+
go 1.16
44

55
require github.com/mattn/go-sqlite3 v1.14.6

govarnam/dictionary.go

Lines changed: 30 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,18 @@ package govarnam
88

99
import (
1010
"context"
11-
sql "database/sql"
11+
"embed"
1212
"fmt"
13+
"io/fs"
1314
"log"
1415
"os"
1516
"path"
1617
"time"
1718
)
1819

20+
//go:embed migrations/*.sql
21+
var embedFS embed.FS
22+
1923
// DictionaryResult result from dictionary search
2024
type DictionaryResult struct {
2125
// Exactly found starting word matches.
@@ -54,108 +58,42 @@ func (varnam *Varnam) InitDict(dictPath string) error {
5458
var err error
5559

5660
if !fileExists(dictPath) {
57-
log.Printf("Making Varnam Learnings File at %s\n", dictPath)
58-
os.MkdirAll(path.Dir(dictPath), 0750)
59-
60-
varnam.dictConn, err = makeDictionary(dictPath)
61-
} else {
62-
varnam.dictConn, err = openDB(dictPath)
61+
log.Printf("Making Varnam Learnings Dir for %s\n", dictPath)
62+
err := os.MkdirAll(path.Dir(dictPath), 0750)
63+
if err != nil {
64+
return err
65+
}
6366
}
6467

65-
// Since SQLite v3.12.0, default page size is 4096
66-
varnam.dictConn.Exec("PRAGMA page_size=4096;")
67-
// WAL makes writes & reads happen concurrently => significantly fast
68-
varnam.dictConn.Exec("PRAGMA journal_mode=wal;")
68+
varnam.dictConn, err = openDB(dictPath)
69+
if err != nil {
70+
return err
71+
}
6972

7073
varnam.DictPath = dictPath
7174

72-
return err
73-
}
74-
75-
func makeDictionary(dictPath string) (*sql.DB, error) {
76-
conn, err := openDB(dictPath)
75+
// cd into migrations directory
76+
migrationsFS, err := fs.Sub(embedFS, "migrations")
7777
if err != nil {
78-
return nil, err
78+
return err
7979
}
8080

81-
queries := []string{
82-
`
83-
CREATE TABLE IF NOT EXISTS metadata (
84-
key TEXT UNIQUE,
85-
value TEXT
86-
);
87-
`,
88-
`
89-
CREATE TABLE IF NOT EXISTS words (
90-
id INTEGER PRIMARY KEY,
91-
word TEXT UNIQUE,
92-
weight INTEGER DEFAULT 1,
93-
learned_on INTEGER
94-
);
95-
`,
96-
`
97-
CREATE VIRTUAL TABLE IF NOT EXISTS words_fts USING FTS5(
98-
word,
99-
weight UNINDEXED,
100-
learned_on UNINDEXED,
101-
content='words',
102-
content_rowid='id',
103-
tokenize='ascii',
104-
prefix='1 2',
105-
);
106-
`,
107-
`
108-
CREATE TRIGGER words_ai AFTER INSERT ON words
109-
BEGIN
110-
INSERT INTO words_fts (rowid, word)
111-
VALUES (new.id, new.word);
112-
END;
113-
`,
114-
`
115-
CREATE TRIGGER words_ad AFTER DELETE ON words
116-
BEGIN
117-
INSERT INTO words_fts (words_fts, rowid, word)
118-
VALUES ('delete', old.id, old.word);
119-
END;
120-
`,
121-
`
122-
CREATE TRIGGER words_au AFTER UPDATE ON words
123-
BEGIN
124-
INSERT INTO words_fts (words_fts, rowid, word)
125-
VALUES ('delete', old.id, old.word);
126-
INSERT INTO words_fts (rowid, word)
127-
VALUES (new.id, new.word);
128-
END;
129-
`,
130-
`
131-
CREATE TABLE IF NOT EXISTS patterns (
132-
pattern TEXT NOT NULL COLLATE NOCASE,
133-
word_id INTEGER NOT NULL,
134-
FOREIGN KEY(word_id) REFERENCES words(id) ON DELETE CASCADE,
135-
PRIMARY KEY(pattern, word_id)
136-
);
137-
`}
138-
139-
// Note: FTS can't be applied on patterns because
140-
// we require partial word search which FTS doesn't support
141-
142-
for _, query := range queries {
143-
ctx, cancelFunc := context.WithTimeout(context.Background(), 5*time.Second)
144-
defer cancelFunc()
145-
146-
stmt, err := conn.PrepareContext(ctx, query)
147-
if err != nil {
148-
return nil, err
149-
}
150-
defer stmt.Close()
81+
mg, err := InitMigrate(varnam.dictConn, migrationsFS)
82+
if err != nil {
83+
return err
84+
}
15185

152-
_, err = stmt.ExecContext(ctx)
153-
if err != nil {
154-
return nil, err
155-
}
86+
ranMigrations, err := mg.Run()
87+
if ranMigrations != 0 {
88+
log.Printf("ran %d migrations", ranMigrations)
15689
}
15790

158-
return conn, nil
91+
// Since SQLite v3.12.0, default page size is 4096
92+
varnam.dictConn.Exec("PRAGMA page_size=4096;")
93+
// WAL makes writes & reads happen concurrently => significantly fast
94+
varnam.dictConn.Exec("PRAGMA journal_mode=wal;")
95+
96+
return err
15997
}
16098

16199
type searchDictionaryType int32
@@ -372,42 +310,6 @@ func (varnam *Varnam) getMoreFromDictionary(ctx context.Context, words []Suggest
372310
}
373311
}
374312

375-
// A simpler function to get matches from pattern dictionary
376-
// Gets incomplete matches.
377-
// Eg: If pattern = "chin", will return "china"
378-
// TODO better function name ? Ambiguous ?
379-
func (varnam *Varnam) getTrailingFromPatternDictionary(ctx context.Context, pattern string) []Suggestion {
380-
var results []Suggestion
381-
382-
select {
383-
case <-ctx.Done():
384-
return results
385-
default:
386-
rows, err := varnam.dictConn.QueryContext(ctx, "SELECT word, weight FROM words WHERE id IN (SELECT word_id FROM patterns WHERE pattern LIKE ?) ORDER BY weight DESC LIMIT 10", pattern+"%")
387-
388-
if err != nil {
389-
log.Print(err)
390-
return results
391-
}
392-
393-
defer rows.Close()
394-
395-
for rows.Next() {
396-
var item Suggestion
397-
rows.Scan(&item.Word, &item.Weight)
398-
item.Weight += VARNAM_LEARNT_WORD_MIN_WEIGHT
399-
results = append(results, item)
400-
}
401-
402-
err = rows.Err()
403-
if err != nil {
404-
log.Print(err)
405-
}
406-
407-
return results
408-
}
409-
}
410-
411313
// Gets incomplete and complete matches from pattern dictionary
412314
// Eg: If pattern = "chin" or "chinayil", will return "china"
413315
func (varnam *Varnam) getFromPatternDictionary(ctx context.Context, pattern string) []PatternDictionarySuggestion {

govarnam/migrate.go

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
package govarnam
2+
3+
import (
4+
sql "database/sql"
5+
"io/fs"
6+
"strings"
7+
)
8+
9+
type migrate struct {
10+
db *sql.DB
11+
fs fs.FS
12+
}
13+
14+
type migrationStatus struct {
15+
lastRun string
16+
lastMigration string
17+
}
18+
19+
func InitMigrate(db *sql.DB, fs fs.FS) (*migrate, error) {
20+
_, err := db.Exec(`
21+
CREATE TABLE IF NOT EXISTS migrations (
22+
id INTEGER PRIMARY KEY,
23+
name VARCHAR(200)
24+
);
25+
`)
26+
if err != nil {
27+
return nil, err
28+
}
29+
30+
return &migrate{db, fs}, nil
31+
}
32+
33+
func (mg *migrate) Status() (*migrationStatus, error) {
34+
var lastRun string = ""
35+
mg.db.QueryRow("SELECT name FROM migrations ORDER BY id DESC LIMIT 1").Scan(&lastRun)
36+
37+
files, err := fs.ReadDir(mg.fs, ".")
38+
if err != nil {
39+
return nil, err
40+
}
41+
42+
lastMigration := files[len(files)-1].Name()
43+
44+
return &migrationStatus{lastRun, lastMigration}, nil
45+
}
46+
47+
func (mg *migrate) Run() (int, error) {
48+
ranMigrations := 0
49+
50+
status, err := mg.Status()
51+
if err != nil {
52+
return 0, err
53+
}
54+
55+
if status.lastRun != status.lastMigration {
56+
ranMigrations, err = mg.runMigrations(status)
57+
if err != nil {
58+
return 0, err
59+
}
60+
}
61+
62+
return ranMigrations, nil
63+
}
64+
65+
func (mg *migrate) runMigrations(status *migrationStatus) (int, error) {
66+
files, err := fs.ReadDir(mg.fs, ".")
67+
if err != nil {
68+
return 0, err
69+
}
70+
71+
ranMigrations := 0
72+
73+
// lastRun will be empty if no migrations have been run
74+
var foundLastRunMigration bool = (status.lastRun == "")
75+
76+
for _, file := range files {
77+
fileNameParts := strings.Split(file.Name(), ".")
78+
migrationName := fileNameParts[0]
79+
80+
// Run all migrations after the last ran migration
81+
if !foundLastRunMigration {
82+
foundLastRunMigration = (status.lastRun == migrationName)
83+
} else {
84+
fileContents, err := fs.ReadFile(mg.fs, file.Name())
85+
if err != nil {
86+
return 0, err
87+
}
88+
89+
tx, err := mg.db.Begin()
90+
if err != nil {
91+
return 0, err
92+
}
93+
94+
_, err = tx.Exec(string(fileContents))
95+
if err != nil {
96+
return 0, err
97+
}
98+
99+
stmt, err := tx.Prepare("INSERT INTO migrations (name) VALUES(?)")
100+
if err != nil {
101+
tx.Rollback()
102+
return 0, err
103+
}
104+
_, err = stmt.Exec(migrationName)
105+
if err != nil {
106+
tx.Rollback()
107+
}
108+
109+
tx.Commit()
110+
111+
ranMigrations++
112+
}
113+
}
114+
115+
return ranMigrations, nil
116+
}

govarnam/migrate_test.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package govarnam
2+
3+
import (
4+
"database/sql"
5+
"embed"
6+
"io/fs"
7+
"testing"
8+
)
9+
10+
//go:embed testdata/*.sql
11+
var testdataFS embed.FS
12+
13+
func TestMigration(t *testing.T) {
14+
db, err := sql.Open("sqlite3", ":memory:")
15+
16+
checkError(err)
17+
18+
testdataDirFS, err := fs.Sub(testdataFS, "testdata")
19+
checkError(err)
20+
21+
dirFiles, err := fs.ReadDir(testdataDirFS, ".")
22+
checkError(err)
23+
24+
mg, err := InitMigrate(db, testdataDirFS)
25+
checkError(err)
26+
27+
_, err = db.Query("SELECT * FROM words")
28+
assertEqual(t, err != nil, true)
29+
30+
ranMigrations, err := mg.Run()
31+
assertEqual(t, err, nil)
32+
assertEqual(t, ranMigrations, len(dirFiles))
33+
34+
_, err = db.Query("SELECT * FROM words")
35+
assertEqual(t, err, nil)
36+
37+
// Part 2 : New Migrations
38+
39+
_, err = db.Query("SELECT * FROM words_fts")
40+
assertEqual(t, err != nil, true)
41+
42+
migrationsFS, err := fs.Sub(embedFS, "migrations")
43+
checkError(err)
44+
45+
dirFiles, err = fs.ReadDir(migrationsFS, ".")
46+
checkError(err)
47+
48+
mg, err = InitMigrate(db, migrationsFS)
49+
checkError(err)
50+
51+
ranMigrations, err = mg.Run()
52+
assertEqual(t, err, nil)
53+
assertEqual(t, ranMigrations, len(dirFiles))
54+
55+
_, err = db.Query("SELECT * FROM words_fts")
56+
assertEqual(t, err, nil)
57+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
CREATE TABLE IF NOT EXISTS metadata (
2+
key TEXT UNIQUE,
3+
value TEXT
4+
);
5+
6+
CREATE TABLE IF NOT EXISTS words (
7+
id INTEGER PRIMARY KEY,
8+
word TEXT UNIQUE,
9+
weight INTEGER DEFAULT 1,
10+
learned_on INTEGER
11+
);
12+
13+
CREATE TABLE IF NOT EXISTS patterns (
14+
pattern TEXT NOT NULL COLLATE NOCASE,
15+
word_id INTEGER NOT NULL,
16+
FOREIGN KEY(word_id) REFERENCES words(id) ON DELETE CASCADE,
17+
PRIMARY KEY(pattern, word_id)
18+
);

0 commit comments

Comments
 (0)