Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,15 @@ func (l *lexable) next() (rune, bool) {
*l = (*l)[width:]
return curchar, true
}

return ' ', false
}
func (l *lexable) peek() (rune, bool) {
if !l.empty() {
c, _ := utf8.DecodeRuneInString(string(*l))
return c, true
}

return ' ', false
}

Expand All @@ -50,13 +52,16 @@ func (l *lexable) lexDecimalNumber() (int64, error) {
r, _ := l.next()
number += fmt.Sprintf("%c", r)
}

if len(number) == 0 {
return 0, fmt.Errorf("number not found in string: %s", *l)
}

i, err := strconv.ParseInt(number, 10, 64)
if err != nil {
return 0, err
}

return i, nil
}

Expand All @@ -76,6 +81,7 @@ func (l *lexable) lexWord() (string, error) {
word = append(word, buf[0:x]...)
}
}

return string(word), nil
}

Expand All @@ -85,9 +91,11 @@ func (l *lexable) lexGloss() (string, error) {
if !ok {
return "", fmt.Errorf("definition expected")
}

if r != '|' {
return "", fmt.Errorf("definition expected (want '|' got '%c') [%q]", r, string(*l))
}

return strings.TrimSpace(string(*l)), nil
}

Expand All @@ -101,13 +109,16 @@ func (l *lexable) lexHexNumber() (int64, error) {
r, _ := l.next()
number += fmt.Sprintf("%c", r)
}

if len(number) == 0 {
return 0, fmt.Errorf("number not found in string: %s", *l)
}

i, err := strconv.ParseInt(number, 16, 64)
if err != nil {
return 0, err
}

return i, nil
}

Expand All @@ -116,14 +127,17 @@ func (l *lexable) lexOffset() (string, error) {
if len(*l) < 8 {
return "", fmt.Errorf("invalid offset")
}

for i := 0; i < 8; i++ {
if !unicode.IsDigit(rune((*l)[i])) {
return "", fmt.Errorf("invalid chars in offset: %s", string((*l)[0:8]))
}
}

cpy := make([]byte, 8)
copy(cpy, (*l)[0:8])
*l = (*l)[8:]

return string(cpy), nil
}

Expand All @@ -133,6 +147,7 @@ func (l *lexable) lexPOS() (PartOfSpeech, error) {
if !ok {
return 0, fmt.Errorf("unexpected end of input")
}

switch curchar {
case 'n':
return Noun, nil
Expand All @@ -149,6 +164,7 @@ func (l *lexable) lexPOS() (PartOfSpeech, error) {
case 'r':
return Adverb, nil
}

return 0, fmt.Errorf("invalid part of speech: %c", curchar)
}

Expand All @@ -158,6 +174,7 @@ func (l *lexable) lexRelationType() (Relation, error) {
if err != nil {
return 0, fmt.Errorf("can't read relation type: %s", err)
}

switch word {
case "!":
return Antonym, nil
Expand Down Expand Up @@ -213,6 +230,7 @@ func (l *lexable) lexRelationType() (Relation, error) {
case "~i":
return InstanceHyponym, nil
}

return 0, fmt.Errorf("unrecognized pointer type: %q", word)
}

Expand Down Expand Up @@ -247,25 +265,30 @@ func parseLine(data []byte, line int64) (*parsed, error) {
}
return nil, fmt.Errorf("can't parse line, expected comment or Offset")
}

// file number
filenum, err := l.lexDecimalNumber()
if err != nil {
return nil, fmt.Errorf("filenumber expected: %s", err)
}

pos, err := l.lexPOS()
if err != nil {
return nil, fmt.Errorf("part of speech expected: %s", err)
}

// lexicographer file containing the word
wordcount, err := l.lexHexNumber()
if err != nil {
return nil, fmt.Errorf("wordcount expected: %s", err)
}

p := parsed{
byteOffset: byteOffset,
pos: pos,
fileNum: filenum,
}

for ; wordcount > 0; wordcount-- {
value, err := l.lexWord()
if err != nil {
Expand All @@ -281,10 +304,12 @@ func parseLine(data []byte, line int64) (*parsed, error) {
sense: uint8(sense),
})
}

pcount, err := l.lexDecimalNumber()
if err != nil {
return nil, fmt.Errorf("pointer count expected: %s", err)
}

for ; pcount > 0; pcount-- {
if rt, err := l.lexRelationType(); err != nil {
return nil, err
Expand All @@ -310,6 +335,7 @@ func parseLine(data []byte, line int64) (*parsed, error) {
p.rels = append(p.rels, r)
}
}

// parse optional frame count
frameCount, err := l.lexDecimalNumber()
if err == nil {
Expand All @@ -324,10 +350,12 @@ func parseLine(data []byte, line int64) (*parsed, error) {
}
}
}

gloss, err := l.lexGloss()
if err != nil {
return nil, err
}

p.gloss = gloss

return &p, nil
Expand Down
4 changes: 4 additions & 0 deletions read.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ func inPlaceReadLine(s io.Reader, cb func([]byte, int64, int64) error) error {
offset += int64(len(line))
count++
}

// If we reached end of file and the line contents are empty, don't return an additional line.
if err == io.EOF {
if len(line) > 0 {
Expand All @@ -34,6 +35,7 @@ func inPlaceReadLine(s io.Reader, cb func([]byte, int64, int64) error) error {
} else {
return cb(line, count, offset)
}

return nil
}

Expand All @@ -42,10 +44,12 @@ func inPlaceReadLineFromPath(filePath string, cb func([]byte, int64, int64) erro
if err != nil {
return err
}

defer func() {
if cerr := f.Close(); cerr != nil {
fmt.Println("Error closing file:", cerr)
}
}()

return inPlaceReadLine(f, cb)
}
6 changes: 4 additions & 2 deletions wordnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ func (w *Lookup) Related(r Relation) (relationships []Lookup) {
})
}
}

// next let's look for syntactic relationships
key := normalize(w.word)
for _, word := range w.cluster.words {
Expand Down Expand Up @@ -437,10 +438,11 @@ func wordbase(word string, ender int) string {

// Try to find all possible baseforms (lemmas) of individual word in POS.
func (h *Handle) MorphWord(word string, pos PartOfSpeech) string {
if pos == Adverb {
switch pos {
case Adverb:
// Adverbs are not inflected in WordNet
return ""
} else if pos == Noun {
case Noun:
if strings.HasSuffix(word, "ful") {
return word[:len(word)-3]
} else if strings.HasSuffix(word, "ss") || len(word) <= 2 {
Expand Down
15 changes: 8 additions & 7 deletions wordnet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package wnram
import (
"path"
"runtime"
"slices"
"testing"
)

Expand Down Expand Up @@ -107,20 +108,15 @@ func TestLemma(t *testing.T) {
}
t.Fatalf("expected one synonym cluster for awesome, got %d", len(found))
}

if found[0].Lemma() != "amazing" {
t.Errorf("incorrect lemma for awesome (%s)", found[0].Lemma())
}
}

func setContains(haystack, needles []string) bool {
for _, n := range needles {
found := false
for _, h := range haystack {
if n == h {
found = true
break
}
}
found := slices.Contains(haystack, n)
if !found {
return false
}
Expand Down Expand Up @@ -160,6 +156,7 @@ func TestAntonyms(t *testing.T) {
antonyms = append(antonyms, a.Word())
}
}

if !setContains(antonyms, []string{"bad", "evil"}) {
t.Errorf("missing antonyms for good")
}
Expand All @@ -178,6 +175,7 @@ func TestHypernyms(t *testing.T) {
hypernyms = append(hypernyms, a.Word())
}
}

if !setContains(hypernyms, []string{"punch"}) {
t.Errorf("missing hypernyms for jab (expected punch, got %v)", hypernyms)
}
Expand All @@ -196,6 +194,7 @@ func TestHyponyms(t *testing.T) {
hyponyms = append(hyponyms, a.Word())
}
}

expected := []string{"chocolate", "cheese", "pasta", "leftovers"}
if !setContains(hyponyms, expected) {
t.Errorf("missing hyponyms for candy (expected %v, got %v)", expected, hyponyms)
Expand All @@ -208,9 +207,11 @@ func TestIterate(t *testing.T) {
count++
return nil
})

if err != nil {
t.Fatalf("Iterate failed: %v", err)
}

if count != 82192 {
t.Errorf("Missing nouns!")
}
Expand Down
Loading