Skip to content
This repository was archived by the owner on Dec 15, 2022. It is now read-only.

Commit e511bfa

Browse files
author
Max Brunsfeld
committed
Merge pull request #27 from atom/mb-add-bulk-checking-method
Add an API for finding all the misspelled words in a given string
2 parents 81b6015 + b47f706 commit e511bfa

16 files changed

+335
-5
lines changed

appveyor.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
environment:
22
nodejs_version: "0.10"
33

4+
matrix:
5+
- {}
6+
- {SPELLCHECKER_PREFER_HUNSPELL: true}
7+
48
install:
59
- ps: Install-Product node $env:nodejs_version
610
- npm --msvs_version=2013 install

binding.gyp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,20 @@
4141
}],
4242
['OS=="win"', {
4343
'sources': [
44-
'src/spellchecker_win.cc'
44+
'src/spellchecker_win.cc',
45+
'src/transcoder_win.cc',
4546
],
4647
}],
4748
['OS=="linux"', {
4849
'sources': [
49-
'src/spellchecker_linux.cc'
50+
'src/spellchecker_linux.cc',
51+
'src/transcoder_posix.cc',
5052
],
5153
}],
5254
['OS=="mac"', {
5355
'sources': [
5456
'src/spellchecker_mac.mm',
57+
'src/transcoder_posix.cc',
5558
],
5659
'link_settings': {
5760
'libraries': [

lib/spellchecker.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ var isMisspelled = function() {
2929
return defaultSpellcheck.isMisspelled.apply(defaultSpellcheck, arguments);
3030
};
3131

32+
var checkSpelling = function() {
33+
ensureDefaultSpellCheck();
34+
35+
return defaultSpellcheck.checkSpelling.apply(defaultSpellcheck, arguments);
36+
};
37+
3238
var add = function() {
3339
ensureDefaultSpellCheck();
3440

@@ -64,6 +70,7 @@ module.exports = {
6470
setDictionary: setDictionary,
6571
add: add,
6672
isMisspelled: isMisspelled,
73+
checkSpelling: checkSpelling,
6774
getAvailableDictionaries: getAvailableDictionaries,
6875
getCorrectionsForMisspelling: getCorrectionsForMisspelling,
6976
Spellchecker: Spellchecker

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"main": "./lib/spellchecker.js",
33
"name": "spellchecker",
44
"description": "Bindings to native spellchecker",
5-
"version": "3.1.3",
5+
"version": "3.2.0-0",
66
"licenses": [
77
{
88
"type": "MIT",

spec/spellchecker-spec.coffee

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,54 @@ describe "SpellChecker", ->
1111
it "throws an exception when no word specified", ->
1212
expect(-> SpellChecker.isMisspelled()).toThrow()
1313

14+
describe ".checkSpelling(string)", ->
15+
it "returns an array of character ranges of misspelled words", ->
16+
string = "cat caat dog dooog"
17+
18+
expect(SpellChecker.checkSpelling(string)).toEqual [
19+
{start: 4, end: 8},
20+
{start: 13, end: 18},
21+
]
22+
23+
it "accounts for UTF16 pairs", ->
24+
string = "😎 cat caat dog dooog"
25+
26+
expect(SpellChecker.checkSpelling(string)).toEqual [
27+
{start: 7, end: 11},
28+
{start: 16, end: 21},
29+
]
30+
31+
it "accounts for other non-word characters", ->
32+
string = "'cat' (caat. <dog> :dooog)"
33+
expect(SpellChecker.checkSpelling(string)).toEqual [
34+
{start: 7, end: 11},
35+
{start: 20, end: 25},
36+
]
37+
38+
it "does not treat non-english letters as word boundaries", ->
39+
SpellChecker.add("cliché")
40+
expect(SpellChecker.checkSpelling("what cliché nonsense")).toEqual []
41+
42+
it "handles words with apostrophes", ->
43+
string = "doesn't isn't aint hasn't"
44+
expect(SpellChecker.checkSpelling(string)).toEqual [
45+
{start: string.indexOf("aint"), end: string.indexOf("aint") + 4}
46+
]
47+
48+
string = "you say you're 'certain', but are you really?"
49+
expect(SpellChecker.checkSpelling(string)).toEqual []
50+
51+
string = "you say you're 'sertan', but are you really?"
52+
expect(SpellChecker.checkSpelling(string)).toEqual [
53+
{start: string.indexOf("sertan"), end: string.indexOf("',")}
54+
]
55+
56+
it "handles invalid inputs", ->
57+
expect(SpellChecker.checkSpelling("")).toEqual []
58+
expect(-> SpellChecker.checkSpelling()).toThrow("Bad argument")
59+
expect(-> SpellChecker.checkSpelling(null)).toThrow("Bad argument")
60+
expect(-> SpellChecker.checkSpelling({})).toThrow("Bad argument")
61+
1462
describe ".getCorrectionsForMisspelling(word)", ->
1563
it "returns an array of possible corrections", ->
1664
corrections = SpellChecker.getCorrectionsForMisspelling('worrd')

src/main.cc

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <vector>
12
#include "nan.h"
23
#include "spellchecker.h"
34

@@ -49,6 +50,42 @@ class Spellchecker : public Nan::ObjectWrap {
4950
info.GetReturnValue().Set(Nan::New(that->impl->IsMisspelled(word)));
5051
}
5152

53+
static NAN_METHOD(CheckSpelling) {
54+
Nan::HandleScope scope;
55+
if (info.Length() < 1) {
56+
return Nan::ThrowError("Bad argument");
57+
}
58+
59+
Handle<String> string = Handle<String>::Cast(info[0]);
60+
if (!string->IsString()) {
61+
return Nan::ThrowError("Bad argument");
62+
}
63+
64+
Local<Array> result = Nan::New<Array>();
65+
info.GetReturnValue().Set(result);
66+
67+
if (string->Length() == 0) {
68+
return;
69+
}
70+
71+
std::vector<uint16_t> text(string->Length() + 1);
72+
string->Write(reinterpret_cast<uint16_t *>(text.data()));
73+
74+
Spellchecker* that = Nan::ObjectWrap::Unwrap<Spellchecker>(info.Holder());
75+
std::vector<MisspelledRange> misspelled_ranges = that->impl->CheckSpelling(text.data(), text.size());
76+
77+
std::vector<MisspelledRange>::const_iterator iter = misspelled_ranges.begin();
78+
for (; iter != misspelled_ranges.end(); ++iter) {
79+
size_t index = iter - misspelled_ranges.begin();
80+
uint32_t start = iter->start, end = iter->end;
81+
82+
Local<Object> misspelled_range = Nan::New<Object>();
83+
misspelled_range->Set(Nan::New("start").ToLocalChecked(), Nan::New<Integer>(start));
84+
misspelled_range->Set(Nan::New("end").ToLocalChecked(), Nan::New<Integer>(end));
85+
result->Set(index, misspelled_range);
86+
}
87+
}
88+
5289
static NAN_METHOD(Add) {
5390
Nan::HandleScope scope;
5491
if (info.Length() < 1) {
@@ -127,6 +164,7 @@ class Spellchecker : public Nan::ObjectWrap {
127164
Nan::SetMethod(tpl->InstanceTemplate(), "getAvailableDictionaries", Spellchecker::GetAvailableDictionaries);
128165
Nan::SetMethod(tpl->InstanceTemplate(), "getCorrectionsForMisspelling", Spellchecker::GetCorrectionsForMisspelling);
129166
Nan::SetMethod(tpl->InstanceTemplate(), "isMisspelled", Spellchecker::IsMisspelled);
167+
Nan::SetMethod(tpl->InstanceTemplate(), "checkSpelling", Spellchecker::CheckSpelling);
130168
Nan::SetMethod(tpl->InstanceTemplate(), "add", Spellchecker::Add);
131169

132170
exports->Set(Nan::New("Spellchecker").ToLocalChecked(), tpl->GetFunction());

src/spellchecker.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,15 @@
33

44
#include <string>
55
#include <vector>
6+
#include <stdint.h>
67

78
namespace spellchecker {
89

10+
struct MisspelledRange {
11+
size_t start;
12+
size_t end;
13+
};
14+
915
class SpellcheckerImplementation {
1016
public:
1117
virtual bool SetDictionary(const std::string& language, const std::string& path) = 0;
@@ -17,6 +23,8 @@ class SpellcheckerImplementation {
1723
// Returns true if the word is misspelled.
1824
virtual bool IsMisspelled(const std::string& word) = 0;
1925

26+
virtual std::vector<MisspelledRange> CheckSpelling(const uint16_t *text, size_t length) = 0;
27+
2028
// Adds a new word to the dictionary.
2129
// NB: When using Hunspell, this will not modify the .dic file; custom words must be added each
2230
// time the spellchecker is created. Use a custom dictionary file.

src/spellchecker_hunspell.cc

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
11
#include <cstdio>
2+
#include <cwctype>
23
#include <algorithm>
34
#include "../vendor/hunspell/src/hunspell/hunspell.hxx"
45
#include "spellchecker_hunspell.h"
56

67
namespace spellchecker {
78

8-
HunspellSpellchecker::HunspellSpellchecker() : hunspell(NULL) { }
9+
HunspellSpellchecker::HunspellSpellchecker() : hunspell(NULL), transcoder(NewTranscoder()) { }
10+
911
HunspellSpellchecker::~HunspellSpellchecker() {
1012
if (hunspell) {
1113
delete hunspell;
1214
}
15+
16+
if (transcoder) {
17+
FreeTranscoder(transcoder);
18+
}
1319
}
1420

1521
bool HunspellSpellchecker::SetDictionary(const std::string& language, const std::string& dirname) {
@@ -48,6 +54,64 @@ bool HunspellSpellchecker::IsMisspelled(const std::string& word) {
4854
return hunspell->spell(word.c_str()) == 0;
4955
}
5056

57+
std::vector<MisspelledRange> HunspellSpellchecker::CheckSpelling(const uint16_t *utf16_text, size_t utf16_length) {
58+
std::vector<MisspelledRange> result;
59+
60+
if (!hunspell || !transcoder) {
61+
return result;
62+
}
63+
64+
std::vector<char> utf8_buffer(256);
65+
66+
enum {
67+
unknown,
68+
in_separator,
69+
in_word,
70+
} state = in_separator;
71+
72+
for (size_t word_start = 0, i = 0; i < utf16_length; i++) {
73+
uint16_t c = utf16_text[i];
74+
75+
switch (state) {
76+
case unknown:
77+
if (iswpunct(c) || iswspace(c)) {
78+
state = in_separator;
79+
}
80+
break;
81+
82+
case in_separator:
83+
if (iswalpha(c)) {
84+
word_start = i;
85+
state = in_word;
86+
} else if (!iswpunct(c) && !iswspace(c)) {
87+
state = unknown;
88+
}
89+
break;
90+
91+
case in_word:
92+
if (c == '\'' && iswalpha(utf16_text[i + 1])) {
93+
i++;
94+
} else if (c == 0 || iswpunct(c) || iswspace(c)) {
95+
state = in_separator;
96+
bool converted = TranscodeUTF16ToUTF8(transcoder, (char *)utf8_buffer.data(), utf8_buffer.size(), utf16_text + word_start, i - word_start);
97+
if (converted) {
98+
if (hunspell->spell(utf8_buffer.data()) == 0) {
99+
MisspelledRange range;
100+
range.start = word_start;
101+
range.end = i;
102+
result.push_back(range);
103+
}
104+
}
105+
} else if (!iswalpha(c)) {
106+
state = unknown;
107+
}
108+
break;
109+
}
110+
}
111+
112+
return result;
113+
}
114+
51115
void HunspellSpellchecker::Add(const std::string& word) {
52116
if (hunspell) {
53117
hunspell->add(word.c_str());

src/spellchecker_hunspell.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define SRC_SPELLCHECKER_HUNSPELL_H_
33

44
#include "spellchecker.h"
5+
#include "transcoder.h"
56

67
class Hunspell;
78

@@ -16,10 +17,12 @@ class HunspellSpellchecker : public SpellcheckerImplementation {
1617
std::vector<std::string> GetAvailableDictionaries(const std::string& path);
1718
std::vector<std::string> GetCorrectionsForMisspelling(const std::string& word);
1819
bool IsMisspelled(const std::string& word);
20+
std::vector<MisspelledRange> CheckSpelling(const uint16_t *text, size_t length);
1921
void Add(const std::string& word);
2022

2123
private:
2224
Hunspell* hunspell;
25+
Transcoder *transcoder;
2326
};
2427

2528
} // namespace spellchecker

src/spellchecker_mac.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class MacSpellchecker : public SpellcheckerImplementation {
1717
std::vector<std::string> GetAvailableDictionaries(const std::string& path);
1818
std::vector<std::string> GetCorrectionsForMisspelling(const std::string& word);
1919
bool IsMisspelled(const std::string& word);
20+
std::vector<MisspelledRange> CheckSpelling(const uint16_t *text, size_t length);
2021
void Add(const std::string& word);
2122

2223
private:

0 commit comments

Comments
 (0)