Skip to content
This repository was archived by the owner on Dec 15, 2022. It is now read-only.

Commit 531ec95

Browse files
committed
Use std libraries for UTF16 -> UTF8 conversion in hunspell spellchecker
1 parent 9aff496 commit 531ec95

File tree

6 files changed

+120
-14
lines changed

6 files changed

+120
-14
lines changed

binding.gyp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,20 @@
4141
}],
4242
['OS=="win"', {
4343
'sources': [
44-
'src/spellchecker_win.cc'
44+
'src/spellchecker_win.cc',
45+
'src/transcoder_win.cc',
4546
],
4647
}],
4748
['OS=="linux"', {
4849
'sources': [
49-
'src/spellchecker_linux.cc'
50+
'src/spellchecker_linux.cc',
51+
'src/transcoder_posix.cc',
5052
],
5153
}],
5254
['OS=="mac"', {
5355
'sources': [
5456
'src/spellchecker_mac.mm',
57+
'src/transcoder_posix.cc',
5558
],
5659
'link_settings': {
5760
'libraries': [

src/spellchecker_hunspell.cc

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,20 @@
22
#include <cwctype>
33
#include <algorithm>
44
#include "../vendor/hunspell/src/hunspell/hunspell.hxx"
5-
#include "../vendor/hunspell/src/hunspell/csutil.hxx"
65
#include "spellchecker_hunspell.h"
76

87
namespace spellchecker {
98

10-
HunspellSpellchecker::HunspellSpellchecker() : hunspell(NULL) { }
9+
HunspellSpellchecker::HunspellSpellchecker() : hunspell(NULL), transcoder(NewTranscoder()) { }
10+
1111
HunspellSpellchecker::~HunspellSpellchecker() {
1212
if (hunspell) {
1313
delete hunspell;
1414
}
15+
16+
if (transcoder) {
17+
FreeTranscoder(transcoder);
18+
}
1519
}
1620

1721
bool HunspellSpellchecker::SetDictionary(const std::string& language, const std::string& dirname) {
@@ -53,27 +57,29 @@ bool HunspellSpellchecker::IsMisspelled(const std::string& word) {
5357
std::vector<MisspelledRange> HunspellSpellchecker::CheckSpelling(const uint16_t *utf16_text, size_t utf16_length) {
5458
std::vector<MisspelledRange> result;
5559

56-
if (!hunspell) {
60+
if (!hunspell || !transcoder) {
5761
return result;
5862
}
5963

6064
std::vector<char> utf8_buffer(256);
61-
char *utf8_word = utf8_buffer.data();
6265

6366
size_t word_start = 0;
6467
bool within_word = false;
6568
for (size_t i = 0; i < utf16_length; i++) {
66-
bool is_word_character = iswalpha(utf16_text[i]);
69+
uint16_t c = utf16_text[i];
70+
bool is_word_character = iswalpha(c);
6771
if (within_word) {
6872
if (!is_word_character) {
6973
within_word = false;
70-
const w_char *utf16_word = reinterpret_cast<const w_char *>(utf16_text + word_start);
71-
u16_u8(utf8_word, utf8_buffer.size(), utf16_word, i - word_start);
72-
if (hunspell->spell(utf8_word) == 0) {
73-
MisspelledRange range;
74-
range.start = word_start;
75-
range.end = i;
76-
result.push_back(range);
74+
75+
bool converted = TranscodeUTF16ToUTF8(transcoder, (char *)utf8_buffer.data(), utf8_buffer.size(), utf16_text + word_start, i - word_start);
76+
if (converted) {
77+
if (hunspell->spell(utf8_buffer.data()) == 0) {
78+
MisspelledRange range;
79+
range.start = word_start;
80+
range.end = i;
81+
result.push_back(range);
82+
}
7783
}
7884
}
7985
} else if (is_word_character) {

src/spellchecker_hunspell.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define SRC_SPELLCHECKER_HUNSPELL_H_
33

44
#include "spellchecker.h"
5+
#include "transcoder.h"
56

67
class Hunspell;
78

@@ -21,6 +22,7 @@ class HunspellSpellchecker : public SpellcheckerImplementation {
2122

2223
private:
2324
Hunspell* hunspell;
25+
Transcoder *transcoder;
2426
};
2527

2628
} // namespace spellchecker

src/transcoder.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#ifndef SRC_TRANSCODER_H_
2+
#define SRC_TRANSCODER_H_
3+
4+
#include <stdlib.h>
5+
#include <stdint.h>
6+
7+
namespace spellchecker {
8+
9+
struct Transcoder;
10+
11+
Transcoder *NewTranscoder();
12+
void FreeTranscoder(Transcoder *);
13+
bool TranscodeUTF16ToUTF8(const Transcoder *, char *out, size_t out_length, const uint16_t *in, size_t in_length);
14+
15+
} // namespace spellchecker
16+
17+
#endif // SRC_TRANSCODER_H_

src/transcoder_posix.cc

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#include <iconv.h>
2+
#include <stdint.h>
3+
#include <stdlib.h>
4+
5+
namespace spellchecker {
6+
7+
struct Transcoder {
8+
iconv_t conversion;
9+
};
10+
11+
static int IsBigEndian(void) {
12+
union {
13+
uint16_t integer;
14+
char bytes[2];
15+
} two_byte_value;
16+
17+
two_byte_value.integer = {0x0102};
18+
return two_byte_value.bytes[0] == 1;
19+
}
20+
21+
Transcoder *NewTranscoder() {
22+
const char *to_encoding = "UTF-8";
23+
const char *from_encoding = IsBigEndian() ? "UTF-16BE" : "UTF-16LE";
24+
iconv_t conversion = iconv_open(to_encoding, from_encoding);
25+
if (conversion == (iconv_t)-1) {
26+
return NULL;
27+
}
28+
29+
Transcoder *result = new Transcoder();
30+
result->conversion = conversion;
31+
return result;
32+
}
33+
34+
void FreeTranscoder(Transcoder *transcoder) {
35+
iconv_close(transcoder->conversion);
36+
delete transcoder;
37+
}
38+
39+
bool TranscodeUTF16ToUTF8(const Transcoder *transcoder, char *out, size_t out_bytes, const uint16_t *in, size_t in_length) {
40+
char *utf16_word = reinterpret_cast<char *>(const_cast<uint16_t *>(in));
41+
size_t utf16_bytes = in_length * (sizeof(uint16_t) / sizeof(char));
42+
43+
size_t iconv_result = iconv(
44+
transcoder->conversion,
45+
&utf16_word,
46+
&utf16_bytes,
47+
&out,
48+
&out_bytes
49+
);
50+
51+
if (iconv_result == static_cast<size_t>(-1)) {
52+
return false;
53+
}
54+
55+
*out = '\0';
56+
return true;
57+
}
58+
59+
} // namespace spellchecker

src/transcoder_win.cc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#include <windows.h>
2+
#include <stdlib.h>
3+
#include "transcoder.h"
4+
5+
namespace spellchecker {
6+
7+
struct Transcoder {};
8+
9+
Transcoder *NewTranscoder() { return NULL; }
10+
11+
void FreeTranscoder(Transcoder *transcoder) {}
12+
13+
bool TranscodeUTF16ToUTF8(const Transcoder *transcoder, char *out, size_t out_length, const uint16_t *in, size_t in_length) {
14+
int length = WideCharToMultiByte(CP_UTF8, 0, reinterpret_cast<const wchar_t *>(in), in_length, out, out_length, NULL, NULL);
15+
out[length] = '\0';
16+
return true;
17+
}
18+
19+
} // namespace spellchecker

0 commit comments

Comments
 (0)