Skip to content

Commit 0680529

Browse files
feat: add variations (#14)
1 parent 67dfedd commit 0680529

File tree

12 files changed

+17013
-11176
lines changed

12 files changed

+17013
-11176
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
高频词汇的释义经过了人工初步校对,其他单词选取使用频率总和大于 50% 的释义(数据来自 [the little dict](http://louischeung.top:225/mdict%E8%AF%8D%E5%85%B8%E5%8C%85/The%20Little%20Dict/)),可以保证一定的准确性。减轻不必要的机械记忆负担。
1010

11-
每个单词有异形词(即考纲当中有多种写法的单词)的,计划将其在后面列出,以保证原始数据的准确性。[这个 PR](https://github.com/awxiaoxian2020/NETEMVocabulary/pull/14)
11+
每个单词有异形词(即考纲当中有多种写法的单词)的,一并列出,以保证原始数据的准确性。目前根据[这个数据](https://github.com/awxiaoxian2020/spelling-variations/blob/dev/src/bydictionary.json)进行了初步填充。有空再和考纲校对
1212

1313
目前正在开发对应的跨端小程序,见 [develop 分支](https://github.com/awxiaoxian2020/NETEMVocabulary/tree/develop)
1414

scripts/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
**/node_modules/
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import SpellingVariations from "./lib/index.js";
2+
import mysql from "mysql2";
3+
4+
// 创建数据库连接
5+
const connection = mysql.createConnection({
6+
host: "127.0.0.1",
7+
user: "root",
8+
password: "root",
9+
database: "netem",
10+
});
11+
12+
// 连接到数据库
13+
connection.connect((err) => {
14+
if (err) {
15+
console.error("无法连接到数据库:", err);
16+
return;
17+
}
18+
console.log("已成功连接到数据库");
19+
});
20+
21+
// 执行数据库查询以获取数据
22+
connection.query("SELECT word FROM vocabulary", (err, results) => {
23+
if (err) {
24+
console.error("查询数据库时出错:", err);
25+
return;
26+
}
27+
28+
// 处理检索到的数据
29+
const records = results; // 此处假设您的数据库表包含名为"word"的列
30+
31+
// 遍历记录并进行拼写变体分析
32+
// 处理检索到的数据
33+
for (const record of records) {
34+
const word = record.word; // 获取单词字段的值
35+
const result = new SpellingVariations(word).analyze();
36+
if (result.hasVariations) {
37+
const uniqueVariantsSet = new Set(
38+
result.variations.filter((variant) => variant !== word)
39+
); // 使用Set来确保唯一性
40+
const uniqueVariants = Array.from(uniqueVariantsSet).join(", ");
41+
const updateQuery = `UPDATE \`vocabulary\` SET \`variant\` = ? WHERE \`word\` = ?`;
42+
const query = connection.query(
43+
updateQuery,
44+
[uniqueVariants, word],
45+
(updateErr, updateResults) => {
46+
// console.log(updateResults)
47+
if (updateErr) {
48+
console.error(`更新单词 ${word} 的变体时出错: ${updateErr}`);
49+
}
50+
}
51+
);
52+
console.log('sql是',query.sql)
53+
} else {
54+
continue;
55+
}
56+
}
57+
58+
// 关闭数据库连接
59+
connection.end((err) => {
60+
if (err) {
61+
console.error("关闭数据库连接时出错:", err);
62+
} else {
63+
console.log("已成功关闭数据库连接");
64+
}
65+
});
66+
});

scripts/spelling-variations/lib/bypattern.js

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,12 @@ const patterns = [
114114
},
115115
];
116116

117-
module.exports = function (word) {
118-
var pattern = patterns.find(pattern=>pattern.regex.test(word));
119-
if(!pattern) return null;
117+
export default function(word) {
118+
var pattern = patterns.find(pattern => pattern.regex.test(word));
119+
if (!pattern) return null;
120120
var result = [];
121-
var replacement = word.replace(pattern.regex,pattern.replacementString);
122-
pattern.originalIndex.forEach(index=>result[index] = word);
123-
pattern.replacementIndex.forEach(index=>result[index] = replacement);
121+
var replacement = word.replace(pattern.regex, pattern.replacementString);
122+
pattern.originalIndex.forEach(index => result[index] = word);
123+
pattern.replacementIndex.forEach(index => result[index] = replacement);
124124
return result;
125-
};
125+
}
Lines changed: 157 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1,106 +1,161 @@
1-
const bydictionary = require('./bydictionary.json');
2-
const bypattern = require('./bypattern.js');
3-
4-
const spellingVariations = function (word) {
5-
this.data = analyse(word);
6-
};
7-
8-
// @return {Number} how common this variation in the UK's texts (1-0)
9-
spellingVariations.prototype.scoreUK = function() {return this.data.scoreUK;};
10-
// @return {Number} how common this variation in the US's texts (1-0)
11-
spellingVariations.prototype.scoreUS = function() {return this.data.scoreUS;};
12-
// @return {Boolean} the word has variations
13-
spellingVariations.prototype.hasVariations = function() {return this.data.hasVariations;};
14-
// @return {Array} US variations of the word
15-
spellingVariations.prototype.USVariations = function() {return this.data.USVariations;};
16-
// @return {Array} UK variations of the word
17-
spellingVariations.prototype.UKVariations = function() {return this.data.UKVariations;};
18-
// @return {String} UK's preferred variation
19-
spellingVariations.prototype.UKPrefered = function() {return this.data.UKPrefered;};
20-
// @return {String} US's preferred variation
21-
spellingVariations.prototype.USPrefered = function() {return this.data.USPrefered;};
22-
// @return {Array} All of the word's variations
23-
spellingVariations.prototype.variations = function() {return this.data.variations;};
24-
// @return {String} UK and US common variation
25-
spellingVariations.prototype.commonVariation = function() {return this.data.commonVariation;};
26-
// @return {String} converts the word spelling to it's UK variant
27-
spellingVariations.prototype.toUK = function() {return this.data.UKPrefered || this.data.word;};
28-
// @return {String} converts the word spelling to it's US variant
29-
spellingVariations.prototype.toUS = function() {return this.data.USPrefered || this.data.word;};
30-
// @return {Object} all the info above
31-
spellingVariations.prototype.analyse = function() {return this.data;};
32-
// a us alias for the above function :)
33-
spellingVariations.prototype.analyze = function() {return this.data;};
34-
35-
36-
/**
37-
*
38-
* This little guy here is actually the one who does all the heavy
39-
* lifting of finding the variations and the class and such..
40-
*
41-
**/
42-
function analyse(word) {
43-
44-
word = (word || "").toLowerCase();
45-
46-
const result = {
47-
word,
48-
scoreUK:-1,
49-
scoreUS:-1,
50-
hasVariations:false,
51-
UKPrefered:word,
52-
USPrefered:word,
53-
commonVariation:word,
54-
UKVariations:[],
55-
USVariations:[],
56-
variations:[],
57-
analyse:analyse,
58-
analyze:analyse
59-
};
60-
61-
var resultArr = [];
62-
var dictionaryEntry = bydictionary[word];
63-
var patternEntry = bypattern(word);
64-
if(dictionaryEntry) resultArr = dictionaryEntry.split("|");
65-
else if(patternEntry) resultArr = patternEntry;
66-
else return result;
67-
68-
// resultArr reference:
69-
// 0: UK1 4: US1
70-
// 1: UK2 5: US2
71-
// 2: UK3 6: US3
72-
// 3: UK4 7: US4 8:UKUS
73-
74-
75-
result.hasVariations = true;
76-
result.variations = filterOut(resultArr,word);
77-
result.UKPrefered = resultArr[0];
78-
result.USPrefered = resultArr[4];
79-
result.commonVariation = resultArr[8] || "";
80-
result.UKVariations = resultArr.filter((e,i)=>e&&(i<4||i===8)&&e!==word);
81-
result.USVariations = resultArr.filter((e,i)=>e&&(i>3||i===8)&&e!==word);
82-
83-
if(resultArr.indexOf(word) === 8) {
84-
result.scoreUK = 0.87;
85-
result.scoreUS = 0.87;
86-
}
87-
88-
else {
89-
var UKi = resultArr.slice(0,4).indexOf(word);
90-
var USi = resultArr.slice(4,8).indexOf(word);
91-
92-
if(UKi === -1) result.scoreUK = 0;
93-
else result.scoreUK = (4-UKi)*0.25;
94-
95-
if(USi === -1) result.scoreUS = 0;
96-
else result.scoreUS = (4-USi)*0.25;
97-
}
98-
99-
return result;
1+
import bypattern from './bypattern.js';
2+
import fs from 'fs';
3+
4+
import path from 'path';
5+
import { exit } from 'process';
6+
import { fileURLToPath } from 'url';
7+
8+
const __filename = fileURLToPath(import.meta.url);
9+
10+
const __dirname = path.dirname(__filename);
11+
12+
function readJsonFile(filePath) {
13+
try {
14+
const data = fs.readFileSync(filePath, 'utf8');
15+
return JSON.parse(data);
16+
} catch (error) {
17+
console.error(`Error reading JSON file: ${error}`);
18+
exit(-1);
19+
}
10020
}
10121

102-
function filterOut(arr,word){
103-
return arr.filter((x)=>x&&x!==word);
22+
const bydictionary = readJsonFile(path.join(__dirname,'bydictionary.json'));
23+
24+
class SpellingVariations {
25+
constructor(word) {
26+
this.data = this.analyse(word);
27+
}
28+
29+
// @return {Number} how common this variation is in the UK's texts (1-0)
30+
scoreUK() {
31+
return this.data.scoreUK;
32+
}
33+
34+
// @return {Number} how common this variation is in the US's texts (1-0)
35+
scoreUS() {
36+
return this.data.scoreUS;
37+
}
38+
39+
// @return {Boolean} the word has variations
40+
hasVariations() {
41+
return this.data.hasVariations;
42+
}
43+
44+
// @return {Array} US variations of the word
45+
USVariations() {
46+
return this.data.USVariations;
47+
}
48+
49+
// @return {Array} UK variations of the word
50+
UKVariations() {
51+
return this.data.UKVariations;
52+
}
53+
54+
// @return {String} UK's preferred variation
55+
UKPreferred() {
56+
return this.data.UKPreferred;
57+
}
58+
59+
// @return {String} US's preferred variation
60+
USPreferred() {
61+
return this.data.USPreferred;
62+
}
63+
64+
// @return {Array} All of the word's variations
65+
variations() {
66+
return this.data.variations;
67+
}
68+
69+
// @return {String} UK and US common variation
70+
commonVariation() {
71+
return this.data.commonVariation;
72+
}
73+
74+
// @return {String} converts the word spelling to its UK variant
75+
toUK() {
76+
return this.data.UKPreferred || this.data.word;
77+
}
78+
79+
// @return {String} converts the word spelling to its US variant
80+
toUS() {
81+
return this.data.USPreferred || this.data.word;
82+
}
83+
84+
// @return {Object} all the info above
85+
analyse() {
86+
return this.data;
87+
}
88+
89+
// a US alias for the above function :)
90+
analyze() {
91+
return this.data;
92+
}
93+
94+
/**
95+
*
96+
* This little guy here is actually the one who does all the heavy
97+
* lifting of finding the variations and the class and such..
98+
*
99+
**/
100+
analyse(word) {
101+
word = (word || "").toLowerCase();
102+
103+
const result = {
104+
word,
105+
scoreUK: -1,
106+
scoreUS: -1,
107+
hasVariations: false,
108+
UKPreferred: word,
109+
USPreferred: word,
110+
commonVariation: word,
111+
UKVariations: [],
112+
USVariations: [],
113+
variations: [],
114+
analyse: this.analyse,
115+
analyze: this.analyse
116+
};
117+
118+
var resultArr = [];
119+
var dictionaryEntry = bydictionary[word];
120+
var patternEntry = bypattern(word);
121+
if (dictionaryEntry) resultArr = dictionaryEntry.split("|");
122+
else if (patternEntry) resultArr = patternEntry;
123+
else return result;
124+
125+
// resultArr reference:
126+
// 0: UK1 4: US1
127+
// 1: UK2 5: US2
128+
// 2: UK3 6: US3
129+
// 3: UK4 7: US4 8:UKUS
130+
131+
result.hasVariations = true;
132+
result.variations = this.filterOut(resultArr, word);
133+
result.UKPreferred = resultArr[0];
134+
result.USPreferred = resultArr[4];
135+
result.commonVariation = resultArr[8] || "";
136+
result.UKVariations = resultArr.filter((e, i) => e && (i < 4 || i === 8) && e !== word);
137+
result.USVariations = resultArr.filter((e, i) => e && (i > 3 || i === 8) && e !== word);
138+
139+
if (resultArr.indexOf(word) === 8) {
140+
result.scoreUK = 0.87;
141+
result.scoreUS = 0.87;
142+
} else {
143+
var UKi = resultArr.slice(0, 4).indexOf(word);
144+
var USi = resultArr.slice(4, 8).indexOf(word);
145+
146+
if (UKi === -1) result.scoreUK = 0;
147+
else result.scoreUK = (4 - UKi) * 0.25;
148+
149+
if (USi === -1) result.scoreUS = 0;
150+
else result.scoreUS = (4 - USi) * 0.25;
151+
}
152+
153+
return result;
154+
}
155+
156+
filterOut(arr, word) {
157+
return arr.filter((x) => x && x !== word);
158+
}
104159
}
105160

106-
module.exports = spellingVariations;
161+
export default SpellingVariations;

0 commit comments

Comments
 (0)