Исправление метода расчёта Дамерау-Левенштейна

anyks · anyks · commit 87df4d8fbe96 · 2020-09-14T21:48:52.000+03:00
diff --git a/ChangeLog.md b/ChangeLog.md
@@ -1,5 +1,7 @@
 # [Change Log](https://github.com/anyks/asc/archive/release.tar.gz)
 
+## [1.1.0](https://github.com/anyks/asc/archive/v1.1.0.tar.gz) Bug fixes
+
 ## [1.0.9](https://github.com/anyks/asc/archive/v1.0.9.tar.gz) Bug fixes
 
 ## [1.0.8](https://github.com/anyks/asc/archive/v1.0.8.tar.gz) Bug fixes
diff --git a/alm b/alm
@@ -1 +1 @@
-Subproject commit 17e85f6ca4180a5f6b2680dd177b187135eb2d3e
+Subproject commit 825ea74ff0ecd6547d2bdf11a27acce76d7799a5
diff --git a/app/asc.hpp b/app/asc.hpp
@@ -9,7 +9,7 @@
 // Название языковой модели
 #define ANYKS_ASC_NAME "asc"
 // Версия приложения
-#define ANYKS_ASC_VERSION "1.0.9"
+#define ANYKS_ASC_VERSION "1.1.0"
 // Версия словаря
 #define ANYKS_ASC_DICT_VERSION "1.0.0"
 // Автор приложения
diff --git a/src/dict.cpp b/src/dict.cpp
@@ -560,7 +560,7 @@ void anyks::Dict::setDate(const time_t date) noexcept {
  */
 void anyks::Dict::setCode(const wstring & code) noexcept {
 	// Если код языка передан
-	if(!code.empty()) this->params.code = code;
+	if(!code.empty()) this->params.code = this->alphabet->toLower(code);
 }
 /**
  * setName Метод установки названия словаря
@@ -848,7 +848,7 @@ const pair <size_t, size_t> anyks::Dict::find(const word_t & word, dumper_t & dm
 	// Если список слов не пустой
 	if(!this->words.empty() && (word.length() <= MAX_WORD_LENGTH)){
 		// Объект левенштейна
-		lev_t levenshtein;
+		lev_t algorithms;
 		// Создаём гипотезу
 		dumper_t::awrd_t hypothesis;
 		// Устанавливаем эталонную фразу
@@ -869,13 +869,13 @@ const pair <size_t, size_t> anyks::Dict::find(const word_t & word, dumper_t & dm
 				// Устанавливаем идентификатор гипотезы
 				hypothesis.idw = idw;
 				// Получаем дистанцию
-				hypothesis.lev = (errors > 1 ? levenshtein.distance(word, it->second) : levenshtein.damerau(word, it->second));
+				hypothesis.lev = (errors > 1 ? algorithms.distance(word, it->second) : algorithms.damerau(word, it->second));
 				// Отфильтровываем ненужные нам слова
 				if(!it->second.empty() && (hypothesis.lev <= errors)){
 					// Извлекаем слово из списка
 					hypothesis.word = it->second;
 					// Устанавливаем значение Танимото
-					hypothesis.tmo = levenshtein.tanimoto(word, hypothesis.word);
+					hypothesis.tmo = algorithms.tanimoto(word, hypothesis.word);
 					// Если расстояние Левенштейна слишком большое, тогда Танимото должен быть больше 4.0
 					if((hypothesis.lev <= 3) || (hypothesis.tmo >= 0.4)){
 						// Если вывод отладочной информации разрешён
diff --git a/src/spl.cpp b/src/spl.cpp
@@ -712,7 +712,7 @@ void anyks::ASpell::spell(wstring & text, const u_short options, vector <vector
 	// Если текст передан
 	if(!text.empty() && (this->dict != nullptr) && (this->alm != nullptr) && (this->alphabet != nullptr) && (this->tokenizer != nullptr)){
 		// Объект левенштейна
-		lev_t levenshtein;
+		lev_t algorithms;
 		// Предыдущее слово
 		word_t lastWord = L"";
 		// Позиция добавления слов
@@ -867,9 +867,9 @@ void anyks::ASpell::spell(wstring & text, const u_short options, vector <vector
 						// Устанавливаем полученное слово
 						hypothesis.word = (* this->dict->word(bwrd.first));
 						// Выполняем расчёт дистанции Дамерау-Левенштейна
-						hypothesis.lev = levenshtein.damerau(bwrd.second.first, bwrd.second.second);
+						hypothesis.lev = algorithms.damerau(bwrd.second.first, bwrd.second.second);
 						// Выполняем расчёт значения Танимото
-						hypothesis.tmo = levenshtein.tanimoto(bwrd.second.first, bwrd.second.second);
+						hypothesis.tmo = algorithms.tanimoto(bwrd.second.first, bwrd.second.second);
 						// Добавляем гипотезу в сборщик гипотез
 						dmp.once(hypothesis, parts.size(), pos2);
 					}
@@ -922,7 +922,7 @@ void anyks::ASpell::spell(wstring & text, const u_short options, vector <vector
 								hypotheses.at(i) = hypothesis;
 							}
 							// Добавляем список гипотез в список
-							pos1 = dmp.add(hypotheses, levenshtein.damerau(word, text), levenshtein.tanimoto(word, text), pos1);
+							pos1 = dmp.add(hypotheses, algorithms.damerau(word, text), algorithms.tanimoto(word, text), pos1);
 						}
 					}
 					// Если разрешено выполнять сплиты
@@ -969,7 +969,7 @@ void anyks::ASpell::spell(wstring & text, const u_short options, vector <vector
 								hypotheses.at(i) = hypothesis;
 							}
 							// Добавляем список гипотез в список
-							pos1 = dmp.add(hypotheses, levenshtein.damerau(word, text), levenshtein.tanimoto(word, text), pos1);
+							pos1 = dmp.add(hypotheses, algorithms.damerau(word, text), algorithms.tanimoto(word, text), pos1);
 						}
 					}
 					// Если сплит выполнять не удалось
@@ -1050,7 +1050,7 @@ void anyks::ASpell::analyze(const wstring & text, const u_short options, vector
 		// Очищаем список анализируемых слов
 		info.clear();
 		// Объект левенштейна
-		lev_t levenshtein;
+		lev_t algorithms;
 		// Предыдущее слово
 		word_t lastWord = L"";
 		// Позиция добавления слов
@@ -1216,9 +1216,9 @@ void anyks::ASpell::analyze(const wstring & text, const u_short options, vector
 						// Устанавливаем полученное слово
 						hypothesis.word = (* this->dict->word(bwrd.first));
 						// Выполняем расчёт дистанции Дамерау-Левенштейна
-						hypothesis.lev = levenshtein.damerau(bwrd.second.first, bwrd.second.second);
+						hypothesis.lev = algorithms.damerau(bwrd.second.first, bwrd.second.second);
 						// Выполняем расчёт значения Танимото
-						hypothesis.tmo = levenshtein.tanimoto(bwrd.second.first, bwrd.second.second);
+						hypothesis.tmo = algorithms.tanimoto(bwrd.second.first, bwrd.second.second);
 						// Добавляем гипотезу в сборщик гипотез
 						dmp.once(hypothesis, parts.size(), pos2);
 					}
@@ -1271,7 +1271,7 @@ void anyks::ASpell::analyze(const wstring & text, const u_short options, vector
 								hypotheses.at(i) = hypothesis;
 							}
 							// Добавляем список гипотез в список
-							pos1 = dmp.add(hypotheses, levenshtein.damerau(word, text), levenshtein.tanimoto(word, text), pos1);
+							pos1 = dmp.add(hypotheses, algorithms.damerau(word, text), algorithms.tanimoto(word, text), pos1);
 						}
 					}
 					// Если разрешено выполнять сплиты
@@ -1318,7 +1318,7 @@ void anyks::ASpell::analyze(const wstring & text, const u_short options, vector
 								hypotheses.at(i) = hypothesis;
 							}
 							// Добавляем список гипотез в список
-							pos1 = dmp.add(hypotheses, levenshtein.damerau(word, text), levenshtein.tanimoto(word, text), pos1);
+							pos1 = dmp.add(hypotheses, algorithms.damerau(word, text), algorithms.tanimoto(word, text), pos1);
 						}
 					}
 					// Если сплит выполнять не удалось

Original file line number	Diff line number	Diff line change
`@@ -712,7 +712,7 @@ void anyks::ASpell::spell(wstring & text, const u_short options, vector <vector`
`712`	`712`	`// Если текст передан`
`713`	`713`	`if(!text.empty() && (this->dict != nullptr) && (this->alm != nullptr) && (this->alphabet != nullptr) && (this->tokenizer != nullptr)){`
`714`	`714`	`// Объект левенштейна`
`715`		`- lev_t levenshtein;`
	`715`	`+ lev_t algorithms;`
`716`	`716`	`// Предыдущее слово`
`717`	`717`	`word_t lastWord = L"";`
`718`	`718`	`// Позиция добавления слов`
`@@ -867,9 +867,9 @@ void anyks::ASpell::spell(wstring & text, const u_short options, vector <vector`
`867`	`867`	`// Устанавливаем полученное слово`
`868`	`868`	`hypothesis.word = (* this->dict->word(bwrd.first));`
`869`	`869`	`// Выполняем расчёт дистанции Дамерау-Левенштейна`
`870`		`- hypothesis.lev = levenshtein.damerau(bwrd.second.first, bwrd.second.second);`
	`870`	`+ hypothesis.lev = algorithms.damerau(bwrd.second.first, bwrd.second.second);`
`871`	`871`	`// Выполняем расчёт значения Танимото`
`872`		`- hypothesis.tmo = levenshtein.tanimoto(bwrd.second.first, bwrd.second.second);`
	`872`	`+ hypothesis.tmo = algorithms.tanimoto(bwrd.second.first, bwrd.second.second);`
`873`	`873`	`// Добавляем гипотезу в сборщик гипотез`
`874`	`874`	`dmp.once(hypothesis, parts.size(), pos2);`
`875`	`875`	`}`
`@@ -922,7 +922,7 @@ void anyks::ASpell::spell(wstring & text, const u_short options, vector <vector`
`922`	`922`	`hypotheses.at(i) = hypothesis;`
`923`	`923`	`}`
`924`	`924`	`// Добавляем список гипотез в список`
`925`		`- pos1 = dmp.add(hypotheses, levenshtein.damerau(word, text), levenshtein.tanimoto(word, text), pos1);`
	`925`	`+ pos1 = dmp.add(hypotheses, algorithms.damerau(word, text), algorithms.tanimoto(word, text), pos1);`
`926`	`926`	`}`
`927`	`927`	`}`
`928`	`928`	`// Если разрешено выполнять сплиты`
`@@ -969,7 +969,7 @@ void anyks::ASpell::spell(wstring & text, const u_short options, vector <vector`
`969`	`969`	`hypotheses.at(i) = hypothesis;`
`970`	`970`	`}`
`971`	`971`	`// Добавляем список гипотез в список`
`972`		`- pos1 = dmp.add(hypotheses, levenshtein.damerau(word, text), levenshtein.tanimoto(word, text), pos1);`
	`972`	`+ pos1 = dmp.add(hypotheses, algorithms.damerau(word, text), algorithms.tanimoto(word, text), pos1);`
`973`	`973`	`}`
`974`	`974`	`}`
`975`	`975`	`// Если сплит выполнять не удалось`
`@@ -1050,7 +1050,7 @@ void anyks::ASpell::analyze(const wstring & text, const u_short options, vector`
`1050`	`1050`	`// Очищаем список анализируемых слов`
`1051`	`1051`	`info.clear();`
`1052`	`1052`	`// Объект левенштейна`
`1053`		`- lev_t levenshtein;`
	`1053`	`+ lev_t algorithms;`
`1054`	`1054`	`// Предыдущее слово`
`1055`	`1055`	`word_t lastWord = L"";`
`1056`	`1056`	`// Позиция добавления слов`
`@@ -1216,9 +1216,9 @@ void anyks::ASpell::analyze(const wstring & text, const u_short options, vector`
`1216`	`1216`	`// Устанавливаем полученное слово`
`1217`	`1217`	`hypothesis.word = (* this->dict->word(bwrd.first));`
`1218`	`1218`	`// Выполняем расчёт дистанции Дамерау-Левенштейна`
`1219`		`- hypothesis.lev = levenshtein.damerau(bwrd.second.first, bwrd.second.second);`
	`1219`	`+ hypothesis.lev = algorithms.damerau(bwrd.second.first, bwrd.second.second);`
`1220`	`1220`	`// Выполняем расчёт значения Танимото`
`1221`		`- hypothesis.tmo = levenshtein.tanimoto(bwrd.second.first, bwrd.second.second);`
	`1221`	`+ hypothesis.tmo = algorithms.tanimoto(bwrd.second.first, bwrd.second.second);`
`1222`	`1222`	`// Добавляем гипотезу в сборщик гипотез`
`1223`	`1223`	`dmp.once(hypothesis, parts.size(), pos2);`
`1224`	`1224`	`}`
`@@ -1271,7 +1271,7 @@ void anyks::ASpell::analyze(const wstring & text, const u_short options, vector`
`1271`	`1271`	`hypotheses.at(i) = hypothesis;`
`1272`	`1272`	`}`
`1273`	`1273`	`// Добавляем список гипотез в список`
`1274`		`- pos1 = dmp.add(hypotheses, levenshtein.damerau(word, text), levenshtein.tanimoto(word, text), pos1);`
	`1274`	`+ pos1 = dmp.add(hypotheses, algorithms.damerau(word, text), algorithms.tanimoto(word, text), pos1);`
`1275`	`1275`	`}`
`1276`	`1276`	`}`
`1277`	`1277`	`// Если разрешено выполнять сплиты`
`@@ -1318,7 +1318,7 @@ void anyks::ASpell::analyze(const wstring & text, const u_short options, vector`
`1318`	`1318`	`hypotheses.at(i) = hypothesis;`
`1319`	`1319`	`}`
`1320`	`1320`	`// Добавляем список гипотез в список`
`1321`		`- pos1 = dmp.add(hypotheses, levenshtein.damerau(word, text), levenshtein.tanimoto(word, text), pos1);`
	`1321`	`+ pos1 = dmp.add(hypotheses, algorithms.damerau(word, text), algorithms.tanimoto(word, text), pos1);`
`1322`	`1322`	`}`
`1323`	`1323`	`}`
`1324`	`1324`	`// Если сплит выполнять не удалось`