Skip to content

Commit 3366ab4

Browse files
committed
Levenshtein distance without langchain
Use the Levenshtein implementation from RapidFuzz instead of the one from LangChain, as the latter will be removed in future versions.
1 parent 4462ce7 commit 3366ab4

File tree

1 file changed

+4
-10
lines changed

1 file changed

+4
-10
lines changed

core/cat/utils.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from typing import Dict, Tuple
88
from pydantic import BaseModel, ConfigDict
99

10-
from langchain.evaluation import StringDistance, load_evaluator, EvaluatorType
10+
from rapidfuzz.fuzz import ratio
11+
from rapidfuzz.distance import Levenshtein
1112
from langchain_core.output_parsers import JsonOutputParser
1213
from langchain_core.prompts import PromptTemplate
1314
from langchain_core.utils import get_colored_text
@@ -155,15 +156,8 @@ def deprecation_warning(message: str, skip=3):
155156

156157

157158
def levenshtein_distance(prediction: str, reference: str) -> int:
158-
jaro_evaluator = load_evaluator(
159-
EvaluatorType.STRING_DISTANCE, distance=StringDistance.LEVENSHTEIN
160-
)
161-
result = jaro_evaluator.evaluate_strings(
162-
prediction=prediction,
163-
reference=reference,
164-
)
165-
return result["score"]
166-
159+
res = Levenshtein.normalized_distance(prediction, reference)
160+
return res
167161

168162
def parse_json(json_string: str, pydantic_model: BaseModel = None) -> dict:
169163
# instantiate parser

0 commit comments

Comments
 (0)