Skip to content
This repository was archived by the owner on Jan 19, 2025. It is now read-only.

Commit 2975efb

Browse files
authored
feat: improve duration time of migration (#1232)
Closes #1231. ### Summary of Changes add cache for compute_function_similarity and previous_function_similarity use enhanced levenshtein distance from api wherever possible ### Testing Instructions run the migrate command and enjoy the shorter runtime
1 parent 1588ed7 commit 2975efb

File tree

1 file changed

+33
-10
lines changed
  • package-parser/package_parser/processing/migration/model

1 file changed

+33
-10
lines changed

package-parser/package_parser/processing/migration/model/_differ.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ class SimpleDiffer(AbstractDiffer):
7373
assigned_by_look_up_similarity: dict[
7474
ParameterAssignment, dict[ParameterAssignment, float]
7575
]
76+
previous_parameter_similarity: dict[str, dict[str, float]] = {}
77+
previous_function_similarity: dict[str, dict[str, float]] = {}
7678

7779
def __init__(self) -> None:
7880
distance_between_implicit_and_explicit = 0.3
@@ -217,6 +219,12 @@ def compute_attribute_similarity(
217219
def compute_function_similarity(
218220
self, function_a: Function, function_b: Function
219221
) -> float:
222+
if (
223+
function_a.id in self.previous_function_similarity
224+
and function_b.id in self.previous_function_similarity[function_a.id]
225+
):
226+
return self.previous_function_similarity[function_a.id][function_b.id]
227+
220228
code_similarity = self._compute_code_similarity(
221229
function_a.code, function_b.code
222230
)
@@ -238,27 +246,38 @@ def are_parameters_similar(
238246

239247
id_similarity = self._compute_id_similarity(function_a.id, function_b.id)
240248

241-
return (
249+
result = (
242250
code_similarity + name_similarity + parameter_similarity + id_similarity
243251
) / 4
252+
if function_a.id not in self.previous_function_similarity:
253+
self.previous_function_similarity[function_a.id] = {}
254+
self.previous_function_similarity[function_a.id][function_b.id] = result
255+
return result
244256

245257
def _compute_code_similarity(self, code_a: str, code_b: str) -> float:
246258
mode = FileMode()
247259
try:
248-
code_a = format_str(code_a, mode=mode)
249-
code_b = format_str(code_b, mode=mode)
260+
code_a_tmp = format_str(code_a, mode=mode)
261+
code_b_tmp = format_str(code_b, mode=mode)
250262
except CannotSplit:
251263
pass
264+
else:
265+
code_a = code_a_tmp
266+
code_b = code_b_tmp
252267
split_a = code_a.split("\n")
253268
split_b = code_b.split("\n")
254-
diff_code = distance_elements(split_a, split_b) / max(
255-
len(split_a), len(split_b), 1
256-
)
269+
diff_code = distance(split_a, split_b) / max(len(split_a), len(split_b), 1)
257270
return 1 - diff_code
258271

259272
def compute_parameter_similarity(
260273
self, parameter_a: Parameter, parameter_b: Parameter
261274
) -> float:
275+
if (
276+
parameter_a.id in self.previous_parameter_similarity
277+
and parameter_b.id in self.previous_parameter_similarity[parameter_a.id]
278+
):
279+
return self.previous_parameter_similarity[parameter_a.id][parameter_b.id]
280+
262281
normalize_similarity = 6
263282
parameter_name_similarity = self._compute_name_similarity(
264283
parameter_a.name, parameter_b.name
@@ -289,14 +308,18 @@ def compute_parameter_similarity(
289308

290309
id_similarity = self._compute_id_similarity(parameter_a.id, parameter_b.id)
291310

292-
return (
311+
result = (
293312
parameter_name_similarity
294313
+ parameter_type_similarity
295314
+ parameter_assignment_similarity
296315
+ parameter_default_value_similarity
297316
+ parameter_documentation_similarity
298317
+ id_similarity
299318
) / normalize_similarity
319+
if parameter_a.id not in self.previous_parameter_similarity:
320+
self.previous_parameter_similarity[parameter_a.id] = {}
321+
self.previous_parameter_similarity[parameter_a.id][parameter_b.id] = result
322+
return result
300323

301324
def _compute_type_similarity(
302325
self, type_a: Optional[AbstractType], type_b: Optional[AbstractType]
@@ -393,9 +416,9 @@ def _compute_parameter_documentation_similarity(
393416
description_a = re.split("[\n ]", documentation_a.description)
394417
description_b = re.split("[\n ]", documentation_b.description)
395418

396-
documentation_similarity = distance_elements(
397-
description_a, description_b
398-
) / max(len(description_a), len(description_b))
419+
documentation_similarity = distance(description_a, description_b) / max(
420+
len(description_a), len(description_b)
421+
)
399422
return 1 - documentation_similarity
400423

401424
def _compute_id_similarity(self, id_a: str, id_b: str) -> float:

0 commit comments

Comments
 (0)