Skip to content
This repository was archived by the owner on Jan 19, 2025. It is now read-only.

Commit 1588ed7

Browse files
authored
feat: Improve Detection of Similarity (#1187)
Closes #1186. ### Summary of Changes Enhance the detection of similarity by: - Format the code before measure similarity - Consider documentation and default value for detecting similarity between parameters - Consider id for detecting similarity for detecting similarity between classes, functions, or parameters. ### Testing Instructions run the `migrate` command with and without this additional code and compare the mappings.
1 parent 324ab23 commit 1588ed7

File tree

9 files changed

+1167
-914
lines changed

9 files changed

+1167
-914
lines changed

package-parser/package_parser/processing/migration/model/_differ.py

Lines changed: 215 additions & 44 deletions
Large diffs are not rendered by default.

package-parser/poetry.lock

Lines changed: 888 additions & 817 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package-parser/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ numpydoc = "^1.5"
1515
spacy = "^3.2.5"
1616
scipy = "^1.10.0"
1717
levenshtein = "^0.20.9"
18+
black = "^23.1.0"
1819

1920
[tool.poetry.dependencies.en_core_web_sm]
2021
url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"

package-parser/tests/data/migration/annotationv2.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@
3737
"reviewers": [],
3838
"target": "test/test/test_function"
3939
},
40-
"test/test/test_function/test_parameter": {
40+
"test/test.other_package/complete_different_function/test_parameter": {
4141
"authors": ["$autogen$", "migration"],
4242
"comment": "",
4343
"newTodo": "another todo annotation",
4444
"reviewResult": "unsure",
4545
"reviewers": [],
46-
"target": "test/test/test_function/test_parameter"
46+
"target": "test/test.other_package/complete_different_function/test_parameter"
4747
}
4848
},
4949
"valueAnnotations": {}

package-parser/tests/data/migration/apiv1_data.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"is_public": true,
2222
"reexported_by": [],
2323
"documentation": "",
24-
"code": "class TestClass:\n\"\"\" This is a TestClass.\n It has no common use.\"\"\"\n pass",
24+
"code": "class TestClass:\n \"\"\"This is a TestClass.\n It has no common use.\"\"\"\n pass",
2525
"instance_attributes": [
2626
{
2727
"name": "a",
@@ -57,7 +57,7 @@
5757
"id": "test/test/test_function/test_parameter",
5858
"name": "test_parameter",
5959
"qname": "test.test_function.test_parameter",
60-
"default_value": "",
60+
"default_value": "''",
6161
"assigned_by": "POSITION_OR_NAME",
6262
"is_public": false,
6363
"docstring": {
@@ -73,7 +73,7 @@
7373
"reexported_by": [],
7474
"description": "",
7575
"docstring": "",
76-
"code": "def complete_different_function():\n \"\"\"This function's only use is to hold a parameter\"\"\" return None"
76+
"code": "def complete_different_function():\n \"\"\"This function's only use is to hold a parameter\"\"\"\n return None"
7777
},
7878
{
7979
"id": "test/test/test_function",
@@ -85,7 +85,7 @@
8585
"reexported_by": [],
8686
"description": "",
8787
"docstring": "",
88-
"code": "def test_function():\n pass"
88+
"code": "def test_function():\n i=0\n pass"
8989
}
9090
]
9191
}

package-parser/tests/data/migration/apiv2_data.json

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
{
77
"id": "test/test",
88
"name": "test",
9-
"classes": ["test/test/NewTestClass"],
9+
"classes": ["test/test/TestClass"],
1010
"functions": [""]
1111
}
1212
],
@@ -21,7 +21,7 @@
2121
"is_public": true,
2222
"reexported_by": [],
2323
"documentation": "",
24-
"code": "class TestClass:\n\"\"\" This is a TestClass.\n It has no common use.\"\"\"\n pass",
24+
"code": "class TestClass:\n \"\"\"This is a TestClass.\n It has no common use.\"\"\"\n pass",
2525
"instance_attributes": [
2626
{
2727
"name": "a",
@@ -49,14 +49,14 @@
4949
],
5050
"functions": [
5151
{
52-
"id": "test/test/complete_different_function",
53-
"qname": "test.complete_different_function",
52+
"id": "test/test.other_package/complete_different_function",
53+
"qname": "test.other_package.complete_different_function",
5454
"decorators": [],
5555
"parameters": [
5656
{
57-
"id": "test/test/test_function/test_parameter",
57+
"id": "test/test.other_package/complete_different_function/test_parameter",
5858
"name": "test_parameter",
59-
"qname": "test.test_function.test_parameter",
59+
"qname": "test.other_package.complete_different_function.test_parameter",
6060
"default_value": "'new_optional_value'",
6161
"assigned_by": "POSITIONAL_VARARG",
6262
"is_public": false,
@@ -73,7 +73,7 @@
7373
"reexported_by": [],
7474
"description": "",
7575
"docstring": "",
76-
"code": "def complete_different_function():\n \"\"\"This function's only use is to hold a parameter\"\"\" return None"
76+
"code": "def complete_different_function():\n \"\"\"This function's only use is to hold a parameter\"\"\"\n return None"
7777
},
7878
{
7979
"id": "test/test/test_function",
@@ -85,19 +85,19 @@
8585
"reexported_by": [],
8686
"description": "",
8787
"docstring": "",
88-
"code": "def test_function():\n pass"
88+
"code": "def test_function():\n i=0\n pass"
8989
},
9090
{
91-
"id": "test/test/other_test_function",
92-
"qname": "other_test_function",
91+
"id": "test/test.other/other_test_function",
92+
"qname": "test.other.other_test_function",
9393
"decorators": [],
9494
"parameters": [],
9595
"results": [],
9696
"is_public": true,
9797
"reexported_by": [],
9898
"description": "",
9999
"docstring": "",
100-
"code": "def other_test_function():\n \"\"\"This function is longer than the other function\n but have some attributes in common.\n Therfore, they should be in the unsure annotationstore\"\"\""
100+
"code": "def other_test_function():\n \"\"\"This function is longer than the other function\n but have some attributes in common.\n Therfore, they should not be in the any annotationstore\"\"\""
101101
}
102102
]
103103
}

package-parser/tests/data/migration/unsure_annotationv2.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@
88
"expertAnnotations": {},
99
"groupAnnotations": {},
1010
"moveAnnotations": {
11-
"test/test/complete_different_function": {
11+
"test/test.other_package/complete_different_function": {
1212
"authors": ["$autogen$", "migration"],
1313
"comment": "",
1414
"destination": "test/test.moved.package",
1515
"reviewResult": "",
1616
"reviewers": [],
17-
"target": "test/test/complete_different_function"
17+
"target": "test/test.other_package/complete_different_function"
1818
}
1919
},
2020
"pureAnnotations": {},

package-parser/tests/processing/migration/test_differ.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
@pytest.mark.parametrize(
2626
"differ",
2727
differ_list,
28-
)
28+
) # type: ignore
2929
def test_attribute_similarity(differ: AbstractDiffer):
3030
attribute_a = Attribute("test_string", NamedType("str"))
3131
assert differ.compute_attribute_similarity(attribute_a, attribute_a) == 1
@@ -41,8 +41,8 @@ def test_attribute_similarity(differ: AbstractDiffer):
4141
@pytest.mark.parametrize(
4242
"differ",
4343
differ_list,
44-
)
45-
def test_class_similarity(differ: AbstractDiffer):
44+
) # type: ignore
45+
def test_class_similarity(differ: AbstractDiffer) -> None:
4646
code_a = cleandoc(
4747
"""
4848
class Test:
@@ -83,23 +83,23 @@ class newTest:
8383
@pytest.mark.parametrize(
8484
"differ",
8585
differ_list,
86-
)
87-
def test_function_similarity(differ: AbstractDiffer):
86+
) # type: ignore
87+
def test_function_similarity(differ: AbstractDiffer) -> None:
8888
parameters = [
8989
Parameter(
9090
"test/test.Test/test/test_parameter",
9191
"test_parameter",
9292
"test.Test.test.test_parameter",
93-
"str",
93+
"'test_str'",
9494
ParameterAssignment.POSITION_OR_NAME,
9595
True,
96-
ParameterDocumentation("str", "", ""),
96+
ParameterDocumentation("'test_str'", "", ""),
9797
)
9898
]
9999
results: list[Result] = []
100100
code_a = cleandoc(
101101
"""
102-
det test(test_parameter: str):
102+
def test(test_parameter: str):
103103
\"\"\"
104104
This test function is a work
105105
\"\"\"
@@ -124,7 +124,7 @@ def test_function_similarity(differ: AbstractDiffer):
124124

125125
code_b = cleandoc(
126126
"""
127-
det test_method(test_parameter: str):
127+
def test_method(test_parameter: str):
128128
\"\"\"
129129
This test function is a concept.
130130
\"\"\"
@@ -136,10 +136,10 @@ def test_function_similarity(differ: AbstractDiffer):
136136
"test/test.Test/test_method/test_parameter",
137137
"test_parameter",
138138
"test.Test.test_method.test_parameter",
139-
"str",
139+
"'test_str'",
140140
ParameterAssignment.POSITION_OR_NAME,
141141
True,
142-
ParameterDocumentation("str", "", ""),
142+
ParameterDocumentation("'test_str'", "", ""),
143143
)
144144
]
145145
function_b = Function(
@@ -162,45 +162,45 @@ def test_function_similarity(differ: AbstractDiffer):
162162
@pytest.mark.parametrize(
163163
"differ",
164164
differ_list,
165-
)
166-
def test_parameter_similarity(differ: AbstractDiffer):
165+
) # type: ignore
166+
def test_parameter_similarity(differ: AbstractDiffer) -> None:
167167
parameter_a = Parameter(
168168
"test/test.Test/test_method/test_parameter",
169169
"test_parameter",
170170
"test.Test.test_method.test_parameter",
171-
"str",
171+
"'str'",
172172
ParameterAssignment.POSITION_OR_NAME,
173173
True,
174-
ParameterDocumentation("str", "", ""),
174+
ParameterDocumentation("'str'", "", ""),
175175
)
176176
parameter_b = Parameter(
177177
"test/test.Test/test_method/test_parameter",
178178
"test_parameter",
179179
"test.Test.test_method.test_parameter",
180-
"int",
180+
"5",
181181
ParameterAssignment.POSITION_OR_NAME,
182182
True,
183183
ParameterDocumentation("int", "", ""),
184184
)
185-
assert differ.compute_parameter_similarity(parameter_a, parameter_b) > 0.5
185+
assert 0.45 < differ.compute_parameter_similarity(parameter_a, parameter_b) < 0.6
186186

187187
parameter_a = Parameter(
188188
"test/test.Test/test_method/test_parameter_new_name",
189189
"test_parameter_new_name",
190190
"test.Test.test_method.test_parameter_new_name",
191-
"int",
191+
"9",
192192
ParameterAssignment.POSITION_OR_NAME,
193193
True,
194194
ParameterDocumentation("int", "", ""),
195195
)
196-
assert differ.compute_parameter_similarity(parameter_a, parameter_b) > 0.8
196+
assert 0.7 < differ.compute_parameter_similarity(parameter_a, parameter_b) < 0.8
197197

198198

199199
@pytest.mark.parametrize(
200200
"differ",
201201
differ_list,
202-
)
203-
def test_result_similarity(differ: AbstractDiffer):
202+
) # type: ignore
203+
def test_result_similarity(differ: AbstractDiffer) -> None:
204204
result_a = Result("config", ResultDocstring("dict", ""))
205205
assert differ.compute_result_similarity(result_a, result_a) == 1
206206

package-parser/tests/processing/migration/test_mapping.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
@pytest.mark.parametrize(
1717
"differ",
1818
differ_list,
19-
)
20-
def test_one_to_one_mapping(differ: AbstractDiffer):
19+
) # type: ignore
20+
def test_one_to_one_mapping(differ: AbstractDiffer) -> None:
2121
apiv1 = API("test", "test", "1.0")
2222
apiv2 = API("test", "test", "2.0")
2323
class_1 = Class(
@@ -44,8 +44,8 @@ def test_one_to_one_mapping(differ: AbstractDiffer):
4444
@pytest.mark.parametrize(
4545
"differ",
4646
differ_list,
47-
)
48-
def test_one_to_many_and_many_to_one_mappings(differ: AbstractDiffer):
47+
) # type: ignore
48+
def test_one_to_many_and_many_to_one_mappings(differ: AbstractDiffer) -> None:
4949
apiv1, apiv2, class_1, class_2, class_3 = create_apis()
5050

5151
mappings = APIMapping(apiv1, apiv2, differ).map_api()
@@ -67,8 +67,8 @@ def test_one_to_many_and_many_to_one_mappings(differ: AbstractDiffer):
6767
@pytest.mark.parametrize(
6868
"differ",
6969
differ_list,
70-
)
71-
def test_many_to_many_mapping(differ: AbstractDiffer):
70+
) # type: ignore
71+
def test_many_to_many_mapping(differ: AbstractDiffer) -> None:
7272
apiv1, apiv2, class_1, class_2, class_3 = create_apis()
7373
class_4 = Class(
7474
"test/test.TestC",
@@ -94,11 +94,11 @@ def test_many_to_many_mapping(differ: AbstractDiffer):
9494
@pytest.mark.parametrize(
9595
"differ",
9696
differ_list,
97-
)
98-
def test_too_different_mapping(differ: AbstractDiffer):
97+
) # type: ignore
98+
def test_too_different_mapping(differ: AbstractDiffer) -> None:
9999
apiv1 = API("test", "test", "1.0")
100100
class_1 = Class(
101-
"test/test.Test",
101+
"test/test/Test",
102102
"Test",
103103
[],
104104
[],
@@ -111,8 +111,8 @@ def test_too_different_mapping(differ: AbstractDiffer):
111111
apiv1.add_class(class_1)
112112
apiv2 = API("test", "test", "2.0")
113113
class_2 = Class(
114-
"test/test.NotSimilarClass",
115-
"NotSimilarClass",
114+
"test/test.test/NotSimilarClass_",
115+
"NotSimilarClass_",
116116
[],
117117
[],
118118
True,
@@ -122,8 +122,18 @@ def test_too_different_mapping(differ: AbstractDiffer):
122122
),
123123
cleandoc(
124124
"""
125-
class NotSimilar:
125+
126+
class NotSimilarClass:
127+
self.i = 5
128+
129+
self.d = 12.01
130+
131+
self.x = "s"
132+
133+
self.f = ""
134+
126135
pass
136+
127137
"""
128138
),
129139
[],
@@ -138,7 +148,7 @@ class NotSimilar:
138148
assert len(mappings) == 0
139149

140150

141-
def create_apis():
151+
def create_apis() -> tuple[API, API, Class, Class, Class]:
142152
class_1 = Class(
143153
"test/test.Test",
144154
"Test",

0 commit comments

Comments
 (0)