Skip to content

Commit fb0c9ed

Browse files
authored
Merge pull request #13501 from pdeslaur/kubeflow-pipelines-visualization-server
py3-spdx-tools: Apply performance patch
2 parents c036d29 + 93144ce commit fb0c9ed

File tree

2 files changed

+339
-1
lines changed

2 files changed

+339
-1
lines changed

py3-spdx-tools.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
package:
33
name: py3-spdx-tools
44
version: 0.8.2
5-
epoch: 0
5+
epoch: 1
66
description: SPDX parser and tools.
77
copyright:
88
- license: Apache-2.0
@@ -36,6 +36,10 @@ pipeline:
3636
tag: v${{package.version}}
3737
expected-commit: 32e74cdc8a39bc3b4119bc6e77f3804d09a05418
3838

39+
- uses: patch
40+
with:
41+
patches: performance.patch
42+
3943
- name: Python Build
4044
uses: python/build-wheel
4145

py3-spdx-tools/performance.patch

Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
From 30ebcf3981a1a1d29260dfefa75690842b0fb2cb Mon Sep 17 00:00:00 2001
2+
From: Jon Johnson <jon.johnson@chainguard.dev>
3+
Date: Mon, 29 Jan 2024 12:12:26 -0800
4+
Subject: [PATCH 1/3] Fix several accidentally quadratic functions
5+
6+
Signed-off-by: Jon Johnson <jon.johnson@chainguard.dev>
7+
---
8+
src/spdx_tools/spdx/model/document.py | 3 +++
9+
src/spdx_tools/spdx/model/relationship.py | 3 +++
10+
.../parser/jsonlikedict/relationship_parser.py | 16 +++++++---------
11+
.../spdx/validation/spdx_id_validators.py | 11 +++++++++--
12+
4 files changed, 22 insertions(+), 11 deletions(-)
13+
14+
diff --git a/src/spdx_tools/spdx/model/document.py b/src/spdx_tools/spdx/model/document.py
15+
index 980c59ca5..e1ec51671 100644
16+
--- a/src/spdx_tools/spdx/model/document.py
17+
+++ b/src/spdx_tools/spdx/model/document.py
18+
@@ -81,3 +81,6 @@ def __init__(
19+
relationships = [] if relationships is None else relationships
20+
extracted_licensing_info = [] if extracted_licensing_info is None else extracted_licensing_info
21+
check_types_and_set_values(self, locals())
22+
+
23+
+ def __hash__(self):
24+
+ return id(self)
25+
diff --git a/src/spdx_tools/spdx/model/relationship.py b/src/spdx_tools/spdx/model/relationship.py
26+
index 02b1326a9..1e6d7ae86 100644
27+
--- a/src/spdx_tools/spdx/model/relationship.py
28+
+++ b/src/spdx_tools/spdx/model/relationship.py
29+
@@ -73,3 +73,6 @@ def __init__(
30+
comment: Optional[str] = None,
31+
):
32+
check_types_and_set_values(self, locals())
33+
+
34+
+ def __hash__(self):
35+
+ return hash("{} -> {} ({})".format(self.spdx_element_id, str(self.related_spdx_element_id), str(self.relationship_type)))
36+
diff --git a/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py b/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py
37+
index 17374bef5..6393470e3 100644
38+
--- a/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py
39+
+++ b/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py
40+
@@ -1,7 +1,7 @@
41+
# SPDX-FileCopyrightText: 2022 spdx contributors
42+
#
43+
# SPDX-License-Identifier: Apache-2.0
44+
-from beartype.typing import Dict, List, Optional
45+
+from beartype.typing import Dict, List, Optional, Set
46+
47+
from spdx_tools.common.typing.constructor_type_errors import ConstructorTypeErrors
48+
from spdx_tools.spdx.model import Relationship, RelationshipType
49+
@@ -35,9 +35,9 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
50+
document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", []))
51+
doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID")
52+
53+
- existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
54+
+ existing_relationships_without_comments: Set[Relationship] = set(self.get_all_relationships_without_comments(
55+
relationships
56+
- )
57+
+ ))
58+
relationships.extend(
59+
parse_field_or_log_error(
60+
self.logger,
61+
@@ -52,9 +52,6 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
62+
)
63+
64+
package_dicts: List[Dict] = input_doc_dict.get("packages", [])
65+
- existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
66+
- relationships
67+
- )
68+
69+
relationships.extend(
70+
parse_field_or_log_error(
71+
@@ -110,7 +107,7 @@ def parse_relationship_type(relationship_type_str: str) -> RelationshipType:
72+
return relationship_type
73+
74+
def parse_document_describes(
75+
- self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: List[Relationship]
76+
+ self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: Set[Relationship]
77+
) -> List[Relationship]:
78+
logger = Logger()
79+
describes_relationships = []
80+
@@ -131,10 +128,11 @@ def parse_document_describes(
81+
return describes_relationships
82+
83+
def parse_has_files(
84+
- self, package_dicts: List[Dict], existing_relationships: List[Relationship]
85+
+ self, package_dicts: List[Dict], existing_relationships: Set[Relationship]
86+
) -> List[Relationship]:
87+
# assume existing relationships are stripped of comments
88+
logger = Logger()
89+
+
90+
contains_relationships = []
91+
for package in package_dicts:
92+
package_spdx_id: Optional[str] = package.get("SPDXID")
93+
@@ -160,7 +158,7 @@ def parse_has_files(
94+
return contains_relationships
95+
96+
def check_if_relationship_exists(
97+
- self, relationship: Relationship, existing_relationships: List[Relationship]
98+
+ self, relationship: Relationship, existing_relationships: Set[Relationship]
99+
) -> bool:
100+
# assume existing relationships are stripped of comments
101+
if relationship in existing_relationships:
102+
diff --git a/src/spdx_tools/spdx/validation/spdx_id_validators.py b/src/spdx_tools/spdx/validation/spdx_id_validators.py
103+
index 6441236a9..2ae412ff3 100644
104+
--- a/src/spdx_tools/spdx/validation/spdx_id_validators.py
105+
+++ b/src/spdx_tools/spdx/validation/spdx_id_validators.py
106+
@@ -4,7 +4,9 @@
107+
108+
import re
109+
110+
-from beartype.typing import List
111+
+from beartype.typing import List, Set
112+
+
113+
+from functools import cache
114+
115+
from spdx_tools.spdx.document_utils import get_contained_spdx_element_ids
116+
from spdx_tools.spdx.model import Document, File
117+
@@ -22,11 +24,16 @@ def is_spdx_id_present_in_files(spdx_id: str, files: List[File]) -> bool:
118+
return spdx_id in [file.spdx_id for file in files]
119+
120+
121+
+@cache
122+
def is_spdx_id_present_in_document(spdx_id: str, document: Document) -> bool:
123+
- all_spdx_ids_in_document: List[str] = get_list_of_all_spdx_ids(document)
124+
+ all_spdx_ids_in_document: Set[str] = get_set_of_all_spdx_ids(document)
125+
126+
return spdx_id in all_spdx_ids_in_document
127+
128+
+@cache
129+
+def get_set_of_all_spdx_ids(document: Document) -> Set[str]:
130+
+ return set(get_list_of_all_spdx_ids(document))
131+
+
132+
133+
def get_list_of_all_spdx_ids(document: Document) -> List[str]:
134+
all_spdx_ids_in_document: List[str] = [document.creation_info.spdx_id]
135+
136+
From 1c6cd54e52d6a3097ad9b4e9ce14c1019e8bcf8f Mon Sep 17 00:00:00 2001
137+
From: paulgibert <paulgibert98@gmail.com>
138+
Date: Wed, 14 Feb 2024 10:55:04 -0500
139+
Subject: [PATCH 2/3] Created a document cache decorator and handled
140+
Relationships as tuples in parsing to remove hash methods.
141+
142+
Signed-off-by: paulgibert <paulgibert98@gmail.com>
143+
---
144+
.../common/typing/dataclass_with_properties.py | 6 +++++-
145+
src/spdx_tools/spdx/model/document.py | 18 +++++++++++++++---
146+
src/spdx_tools/spdx/model/relationship.py | 3 ---
147+
.../parser/jsonlikedict/relationship_parser.py | 16 +++++++++++-----
148+
.../spdx/validation/spdx_id_validators.py | 5 +++--
149+
5 files changed, 34 insertions(+), 14 deletions(-)
150+
151+
diff --git a/src/spdx_tools/common/typing/dataclass_with_properties.py b/src/spdx_tools/common/typing/dataclass_with_properties.py
152+
index 3f13950d5..ba343af08 100644
153+
--- a/src/spdx_tools/common/typing/dataclass_with_properties.py
154+
+++ b/src/spdx_tools/common/typing/dataclass_with_properties.py
155+
@@ -1,12 +1,16 @@
156+
# SPDX-FileCopyrightText: 2022 spdx contributors
157+
#
158+
# SPDX-License-Identifier: Apache-2.0
159+
-from dataclasses import dataclass
160+
+from dataclasses import dataclass, astuple
161+
162+
from beartype import beartype
163+
from beartype.roar import BeartypeCallHintException
164+
165+
166+
+def freeze_dataclass_with_properties_list(items):
167+
+ return {astuple(itm) for itm in items}
168+
+
169+
+
170+
def dataclass_with_properties(cls):
171+
"""Decorator to generate a dataclass with properties out of the class' value:type list.
172+
Their getters and setters will be subjected to the @typechecked decorator to ensure type conformity."""
173+
diff --git a/src/spdx_tools/spdx/model/document.py b/src/spdx_tools/spdx/model/document.py
174+
index e1ec51671..4ed1b45ae 100644
175+
--- a/src/spdx_tools/spdx/model/document.py
176+
+++ b/src/spdx_tools/spdx/model/document.py
177+
@@ -1,7 +1,7 @@
178+
# SPDX-FileCopyrightText: 2022 spdx contributors
179+
#
180+
# SPDX-License-Identifier: Apache-2.0
181+
-from dataclasses import field
182+
+from dataclasses import field, astuple
183+
from datetime import datetime
184+
185+
from beartype.typing import List, Optional
186+
@@ -82,5 +82,17 @@ def __init__(
187+
extracted_licensing_info = [] if extracted_licensing_info is None else extracted_licensing_info
188+
check_types_and_set_values(self, locals())
189+
190+
- def __hash__(self):
191+
- return id(self)
192+
+
193+
+def document_cache(func):
194+
+ cache = {}
195+
+
196+
+ def cached_function(document: Document):
197+
+ key = id(document)
198+
+ if key in cache.keys():
199+
+ return cache[key]
200+
+ else:
201+
+ value = func(document)
202+
+ cache[key] = value
203+
+ return value
204+
+
205+
+ return cached_function
206+
\ No newline at end of file
207+
diff --git a/src/spdx_tools/spdx/model/relationship.py b/src/spdx_tools/spdx/model/relationship.py
208+
index 1e6d7ae86..02b1326a9 100644
209+
--- a/src/spdx_tools/spdx/model/relationship.py
210+
+++ b/src/spdx_tools/spdx/model/relationship.py
211+
@@ -73,6 +73,3 @@ def __init__(
212+
comment: Optional[str] = None,
213+
):
214+
check_types_and_set_values(self, locals())
215+
-
216+
- def __hash__(self):
217+
- return hash("{} -> {} ({})".format(self.spdx_element_id, str(self.related_spdx_element_id), str(self.relationship_type)))
218+
diff --git a/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py b/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py
219+
index 6393470e3..9ab8a4755 100644
220+
--- a/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py
221+
+++ b/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py
222+
@@ -1,9 +1,12 @@
223+
# SPDX-FileCopyrightText: 2022 spdx contributors
224+
#
225+
# SPDX-License-Identifier: Apache-2.0
226+
+from dataclasses import astuple
227+
+
228+
from beartype.typing import Dict, List, Optional, Set
229+
230+
from spdx_tools.common.typing.constructor_type_errors import ConstructorTypeErrors
231+
+from spdx_tools.common.typing.dataclass_with_properties import freeze_dataclass_with_properties_list
232+
from spdx_tools.spdx.model import Relationship, RelationshipType
233+
from spdx_tools.spdx.parser.error import SPDXParsingError
234+
from spdx_tools.spdx.parser.jsonlikedict.dict_parsing_functions import (
235+
@@ -35,9 +38,12 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
236+
document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", []))
237+
doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID")
238+
239+
- existing_relationships_without_comments: Set[Relationship] = set(self.get_all_relationships_without_comments(
240+
- relationships
241+
- ))
242+
+ relationship_hash = lambda r: hash("{} -> {} ({})" \
243+
+ .format(r.spdx_element_id,
244+
+ str(r.related_spdx_element_id),
245+
+ str(r.relationship_type)))
246+
+ existing_relationships_without_comments: Set[Relationship] = freeze_dataclass_with_properties_list(
247+
+ self.get_all_relationships_without_comments(relationships))
248+
relationships.extend(
249+
parse_field_or_log_error(
250+
self.logger,
251+
@@ -161,10 +167,10 @@ def check_if_relationship_exists(
252+
self, relationship: Relationship, existing_relationships: Set[Relationship]
253+
) -> bool:
254+
# assume existing relationships are stripped of comments
255+
- if relationship in existing_relationships:
256+
+ if astuple(relationship) in existing_relationships:
257+
return True
258+
relationship_inverted: Relationship = self.invert_relationship(relationship)
259+
- if relationship_inverted in existing_relationships:
260+
+ if astuple(relationship_inverted) in existing_relationships:
261+
return True
262+
263+
return False
264+
diff --git a/src/spdx_tools/spdx/validation/spdx_id_validators.py b/src/spdx_tools/spdx/validation/spdx_id_validators.py
265+
index 2ae412ff3..de3a505b4 100644
266+
--- a/src/spdx_tools/spdx/validation/spdx_id_validators.py
267+
+++ b/src/spdx_tools/spdx/validation/spdx_id_validators.py
268+
@@ -10,6 +10,7 @@
269+
270+
from spdx_tools.spdx.document_utils import get_contained_spdx_element_ids
271+
from spdx_tools.spdx.model import Document, File
272+
+from spdx_tools.spdx.model.document import document_cache
273+
274+
275+
def is_valid_internal_spdx_id(spdx_id: str) -> bool:
276+
@@ -24,13 +25,13 @@ def is_spdx_id_present_in_files(spdx_id: str, files: List[File]) -> bool:
277+
return spdx_id in [file.spdx_id for file in files]
278+
279+
280+
-@cache
281+
+# @cache
282+
def is_spdx_id_present_in_document(spdx_id: str, document: Document) -> bool:
283+
all_spdx_ids_in_document: Set[str] = get_set_of_all_spdx_ids(document)
284+
285+
return spdx_id in all_spdx_ids_in_document
286+
287+
-@cache
288+
+@document_cache
289+
def get_set_of_all_spdx_ids(document: Document) -> Set[str]:
290+
return set(get_list_of_all_spdx_ids(document))
291+
292+
293+
From f45032d642aafc46f9aa59bf2572f434f5ba7813 Mon Sep 17 00:00:00 2001
294+
From: Jon Johnson <jon.johnson@chainguard.dev>
295+
Date: Wed, 14 Feb 2024 09:29:07 -0800
296+
Subject: [PATCH 3/3] Fix lints
297+
298+
Signed-off-by: Jon Johnson <jon.johnson@chainguard.dev>
299+
---
300+
src/spdx_tools/spdx/model/document.py | 2 +-
301+
src/spdx_tools/spdx/validation/spdx_id_validators.py | 3 ---
302+
2 files changed, 1 insertion(+), 4 deletions(-)
303+
304+
diff --git a/src/spdx_tools/spdx/model/document.py b/src/spdx_tools/spdx/model/document.py
305+
index 4ed1b45ae..33018c896 100644
306+
--- a/src/spdx_tools/spdx/model/document.py
307+
+++ b/src/spdx_tools/spdx/model/document.py
308+
@@ -95,4 +95,4 @@ def cached_function(document: Document):
309+
cache[key] = value
310+
return value
311+
312+
- return cached_function
313+
\ No newline at end of file
314+
+ return cached_function
315+
diff --git a/src/spdx_tools/spdx/validation/spdx_id_validators.py b/src/spdx_tools/spdx/validation/spdx_id_validators.py
316+
index de3a505b4..d70c36bb7 100644
317+
--- a/src/spdx_tools/spdx/validation/spdx_id_validators.py
318+
+++ b/src/spdx_tools/spdx/validation/spdx_id_validators.py
319+
@@ -6,8 +6,6 @@
320+
321+
from beartype.typing import List, Set
322+
323+
-from functools import cache
324+
-
325+
from spdx_tools.spdx.document_utils import get_contained_spdx_element_ids
326+
from spdx_tools.spdx.model import Document, File
327+
from spdx_tools.spdx.model.document import document_cache
328+
@@ -25,7 +23,6 @@ def is_spdx_id_present_in_files(spdx_id: str, files: List[File]) -> bool:
329+
return spdx_id in [file.spdx_id for file in files]
330+
331+
332+
-# @cache
333+
def is_spdx_id_present_in_document(spdx_id: str, document: Document) -> bool:
334+
all_spdx_ids_in_document: Set[str] = get_set_of_all_spdx_ids(document)

0 commit comments

Comments
 (0)