Skip to content

Fix several accidentally quadratic functions #792

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/spdx_tools/spdx/model/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,6 @@ def __init__(
relationships = [] if relationships is None else relationships
extracted_licensing_info = [] if extracted_licensing_info is None else extracted_licensing_info
check_types_and_set_values(self, locals())

def __hash__(self):
return id(self)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

documents are not immutable and therefore hashing might run into inconsistencies. Most use-cases would probably expect a content dependent hash.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah that's a shame.

I wouldn't expect for the document to get mutated during validation. Would it make sense to have some frozen/immutable variants of document and relationship that could be passed around the validators?

Or do you have any ideas that would be a better approach to take?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree that a hash should only be implemented on immutable objects when it uses mutable properties of the object - most "useful" hashes do. This hash is not based on a mutable property which would make it safe and consistent although not very useful (The default implementation of hash is just id(self) >> 4). Also note that eq should always be defined when hash is [see here]. Why define hash here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've used functools.lru_cache in the past to speed up slow loops; it watches the parameters and replaces a function call with a table lookup when that makes sense. The nice thing is that it can catch mutations and know to call the function again without calling it every single time. Not sure if that would help in this case; I'll try to take a look this weekend.

3 changes: 3 additions & 0 deletions src/spdx_tools/spdx/model/relationship.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,6 @@ def __init__(
comment: Optional[str] = None,
):
check_types_and_set_values(self, locals())

def __hash__(self):
return hash("{} -> {} ({})".format(self.spdx_element_id, str(self.related_spdx_element_id), str(self.relationship_type)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think our Relationships are mutable and that does not work well with hashing. This can cause inconsistencies when used in hashsets.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This class needs a definition of eq as well.

16 changes: 7 additions & 9 deletions src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2022 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from beartype.typing import Dict, List, Optional
from beartype.typing import Dict, List, Optional, Set

from spdx_tools.common.typing.constructor_type_errors import ConstructorTypeErrors
from spdx_tools.spdx.model import Relationship, RelationshipType
Expand Down Expand Up @@ -35,9 +35,9 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", []))
doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID")

existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
existing_relationships_without_comments: Set[Relationship] = set(self.get_all_relationships_without_comments(
relationships
)
))
relationships.extend(
parse_field_or_log_error(
self.logger,
Expand All @@ -52,9 +52,6 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
)

package_dicts: List[Dict] = input_doc_dict.get("packages", [])
existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
relationships
)

relationships.extend(
parse_field_or_log_error(
Expand Down Expand Up @@ -110,7 +107,7 @@ def parse_relationship_type(relationship_type_str: str) -> RelationshipType:
return relationship_type

def parse_document_describes(
self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: List[Relationship]
self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: Set[Relationship]
) -> List[Relationship]:
logger = Logger()
describes_relationships = []
Expand All @@ -131,10 +128,11 @@ def parse_document_describes(
return describes_relationships

def parse_has_files(
self, package_dicts: List[Dict], existing_relationships: List[Relationship]
self, package_dicts: List[Dict], existing_relationships: Set[Relationship]
) -> List[Relationship]:
# assume existing relationships are stripped of comments
logger = Logger()

contains_relationships = []
for package in package_dicts:
package_spdx_id: Optional[str] = package.get("SPDXID")
Expand All @@ -160,7 +158,7 @@ def parse_has_files(
return contains_relationships

def check_if_relationship_exists(
self, relationship: Relationship, existing_relationships: List[Relationship]
self, relationship: Relationship, existing_relationships: Set[Relationship]
) -> bool:
# assume existing relationships are stripped of comments
if relationship in existing_relationships:
Expand Down
11 changes: 9 additions & 2 deletions src/spdx_tools/spdx/validation/spdx_id_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

import re

from beartype.typing import List
from beartype.typing import List, Set

from functools import cache

from spdx_tools.spdx.document_utils import get_contained_spdx_element_ids
from spdx_tools.spdx.model import Document, File
Expand All @@ -22,11 +24,16 @@ def is_spdx_id_present_in_files(spdx_id: str, files: List[File]) -> bool:
return spdx_id in [file.spdx_id for file in files]


@cache
def is_spdx_id_present_in_document(spdx_id: str, document: Document) -> bool:
all_spdx_ids_in_document: List[str] = get_list_of_all_spdx_ids(document)
all_spdx_ids_in_document: Set[str] = get_set_of_all_spdx_ids(document)

return spdx_id in all_spdx_ids_in_document

@cache
def get_set_of_all_spdx_ids(document: Document) -> Set[str]:
return set(get_list_of_all_spdx_ids(document))


def get_list_of_all_spdx_ids(document: Document) -> List[str]:
all_spdx_ids_in_document: List[str] = [document.creation_info.spdx_id]
Expand Down