From 8498b336f8b8b84031a497a99b04a29e3bebea60 Mon Sep 17 00:00:00 2001 From: estelle Date: Tue, 13 May 2025 09:19:37 +0200 Subject: [PATCH 1/2] Fix issue requiring to install spacy and rapidfuzz even if not used --- .../experimental/components/resolver.py | 65 +++++++++++++++---- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/src/neo4j_graphrag/experimental/components/resolver.py b/src/neo4j_graphrag/experimental/components/resolver.py index ecff8cd0e..6201d1242 100644 --- a/src/neo4j_graphrag/experimental/components/resolver.py +++ b/src/neo4j_graphrag/experimental/components/resolver.py @@ -12,18 +12,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import abc import logging from itertools import combinations -from typing import Any, List, Optional +from typing import Any, List, Optional, TYPE_CHECKING + + +try: + from rapidfuzz import fuzz + from rapidfuzz import utils + + IS_RAPIDFUZZ_INSTALLED = True +except ImportError: + IS_RAPIDFUZZ_INSTALLED = False + +try: + import spacy + from spacy.cli.download import download as spacy_download + from spacy.language import Language + import numpy as np + + IS_SPACY_INSTALLED = True +except ImportError: + IS_SPACY_INSTALLED = False -import numpy as np -import rapidfuzz.fuzz -import spacy -from numpy.typing import NDArray -from rapidfuzz import utils -from spacy.cli.download import download as spacy_download -from spacy.language import Language + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray import neo4j from neo4j_graphrag.experimental.components.types import ResolutionStats @@ -334,6 +352,11 @@ def __init__( spacy_model: str = "en_core_web_lg", neo4j_database: Optional[str] = None, ) -> None: + if not IS_SPACY_INSTALLED: + raise ImportError("""`spacy` python module needs to be installed to use + the SpaCySemanticMatchResolver. Install it with: + `pip install "neo4j-graphrag[nlp]"` + """) super().__init__( driver, filter_query, @@ -398,6 +421,27 @@ class FuzzyMatchResolver(BasePropertySimilarityResolver): and 1. """ + def __init__( + self, + driver: neo4j.Driver, + filter_query: Optional[str] = None, + resolve_properties: Optional[List[str]] = None, + similarity_threshold: float = 0.8, + neo4j_database: Optional[str] = None, + ) -> None: + if not IS_RAPIDFUZZ_INSTALLED: + raise ImportError("""`rapidfuzz` python module needs to be installed to use + the SpaCySemanticMatchResolver. Install it with: + `pip install "neo4j-graphrag[fuzzy-matching]"` + """) + super().__init__( + driver, + filter_query, + resolve_properties, + similarity_threshold, + neo4j_database, + ) + async def run(self) -> ResolutionStats: return await super().run() @@ -406,7 +450,4 @@ def compute_similarity(self, text_a: str, text_b: str) -> float: # normalize the input strings before the comparison is done (processor=utils.default_process) # e.g., lowercase the text, strip whitespace, and remove punctuation # normalize the score to the 0..1 range - return ( - rapidfuzz.fuzz.WRatio(text_a, text_b, processor=utils.default_process) - / 100.0 - ) + return fuzz.WRatio(text_a, text_b, processor=utils.default_process) / 100.0 From 1805bf9f7b5319f79778876760da6c2ce33445b6 Mon Sep 17 00:00:00 2001 From: estelle Date: Tue, 13 May 2025 09:28:21 +0200 Subject: [PATCH 2/2] Update CHANGELOG and doc --- CHANGELOG.md | 4 ++++ docs/source/index.rst | 6 ++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a0533cfa..53c88b143 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Next +### Fixed + +- Fixed a bug where `spacy` and `rapidfuzz` needed to be installed even if not using the relevant entity resolvers. + ## 1.7.0 ### Added diff --git a/docs/source/index.rst b/docs/source/index.rst index d6700ef55..d9bd1749b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -98,10 +98,8 @@ List of extra dependencies: - **pinecone**: store vectors in Pinecone - **qdrant**: store vectors in Qdrant - **experimental**: experimental features mainly from the Knowledge Graph creation pipelines. -- nlp: - - **spaCy**: load spaCy trained models for nlp pipelines, used by `SpaCySemanticMatchResolver` component from the Knowledge Graph creation pipelines. -- fuzzy-matching: - - **rapidfuzz**: apply fuzzy matching using string similarity, used by `FuzzyMatchResolver` component from the Knowledge Graph creation pipelines. +- **nlp**: installs spaCy for nlp pipelines, used by `SpaCySemanticMatchResolver` component from the Knowledge Graph creation pipelines. +- **fuzzy-matching**: installs **rapidfuzz** to fuzzy matching using string similarity, used by `FuzzyMatchResolver` component from the Knowledge Graph creation pipelines. ******** Examples