Skip to content

Commit 8c799b2

Browse files
committed
Removed the _remove_lucene_chars function
1 parent 2509967 commit 8c799b2

File tree

4 files changed

+4
-82
lines changed

4 files changed

+4
-82
lines changed

src/neo4j_graphrag/indexes.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -471,35 +471,6 @@ async def async_upsert_vector_on_relationship(
471471
) from e
472472

473473

474-
def _remove_lucene_chars(text: str) -> str:
475-
"""Remove Lucene special characters"""
476-
special_chars = [
477-
"+",
478-
"-",
479-
"&",
480-
"|",
481-
"!",
482-
"(",
483-
")",
484-
"{",
485-
"}",
486-
"[",
487-
"]",
488-
"^",
489-
'"',
490-
"~",
491-
"*",
492-
"?",
493-
":",
494-
"\\",
495-
"/",
496-
]
497-
for char in special_chars:
498-
if char in text:
499-
text = text.replace(char, " ")
500-
return text.strip()
501-
502-
503474
def _sort_by_index_name(
504475
records: List[neo4j.Record], index_name: str
505476
) -> List[neo4j.Record]:

src/neo4j_graphrag/retrievers/hybrid.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
RetrieverInitializationError,
2828
SearchValidationError,
2929
)
30-
from neo4j_graphrag.indexes import _remove_lucene_chars
3130
from neo4j_graphrag.neo4j_queries import get_search_query
3231
from neo4j_graphrag.retrievers.base import Retriever
3332
from neo4j_graphrag.types import (
@@ -184,7 +183,7 @@ def get_search_results(
184183
parameters = validated_data.model_dump(exclude_none=True)
185184
parameters["vector_index_name"] = self.vector_index_name
186185
parameters["fulltext_index_name"] = self.fulltext_index_name
187-
parameters["query_text"] = _remove_lucene_chars(query_text)
186+
parameters["query_text"] = query_text
188187

189188
if query_text and not query_vector:
190189
if not self.embedder:
@@ -344,7 +343,7 @@ def get_search_results(
344343
parameters = validated_data.model_dump(exclude_none=True)
345344
parameters["vector_index_name"] = self.vector_index_name
346345
parameters["fulltext_index_name"] = self.fulltext_index_name
347-
parameters["query_text"] = _remove_lucene_chars(query_text)
346+
parameters["query_text"] = query_text
348347

349348
if query_text and not query_vector:
350349
if not self.embedder:

tests/unit/retrievers/test_hybrid.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
EmbeddingRequiredError,
2222
RetrieverInitializationError,
2323
)
24-
from neo4j_graphrag.indexes import _remove_lucene_chars
2524
from neo4j_graphrag.neo4j_queries import get_search_query
2625
from neo4j_graphrag.retrievers import HybridCypherRetriever, HybridRetriever
2726
from neo4j_graphrag.types import RetrieverResult, RetrieverResultItem, SearchType
@@ -269,7 +268,7 @@ def test_hybrid_search_sanitizes_text(
269268
"vector_index_name": vector_index_name,
270269
"top_k": top_k,
271270
"effective_search_ratio": effective_search_ratio,
272-
"query_text": _remove_lucene_chars(query_text),
271+
"query_text": query_text,
273272
"fulltext_index_name": fulltext_index_name,
274273
"query_vector": embed_query_vector,
275274
},
@@ -599,7 +598,7 @@ def test_hybrid_cypher_search_sanitizes_text(
599598
"vector_index_name": vector_index_name,
600599
"top_k": top_k,
601600
"effective_search_ratio": effective_search_ratio,
602-
"query_text": _remove_lucene_chars(query_text),
601+
"query_text": query_text,
603602
"fulltext_index_name": fulltext_index_name,
604603
"query_vector": embed_query_vector,
605604
},

tests/unit/test_indexes.py

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
create_fulltext_index,
2424
create_vector_index,
2525
drop_index_if_exists,
26-
_remove_lucene_chars,
2726
upsert_vector,
2827
upsert_vector_on_relationship,
2928
)
@@ -288,49 +287,3 @@ def test_upsert_vector_raises_neo4j_insertion_error(
288287
upsert_vector(driver, id, embedding_property, vector)
289288

290289
assert "Upserting vector to Neo4j failed" in str(excinfo)
291-
292-
293-
def test_escaping_lucene() -> None:
294-
"""Test escaping lucene characters"""
295-
assert _remove_lucene_chars("Hello+World") == "Hello World"
296-
assert _remove_lucene_chars("Hello World\\") == "Hello World"
297-
assert (
298-
_remove_lucene_chars("It is the end of the world. Take shelter!")
299-
== "It is the end of the world. Take shelter"
300-
)
301-
assert (
302-
_remove_lucene_chars("It is the end of the world. Take shelter&&")
303-
== "It is the end of the world. Take shelter"
304-
)
305-
assert (
306-
_remove_lucene_chars("Bill&&Melinda Gates Foundation")
307-
== "Bill Melinda Gates Foundation"
308-
)
309-
assert (
310-
_remove_lucene_chars("It is the end of the world. Take shelter(&&)")
311-
== "It is the end of the world. Take shelter"
312-
)
313-
assert (
314-
_remove_lucene_chars("It is the end of the world. Take shelter??")
315-
== "It is the end of the world. Take shelter"
316-
)
317-
assert (
318-
_remove_lucene_chars("It is the end of the world. Take shelter^")
319-
== "It is the end of the world. Take shelter"
320-
)
321-
assert (
322-
_remove_lucene_chars("It is the end of the world. Take shelter+")
323-
== "It is the end of the world. Take shelter"
324-
)
325-
assert (
326-
_remove_lucene_chars("It is the end of the world. Take shelter-")
327-
== "It is the end of the world. Take shelter"
328-
)
329-
assert (
330-
_remove_lucene_chars("It is the end of the world. Take shelter~")
331-
== "It is the end of the world. Take shelter"
332-
)
333-
assert (
334-
_remove_lucene_chars("It is the end of the world. Take shelter/")
335-
== "It is the end of the world. Take shelter"
336-
)

0 commit comments

Comments
 (0)