Skip to content

Strict mode behavior #334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@

### Added

- Added support for automatic schema extraction from text using LLMs. In the `SimpleKGPipeline`, when the user provides no schema, the automatic schema extraction is enabled by default.
- Added support for automatic schema extraction from text using LLMs. In the `SimpleKGPipeline`, when the user provides no schema, the automatic schema extraction is enabled by default.

### Fixed

- Fixed a bug where `spacy` and `rapidfuzz` needed to be installed even if not using the relevant entity resolvers.

### Changed

- Strict mode in `SimpleKGPipeline`: now properties and relationships are pruned only if they are defined in the input schema.


## 1.7.0

Expand Down
7 changes: 7 additions & 0 deletions docs/source/user_guide_kg_builder.rst
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,13 @@ Any relation whose start node or end node does not conform to the provided tuple
If a relation start/end nodes are valid but the direction is incorrect, the latter will be inverted.
If a node is left with no properties, it will be also pruned.

.. note::

If the input schema lacks a certain type of information, pruning is skipped.
For example, if an entity is defined only by a label and has no properties,
property pruning is not performed and all properties returned by the LLM are kept.


.. warning::

Note that if the schema enforcement mode is on but the schema is not provided, no schema enforcement will be applied.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -403,8 +403,13 @@ def _enforce_nodes(
schema_entity = schema.entities.get(node.label)
if not schema_entity:
continue
allowed_props = schema_entity.get("properties", [])
filtered_props = self._enforce_properties(node.properties, allowed_props)
allowed_props = schema_entity.get("properties")
if allowed_props:
filtered_props = self._enforce_properties(
node.properties, allowed_props
)
else:
filtered_props = node.properties
if filtered_props:
valid_nodes.append(
Neo4jNode(
Expand Down Expand Up @@ -434,16 +439,17 @@ def _enforce_relationships(
if self.enforce_schema != SchemaEnforcementMode.STRICT:
return extracted_relationships

if schema.relations is None:
return extracted_relationships

valid_rels = []

valid_nodes = {node.id: node.label for node in filtered_nodes}

potential_schema = schema.potential_schema

for rel in extracted_relationships:
schema_relation = (
schema.relations.get(rel.type) if schema.relations else None
)
schema_relation = schema.relations.get(rel.type)
if not schema_relation:
continue

Expand All @@ -468,8 +474,11 @@ def _enforce_relationships(
if not tuple_valid and not reverse_tuple_valid:
continue

allowed_props = schema_relation.get("properties", [])
filtered_props = self._enforce_properties(rel.properties, allowed_props)
allowed_props = schema_relation.get("properties")
if allowed_props:
filtered_props = self._enforce_properties(rel.properties, allowed_props)
else:
filtered_props = rel.properties

valid_rels.append(
Neo4jRelationship(
Expand Down
2 changes: 1 addition & 1 deletion src/neo4j_graphrag/experimental/components/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class SchemaConfig(DataModel):
@model_validator(mode="before")
def check_schema(cls, data: Dict[str, Any]) -> Dict[str, Any]:
entities = data.get("entities", {}).keys()
relations = data.get("relations", {}).keys()
relations = (data.get("relations") or {}).keys()
potential_schema = data.get("potential_schema", [])

if potential_schema:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ async def test_extractor_schema_enforcement_valid_nodes_with_empty_props() -> No

result: Neo4jGraph = await extractor.run(chunks, schema=schema)

assert len(result.nodes) == 0
assert len(result.nodes) == 1


@pytest.mark.asyncio
Expand Down Expand Up @@ -564,6 +564,74 @@ async def test_extractor_schema_enforcement_inverted_relation_direction() -> Non
assert result.relationships[0].end_node_id.split(":")[1] == "2"


@pytest.mark.asyncio
async def test_extractor_schema_enforcement_none_relationships_in_schema() -> None:
llm = MagicMock(spec=LLMInterface)
llm.ainvoke.return_value = LLMResponse(
content='{"nodes":[{"id":"1","label":"Person","properties":'
'{"name":"Alice"}},{"id":"2","label":"Person","properties":'
'{"name":"Bob"}}],'
'"relationships":[{"start_node_id":"1","end_node_id":"2",'
'"type":"FRIENDS_WITH","properties":{}}]}'
)

extractor = LLMEntityRelationExtractor(
llm=llm, create_lexical_graph=False, enforce_schema=SchemaEnforcementMode.STRICT
)

schema = SchemaConfig(
entities={
"Person": {
"label": "Person",
"properties": [{"name": "name", "type": "STRING"}],
}
},
relations=None,
potential_schema=None,
)

chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)])

result: Neo4jGraph = await extractor.run(chunks, schema=schema)

assert len(result.nodes) == 2
assert len(result.relationships) == 1
assert result.relationships[0].type == "FRIENDS_WITH"


@pytest.mark.asyncio
async def test_extractor_schema_enforcement_empty_relationships_in_schema() -> None:
llm = MagicMock(spec=LLMInterface)
llm.ainvoke.return_value = LLMResponse(
content='{"nodes":[{"id":"1","label":"Person","properties":'
'{"name":"Alice"}},{"id":"2","label":"Person","properties":'
'{"name":"Bob"}}],'
'"relationships":[{"start_node_id":"1","end_node_id":"2",'
'"type":"FRIENDS_WITH","properties":{}}]}'
)

extractor = LLMEntityRelationExtractor(
llm=llm, create_lexical_graph=False, enforce_schema=SchemaEnforcementMode.STRICT
)

schema = SchemaConfig(
entities={
"Person": {
"label": "Person",
"properties": [{"name": "name", "type": "STRING"}],
}
},
relations={},
potential_schema=None,
)

chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)])

result: Neo4jGraph = await extractor.run(chunks, schema=schema)

assert len(result.relationships) == 0


def test_fix_invalid_json_empty_result() -> None:
json_string = "invalid json"

Expand Down