neo4j
diff --git a/‎CHANGELOG.md
Lines changed: 3 additions & 1 deletion b/‎CHANGELOG.md
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/source/api.rst
Lines changed: 8 additions & 1 deletion b/‎docs/source/api.rst
Lines changed: 8 additions & 1 deletion
diff --git a/‎docs/source/user_guide_kg_builder.rst
Lines changed: 54 additions & 9 deletions b/‎docs/source/user_guide_kg_builder.rst
Lines changed: 54 additions & 9 deletions
diff --git a/‎examples/customize/build_graph/components/extractors/custom_extractor.py
Lines changed: 6 additions & 1 deletion b/‎examples/customize/build_graph/components/extractors/custom_extractor.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎examples/customize/build_graph/components/lexical_graph_builder/lexical_graph_builder.py
Lines changed: 32 additions & 0 deletions b/‎examples/customize/build_graph/components/lexical_graph_builder/lexical_graph_builder.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎examples/customize/build_graph/components/writers/custom_writer.py
Lines changed: 6 additions & 2 deletions b/‎examples/customize/build_graph/components/writers/custom_writer.py
Lines changed: 6 additions & 2 deletions
diff --git a/‎examples/customize/build_graph/pipeline/kg_builder_from_text.py
Lines changed: 1 addition & 0 deletions b/‎examples/customize/build_graph/pipeline/kg_builder_from_text.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/customize/build_graph/pipeline/lexical_graph_builder_from_text.py
Lines changed: 85 additions & 0 deletions b/‎examples/customize/build_graph/pipeline/lexical_graph_builder_from_text.py
Lines changed: 85 additions & 0 deletions
@@ -5,6 +5,7 @@
 ### Added
 - Made `relations` and `potential_schema` optional in `SchemaBuilder`.
 - Added a check to prevent the use of deprecated Cypher syntax for Neo4j versions 5.23.0 and above.
+- Added a `LexicalGraphBuilder` component to enable the import of the lexical graph (document, chunks) without performing entity and relation extraction.
 
 ### Changed
 - Vector and Hybrid retrievers used with `return_properties` now also return the node labels (`nodeLabels`) and the node's element ID (`id`).
@@ -100,7 +101,8 @@
 ### IMPORTANT NOTICE
 - The `neo4j-genai` package is now deprecated. Users are advised to switch to the new package `neo4j-graphrag`.
 ### Added
-- Ability to visualise pipeline with `my_pipeline.draw("pipeline.png")`
+- Ability to visualise pipeline with `my_pipeline.draw("pipeline.png")`.
+- `LexicalGraphBuilder` component to create the lexical graph without entity-relation extraction.
 
 ### Fixed
 - Pipelines now return correct results when the same pipeline is run in parallel.
 
@@ -51,6 +51,13 @@ TextChunkEmbedder
 .. autoclass:: neo4j_graphrag.experimental.components.embedder.TextChunkEmbedder
     :members: run
 
+LexicalGraphBuilder
+===================
+
+.. autoclass:: neo4j_graphrag.experimental.components.lexical_graph.LexicalGraphBuilder
+    :members:
+    :exclude-members: component_inputs, component_outputs
+
 SchemaBuilder
 =============
 
@@ -62,7 +69,7 @@ EntityRelationExtractor
 
 .. autoclass:: neo4j_graphrag.experimental.components.entity_relation_extractor.EntityRelationExtractor
     :members:
-    :undoc-members: component_inputs, component_outputs
+    :exclude-members: component_inputs, component_outputs
 
 LLMEntityRelationExtractor
 ==========================
 
@@ -22,6 +22,7 @@ A Knowledge Graph (KG) construction pipeline requires a few components:
 - **Document chunker**: split the text into smaller pieces of text, manageable by the LLM context window (token limit).
 - **Chunk embedder** (optional): compute the chunk embeddings.
 - **Schema builder**: provide a schema to ground the LLM extracted entities and relations and obtain an easily navigable KG.
+- **LexicalGraphBuilder**: build the lexical graph (Document, Chunk and their relationships) (optional).
 - **Entity and relation extractor**: extract relevant entities and relations from the text.
 - **Knowledge Graph writer**: save the identified entities and relations.
 - **Entity resolver**: merge similar entities into a single node.
@@ -166,11 +167,43 @@ Example usage:
         os.environ["OPENAI_API_KEY"] = "sk-..."
 
 
-If OpenAI is not an option, see :ref:`embedders` to learn how to use sentence-transformers or create your own embedder.
+If OpenAI is not an option, see :ref:`embedders` to learn how to use other supported embedders.
 
 The embeddings are added to each chunk metadata, and will be saved as a Chunk node property in the graph if
 `create_lexical_graph` is enabled in the `EntityRelationExtractor` (keep reading).
 
+.. _lexical-graph-builder:
+
+Lexical Graph Builder
+=====================
+
+Once the chunks are extracted and embedded (if required), a graph can be created.
+
+The **lexical graph** contains:
+
+- `Document` node: represent the processed document and have a `path` property.
+- `Chunk` nodes: represent the text chunks. They have a `text` property and, if computed, an `embedding` property.
+- `NEXT_CHUNK` relationships between one chunk node and the next one in the document. It can be used to enhance the context in a RAG application.
+- `FROM_DOCUMENT` relationship between each chunk and the document it was built from.
+
+Example usage:
+
+.. code:: python
+
+    from neo4j_graphrag.experimental.pipeline.components.lexical_graph_builder import LexicalGraphBuilder
+    from neo4j_graphrag.experimental.pipeline.components.types import LexicalGraphConfig
+
+    lexical_graph_builder = LexicalGraphBuilder(config=LexicalGraphConfig(id_prefix="example"))
+    graph = await lexical_graph_builder.run(
+        text_chunks=TextChunks(chunks=[
+            TextChunk(text="some text", index=0),
+            TextChunk(text="some text", index=1),
+        ]),
+        document_info=DocumentInfo(path="my_document.pdf"),
+    )
+
+See :ref:`kg-writer-section` to learn how to write the resulting nodes and relationships to Neo4j.
+
 
 Schema Builder
 ==============
@@ -292,17 +325,12 @@ This behaviour can be changed by using the `on_error` flag in the `LLMEntityRela
 In this scenario, any failing chunk will make the whole pipeline fail (for all chunks), and no data
 will be saved to Neo4j.
 
+.. _lexical-graph-in-er-extraction:
 
 Lexical Graph
 -------------
 
-By default, the `LLMEntityRelationExtractor` adds some extra nodes and relationships to the extracted graph:
-
-- `Document` node: represent the processed document and have a `path` property.
-- `Chunk` nodes: represent the text chunks. They have a `text` property and, if computed, an `embedding` property.
-- `NEXT_CHUNK` relationships between one chunk node and the next one in the document. It can be used to enhance the context in a RAG application.
-- `FROM_CHUNK` relationship between any extracted entity and the chunk it has been identified into.
-- `FROM_DOCUMENT` relationship between each chunk and the document it was built from.
+By default, the `LLMEntityRelationExtractor` also creates the :ref:`lexical graph<lexical-graph-builder>`.
 
 If this 'lexical graph' is not desired, set the `created_lexical_graph` to `False` in the extractor constructor:
 
@@ -314,6 +342,21 @@ If this 'lexical graph' is not desired, set the `created_lexical_graph` to `Fals
     )
 
 
+.. note::
+
+    - If `self.create_lexical_graph` is set to `True`, the complete lexical graph
+      will be created, including the document and chunk nodes, along with the relationships
+      between entities and the chunk they were extracted from.
+    - If `self.create_lexical_graph` is set to `False` but `lexical_graph_config`
+      is provided, the document and chunk nodes won't be created. However, relationships
+      between chunks and the entities extracted from them will still be added to the graph.
+
+.. warning::
+
+    If omitting `self.create_lexical_graph` and the chunk does not exist,
+    this will result in no relationship being created in the database by the writer.
+
+
 Customizing the Prompt
 ----------------------
 
@@ -368,6 +411,8 @@ If more customization is needed, it is possible to subclass the `EntityRelationE
 See :ref:`entityrelationextractor`.
 
 
+.. _kg-writer-section:
+
 Knowledge Graph Writer
 ======================
 
@@ -421,7 +466,7 @@ It is possible to create a custom writer using the `KGWriter` interface:
 
 .. note::
 
-    The `validate_call` decorator is required when the input parameter contain a `pydantic` model.
+    The `validate_call` decorator is required when the input parameter contain a `Pydantic` model.
 
 
 See :ref:`kgwritermodel` and :ref:`kgwriter` in API reference.
 
@@ -5,7 +5,11 @@
     OnError,
 )
 from neo4j_graphrag.experimental.components.pdf_loader import DocumentInfo
-from neo4j_graphrag.experimental.components.types import Neo4jGraph, TextChunks
+from neo4j_graphrag.experimental.components.types import (
+    LexicalGraphConfig,
+    Neo4jGraph,
+    TextChunks,
+)
 
 
 class MyExtractor(EntityRelationExtractor):
@@ -27,6 +31,7 @@ async def run(
         self,
         chunks: TextChunks,
         document_info: Optional[DocumentInfo] = None,
+        lexical_graph_config: Optional[LexicalGraphConfig] = None,
         **kwargs: Any,
     ) -> Neo4jGraph:
         # Implement your logic here
 
@@ -0,0 +1,32 @@
+from neo4j_graphrag.experimental.components.lexical_graph import (
+    LexicalGraphBuilder,
+)
+from neo4j_graphrag.experimental.components.types import (
+    GraphResult,
+    LexicalGraphConfig,
+    TextChunk,
+    TextChunks,
+)
+
+
+async def main() -> GraphResult:
+    """ """
+    # optionally, define a LexicalGraphConfig object
+    # shown below with default values
+    config = LexicalGraphConfig(
+        id_prefix="",  # used to prefix the chunk and document IDs
+        chunk_node_label="Chunk",
+        document_node_label="Document",
+        chunk_to_document_relationship_type="PART_OF_DOCUMENT",
+        next_chunk_relationship_type="NEXT_CHUNK",
+        node_to_chunk_relationship_type="PART_OF_CHUNK",
+        chunk_embedding_property="embeddings",
+    )
+    builder = LexicalGraphBuilder(
+        config=config,  # optional
+    )
+    graph_result = await builder.run(
+        text_chunks=TextChunks(chunks=[TextChunk(text="....", index=0)]),
+        # document_info={"path": "example"},  # uncomment to create a "Document" node
+    )
+    return graph_result
@@ -4,7 +4,7 @@
 
 import neo4j
 from neo4j_graphrag.experimental.components.kg_writer import KGWriter, KGWriterModel
-from neo4j_graphrag.experimental.components.types import Neo4jGraph
+from neo4j_graphrag.experimental.components.types import LexicalGraphConfig, Neo4jGraph
 from pydantic import validate_call
 
 
@@ -13,7 +13,11 @@ def __init__(self, driver: neo4j.Driver) -> None:
         self.driver = driver
 
     @validate_call
-    async def run(self, graph: Neo4jGraph) -> KGWriterModel:
+    async def run(
+        self,
+        graph: Neo4jGraph,
+        lexical_graph_config: LexicalGraphConfig = LexicalGraphConfig(),
+    ) -> KGWriterModel:
         try:
             self.driver.execute_query("my query")
             return KGWriterModel(status="SUCCESS")
 
@@ -44,6 +44,7 @@ async def define_and_run_pipeline(
     """This is where we define and run the KG builder pipeline, instantiating a few
     components:
     - Text Splitter: in this example we use the fixed size text splitter
+    - Chunk Embedder: to embed the chunks' text
     - Schema Builder: this component takes a list of entities, relationships and
         possible triplets as inputs, validate them and return a schema ready to use
         for the rest of the pipeline
 
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import asyncio
+
+import neo4j
+from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
+from neo4j_graphrag.experimental.components.embedder import TextChunkEmbedder
+from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter
+from neo4j_graphrag.experimental.components.lexical_graph import LexicalGraphBuilder
+from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import (
+    FixedSizeSplitter,
+)
+from neo4j_graphrag.experimental.components.types import LexicalGraphConfig
+from neo4j_graphrag.experimental.pipeline import Pipeline
+from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
+
+
+async def main(neo4j_driver: neo4j.Driver) -> PipelineResult:
+    """This is where we define and run the Lexical Graph builder pipeline, instantiating
+    a few components:
+
+    - Text Splitter: to split the text into manageable chunks of fixed size
+    - Chunk Embedder: to embed the chunks' text
+    - Lexical Graph Builder: to build the lexical graph, ie creating the chunk nodes and relationships between them
+    - KG writer: save the lexical graph to Neo4j
+    """
+    pipe = Pipeline()
+    # define the components
+    pipe.add_component(
+        FixedSizeSplitter(chunk_size=20, chunk_overlap=1),
+        "splitter",
+    )
+    pipe.add_component(TextChunkEmbedder(embedder=OpenAIEmbeddings()), "chunk_embedder")
+    # optional: define some custom node labels for the lexical graph:
+    lexical_graph_config = LexicalGraphConfig(
+        id_prefix="example",
+        chunk_node_label="TextPart",
+    )
+    pipe.add_component(
+        LexicalGraphBuilder(lexical_graph_config),
+        "lexical_graph_builder",
+    )
+    pipe.add_component(Neo4jWriter(neo4j_driver), "writer")
+    # define the execution order of component
+    # and how the output of previous components must be used
+    pipe.connect("splitter", "chunk_embedder", input_config={"text_chunks": "splitter"})
+    pipe.connect(
+        "chunk_embedder",
+        "lexical_graph_builder",
+        input_config={"text_chunks": "chunk_embedder"},
+    )
+    pipe.connect(
+        "lexical_graph_builder",
+        "writer",
+        input_config={
+            "graph": "lexical_graph_builder.graph",
+            "lexical_graph_config": "lexical_graph_builder.config",
+        },
+    )
+    # user input:
+    # the initial text
+    # and the list of entities and relations we are looking for
+    pipe_inputs = {
+        "splitter": {
+            "text": """Albert Einstein was a German physicist born in 1879 who
+            wrote many groundbreaking papers especially about general relativity
+            and quantum mechanics. He worked for many different institutions, including
+            the University of Bern in Switzerland and the University of Oxford."""
+        },
+        "lexical_graph_builder": {
+            "document_info": {
+                # 'path' can be anything
+                "path": "example/lexical_graph_from_text.py"
+            },
+        },
+    }
+    # run the pipeline
+    return await pipe.run(pipe_inputs)
+
+
+if __name__ == "__main__":
+    with neo4j.GraphDatabase.driver(
+        "bolt://localhost:7687", auth=("neo4j", "password")
+    ) as driver:
+        print(asyncio.run(main(driver)))