Skip to content

Rag pipeline POC #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions examples/question_answering/graphrag_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import asyncio

import neo4j
from neo4j_graphrag.embeddings import OpenAIEmbeddings
from neo4j_graphrag.experimental.components.rag.generate import Generator
from neo4j_graphrag.experimental.components.rag.prompt_builder import PromptBuilder
from neo4j_graphrag.experimental.components.rag.retrievers import RetrieverWrapper
from neo4j_graphrag.experimental.pipeline import Pipeline
from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
from neo4j_graphrag.generation import RagTemplate
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.retrievers import VectorRetriever

URI = "neo4j+s://demo.neo4jlabs.com"
AUTH = ("recommendations", "recommendations")
DATABASE = "recommendations"
INDEX_NAME = "moviePlotsEmbedding"


async def main() -> PipelineResult:
pipeline = Pipeline()
driver = neo4j.GraphDatabase.driver(URI, auth=AUTH)
llm = OpenAILLM(model_name="gpt-4o")
embedder = OpenAIEmbeddings()
retriever = VectorRetriever(
driver,
index_name=INDEX_NAME,
neo4j_database=DATABASE,
embedder=embedder,
)
pipeline.add_component(RetrieverWrapper(retriever), "retriever")
pipeline.add_component(PromptBuilder(RagTemplate()), "prompt")
pipeline.add_component(Generator(llm), "generate")

pipeline.connect(
"retriever",
"prompt",
{
"context": "retriever.result",
},
)
pipeline.connect(
"prompt",
"generate",
{
"prompt": "prompt.prompt",
},
)

query = "show me a movie with cats"
res = await pipeline.run(
{
"retriever": {"query_text": query},
"prompt": {"query_text": query, "examples": ""},
}
)

driver.close()
await llm.async_client.close()

# context_result = await pipeline.store.get_result_for_component(
# res.run_id, "retriever"
# )
# context = context_result.get("result")
# res.result["context"] = context
return res


if __name__ == "__main__":
print(asyncio.run(main()))
26 changes: 26 additions & 0 deletions examples/question_answering/graphrag_simple_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from neo4j_graphrag.experimental.pipeline.config.runner import PipelineRunner

if __name__ == "__main__":
import asyncio
import os

os.environ["NEO4J_URI"] = "neo4j+s://demo.neo4jlabs.com"
os.environ["NEO4J_USER"] = "recommendations"
os.environ["NEO4J_PASSWORD"] = "recommendations"

runner = PipelineRunner.from_config_file(
"examples/question_answering/simple_rag_pipeline_config.json"
)
print(
asyncio.run(
runner.run(
dict(
query_text="show me a movie about cats",
retriever_config={
"top_k": 2,
},
# return_context=True,
)
)
)
)
57 changes: 57 additions & 0 deletions examples/question_answering/simple_rag_pipeline_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"version_": "1",
"template_": "SimpleRAGPipeline",
"neo4j_config": {
"params_": {
"uri": {
"resolver_": "ENV",
"var_": "NEO4J_URI"
},
"user": {
"resolver_": "ENV",
"var_": "NEO4J_USER"
},
"password": {
"resolver_": "ENV",
"var_": "NEO4J_PASSWORD"
}
}
},
"llm_config": {
"class_": "OpenAILLM",
"params_": {
"api_key": {
"resolver_": "ENV",
"var_": "OPENAI_API_KEY"
},
"model_name": "gpt-4o",
"model_params": {
"temperature": 0,
"max_tokens": 2000
}
}
},
"embedder_config": {
"class_": "OpenAIEmbeddings",
"params_": {
"api_key": {
"resolver_": "ENV",
"var_": "OPENAI_API_KEY"
}
}
},
"retriever": {
"class_": "VectorRetriever",
"params_": {
"driver": {
"resolver_": "CONFIG_KEY",
"key_": "neo4j_config.default"
},
"index_name": "moviePlotsEmbedding",
"embedder": {
"resolver_": "CONFIG_KEY",
"key_": "embedder_config.default"
}
}
}
}
14 changes: 14 additions & 0 deletions src/neo4j_graphrag/experimental/components/rag/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) "Neo4j"
# Neo4j Sweden AB [https://neo4j.com]
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# https://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
31 changes: 31 additions & 0 deletions src/neo4j_graphrag/experimental/components/rag/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (c) "Neo4j"
# Neo4j Sweden AB [https://neo4j.com]
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# https://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from neo4j_graphrag.experimental.pipeline import Component, DataModel
from neo4j_graphrag.llm import LLMInterface


class GenerationResult(DataModel):
content: str


class Generator(Component):
def __init__(self, llm: LLMInterface) -> None:
self.llm = llm

async def run(self, prompt: str) -> GenerationResult:
llm_response = await self.llm.ainvoke(prompt)
return GenerationResult(
content=llm_response.content,
)
33 changes: 33 additions & 0 deletions src/neo4j_graphrag/experimental/components/rag/prompt_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) "Neo4j"
# Neo4j Sweden AB [https://neo4j.com]
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# https://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any

from neo4j_graphrag.experimental.pipeline import Component, DataModel
from neo4j_graphrag.generation import PromptTemplate

# class PromptData(DataModel):
# inputs: dict[str, Any]


class PromptResult(DataModel):
prompt: str


class PromptBuilder(Component):
def __init__(self, template: PromptTemplate):
self.template = template

async def run(self, **kwargs: Any) -> PromptResult:
return PromptResult(prompt=self.template.format(**kwargs))
33 changes: 33 additions & 0 deletions src/neo4j_graphrag/experimental/components/rag/retrievers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) "Neo4j"
# Neo4j Sweden AB [https://neo4j.com]
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# https://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any

from neo4j_graphrag.experimental.pipeline import Component, DataModel
from neo4j_graphrag.retrievers.base import Retriever
from neo4j_graphrag.types import RetrieverResult


class RetrieverWrapperResult(DataModel):
result: RetrieverResult


class RetrieverWrapper(Component):
def __init__(self, retriever: Retriever):
self.retriever = retriever

async def run(self, **kwargs: Any) -> RetrieverWrapperResult:
return RetrieverWrapperResult(
result=self.retriever.search(**kwargs),
)
4 changes: 4 additions & 0 deletions src/neo4j_graphrag/experimental/pipeline/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ def __new__(
for param in sig.parameters.values()
if param.name not in ("self", "kwargs")
}
attrs["anonymous_input_allowed"] = any(
param.name == "kwargs" for param in sig.parameters.values()
)
# extract returned fields from the run method return type hint
return_model = get_type_hints(run_method).get("return")
if return_model is None:
Expand Down Expand Up @@ -76,6 +79,7 @@ class Component(abc.ABC, metaclass=ComponentMeta):
# DO NOT CHANGE
component_inputs: dict[str, dict[str, str | bool]]
component_outputs: dict[str, dict[str, str | bool]]
anonymous_input_allowed: bool

@abc.abstractmethod
async def run(self, *args: Any, **kwargs: Any) -> DataModel:
Expand Down
20 changes: 20 additions & 0 deletions src/neo4j_graphrag/experimental/pipeline/config/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,13 @@
from neo4j_graphrag.experimental.pipeline.config.template_pipeline.simple_kg_builder import (
SimpleKGPipelineConfig,
)
from neo4j_graphrag.experimental.pipeline.config.template_pipeline.simple_rag_pipeline import (
SimpleRAGPipelineConfig,
)
from neo4j_graphrag.experimental.pipeline.config.types import PipelineType
from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
from neo4j_graphrag.experimental.pipeline.types import PipelineDefinition
from neo4j_graphrag.generation.types import RagResultModel
from neo4j_graphrag.utils.logging import prettify

logger = logging.getLogger(__name__)
Expand All @@ -68,6 +72,7 @@ class PipelineConfigWrapper(BaseModel):
config: Union[
Annotated[PipelineConfig, Tag(PipelineType.NONE)],
Annotated[SimpleKGPipelineConfig, Tag(PipelineType.SIMPLE_KG_PIPELINE)],
Annotated[SimpleRAGPipelineConfig, Tag(PipelineType.SIMPLE_RAG_PIPELINE)],
] = Field(discriminator=Discriminator(_get_discriminator_value))

def parse(self, resolved_data: dict[str, Any] | None = None) -> PipelineDefinition:
Expand Down Expand Up @@ -136,3 +141,18 @@ async def close(self) -> None:
logger.debug("PIPELINE_RUNNER: cleaning up (closing instantiated drivers...)")
if self.config:
await self.config.close()


class RagPipelineRunner(PipelineRunner):
async def search(self, **kwargs: Any) -> RagResultModel:
result = await self.run(kwargs)
context = None
if kwargs.get("return_context"):
context = await self.pipeline.store.get_result_for_component(
result.run_id, "retriever"
)
context = context.get("result")
return RagResultModel(
answer=result.result["generator"]["content"],
retriever_result=context,
)
Loading
Loading