neo4j · stellasia · Oct 16, 2025 · Oct 14, 2025 · Oct 15, 2025
@@ -4,15 +4,8 @@
 
 ### Added
 
-- Document node is now always created when running SimpleKGPipeline, even if `from_pdf=False`.
-- Document metadata is exposed in SimpleKGPipeline run method.
 - Added automatic rate limiting with retry logic and exponential backoff for all Embedding providers using tenacity. The `RateLimitHandler` interface allows for custom rate limiting strategies, including the ability to disable rate limiting entirely.
 
-### Fixed
-
-- LangChain Chat models compatibility is now working again.
-
-
 ## 1.10.0
 
 ### Added

@@ -69,7 +69,7 @@ are listed in [the last section of this file](#customize).
 - [OpenAI (GPT)](./customize/llms/openai_llm.py)
 - [Azure OpenAI]()
 - [VertexAI (Gemini)](./customize/llms/vertexai_llm.py)
-- [MistralAI](customize/llms/mistralai_llm.py)
+- [MistralAI](./customize/llms/mistalai_llm.py)
 - [Cohere](./customize/llms/cohere_llm.py)
 - [Anthropic (Claude)](./customize/llms/anthropic_llm.py)
 - [Ollama](./customize/llms/ollama_llm.py)
@@ -142,7 +142,7 @@ are listed in [the last section of this file](#customize).
 
 ### Answer: GraphRAG
 
-- [LangChain compatibility](customize/answer/langchain_compatibility.py)
+- [LangChain compatibility](./customize/answer/langchain_compatiblity.py)
 - [Use a custom prompt](./customize/answer/custom_prompt.py)
 
 

@@ -1,28 +1,12 @@
 from neo4j_graphrag.llm import AnthropicLLM, LLMResponse
-from neo4j_graphrag.types import LLMMessage
 
 # set api key here on in the ANTHROPIC_API_KEY env var
 api_key = None
 
-messages: list[LLMMessage] = [
-    {
-        "role": "system",
-        "content": "You are a seasoned actor and expert performer, renowned for your one-man shows and comedic talent.",
-    },
-    {
-        "role": "user",
-        "content": "say something",
-    },
-]
-
-
 llm = AnthropicLLM(
     model_name="claude-3-opus-20240229",
     model_params={"max_tokens": 1000},  # max_tokens must be specified
     api_key=api_key,
 )
-res: LLMResponse = llm.invoke(
-    # "say something",
-    messages,
-)
+res: LLMResponse = llm.invoke("say something")
 print(res.content)
@@ -1,23 +1,11 @@
 from neo4j_graphrag.llm import CohereLLM, LLMResponse
-from neo4j_graphrag.types import LLMMessage
 
 # set api key here on in the CO_API_KEY env var
 api_key = None
 
-messages: list[LLMMessage] = [
-    {
-        "role": "system",
-        "content": "You are a seasoned actor and expert performer, renowned for your one-man shows and comedic talent.",
-    },
-    {
-        "role": "user",
-        "content": "say something",
-    },
-]
-
 llm = CohereLLM(
     model_name="command-r",
     api_key=api_key,
 )
-res: LLMResponse = llm.invoke(input=messages)
+res: LLMResponse = llm.invoke("say something")
 print(res.content)
@@ -1,13 +1,14 @@
 import random
 import string
-from typing import Any, Awaitable, Callable, Optional, TypeVar
+from typing import Any, Awaitable, Callable, List, Optional, TypeVar, Union
 
 from neo4j_graphrag.llm import LLMInterface, LLMResponse
 from neo4j_graphrag.utils.rate_limit import (
     RateLimitHandler,
     # rate_limit_handler,
     # async_rate_limit_handler,
 )
+from neo4j_graphrag.message_history import MessageHistory
 from neo4j_graphrag.types import LLMMessage
 
 
@@ -17,27 +18,37 @@ def __init__(
     ):
         super().__init__(model_name, **kwargs)
 
-    def _invoke(
+    # Optional: Apply rate limit handling to synchronous invoke method
+    # @rate_limit_handler
+    def invoke(
         self,
-        input: list[LLMMessage],
+        input: str,
+        message_history: Optional[Union[List[LLMMessage], MessageHistory]] = None,
+        system_instruction: Optional[str] = None,
     ) -> LLMResponse:
         content: str = (
             self.model_name + ": " + "".join(random.choices(string.ascii_letters, k=30))
         )
         return LLMResponse(content=content)
 
-    async def _ainvoke(
+    # Optional: Apply rate limit handling to asynchronous ainvoke method
+    # @async_rate_limit_handler
+    async def ainvoke(
         self,
-        input: list[LLMMessage],
+        input: str,
+        message_history: Optional[Union[List[LLMMessage], MessageHistory]] = None,
+        system_instruction: Optional[str] = None,
     ) -> LLMResponse:
         raise NotImplementedError()
 
 
-llm = CustomLLM("")
+llm = CustomLLM(
+    ""
+)  # if rate_limit_handler and async_rate_limit_handler decorators are used, the default rate limit handler will be applied automatically (retry with exponential backoff)
 res: LLMResponse = llm.invoke("text")
 print(res.content)
 
-# If  you want to use a custom rate limit handler
+# If rate_limit_handler and async_rate_limit_handler decorators are used and you want to use a custom rate limit handler
 # Type variables for function signatures used in rate limit handlers
 F = TypeVar("F", bound=Callable[..., Any])
 AF = TypeVar("AF", bound=Callable[..., Awaitable[Any]])

@@ -0,0 +1,10 @@
+from neo4j_graphrag.llm import MistralAILLM
+
+# set api key here on in the MISTRAL_API_KEY env var
+api_key = None
+
+llm = MistralAILLM(
+    model_name="mistral-small-latest",
+    api_key=api_key,
+)
+llm.invoke("say something")
@@ -3,26 +3,11 @@
 """
 
 from neo4j_graphrag.llm import LLMResponse, OllamaLLM
-from neo4j_graphrag.types import LLMMessage
-
-messages: list[LLMMessage] = [
-    {
-        "role": "system",
-        "content": "You are a seasoned actor and expert performer, renowned for your one-man shows and comedic talent.",
-    },
-    {
-        "role": "user",
-        "content": "say something",
-    },
-]
-
 
 llm = OllamaLLM(
-    model_name="orca-mini:latest",
+    model_name="<model_name>",
     # model_params={"options": {"temperature": 0}, "format": "json"},
     # host="...",  # if using a remote server
 )
-res: LLMResponse = llm.invoke(
-    messages,
-)
+res: LLMResponse = llm.invoke("What is the additive color model?")
 print(res.content)
@@ -1,28 +1,8 @@
 from neo4j_graphrag.llm import LLMResponse, OpenAILLM
-from neo4j_graphrag.message_history import InMemoryMessageHistory
-from neo4j_graphrag.types import LLMMessage
 
 # set api key here on in the OPENAI_API_KEY env var
 api_key = None
 
-messages: list[LLMMessage] = [
-    {
-        "role": "system",
-        "content": "You are a seasoned actor and expert performer, renowned for your one-man shows and comedic talent.",
-    },
-    {
-        "role": "user",
-        "content": "say something",
-    },
-]
-
-
 llm = OpenAILLM(model_name="gpt-4o", api_key=api_key)
-res: LLMResponse = llm.invoke(
-    # "say something",
-    # messages,
-    InMemoryMessageHistory(
-        messages=messages,
-    )
-)
+res: LLMResponse = llm.invoke("say something")
 print(res.content)
@@ -1,20 +1,6 @@
 from neo4j_graphrag.llm import LLMResponse, VertexAILLM
 from vertexai.generative_models import GenerationConfig
 
-from neo4j_graphrag.types import LLMMessage
-
-messages: list[LLMMessage] = [
-    {
-        "role": "system",
-        "content": "You are a seasoned actor and expert performer, renowned for your one-man shows and comedic talent.",
-    },
-    {
-        "role": "user",
-        "content": "say something",
-    },
-]
-
-
 generation_config = GenerationConfig(temperature=1.0)
 llm = VertexAILLM(
     model_name="gemini-2.0-flash-001",
@@ -23,6 +9,7 @@
     # vertexai.generative_models.GenerativeModel client
 )
 res: LLMResponse = llm.invoke(
-    input=messages,
+    "say something",
+    system_instruction="You are living in 3000 where AI rules the world",
 )
 print(res.content)
@@ -27,7 +27,6 @@
 from neo4j_graphrag.generation.prompts import RagTemplate
 from neo4j_graphrag.generation.types import RagInitModel, RagResultModel, RagSearchModel
 from neo4j_graphrag.llm import LLMInterface
-from neo4j_graphrag.llm.utils import legacy_inputs_to_messages
 from neo4j_graphrag.message_history import MessageHistory
 from neo4j_graphrag.retrievers.base import Retriever
 from neo4j_graphrag.types import LLMMessage, RetrieverResult
@@ -146,17 +145,12 @@ def search(
             prompt = self.prompt_template.format(
                 query_text=query_text, context=context, examples=validated_data.examples
             )
-
-            messages = legacy_inputs_to_messages(
-                prompt,
-                message_history=message_history,
-                system_instruction=self.prompt_template.system_instructions,
-            )
-
             logger.debug(f"RAG: retriever_result={prettify(retriever_result)}")
             logger.debug(f"RAG: prompt={prompt}")
             llm_response = self.llm.invoke(
-                messages,
+                prompt,
+                message_history,
+                system_instruction=self.prompt_template.system_instructions,
             )
             answer = llm_response.content
         result: dict[str, Any] = {"answer": answer}
@@ -174,12 +168,9 @@ def _build_query(
             summarization_prompt = self._chat_summary_prompt(
                 message_history=message_history
             )
-            messages = legacy_inputs_to_messages(
-                summarization_prompt,
-                system_instruction=summary_system_message,
-            )
             summary = self.llm.invoke(
-                messages,
+                input=summarization_prompt,
+                system_instruction=summary_system_message,
             ).content
             return self.conversation_prompt(summary=summary, current_query=query_text)
         return query_text