Skip to content

Commit 857f28d

Browse files
authored
Merge pull request #702 from vedovati-matteo/pre/beta
Reasoning Node added
2 parents e5ac020 + b1ce563 commit 857f28d

File tree

7 files changed

+175
-16
lines changed

7 files changed

+175
-16
lines changed

scrapegraphai/nodes/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@
2525
from .concat_answers_node import ConcatAnswersNode
2626
from .prompt_refiner_node import PromptRefinerNode
2727
from .html_analyzer_node import HtmlAnalyzerNode
28-
from .generate_code_node import GenerateCodeNode
28+
from .generate_code_node import GenerateCodeNode
29+
from .reasoning_node import ReasoningNode

scrapegraphai/nodes/generate_code_node.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,23 @@
55
from langchain.prompts import PromptTemplate
66
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
77
from langchain_core.output_parsers import StrOutputParser
8-
from langchain_core.runnables import RunnableParallel
9-
from langchain_core.utils.pydantic import is_basemodel_subclass
108
from langchain_community.chat_models import ChatOllama
119
import ast
1210
import sys
1311
from io import StringIO
1412
from bs4 import BeautifulSoup
1513
import re
16-
from tqdm import tqdm
17-
from .base_node import BaseNode
14+
import json
15+
from jsonschema import validate, ValidationError
1816
from pydantic import ValidationError
17+
from .base_node import BaseNode
1918
from ..utils import (transform_schema,
2019
extract_code,
2120
syntax_focused_analysis, syntax_focused_code_generation,
2221
execution_focused_analysis, execution_focused_code_generation,
2322
validation_focused_analysis, validation_focused_code_generation,
2423
semantic_focused_analysis, semantic_focused_code_generation,
2524
are_content_equal)
26-
from jsonschema import validate, ValidationError
27-
import json
2825
from ..prompts import (
2926
TEMPLATE_INIT_CODE_GENERATION, TEMPLATE_SEMANTIC_COMPARISON
3027
)

scrapegraphai/nodes/html_analyzer_node.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,7 @@
44
from typing import List, Optional
55
from langchain.prompts import PromptTemplate
66
from langchain_core.output_parsers import StrOutputParser
7-
from langchain_core.runnables import RunnableParallel
8-
from langchain_core.utils.pydantic import is_basemodel_subclass
97
from langchain_community.chat_models import ChatOllama
10-
from tqdm import tqdm
118
from .base_node import BaseNode
129
from ..utils import reduce_html
1310
from ..prompts import (

scrapegraphai/nodes/prompt_refiner_node.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,7 @@
44
from typing import List, Optional
55
from langchain.prompts import PromptTemplate
66
from langchain_core.output_parsers import StrOutputParser
7-
from langchain_core.runnables import RunnableParallel
8-
from langchain_core.utils.pydantic import is_basemodel_subclass
9-
from langchain_openai import ChatOpenAI, AzureChatOpenAI
10-
from langchain_mistralai import ChatMistralAI
117
from langchain_community.chat_models import ChatOllama
12-
from tqdm import tqdm
138
from .base_node import BaseNode
149
from ..utils import transform_schema
1510
from ..prompts import (

scrapegraphai/nodes/reasoning_node.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
"""
2+
PromptRefinerNode Module
3+
"""
4+
from typing import List, Optional
5+
from langchain.prompts import PromptTemplate
6+
from langchain_core.output_parsers import StrOutputParser
7+
from langchain_community.chat_models import ChatOllama
8+
from .base_node import BaseNode
9+
from ..utils import transform_schema
10+
from ..prompts import (
11+
TEMPLATE_REASONING, TEMPLATE_REASONING_WITH_CONTEXT
12+
)
13+
14+
class ReasoningNode(BaseNode):
15+
"""
16+
A node that refine the user prompt with the use of the schema and additional context and
17+
create a precise prompt in subsequent steps that explicitly link elements in the user's
18+
original input to their corresponding representations in the JSON schema.
19+
20+
Attributes:
21+
llm_model: An instance of a language model client, configured for generating answers.
22+
verbose (bool): A flag indicating whether to show print statements during execution.
23+
24+
Args:
25+
input (str): Boolean expression defining the input keys needed from the state.
26+
output (List[str]): List of output keys to be updated in the state.
27+
node_config (dict): Additional configuration for the node.
28+
node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
29+
"""
30+
31+
def __init__(
32+
self,
33+
input: str,
34+
output: List[str],
35+
node_config: Optional[dict] = None,
36+
node_name: str = "PromptRefiner",
37+
):
38+
super().__init__(node_name, "node", input, output, 2, node_config)
39+
40+
self.llm_model = node_config["llm_model"]
41+
42+
if isinstance(node_config["llm_model"], ChatOllama):
43+
self.llm_model.format="json"
44+
45+
self.verbose = (
46+
True if node_config is None else node_config.get("verbose", False)
47+
)
48+
self.force = (
49+
False if node_config is None else node_config.get("force", False)
50+
)
51+
52+
self.additional_info = node_config.get("additional_info", None)
53+
54+
self.output_schema = node_config.get("schema")
55+
56+
def execute(self, state: dict) -> dict:
57+
"""
58+
Generate a refined prompt for the reasoning task based on the user's input and the JSON schema.
59+
60+
Args:
61+
state (dict): The current state of the graph. The input keys will be used
62+
to fetch the correct data from the state.
63+
64+
Returns:
65+
dict: The updated state with the output key containing the generated answer.
66+
67+
Raises:
68+
KeyError: If the input keys are not found in the state, indicating
69+
that the necessary information for generating an answer is missing.
70+
"""
71+
72+
self.logger.info(f"--- Executing {self.node_name} Node ---")
73+
74+
user_prompt = state['user_prompt']
75+
76+
self.simplefied_schema = transform_schema(self.output_schema.schema())
77+
78+
if self.additional_info is not None:
79+
prompt = PromptTemplate(
80+
template=TEMPLATE_REASONING_WITH_CONTEXT,
81+
partial_variables={"user_input": user_prompt,
82+
"json_schema": str(self.simplefied_schema),
83+
"additional_context": self.additional_info})
84+
else:
85+
prompt = PromptTemplate(
86+
template=TEMPLATE_REASONING,
87+
partial_variables={"user_input": user_prompt,
88+
"json_schema": str(self.simplefied_schema)})
89+
90+
output_parser = StrOutputParser()
91+
92+
chain = prompt | self.llm_model | output_parser
93+
refined_prompt = chain.invoke({})
94+
95+
state.update({self.output[0]: refined_prompt})
96+
return state

scrapegraphai/prompts/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,5 @@
1818
TEMPLATE_EXECUTION_ANALYSIS, TEMPLATE_EXECUTION_CODE_GENERATION,
1919
TEMPLATE_VALIDATION_ANALYSIS, TEMPLATE_VALIDATION_CODE_GENERATION,
2020
TEMPLATE_SEMANTIC_COMPARISON, TEMPLATE_SEMANTIC_ANALYSIS,
21-
TEMPLATE_SEMANTIC_CODE_GENERATION)
21+
TEMPLATE_SEMANTIC_CODE_GENERATION)
22+
from .reasoning_node_prompts import TEMPLATE_REASONING, TEMPLATE_REASONING_WITH_CONTEXT
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""
2+
Reasoning prompts helper
3+
"""
4+
5+
TEMPLATE_REASONING = """
6+
**Task**: Analyze the user's request and the provided JSON schema to guide an LLM in extracting information directly from a markdown file previously parsed froma a HTML file.
7+
8+
**User's Request**:
9+
{user_input}
10+
11+
**Target JSON Schema**:
12+
```json
13+
{json_schema}
14+
```
15+
16+
**Analysis Instructions**:
17+
1. **Interpret User Request:**
18+
* Identify the key information types or entities the user is seeking.
19+
* Note any specific attributes, relationships, or constraints mentioned.
20+
21+
2. **Map to JSON Schema**:
22+
* For each identified element in the user request, locate its corresponding field in the JSON schema.
23+
* Explain how the schema structure represents the requested information.
24+
* Highlight any relevant schema elements not explicitly mentioned in the user's request.
25+
26+
3. **Data Transformation Guidance**:
27+
* Provide guidance on any necessary transformations to align extracted data with the JSON schema requirements.
28+
29+
This analysis will be used to instruct an LLM that has the HTML content in its context. The LLM will use this guidance to extract the information and return it directly in the specified JSON format.
30+
31+
**Reasoning Output**:
32+
[Your detailed analysis based on the above instructions]
33+
"""
34+
35+
TEMPLATE_REASONING_WITH_CONTEXT = """
36+
**Task**: Analyze the user's request and the provided JSON schema to guide an LLM in extracting information directly from a markdown file previously parsed froma a HTML file.
37+
38+
**User's Request**:
39+
{user_input}
40+
41+
**Target JSON Schema**:
42+
```json
43+
{json_schema}
44+
```
45+
46+
**Additional Context**:
47+
{additional_context}
48+
49+
**Analysis Instructions**:
50+
1. **Interpret User Request and Context:**
51+
* Identify the key information types or entities the user is seeking.
52+
* Note any specific attributes, relationships, or constraints mentioned.
53+
* Incorporate insights from the additional context to refine understanding of the task.
54+
55+
2. **Map to JSON Schema**:
56+
* For each identified element in the user request, locate its corresponding field in the JSON schema.
57+
* Explain how the schema structure represents the requested information.
58+
* Highlight any relevant schema elements not explicitly mentioned in the user's request.
59+
60+
3. **Extraction Strategy**:
61+
* Based on the additional context, suggest specific strategies for locating and extracting the required information from the HTML.
62+
* Highlight any potential challenges or special considerations mentioned in the context.
63+
64+
4. **Data Transformation Guidance**:
65+
* Provide guidance on any necessary transformations to align extracted data with the JSON schema requirements.
66+
* Note any special formatting, validation, or business logic considerations from the additional context.
67+
68+
This analysis will be used to instruct an LLM that has the HTML content in its context. The LLM will use this guidance to extract the information and return it directly in the specified JSON format.
69+
70+
**Reasoning Output**:
71+
[Your detailed analysis based on the above instructions, incorporating insights from the additional context]
72+
"""

0 commit comments

Comments
 (0)