1
- """
1
+ """
2
2
ExploreGraph Module
3
3
"""
4
4
5
- from copy import copy , deepcopy
6
5
from typing import Optional
7
6
from pydantic import BaseModel
8
7
9
8
from .base_graph import BaseGraph
10
9
from .abstract_graph import AbstractGraph
11
- from .smart_scraper_graph import SmartScraperGraph
12
10
13
11
from ..nodes import (
14
12
FetchNode ,
20
18
21
19
22
20
class ExploreGraph (AbstractGraph ):
23
- """
24
- ExploreGraph is a scraping pipeline that searches the internet for answers to a given prompt.
25
- It only requires a user prompt to search the internet and generate an answer.
21
+ """
22
+ SmartScraper is a scraping pipeline that automates the process of
23
+ extracting information from web pages
24
+ using a natural language model to interpret and answer prompts.
26
25
27
26
Attributes:
28
- prompt (str): The user prompt to search the internet.
29
- llm_model (dict): The configuration for the language model.
30
- embedder_model (dict): The configuration for the embedder model.
31
- headless (bool): A flag to run the browser in headless mode.
32
- verbose (bool): A flag to display the execution information.
33
- model_token (int): The token limit for the language model.
27
+ prompt (str): The prompt for the graph.
28
+ source (str): The source of the graph.
29
+ config (dict): Configuration parameters for the graph.
30
+ schema (str): The schema for the graph output.
31
+ llm_model: An instance of a language model client, configured for generating answers.
32
+ embedder_model: An instance of an embedding model client,
33
+ configured for generating embeddings.
34
+ verbose (bool): A flag indicating whether to show print statements during execution.
35
+ headless (bool): A flag indicating whether to run the graph in headless mode.
34
36
35
37
Args:
36
- prompt (str): The user prompt to search the internet.
38
+ prompt (str): The prompt for the graph.
39
+ source (str): The source of the graph.
37
40
config (dict): Configuration parameters for the graph.
38
- schema (Optional[ str] ): The schema for the graph output.
41
+ schema (str): The schema for the graph output.
39
42
40
43
Example:
41
- >>> search_graph = ExploreGraph(
42
- ... "What is Chioggia famous for?",
44
+ >>> smart_scraper = ExploreGraph(
45
+ ... "List me all the attractions in Chioggia.",
46
+ ... "https://en.wikipedia.org/wiki/Chioggia",
43
47
... {"llm": {"model": "gpt-3.5-turbo"}}
44
48
... )
45
- >>> result = search_graph.run()
49
+ >>> result = smart_scraper.run()
50
+ )
46
51
"""
47
52
48
- def __init__ (self , prompt : str , config : dict , schema : Optional [BaseModel ] = None ):
49
-
50
- self .max_results = config .get ("max_results" , 3 )
53
+ def __init__ (self , prompt : str , source : str , config : dict , schema : Optional [BaseModel ] = None ):
54
+ super ().__init__ (prompt , config , source , schema )
51
55
52
- if all (isinstance (value , str ) for value in config .values ()):
53
- self .copy_config = copy (config )
54
- else :
55
- self .copy_config = deepcopy (config )
56
-
57
- self .copy_schema = deepcopy (schema )
58
-
59
- super ().__init__ (prompt , config , schema )
56
+ self .input_key = "url" if source .startswith ("http" ) else "local_dir"
60
57
61
58
def _create_graph (self ) -> BaseGraph :
62
59
"""
63
- Creates the graph of nodes representing the workflow for web scraping and searching .
60
+ Creates the graph of nodes representing the workflow for web scraping.
64
61
65
62
Returns:
66
- BaseGraph: A graph instance representing the web scraping and searching workflow.
63
+ BaseGraph: A graph instance representing the web scraping workflow.
67
64
"""
68
-
69
- # ************************************************
70
- # Create a SmartScraperGraph instance
71
- # ************************************************
72
-
73
65
fetch_node = FetchNode (
74
66
input = "url | local_dir" ,
75
67
output = ["doc" , "link_urls" , "img_urls" ],
@@ -100,7 +92,7 @@ def _create_graph(self) -> BaseGraph:
100
92
"schema" : self .schema ,
101
93
}
102
94
)
103
-
95
+
104
96
search_link_node = SearchLinkNode (
105
97
input = "doc" ,
106
98
output = [{"link" : "description" }],
@@ -114,25 +106,28 @@ def _create_graph(self) -> BaseGraph:
114
106
fetch_node ,
115
107
parse_node ,
116
108
rag_node ,
109
+ search_link_node ,
117
110
generate_answer_node ,
118
111
],
112
+
119
113
edges = [
120
114
(fetch_node , parse_node ),
121
115
(parse_node , rag_node ),
122
- (rag_node , generate_answer_node ),
123
- (generate_answer_node , search_link_node )
116
+ (rag_node , search_link_node ),
117
+ (search_link_node , generate_answer_node )
124
118
],
125
119
entry_point = fetch_node
126
120
)
127
121
128
122
def run (self ) -> str :
129
123
"""
130
- Executes the web scraping and searching process .
124
+ Executes the scraping process and returns the answer to the prompt .
131
125
132
126
Returns:
133
127
str: The answer to the prompt.
134
128
"""
135
- inputs = {"user_prompt" : self .prompt }
129
+
130
+ inputs = {"user_prompt" : self .prompt , self .input_key : self .source }
136
131
self .final_state , self .execution_info = self .graph .execute (inputs )
137
132
138
133
return self .final_state .get ("answer" , "No answer found." )
0 commit comments