Skip to content

Commit d1c3de7

Browse files
committed
fixed a bug
1 parent 7af411a commit d1c3de7

File tree

3 files changed

+13
-6
lines changed

3 files changed

+13
-6
lines changed

scrapegraphai/graphs/script_creator_graph.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,13 @@ def _create_graph(self) -> BaseGraph:
6464
"""
6565

6666
fetch_node = FetchNode(
67-
input="url_for_script | local_dir",
67+
input="url | local_dir",
6868
output=["doc", "link_urls", "img_urls"],
69+
node_config={
70+
"llm_model": self.llm_model,
71+
"loader_kwargs": self.config.get("loader_kwargs", {}),
72+
"script_creator": True
73+
}
6974
)
7075
parse_node = ParseNode(
7176
input="doc",

scrapegraphai/graphs/smart_scraper_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def _create_graph(self) -> BaseGraph:
6262
BaseGraph: A graph instance representing the web scraping workflow.
6363
"""
6464
fetch_node = FetchNode(
65-
input="url_for_scraping | local_dir",
65+
input="url| local_dir",
6666
output=["doc", "link_urls", "img_urls"],
6767
node_config={
6868
"llm_model": self.llm_model,

scrapegraphai/nodes/fetch_node.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,10 @@ def __init__(
6262
{} if node_config is None else node_config.get("llm_model", {})
6363
)
6464
self.force = (
65-
{} if node_config is None else node_config.get("force", {})
65+
{} if node_config is None else node_config.get("force", False)
6666
)
67+
self.script_creator = node_config.get("script_creator", False)
68+
6769

6870
def execute(self, state):
6971
"""
@@ -146,7 +148,7 @@ def execute(self, state):
146148

147149
parsed_content = source
148150

149-
if isinstance(self.llm_model, OpenAI) and self.input == "-----" or self.force:
151+
if isinstance(self.llm_model, OpenAI) and not self.script_creator or self.force and not self.script_creator:
150152
parsed_content = convert_to_md(source)
151153

152154
compressed_document = [
@@ -162,7 +164,7 @@ def execute(self, state):
162164

163165
parsed_content = source
164166

165-
if isinstance(self.llm_model, OpenAI) and self.input == "-----" or self.force:
167+
if isinstance(self.llm_model, OpenAI) and not self.script_creator or self.force and not self.script_creator:
166168
parsed_content = convert_to_md(source)
167169
compressed_document = [Document(page_content=parsed_content)]
168170
else:
@@ -184,7 +186,7 @@ def execute(self, state):
184186
raise ValueError("No HTML body content found in the document fetched by ChromiumLoader.")
185187
parsed_content = document[0].page_content
186188

187-
if isinstance(self.llm_model, OpenAI) and self.input == "-----" or self.force:
189+
if isinstance(self.llm_model, OpenAI) and not self.script_creator or self.force and not self.script_creator:
188190
parsed_content = convert_to_md(document[0].page_content)
189191

190192
compressed_document = [

0 commit comments

Comments
 (0)