Skip to content

Commit ae275ec

Browse files
committed
feat: add integration with scrape.do
1 parent 5002c71 commit ae275ec

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

scrapegraphai/nodes/fetch_node.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,10 +270,10 @@ def handle_web_source(self, state, source):
270270
else:
271271
loader_kwargs = {}
272272

273-
if self.node_config is not None:
273+
if self.node_config:
274274
loader_kwargs = self.node_config.get("loader_kwargs", {})
275275

276-
if self.browser_base is not None:
276+
if self.browser_base:
277277
try:
278278
from ..docloaders.browser_base import browser_base_fetch
279279
except ImportError:
@@ -285,7 +285,7 @@ def handle_web_source(self, state, source):
285285

286286
document = [Document(page_content=content,
287287
metadata={"source": source}) for content in data]
288-
elif self.scrape_do is not None:
288+
elif self.scrape_do:
289289
from ..docloaders.scrape_do import scrape_do_fetch
290290
if (self.scrape_do.get("use_proxy") is None) or \
291291
self.scrape_do.get("geoCode") is None or \

scrapegraphai/nodes/fetch_node_level_k.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def __init__(
5757
self.headless = node_config.get("headless", True) if node_config else True
5858
self.loader_kwargs = node_config.get("loader_kwargs", {}) if node_config else {}
5959
self.browser_base = node_config.get("browser_base", None)
60+
self.scrape_do = node_config.get("scrape_do", None)
6061
self.depth = node_config.get("depth", 1) if node_config else 1
6162
self.only_inside_links = node_config.get("only_inside_links", False) if node_config else False
6263
self.min_input_len = 1
@@ -115,6 +116,11 @@ def fetch_content(self, source: str, loader_kwargs) -> Optional[str]:
115116
self.browser_base.get("project_id"), [source])
116117
document = [Document(page_content=content,
117118
metadata={"source": source}) for content in data]
119+
elif self.scrape_do:
120+
from ..docloaders.scrape_do import scrape_do_fetch
121+
data = scrape_do_fetch(self.scrape_do.get("api_key"), source)
122+
document = [Document(page_content=data,
123+
metadata={"source": source})]
118124
else:
119125
loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
120126
document = loader.load()

0 commit comments

Comments
 (0)