Skip to content

Commit 560f079

Browse files
committed
refactoring of the code
1 parent b820ad6 commit 560f079

37 files changed

+85
-106
lines changed

scrapegraphai/docloaders/browser_base.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str], text_cont
1313
- `api_key`: The API key provided by BrowserBase.
1414
- `project_id`: The ID of the project on BrowserBase where you want to fetch data from.
1515
- `link`: The URL or link that you want to fetch data from.
16-
- `text_content`: A boolean flag to specify whether to return only the text content (True) or the full HTML (False).
17-
- `async_mode`: A boolean flag that determines whether the function runs asynchronously (True) or synchronously (False, default).
16+
- `text_content`: A boolean flag to specify whether to return only the
17+
text content (True) or the full HTML (False).
18+
- `async_mode`: A boolean flag that determines whether the function runs asynchronously
19+
(True) or synchronously (False, default).
1820
1921
It initializes a Browserbase object with the given API key and project ID,
2022
then uses this object to load the specified link.
@@ -37,8 +39,10 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str], text_cont
3739
api_key (str): The API key provided by BrowserBase.
3840
project_id (str): The ID of the project on BrowserBase where you want to fetch data from.
3941
link (str): The URL or link that you want to fetch data from.
40-
text_content (bool): Whether to return only the text content (True) or the full HTML (False). Defaults to True.
41-
async_mode (bool): Whether to run the function asynchronously (True) or synchronously (False). Defaults to False.
42+
text_content (bool): Whether to return only the text content
43+
(True) or the full HTML (False). Defaults to True.
44+
async_mode (bool): Whether to run the function asynchronously
45+
(True) or synchronously (False). Defaults to False.
4246
4347
Returns:
4448
object: The result of the loading operation.
@@ -47,7 +51,8 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str], text_cont
4751
try:
4852
from browserbase import Browserbase
4953
except ImportError:
50-
raise ImportError("The browserbase module is not installed. Please install it using `pip install browserbase`.")
54+
raise ImportError(f"""The browserbase module is not installed.
55+
Please install it using `pip install browserbase`.""")
5156

5257

5358
browserbase = Browserbase(api_key=api_key, project_id=project_id)

scrapegraphai/docloaders/scrape_do.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,14 @@ def scrape_do_fetch(token, target_url, use_proxy=False, geoCode=None, super_prox
2424
"""
2525
encoded_url = urllib.parse.quote(target_url)
2626
if use_proxy:
27-
# Create proxy mode URL
28-
proxyModeUrl = f"http://{token}:@proxy.scrape.do:8080"
27+
proxy_mode_url = f"http://{token}:@proxy.scrape.do:8080"
2928
proxies = {
30-
"http": proxyModeUrl,
31-
"https": proxyModeUrl,
29+
"http": proxy_mode_url,
30+
"https": proxy_mode_url,
3231
}
33-
# Add optional geoCode and super proxy parameters if provided
3432
params = {"geoCode": geoCode, "super": str(super_proxy).lower()} if geoCode else {}
3533
response = requests.get(target_url, proxies=proxies, verify=False, params=params)
3634
else:
37-
# API Mode URL
3835
url = f"http://api.scrape.do?token={token}&url={encoded_url}"
3936
response = requests.get(url)
4037

scrapegraphai/graphs/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@
2626
from .search_link_graph import SearchLinkGraph
2727
from .screenshot_scraper_graph import ScreenshotScraperGraph
2828
from .smart_scraper_multi_concat_graph import SmartScraperMultiConcatGraph
29-
from .code_generator_graph import CodeGeneratorGraph
29+
from .code_generator_graph import CodeGeneratorGraph

scrapegraphai/graphs/abstract_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def _create_llm(self, llm_config: dict) -> object:
128128
if requests_per_second is not None:
129129
with warnings.catch_warnings():
130130
warnings.simplefilter("ignore")
131-
llm_params["rate_limiter"] = InMemoryRateLimiter(requests_per_second=requests_per_second)
131+
llm_params["rate_limiter"] = InMemoryRateLimiter(requests_per_second=requests_per_second)
132132
if max_retries is not None:
133133
llm_params["max_retries"] = max_retries
134134

scrapegraphai/graphs/base_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ class BaseGraph:
4545
... )
4646
"""
4747

48-
def __init__(self, nodes: list, edges: list, entry_point: str, use_burr: bool = False, burr_config: dict = None, graph_name: str = "Custom"):
48+
def __init__(self, nodes: list, edges: list, entry_point: str,
49+
use_burr: bool = False, burr_config: dict = None, graph_name: str = "Custom"):
4950
self.nodes = nodes
5051
self.raw_edges = edges
5152
self.edges = self._create_edges({e for e in edges})

scrapegraphai/graphs/code_generator_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class CodeGeneratorGraph(AbstractGraph):
5151
"""
5252

5353
def __init__(self, prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None):
54-
54+
5555
super().__init__(prompt, config, source, schema)
5656

5757
self.input_key = "url" if source.startswith("http") else "local_dir"
@@ -63,7 +63,7 @@ def _create_graph(self) -> BaseGraph:
6363
Returns:
6464
BaseGraph: A graph instance representing the web scraping workflow.
6565
"""
66-
66+
6767
if self.schema is None:
6868
raise KeyError("The schema is required for CodeGeneratorGraph")
6969

scrapegraphai/graphs/deep_scraper_graph.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ def _create_repeated_graph(self) -> BaseGraph:
134134
)
135135

136136

137-
138137
def _create_graph(self) -> BaseGraph:
139138
"""
140139
Creates the graph of nodes representing the workflow for web scraping

scrapegraphai/graphs/omni_search_graph.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
)
1515
from ..utils.copy import safe_deepcopy
1616

17-
1817
class OmniSearchGraph(AbstractGraph):
1918
"""
2019
OmniSearchGraph is a scraping pipeline that searches the internet for answers to a given prompt.

scrapegraphai/graphs/screenshot_scraper_graph.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def _create_graph(self) -> BaseGraph:
4747
"link": self.source
4848
}
4949
)
50+
5051
generate_answer_from_image_node = GenerateAnswerFromImageNode(
5152
input="screenshots",
5253
output=["answer"],

scrapegraphai/graphs/script_creator_graph.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def _create_graph(self) -> BaseGraph:
6868
"script_creator": True
6969
}
7070
)
71+
7172
parse_node = ParseNode(
7273
input="doc",
7374
output=["parsed_doc"],
@@ -76,6 +77,7 @@ def _create_graph(self) -> BaseGraph:
7677
"llm_model": self.llm_model
7778
}
7879
)
80+
7981
generate_scraper_node = GenerateScraperNode(
8082
input="user_prompt & (parsed_doc)",
8183
output=["answer"],

0 commit comments

Comments
 (0)