From 438b8127db941b5ac71094bf7c06f4dfdf80489a Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Sun, 15 Sep 2024 11:20:08 +0200
Subject: [PATCH] refctoring of the code

---
 scrapegraphai/builders/graph_builder.py      |  6 ------
 scrapegraphai/nodes/fetch_node.py            | 10 ++++++----
 scrapegraphai/telemetry/telemetry.py         | 17 +++++++----------
 scrapegraphai/utils/convert_to_md.py         |  1 +
 scrapegraphai/utils/copy.py                  |  3 +++
 scrapegraphai/utils/parse_state_keys.py      |  5 ++---
 scrapegraphai/utils/save_audio_from_bytes.py |  1 -
 scrapegraphai/utils/token_calculator.py      |  1 -
 8 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/scrapegraphai/builders/graph_builder.py b/scrapegraphai/builders/graph_builder.py
index 69ebe492..1bfdab72 100644
--- a/scrapegraphai/builders/graph_builder.py
+++ b/scrapegraphai/builders/graph_builder.py
@@ -65,12 +65,10 @@ def _create_llm(self, llm_config: dict):
             "temperature": 0,
             "streaming": True
         }
-        # Update defaults with any LLM parameters that were provided
         llm_params = {**llm_defaults, **llm_config}
         if "api_key" not in llm_params:
             raise ValueError("LLM configuration must include an 'api_key'.")
 
-        # select the model based on the model name
         if "gpt-" in llm_params["model"]:
             return ChatOpenAI(llm_params)
         elif "gemini" in llm_params["model"]:
@@ -152,17 +150,13 @@ def convert_json_to_graphviz(json_data, format: str = 'pdf'):
         edges = graph_config.get('edges', [])
         entry_point = graph_config.get('entry_point')
 
-        # Add nodes to the graph
         for node in nodes:
-            # If this node is the entry point, use a double circle to denote it
             if node['node_name'] == entry_point:
                 graph.node(node['node_name'], shape='doublecircle')
             else:
                 graph.node(node['node_name'])
 
-        # Add edges to the graph
         for edge in edges:
-            # An edge could potentially have multiple 'to' nodes if it's from a conditional node
             if isinstance(edge['to'], list):
                 for to_node in edge['to']:
                     graph.edge(edge['from'], to_node)
diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py
index f015278d..bbc0abd7 100644
--- a/scrapegraphai/nodes/fetch_node.py
+++ b/scrapegraphai/nodes/fetch_node.py
@@ -252,8 +252,8 @@ def handle_web_source(self, state, source):
                 if not self.cut:
                     parsed_content = cleanup_html(response, source)
 
-                if  ((isinstance(self.llm_model, ChatOpenAI) or isinstance(self.llm_model, AzureChatOpenAI))
-                     and not self.script_creator) or (self.force and not self.script_creator):
+                if isinstance(self.llm_model, (ChatOpenAI, AzureChatOpenAI)) \
+                    and not self.script_creator) or (self.force and not self.script_creator):
                     parsed_content = convert_to_md(source, parsed_content)
 
                 compressed_document = [Document(page_content=parsed_content)]
@@ -271,7 +271,8 @@ def handle_web_source(self, state, source):
                 try:
                     from ..docloaders.browser_base import browser_base_fetch
                 except ImportError:
-                    raise ImportError("The browserbase module is not installed. Please install it using `pip install browserbase`.")
+                    raise ImportError("""The browserbase module is not installed. 
+                                      Please install it using `pip install browserbase`.""")
 
                 data =  browser_base_fetch(self.browser_base.get("api_key"),
                                             self.browser_base.get("project_id"), [source])
@@ -283,7 +284,8 @@ def handle_web_source(self, state, source):
                 document = loader.load()
 
             if not document or not document[0].page_content.strip():
-                raise ValueError("No HTML body content found in the document fetched by ChromiumLoader.")
+                raise ValueError("""No HTML body content found in
+                                 the document fetched by ChromiumLoader.""")
             parsed_content = document[0].page_content
 
             if (isinstance(self.llm_model, ChatOpenAI) or isinstance(self.llm_model, AzureChatOpenAI))  and not self.script_creator or self.force and not self.script_creator and not self.openai_md_enabled:
diff --git a/scrapegraphai/telemetry/telemetry.py b/scrapegraphai/telemetry/telemetry.py
index 655b2cc5..61af900c 100644
--- a/scrapegraphai/telemetry/telemetry.py
+++ b/scrapegraphai/telemetry/telemetry.py
@@ -14,7 +14,6 @@
   or:
   export SCRAPEGRAPHAI_TELEMETRY_ENABLED=false
 """
-
 import configparser
 import functools
 import importlib.metadata
@@ -68,14 +67,16 @@ def _check_config_and_environ_for_telemetry_flag(
         try:
             telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled")
         except ValueError as e:
-            logger.debug(f"Unable to parse value for `telemetry_enabled` from config. Encountered {e}")
+            logger.debug(f"""Unable to parse value for 
+                         `telemetry_enabled` from config. Encountered {e}""")
     if os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED") is not None:
         env_value = os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED")
         config_obj["DEFAULT"]["telemetry_enabled"] = env_value
         try:
             telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled")
         except ValueError as e:
-            logger.debug(f"Unable to parse value for `SCRAPEGRAPHAI_TELEMETRY_ENABLED` from environment. Encountered {e}")
+            logger.debug(f"""Unable to parse value for `SCRAPEGRAPHAI_TELEMETRY_ENABLED` 
+                         from environment. Encountered {e}""")
     return telemetry_enabled
 
 
@@ -94,7 +95,6 @@ def _check_config_and_environ_for_telemetry_flag(
     "telemetry_version": "0.0.3",
 }
 
-
 def disable_telemetry():
     """
     function for disabling the telemetries 
@@ -102,7 +102,6 @@ def disable_telemetry():
     global g_telemetry_enabled
     g_telemetry_enabled = False
 
-
 def is_telemetry_enabled() -> bool:
     """
     function for checking if a telemetry is enables
@@ -122,7 +121,6 @@ def is_telemetry_enabled() -> bool:
     else:
         return False
 
-
 def _send_event_json(event_json: dict):
     headers = {
         "Content-Type": "application/json",
@@ -141,7 +139,6 @@ def _send_event_json(event_json: dict):
     else:
         logger.debug(f"Telemetry data sent: {data}")
 
-
 def send_event_json(event_json: dict):
     """
     fucntion for sending event json
@@ -154,7 +151,6 @@ def send_event_json(event_json: dict):
     except Exception as e:
         logger.debug(f"Failed to send telemetry data in a thread: {e}")
 
-
 def log_event(event: str, properties: Dict[str, any]):
     """
     function for logging the events
@@ -167,7 +163,6 @@ def log_event(event: str, properties: Dict[str, any]):
         }
         send_event_json(event_json)
 
-
 def log_graph_execution(graph_name: str, source: str, prompt:str, schema:dict,
                         llm_model: str, embedder_model: str, source_type: str,
                         execution_time: float, content: str = None, response: dict = None,
@@ -193,8 +188,10 @@ def log_graph_execution(graph_name: str, source: str, prompt:str, schema:dict,
     }
     log_event("graph_execution", properties)
 
-
 def capture_function_usage(call_fn: Callable) -> Callable:
+    """
+    function that captures the usage
+    """
     @functools.wraps(call_fn)
     def wrapped_fn(*args, **kwargs):
         try:
diff --git a/scrapegraphai/utils/convert_to_md.py b/scrapegraphai/utils/convert_to_md.py
index ff0bbbd7..2f31c3a1 100644
--- a/scrapegraphai/utils/convert_to_md.py
+++ b/scrapegraphai/utils/convert_to_md.py
@@ -24,6 +24,7 @@ def convert_to_md(html: str, url: str = None) -> str:
     h = html2text.HTML2Text()
     h.ignore_links = False
     h.body_width = 0
+
     if url is not None:
         parsed_url = urlparse(url)
         domain = f"{parsed_url.scheme}://{parsed_url.netloc}"
diff --git a/scrapegraphai/utils/copy.py b/scrapegraphai/utils/copy.py
index 838f3c05..0cdda362 100644
--- a/scrapegraphai/utils/copy.py
+++ b/scrapegraphai/utils/copy.py
@@ -1,3 +1,6 @@
+"""
+copy module
+"""
 import copy
 from typing import Any
 
diff --git a/scrapegraphai/utils/parse_state_keys.py b/scrapegraphai/utils/parse_state_keys.py
index f4bd2ea5..79de329c 100644
--- a/scrapegraphai/utils/parse_state_keys.py
+++ b/scrapegraphai/utils/parse_state_keys.py
@@ -3,7 +3,6 @@
 """
 import re
 
-
 def parse_expression(expression, state: dict) -> list:
     """
     Parses a complex boolean expression involving state keys.
@@ -22,7 +21,8 @@ def parse_expression(expression, state: dict) -> list:
 
     Example:
         >>> parse_expression("user_input & (relevant_chunks | parsed_document | document)", 
-                            {"user_input": None, "document": None, "parsed_document": None, "relevant_chunks": None})
+                            {"user_input": None, "document": None, 
+                            "parsed_document": None, "relevant_chunks": None})
         ['user_input', 'relevant_chunks', 'parsed_document', 'document']
 
     This function evaluates the expression to determine the 
@@ -69,7 +69,6 @@ def evaluate_simple_expression(exp):
                 return [elem.strip() for elem in and_segment if elem.strip() in state]
         return []
 
-    # Helper function to evaluate expressions with parentheses
     def evaluate_expression(expression):
         while '(' in expression:
             start = expression.rfind('(')
diff --git a/scrapegraphai/utils/save_audio_from_bytes.py b/scrapegraphai/utils/save_audio_from_bytes.py
index 2bad3106..aeef411c 100644
--- a/scrapegraphai/utils/save_audio_from_bytes.py
+++ b/scrapegraphai/utils/save_audio_from_bytes.py
@@ -4,7 +4,6 @@
 from pathlib import Path
 from typing import Union
 
-
 def save_audio_from_bytes(byte_response: bytes, output_path: Union[str, Path]) -> None:
     """
     Saves the byte response as an audio file to the specified path.
diff --git a/scrapegraphai/utils/token_calculator.py b/scrapegraphai/utils/token_calculator.py
index c5e5fbbb..475a0a14 100644
--- a/scrapegraphai/utils/token_calculator.py
+++ b/scrapegraphai/utils/token_calculator.py
@@ -5,7 +5,6 @@
 import tiktoken
 from ..helpers.models_tokens import models_tokens
 
-
 def truncate_text_tokens(text: str, model: str, encoding_name: str) -> List[str]:
     """
     Truncates text into chunks that are small enough to be processed by specified llm models.