From e53766b16e89254f945f9b54b38445a24f8b81f2 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Tue, 14 May 2024 15:20:39 +0200 Subject: [PATCH 1/2] feat: add logger integration --- scrapegraphai/nodes/fetch_node.py | 11 +- .../nodes/generate_answer_csv_node.py | 4 +- scrapegraphai/nodes/generate_answer_node.py | 4 +- .../nodes/generate_answer_pdf_node.py | 4 +- scrapegraphai/nodes/generate_scraper_node.py | 4 +- scrapegraphai/nodes/get_probable_tags_node.py | 13 +- scrapegraphai/nodes/graph_iterator_node.py | 2 +- scrapegraphai/nodes/image_to_text_node.py | 4 +- scrapegraphai/nodes/merge_answers_node.py | 5 +- scrapegraphai/nodes/parse_node.py | 5 +- scrapegraphai/nodes/rag_node.py | 6 +- scrapegraphai/nodes/robots_node.py | 11 +- scrapegraphai/nodes/search_internet_node.py | 8 +- scrapegraphai/nodes/search_link_node.py | 5 +- scrapegraphai/nodes/text_to_speech_node.py | 5 +- scrapegraphai/utils/__init__.py | 1 + scrapegraphai/utils/logging.py | 137 ++++++++++++++++++ 17 files changed, 195 insertions(+), 34 deletions(-) create mode 100644 scrapegraphai/utils/logging.py diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 1edefdbd..39463057 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -1,4 +1,4 @@ -""" +"""" FetchNode Module """ @@ -13,7 +13,7 @@ from ..docloaders import ChromiumLoader from .base_node import BaseNode from ..utils.cleanup_html import cleanup_html - +from ..utils.logging import get_logger class FetchNode(BaseNode): """ @@ -74,7 +74,8 @@ def execute(self, state): necessary information to perform the operation is missing. """ if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger = get_logger("fetch node") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) @@ -128,7 +129,7 @@ def execute(self, state): cleanedup_html = cleanup_html(response.text, source) compressed_document = [Document(page_content=cleanedup_html)] else: - print(f"Failed to retrieve contents from the webpage at url: {source}") + logger.warning(f"Failed to retrieve contents from the webpage at url: {source}") else: loader_kwargs = {} @@ -144,4 +145,4 @@ def execute(self, state): ] state.update({self.output[0]: compressed_document}) - return state \ No newline at end of file + return state diff --git a/scrapegraphai/nodes/generate_answer_csv_node.py b/scrapegraphai/nodes/generate_answer_csv_node.py index 53f7121b..f3f5b7ec 100644 --- a/scrapegraphai/nodes/generate_answer_csv_node.py +++ b/scrapegraphai/nodes/generate_answer_csv_node.py @@ -9,6 +9,7 @@ from langchain.prompts import PromptTemplate from langchain_core.output_parsers import JsonOutputParser from langchain_core.runnables import RunnableParallel +from ..utils.logging import get_logger # Imports from the library from .base_node import BaseNode @@ -72,7 +73,8 @@ def execute(self, state): """ if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger = get_logger("generate_answer csv node") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py index f554f8d9..beeac15a 100644 --- a/scrapegraphai/nodes/generate_answer_node.py +++ b/scrapegraphai/nodes/generate_answer_node.py @@ -10,6 +10,7 @@ from langchain.prompts import PromptTemplate from langchain_core.output_parsers import JsonOutputParser from langchain_core.runnables import RunnableParallel +from ..utils.logging import get_logger # Imports from the library from .base_node import BaseNode @@ -59,7 +60,8 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger = get_logger("generate answer node") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/generate_answer_pdf_node.py b/scrapegraphai/nodes/generate_answer_pdf_node.py index 31839d22..4a42df23 100644 --- a/scrapegraphai/nodes/generate_answer_pdf_node.py +++ b/scrapegraphai/nodes/generate_answer_pdf_node.py @@ -9,6 +9,7 @@ from langchain.prompts import PromptTemplate from langchain_core.output_parsers import JsonOutputParser from langchain_core.runnables import RunnableParallel +from ..utils.logging import get_logger # Imports from the library from .base_node import BaseNode @@ -72,7 +73,8 @@ def execute(self, state): """ if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger = get_logger("generate answer pdf node") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/generate_scraper_node.py b/scrapegraphai/nodes/generate_scraper_node.py index 804635de..a6a8dc00 100644 --- a/scrapegraphai/nodes/generate_scraper_node.py +++ b/scrapegraphai/nodes/generate_scraper_node.py @@ -10,6 +10,7 @@ from langchain.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnableParallel +from ..utils.logging import get_logger # Imports from the library from .base_node import BaseNode @@ -63,7 +64,8 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger = get_logger("generate scraper node") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/get_probable_tags_node.py b/scrapegraphai/nodes/get_probable_tags_node.py index e970c285..b0c2b41d 100644 --- a/scrapegraphai/nodes/get_probable_tags_node.py +++ b/scrapegraphai/nodes/get_probable_tags_node.py @@ -6,7 +6,7 @@ from langchain.output_parsers import CommaSeparatedListOutputParser from langchain.prompts import PromptTemplate from .base_node import BaseNode - +from ..utils.logging import get_logger class GetProbableTagsNode(BaseNode): """ @@ -25,11 +25,12 @@ class GetProbableTagsNode(BaseNode): node_name (str): The unique identifier name for the node, defaulting to "GetProbableTags". """ - def __init__(self, input: str, output: List[str], model_config: dict, + def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "GetProbableTags"): - super().__init__(node_name, "node", input, output, 2, model_config) + super().__init__(node_name, "node", input, output, 2, node_config) - self.llm_model = model_config["llm_model"] + self.llm_model = node_config["llm_model"] + self.verbose = False if node_config is None else node_config.get("verbose", False) def execute(self, state: dict) -> dict: """ @@ -49,7 +50,9 @@ def execute(self, state: dict) -> dict: necessary information for generating tag predictions is missing. """ - print(f"--- Executing {self.node_name} Node ---") + if self.verbose: + logger = get_logger("get probable tags node") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/graph_iterator_node.py b/scrapegraphai/nodes/graph_iterator_node.py index 8a71319a..b6c7690e 100644 --- a/scrapegraphai/nodes/graph_iterator_node.py +++ b/scrapegraphai/nodes/graph_iterator_node.py @@ -5,7 +5,7 @@ import asyncio import copy from typing import List, Optional - +from ..utils.logging import get_logger from tqdm.asyncio import tqdm from .base_node import BaseNode diff --git a/scrapegraphai/nodes/image_to_text_node.py b/scrapegraphai/nodes/image_to_text_node.py index 27f09016..07ef3be7 100644 --- a/scrapegraphai/nodes/image_to_text_node.py +++ b/scrapegraphai/nodes/image_to_text_node.py @@ -4,6 +4,7 @@ from typing import List, Optional from .base_node import BaseNode +from ..utils.logging import get_logger class ImageToTextNode(BaseNode): @@ -42,7 +43,8 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - print("---GENERATING TEXT FROM IMAGE---") + logger = get_logger("image to text node") + logger.info(f"--- Executing {self.node_name} Node ---") input_keys = self.get_input_keys(state) input_data = [state[key] for key in input_keys] diff --git a/scrapegraphai/nodes/merge_answers_node.py b/scrapegraphai/nodes/merge_answers_node.py index 63ed6afa..da115005 100644 --- a/scrapegraphai/nodes/merge_answers_node.py +++ b/scrapegraphai/nodes/merge_answers_node.py @@ -5,7 +5,7 @@ # Imports from standard library from typing import List, Optional from tqdm import tqdm - +from ..utils.logging import get_logger # Imports from Langchain from langchain.prompts import PromptTemplate from langchain_core.output_parsers import JsonOutputParser @@ -54,7 +54,8 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger = get_logger("fetch node") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/parse_node.py b/scrapegraphai/nodes/parse_node.py index 2cd7eb33..436cddc4 100644 --- a/scrapegraphai/nodes/parse_node.py +++ b/scrapegraphai/nodes/parse_node.py @@ -6,7 +6,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_transformers import Html2TextTransformer from .base_node import BaseNode - +from ..utils.logging import get_logger class ParseNode(BaseNode): """ @@ -49,7 +49,8 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger = get_logger("fetch node") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py index 27d97b6e..fdcdd8e8 100644 --- a/scrapegraphai/nodes/rag_node.py +++ b/scrapegraphai/nodes/rag_node.py @@ -8,6 +8,7 @@ from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline from langchain_community.document_transformers import EmbeddingsRedundantFilter from langchain_community.vectorstores import FAISS +from ..utils.logging import get_logger from .base_node import BaseNode @@ -55,9 +56,10 @@ def execute(self, state: dict) -> dict: KeyError: If the input keys are not found in the state, indicating that the necessary information for compressing the content is missing. """ + logger = get_logger("rag node") if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) @@ -80,7 +82,7 @@ def execute(self, state: dict) -> dict: chunked_docs.append(doc) if self.verbose: - print("--- (updated chunks metadata) ---") + logger.info("--- (updated chunks metadata) ---") # check if embedder_model is provided, if not use llm_model self.embedder_model = self.embedder_model if self.embedder_model else self.llm_model diff --git a/scrapegraphai/nodes/robots_node.py b/scrapegraphai/nodes/robots_node.py index 7aea6cae..ab0c7919 100644 --- a/scrapegraphai/nodes/robots_node.py +++ b/scrapegraphai/nodes/robots_node.py @@ -9,7 +9,7 @@ from langchain.output_parsers import CommaSeparatedListOutputParser from .base_node import BaseNode from ..helpers import robots_dictionary - +from ..utils.logging import get_logger class RobotsNode(BaseNode): """ @@ -61,9 +61,10 @@ def execute(self, state: dict) -> dict: ValueError: If the website is not scrapeable based on the robots.txt file and scraping is not enforced. """ + logger = get_logger("robots node") if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) @@ -121,17 +122,17 @@ def execute(self, state: dict) -> dict: if "no" in is_scrapable: if self.verbose: - print("\033[31m(Scraping this website is not allowed)\033[0m") + logger.warning("\033[31m(Scraping this website is not allowed)\033[0m") if not self.force_scraping: raise ValueError( 'The website you selected is not scrapable') else: if self.verbose: - print("\033[33m(WARNING: Scraping this website is not allowed but you decided to force it)\033[0m") + logger.warning("\033[33m(WARNING: Scraping this website is not allowed but you decided to force it)\033[0m") else: if self.verbose: - print("\033[32m(Scraping this website is allowed)\033[0m") + logger.warning("\033[32m(Scraping this website is allowed)\033[0m") state.update({self.output[0]: is_scrapable}) return state diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py index 87f8dcb2..e2443a25 100644 --- a/scrapegraphai/nodes/search_internet_node.py +++ b/scrapegraphai/nodes/search_internet_node.py @@ -7,7 +7,7 @@ from langchain.prompts import PromptTemplate from ..utils.research_web import search_on_web from .base_node import BaseNode - +from ..utils.logging import get_logger class SearchInternetNode(BaseNode): """ @@ -54,9 +54,10 @@ def execute(self, state: dict) -> dict: KeyError: If the input keys are not found in the state, indicating that the necessary information for generating the answer is missing. """ + logger = get_logger("search interne node") if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger.info(f"--- Executing {self.node_name} Node ---") input_keys = self.get_input_keys(state) @@ -88,7 +89,8 @@ def execute(self, state: dict) -> dict: search_query = search_answer.invoke({"user_prompt": user_prompt})[0] if self.verbose: - print(f"Search Query: {search_query}") + logger.info(f"Search Query: {search_query}") + answer = search_on_web( query=search_query, max_results=self.max_results) diff --git a/scrapegraphai/nodes/search_link_node.py b/scrapegraphai/nodes/search_link_node.py index bf64b5d9..93c60e4a 100644 --- a/scrapegraphai/nodes/search_link_node.py +++ b/scrapegraphai/nodes/search_link_node.py @@ -5,7 +5,7 @@ # Imports from standard library from typing import List, Optional from tqdm import tqdm - +from ..utils.logging import get_logger # Imports from Langchain from langchain.prompts import PromptTemplate @@ -59,7 +59,8 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger = get_logger("search link node") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/text_to_speech_node.py b/scrapegraphai/nodes/text_to_speech_node.py index d9fe7ca4..06ed8d5f 100644 --- a/scrapegraphai/nodes/text_to_speech_node.py +++ b/scrapegraphai/nodes/text_to_speech_node.py @@ -4,7 +4,7 @@ from typing import List, Optional from .base_node import BaseNode - +from ..utils.logging import get_logger class TextToSpeechNode(BaseNode): """ @@ -45,7 +45,8 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - print(f"--- Executing {self.node_name} Node ---") + logger = get_logger("text to speach node") + logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/utils/__init__.py b/scrapegraphai/utils/__init__.py index 72a8b96c..ee647466 100644 --- a/scrapegraphai/utils/__init__.py +++ b/scrapegraphai/utils/__init__.py @@ -9,3 +9,4 @@ from .save_audio_from_bytes import save_audio_from_bytes from .sys_dynamic_import import dynamic_import, srcfile_import from .cleanup_html import cleanup_html +from .logging import * \ No newline at end of file diff --git a/scrapegraphai/utils/logging.py b/scrapegraphai/utils/logging.py new file mode 100644 index 00000000..428fb8a7 --- /dev/null +++ b/scrapegraphai/utils/logging.py @@ -0,0 +1,137 @@ +"""A centralized logging system for any library + +source code inspired by https://github.com/huggingface/transformers/blob/main/src/transformers/utils/logging.py +""" +import logging +import os +import sys +import threading +from functools import lru_cache + + +_library_name = __name__.split(".", maxsplit=1)[0] + +_default_handler = None +_default_logging_level = logging.WARNING + +_semaphore = threading.Lock() + + +def _get_library_root_logger() -> logging.Logger: + return logging.getLogger(_library_name) + + +def _set_library_root_logger() -> None: + global _default_handler + + with _semaphore: + if _default_handler: return + + _default_handler = logging.StreamHandler() # sys.stderr as stream + + # https://github.com/pyinstaller/pyinstaller/issues/7334#issuecomment-1357447176 + if sys.stderr is None: + sys.stderr = open(os.devnull, "w") + + _default_handler.flush = sys.stderr.flush + + library_root_logger = _get_library_root_logger() + library_root_logger.addHandler(_default_handler) + library_root_logger.setLevel(_default_logging_level) + library_root_logger.propagate = False + + +def get_logger(name: str | None = None) -> logging.Logger: + _set_library_root_logger() + return logging.getLogger(name or _library_name) + + +def get_verbosity() -> int: + _set_library_root_logger() + return _get_library_root_logger().getEffectiveLevel() + + +def set_verbosity(verbosity: int) -> None: + _set_library_root_logger() + _get_library_root_logger().setLevel(verbosity) + + +def set_verbosity_debug() -> None: + set_verbosity(logging.DEBUG) + + +def set_verbosity_info() -> None: + set_verbosity(logging.INFO) + + +def set_verbosity_warning() -> None: + set_verbosity(logging.WARNING) + + +def set_verbosity_error() -> None: + set_verbosity(logging.ERROR) + + +def set_verbosity_fatal() -> None: + set_verbosity(logging.FATAL) + + +def set_handler(handler: logging.Handler) -> None: + _set_library_root_logger() + + assert handler is not None + + _get_library_root_logger().addHandler(handler) + + +def set_default_handler() -> None: + set_handler(_default_handler) + + +def unset_handler(handler: logging.Handler) -> None: + _set_library_root_logger() + + assert handler is not None + + _get_library_root_logger().removeHandler(handler) + + +def unset_default_handler() -> None: + unset_handler(_default_handler) + + +def set_propagation() -> None: + _get_library_root_logger().propagate = True + + +def unset_propagation() -> None: + _get_library_root_logger().propagate = False + + +def set_formatting() -> None: + """sets formatting for all handlers bound to the root logger + + ``` + [levelname|filename|line number] time >> message + ``` + """ + formatter = logging.Formatter( + "[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s >> %(message)s" + ) + + for handler in _get_library_root_logger().handlers: + handler.setFormatter(formatter) + + +def unset_formatting() -> None: + for handler in _get_library_root_logger().handlers: + handler.setFormatter(None) + + +@lru_cache(None) +def warning_once(self, *args, **kwargs): + """emits warning logs with the same message only once""" + self.warning(*args, **kwargs) + + +logging.Logger.warning_once = warning_once \ No newline at end of file From 05890835f5224dd1d8411f44142060ba76d9c3f9 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Wed, 15 May 2024 10:54:53 +0200 Subject: [PATCH 2/2] refactoring of loggers --- scrapegraphai/graphs/abstract_graph.py | 5 +++-- scrapegraphai/nodes/fetch_node.py | 5 ++--- scrapegraphai/nodes/generate_answer_csv_node.py | 4 ++-- scrapegraphai/nodes/generate_answer_node.py | 3 +-- scrapegraphai/nodes/generate_answer_pdf_node.py | 3 +-- scrapegraphai/nodes/generate_scraper_node.py | 3 +-- scrapegraphai/nodes/get_probable_tags_node.py | 3 +-- scrapegraphai/nodes/graph_iterator_node.py | 3 ++- scrapegraphai/nodes/image_to_text_node.py | 3 +-- scrapegraphai/nodes/merge_answers_node.py | 3 +-- scrapegraphai/nodes/parse_node.py | 3 +-- scrapegraphai/nodes/rag_node.py | 7 +++---- scrapegraphai/nodes/robots_node.py | 6 +++--- scrapegraphai/nodes/search_internet_node.py | 5 ++--- scrapegraphai/nodes/search_link_node.py | 3 +-- scrapegraphai/nodes/text_to_speech_node.py | 3 +-- 16 files changed, 26 insertions(+), 36 deletions(-) diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 28eb27b2..68652dc8 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -10,7 +10,7 @@ from ..helpers import models_tokens from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings - +from ..utils.logging import get_logger class AbstractGraph(ABC): """ @@ -61,6 +61,7 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None): self.headless = True if config is None else config.get( "headless", True) self.loader_kwargs = config.get("loader_kwargs", {}) + self.logger = get_logger("graph") common_params = {"headless": self.headless, "verbose": self.verbose, @@ -79,7 +80,7 @@ def set_common_params(self, params: dict, overwrite=False): for node in self.graph.nodes: node.update_config(params, overwrite) - + def _set_model_token(self, llm): if 'Azure' in str(type(llm)): diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 39463057..6a87d9f4 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -74,8 +74,7 @@ def execute(self, state): necessary information to perform the operation is missing. """ if self.verbose: - logger = get_logger("fetch node") - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) @@ -129,7 +128,7 @@ def execute(self, state): cleanedup_html = cleanup_html(response.text, source) compressed_document = [Document(page_content=cleanedup_html)] else: - logger.warning(f"Failed to retrieve contents from the webpage at url: {source}") + self.logger.warning(f"Failed to retrieve contents from the webpage at url: {source}") else: loader_kwargs = {} diff --git a/scrapegraphai/nodes/generate_answer_csv_node.py b/scrapegraphai/nodes/generate_answer_csv_node.py index f3f5b7ec..cf32b411 100644 --- a/scrapegraphai/nodes/generate_answer_csv_node.py +++ b/scrapegraphai/nodes/generate_answer_csv_node.py @@ -1,4 +1,5 @@ """ +gg Module for generating the answer node """ # Imports from standard library @@ -73,8 +74,7 @@ def execute(self, state): """ if self.verbose: - logger = get_logger("generate_answer csv node") - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py index beeac15a..234e339e 100644 --- a/scrapegraphai/nodes/generate_answer_node.py +++ b/scrapegraphai/nodes/generate_answer_node.py @@ -60,8 +60,7 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - logger = get_logger("generate answer node") - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/generate_answer_pdf_node.py b/scrapegraphai/nodes/generate_answer_pdf_node.py index 4a42df23..1e7e0edf 100644 --- a/scrapegraphai/nodes/generate_answer_pdf_node.py +++ b/scrapegraphai/nodes/generate_answer_pdf_node.py @@ -73,8 +73,7 @@ def execute(self, state): """ if self.verbose: - logger = get_logger("generate answer pdf node") - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/generate_scraper_node.py b/scrapegraphai/nodes/generate_scraper_node.py index a6a8dc00..d35db233 100644 --- a/scrapegraphai/nodes/generate_scraper_node.py +++ b/scrapegraphai/nodes/generate_scraper_node.py @@ -64,8 +64,7 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - logger = get_logger("generate scraper node") - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/get_probable_tags_node.py b/scrapegraphai/nodes/get_probable_tags_node.py index b0c2b41d..39b437a5 100644 --- a/scrapegraphai/nodes/get_probable_tags_node.py +++ b/scrapegraphai/nodes/get_probable_tags_node.py @@ -51,8 +51,7 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - logger = get_logger("get probable tags node") - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/graph_iterator_node.py b/scrapegraphai/nodes/graph_iterator_node.py index b6c7690e..063466a9 100644 --- a/scrapegraphai/nodes/graph_iterator_node.py +++ b/scrapegraphai/nodes/graph_iterator_node.py @@ -60,7 +60,8 @@ def execute(self, state: dict) -> dict: batchsize = self.node_config.get("batchsize", _default_batchsize) if self.verbose: - print(f"--- Executing {self.node_name} Node with batchsize {batchsize} ---") + self.logger.info(f"--- Executing {self.node_name} Node with batchsize {batchsize} ---") + try: eventloop = asyncio.get_event_loop() diff --git a/scrapegraphai/nodes/image_to_text_node.py b/scrapegraphai/nodes/image_to_text_node.py index 07ef3be7..314e26bc 100644 --- a/scrapegraphai/nodes/image_to_text_node.py +++ b/scrapegraphai/nodes/image_to_text_node.py @@ -43,8 +43,7 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - logger = get_logger("image to text node") - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") input_keys = self.get_input_keys(state) input_data = [state[key] for key in input_keys] diff --git a/scrapegraphai/nodes/merge_answers_node.py b/scrapegraphai/nodes/merge_answers_node.py index da115005..8d8c4e82 100644 --- a/scrapegraphai/nodes/merge_answers_node.py +++ b/scrapegraphai/nodes/merge_answers_node.py @@ -54,8 +54,7 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - logger = get_logger("fetch node") - logger.info(f"--- Executing {self.node_name} Node ---") + self.ogger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/parse_node.py b/scrapegraphai/nodes/parse_node.py index 436cddc4..2f49106f 100644 --- a/scrapegraphai/nodes/parse_node.py +++ b/scrapegraphai/nodes/parse_node.py @@ -49,8 +49,7 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - logger = get_logger("fetch node") - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py index fdcdd8e8..dae666cf 100644 --- a/scrapegraphai/nodes/rag_node.py +++ b/scrapegraphai/nodes/rag_node.py @@ -56,10 +56,9 @@ def execute(self, state: dict) -> dict: KeyError: If the input keys are not found in the state, indicating that the necessary information for compressing the content is missing. """ - logger = get_logger("rag node") if self.verbose: - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) @@ -82,7 +81,7 @@ def execute(self, state: dict) -> dict: chunked_docs.append(doc) if self.verbose: - logger.info("--- (updated chunks metadata) ---") + self.logger.info("--- (updated chunks metadata) ---") # check if embedder_model is provided, if not use llm_model self.embedder_model = self.embedder_model if self.embedder_model else self.llm_model @@ -110,7 +109,7 @@ def execute(self, state: dict) -> dict: compressed_docs = compression_retriever.invoke(user_prompt) if self.verbose: - print("--- (tokens compressed and vector stored) ---") + self.logger.info("--- (tokens compressed and vector stored) ---") state.update({self.output[0]: compressed_docs}) return state diff --git a/scrapegraphai/nodes/robots_node.py b/scrapegraphai/nodes/robots_node.py index ab0c7919..29b71800 100644 --- a/scrapegraphai/nodes/robots_node.py +++ b/scrapegraphai/nodes/robots_node.py @@ -122,17 +122,17 @@ def execute(self, state: dict) -> dict: if "no" in is_scrapable: if self.verbose: - logger.warning("\033[31m(Scraping this website is not allowed)\033[0m") + self.logger.warning("\033[31m(Scraping this website is not allowed)\033[0m") if not self.force_scraping: raise ValueError( 'The website you selected is not scrapable') else: if self.verbose: - logger.warning("\033[33m(WARNING: Scraping this website is not allowed but you decided to force it)\033[0m") + self.logger.warning("\033[33m(WARNING: Scraping this website is not allowed but you decided to force it)\033[0m") else: if self.verbose: - logger.warning("\033[32m(Scraping this website is allowed)\033[0m") + self.logger.warning("\033[32m(Scraping this website is allowed)\033[0m") state.update({self.output[0]: is_scrapable}) return state diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py index e2443a25..9611407d 100644 --- a/scrapegraphai/nodes/search_internet_node.py +++ b/scrapegraphai/nodes/search_internet_node.py @@ -54,10 +54,9 @@ def execute(self, state: dict) -> dict: KeyError: If the input keys are not found in the state, indicating that the necessary information for generating the answer is missing. """ - logger = get_logger("search interne node") if self.verbose: - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") input_keys = self.get_input_keys(state) @@ -89,7 +88,7 @@ def execute(self, state: dict) -> dict: search_query = search_answer.invoke({"user_prompt": user_prompt})[0] if self.verbose: - logger.info(f"Search Query: {search_query}") + self.logger.info(f"Search Query: {search_query}") answer = search_on_web( diff --git a/scrapegraphai/nodes/search_link_node.py b/scrapegraphai/nodes/search_link_node.py index 93c60e4a..a06ccdee 100644 --- a/scrapegraphai/nodes/search_link_node.py +++ b/scrapegraphai/nodes/search_link_node.py @@ -59,8 +59,7 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - logger = get_logger("search link node") - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state) diff --git a/scrapegraphai/nodes/text_to_speech_node.py b/scrapegraphai/nodes/text_to_speech_node.py index 06ed8d5f..497b2501 100644 --- a/scrapegraphai/nodes/text_to_speech_node.py +++ b/scrapegraphai/nodes/text_to_speech_node.py @@ -45,8 +45,7 @@ def execute(self, state: dict) -> dict: """ if self.verbose: - logger = get_logger("text to speach node") - logger.info(f"--- Executing {self.node_name} Node ---") + self.logger.info(f"--- Executing {self.node_name} Node ---") # Interpret input keys based on the provided input expression input_keys = self.get_input_keys(state)