Skip to content

feat: add logger integration #237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions scrapegraphai/graphs/abstract_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ..helpers import models_tokens
from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

from ..utils.logging import get_logger

class AbstractGraph(ABC):
"""
Expand Down Expand Up @@ -61,6 +61,7 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
self.headless = True if config is None else config.get(
"headless", True)
self.loader_kwargs = config.get("loader_kwargs", {})
self.logger = get_logger("graph")

common_params = {"headless": self.headless,
"verbose": self.verbose,
Expand All @@ -79,7 +80,7 @@ def set_common_params(self, params: dict, overwrite=False):

for node in self.graph.nodes:
node.update_config(params, overwrite)

def _set_model_token(self, llm):

if 'Azure' in str(type(llm)):
Expand Down
10 changes: 5 additions & 5 deletions scrapegraphai/nodes/fetch_node.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
""""
FetchNode Module
"""

Expand All @@ -13,7 +13,7 @@
from ..docloaders import ChromiumLoader
from .base_node import BaseNode
from ..utils.cleanup_html import cleanup_html

from ..utils.logging import get_logger

class FetchNode(BaseNode):
"""
Expand Down Expand Up @@ -74,7 +74,7 @@ def execute(self, state):
necessary information to perform the operation is missing.
"""
if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
self.logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down Expand Up @@ -128,7 +128,7 @@ def execute(self, state):
cleanedup_html = cleanup_html(response.text, source)
compressed_document = [Document(page_content=cleanedup_html)]
else:
print(f"Failed to retrieve contents from the webpage at url: {source}")
self.logger.warning(f"Failed to retrieve contents from the webpage at url: {source}")

else:
loader_kwargs = {}
Expand All @@ -144,4 +144,4 @@ def execute(self, state):
]

state.update({self.output[0]: compressed_document})
return state
return state
4 changes: 3 additions & 1 deletion scrapegraphai/nodes/generate_answer_csv_node.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""
gg
Module for generating the answer node
"""
# Imports from standard library
Expand All @@ -9,6 +10,7 @@
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel
from ..utils.logging import get_logger

# Imports from the library
from .base_node import BaseNode
Expand Down Expand Up @@ -72,7 +74,7 @@ def execute(self, state):
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
self.logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
3 changes: 2 additions & 1 deletion scrapegraphai/nodes/generate_answer_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel
from ..utils.logging import get_logger

# Imports from the library
from .base_node import BaseNode
Expand Down Expand Up @@ -59,7 +60,7 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
self.logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
3 changes: 2 additions & 1 deletion scrapegraphai/nodes/generate_answer_pdf_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel
from ..utils.logging import get_logger

# Imports from the library
from .base_node import BaseNode
Expand Down Expand Up @@ -72,7 +73,7 @@ def execute(self, state):
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
self.logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
3 changes: 2 additions & 1 deletion scrapegraphai/nodes/generate_scraper_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel
from ..utils.logging import get_logger

# Imports from the library
from .base_node import BaseNode
Expand Down Expand Up @@ -63,7 +64,7 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
self.logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
12 changes: 7 additions & 5 deletions scrapegraphai/nodes/get_probable_tags_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate
from .base_node import BaseNode

from ..utils.logging import get_logger

class GetProbableTagsNode(BaseNode):
"""
Expand All @@ -25,11 +25,12 @@ class GetProbableTagsNode(BaseNode):
node_name (str): The unique identifier name for the node, defaulting to "GetProbableTags".
"""

def __init__(self, input: str, output: List[str], model_config: dict,
def __init__(self, input: str, output: List[str], node_config: dict,
node_name: str = "GetProbableTags"):
super().__init__(node_name, "node", input, output, 2, model_config)
super().__init__(node_name, "node", input, output, 2, node_config)

self.llm_model = model_config["llm_model"]
self.llm_model = node_config["llm_model"]
self.verbose = False if node_config is None else node_config.get("verbose", False)

def execute(self, state: dict) -> dict:
"""
Expand All @@ -49,7 +50,8 @@ def execute(self, state: dict) -> dict:
necessary information for generating tag predictions is missing.
"""

print(f"--- Executing {self.node_name} Node ---")
if self.verbose:
self.logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
5 changes: 3 additions & 2 deletions scrapegraphai/nodes/graph_iterator_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import asyncio
import copy
from typing import List, Optional

from ..utils.logging import get_logger
from tqdm.asyncio import tqdm

from .base_node import BaseNode
Expand Down Expand Up @@ -60,7 +60,8 @@ def execute(self, state: dict) -> dict:
batchsize = self.node_config.get("batchsize", _default_batchsize)

if self.verbose:
print(f"--- Executing {self.node_name} Node with batchsize {batchsize} ---")
self.logger.info(f"--- Executing {self.node_name} Node with batchsize {batchsize} ---")


try:
eventloop = asyncio.get_event_loop()
Expand Down
3 changes: 2 additions & 1 deletion scrapegraphai/nodes/image_to_text_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from typing import List, Optional
from .base_node import BaseNode
from ..utils.logging import get_logger


class ImageToTextNode(BaseNode):
Expand Down Expand Up @@ -42,7 +43,7 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print("---GENERATING TEXT FROM IMAGE---")
self.logger.info(f"--- Executing {self.node_name} Node ---")

input_keys = self.get_input_keys(state)
input_data = [state[key] for key in input_keys]
Expand Down
4 changes: 2 additions & 2 deletions scrapegraphai/nodes/merge_answers_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Imports from standard library
from typing import List, Optional
from tqdm import tqdm

from ..utils.logging import get_logger
# Imports from Langchain
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
Expand Down Expand Up @@ -54,7 +54,7 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
self.ogger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
4 changes: 2 additions & 2 deletions scrapegraphai/nodes/parse_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_transformers import Html2TextTransformer
from .base_node import BaseNode

from ..utils.logging import get_logger

class ParseNode(BaseNode):
"""
Expand Down Expand Up @@ -49,7 +49,7 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
self.logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
7 changes: 4 additions & 3 deletions scrapegraphai/nodes/rag_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_community.vectorstores import FAISS
from ..utils.logging import get_logger

from .base_node import BaseNode

Expand Down Expand Up @@ -57,7 +58,7 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
self.logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand All @@ -80,7 +81,7 @@ def execute(self, state: dict) -> dict:
chunked_docs.append(doc)

if self.verbose:
print("--- (updated chunks metadata) ---")
self.logger.info("--- (updated chunks metadata) ---")

# check if embedder_model is provided, if not use llm_model
self.embedder_model = self.embedder_model if self.embedder_model else self.llm_model
Expand Down Expand Up @@ -108,7 +109,7 @@ def execute(self, state: dict) -> dict:
compressed_docs = compression_retriever.invoke(user_prompt)

if self.verbose:
print("--- (tokens compressed and vector stored) ---")
self.logger.info("--- (tokens compressed and vector stored) ---")

state.update({self.output[0]: compressed_docs})
return state
Expand Down
11 changes: 6 additions & 5 deletions scrapegraphai/nodes/robots_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from langchain.output_parsers import CommaSeparatedListOutputParser
from .base_node import BaseNode
from ..helpers import robots_dictionary

from ..utils.logging import get_logger

class RobotsNode(BaseNode):
"""
Expand Down Expand Up @@ -61,9 +61,10 @@ def execute(self, state: dict) -> dict:
ValueError: If the website is not scrapeable based on the robots.txt file and
scraping is not enforced.
"""
logger = get_logger("robots node")

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down Expand Up @@ -121,17 +122,17 @@ def execute(self, state: dict) -> dict:

if "no" in is_scrapable:
if self.verbose:
print("\033[31m(Scraping this website is not allowed)\033[0m")
self.logger.warning("\033[31m(Scraping this website is not allowed)\033[0m")

if not self.force_scraping:
raise ValueError(
'The website you selected is not scrapable')
else:
if self.verbose:
print("\033[33m(WARNING: Scraping this website is not allowed but you decided to force it)\033[0m")
self.logger.warning("\033[33m(WARNING: Scraping this website is not allowed but you decided to force it)\033[0m")
else:
if self.verbose:
print("\033[32m(Scraping this website is allowed)\033[0m")
self.logger.warning("\033[32m(Scraping this website is allowed)\033[0m")

state.update({self.output[0]: is_scrapable})
return state
7 changes: 4 additions & 3 deletions scrapegraphai/nodes/search_internet_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from langchain.prompts import PromptTemplate
from ..utils.research_web import search_on_web
from .base_node import BaseNode

from ..utils.logging import get_logger

class SearchInternetNode(BaseNode):
"""
Expand Down Expand Up @@ -56,7 +56,7 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
self.logger.info(f"--- Executing {self.node_name} Node ---")

input_keys = self.get_input_keys(state)

Expand Down Expand Up @@ -88,7 +88,8 @@ def execute(self, state: dict) -> dict:
search_query = search_answer.invoke({"user_prompt": user_prompt})[0]

if self.verbose:
print(f"Search Query: {search_query}")
self.logger.info(f"Search Query: {search_query}")


answer = search_on_web(
query=search_query, max_results=self.max_results)
Expand Down
4 changes: 2 additions & 2 deletions scrapegraphai/nodes/search_link_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Imports from standard library
from typing import List, Optional
from tqdm import tqdm

from ..utils.logging import get_logger

# Imports from Langchain
from langchain.prompts import PromptTemplate
Expand Down Expand Up @@ -59,7 +59,7 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
self.logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
4 changes: 2 additions & 2 deletions scrapegraphai/nodes/text_to_speech_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from typing import List, Optional
from .base_node import BaseNode

from ..utils.logging import get_logger

class TextToSpeechNode(BaseNode):
"""
Expand Down Expand Up @@ -45,7 +45,7 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
self.logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
1 change: 1 addition & 0 deletions scrapegraphai/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
from .save_audio_from_bytes import save_audio_from_bytes
from .sys_dynamic_import import dynamic_import, srcfile_import
from .cleanup_html import cleanup_html
from .logging import *
Loading
Loading