From e76a68a782e5bce48d421cb620d0b7bffa412918 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Sun, 13 Oct 2024 23:04:27 +0800
Subject: [PATCH 01/39] fix: remove variable "max_result" not being used in the
 code

---
 scrapegraphai/graphs/csv_scraper_multi_graph.py          | 1 -
 scrapegraphai/graphs/json_scraper_multi_graph.py         | 1 -
 scrapegraphai/graphs/script_creator_multi_graph.py       | 2 --
 scrapegraphai/graphs/smart_scraper_multi_concat_graph.py | 1 -
 4 files changed, 5 deletions(-)

diff --git a/scrapegraphai/graphs/csv_scraper_multi_graph.py b/scrapegraphai/graphs/csv_scraper_multi_graph.py
index e7a028f3..284a49ac 100644
--- a/scrapegraphai/graphs/csv_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/csv_scraper_multi_graph.py
@@ -44,7 +44,6 @@ class CSVScraperMultiGraph(AbstractGraph):
     def __init__(self, prompt: str, source: List[str], 
                  config: dict, schema: Optional[BaseModel] = None):
 
-        self.max_results = config.get("max_results", 3)
         self.copy_config = safe_deepcopy(config)
         self.copy_schema = deepcopy(schema)
 
diff --git a/scrapegraphai/graphs/json_scraper_multi_graph.py b/scrapegraphai/graphs/json_scraper_multi_graph.py
index 6e5434f0..a6dd22d4 100644
--- a/scrapegraphai/graphs/json_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/json_scraper_multi_graph.py
@@ -44,7 +44,6 @@ class JSONScraperMultiGraph(AbstractGraph):
     def __init__(self, prompt: str, source: List[str], 
                  config: dict, schema: Optional[BaseModel] = None):
 
-        self.max_results = config.get("max_results", 3)
         self.copy_config = safe_deepcopy(config)
         self.copy_schema = deepcopy(schema)
 
diff --git a/scrapegraphai/graphs/script_creator_multi_graph.py b/scrapegraphai/graphs/script_creator_multi_graph.py
index de1ab6f7..a5598936 100644
--- a/scrapegraphai/graphs/script_creator_multi_graph.py
+++ b/scrapegraphai/graphs/script_creator_multi_graph.py
@@ -43,8 +43,6 @@ class ScriptCreatorMultiGraph(AbstractGraph):
     def __init__(self, prompt: str, source: List[str],
                  config: dict, schema: Optional[BaseModel] = None):
 
-        self.max_results = config.get("max_results", 3)
-
         self.copy_config = safe_deepcopy(config)
         self.copy_schema = deepcopy(schema)
         super().__init__(prompt, config, source, schema)
diff --git a/scrapegraphai/graphs/smart_scraper_multi_concat_graph.py b/scrapegraphai/graphs/smart_scraper_multi_concat_graph.py
index 0bd84a55..312d6457 100644
--- a/scrapegraphai/graphs/smart_scraper_multi_concat_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_multi_concat_graph.py
@@ -45,7 +45,6 @@ class SmartScraperMultiConcatGraph(AbstractGraph):
     def __init__(self, prompt: str, source: List[str], 
                  config: dict, schema: Optional[BaseModel] = None):
 
-        self.max_results = config.get("max_results", 3)
         self.copy_config = safe_deepcopy(config)
         self.copy_schema = deepcopy(schema)
 

From e0fc457d1a850f3306d473fbde55dd800133b404 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Mon, 14 Oct 2024 12:14:59 +0000
Subject: [PATCH 02/39] ci(release): 1.26.6-beta.1 [skip ci]

## [1.26.6-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.26.5...v1.26.6-beta.1) (2024-10-14)

### Bug Fixes

* remove variable "max_result" not being used in the code ([e76a68a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/e76a68a782e5bce48d421cb620d0b7bffa412918))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cbe12a5a..3b45f437 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.26.6-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.26.5...v1.26.6-beta.1) (2024-10-14)
+
+
+### Bug Fixes
+
+* remove variable "max_result" not being used in the code ([e76a68a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/e76a68a782e5bce48d421cb620d0b7bffa412918))
+
 ## [1.26.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.26.4...v1.26.5) (2024-10-13)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 6bade627..c0b58843 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.26.5"
+version = "1.26.6b1"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From cacd9cde004dace1a7dcc27981245632a78b95f3 Mon Sep 17 00:00:00 2001
From: ekinsenler <ekinsenler@gmail.com>
Date: Tue, 15 Oct 2024 14:23:02 +0300
Subject: [PATCH 03/39] feat: add conditional node structure to the
 smart_scraper_graph and implemented a structured way to check condition

---
 requirements.txt                              |   3 +-
 scrapegraphai/graphs/smart_scraper_graph.py   | 138 +++++++++++-------
 scrapegraphai/prompts/__init__.py             |   2 +-
 .../prompts/generate_answer_node_prompts.py   |   4 +
 4 files changed, 90 insertions(+), 57 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index c72ad1bb..6c7a0326 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,4 +18,5 @@ undetected-playwright>=0.3.0
 semchunk>=1.0.1
 langchain-ollama>=0.1.3
 simpleeval>=0.9.13
-googlesearch-python>=1.2.5 
\ No newline at end of file
+googlesearch-python>=1.2.5 
+async_timeout>=4.0.3
\ No newline at end of file
diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py
index 60407624..478f6634 100644
--- a/scrapegraphai/graphs/smart_scraper_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_graph.py
@@ -2,7 +2,6 @@
 SmartScraperGraph Module
 """
 from typing import Optional
-import logging
 from pydantic import BaseModel
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
@@ -10,8 +9,10 @@
     FetchNode,
     ParseNode,
     ReasoningNode,
-    GenerateAnswerNode
+    GenerateAnswerNode,
+    ConditionalNode
 )
+from ..prompts import REGEN_ADDITIONAL_INFO
 
 class SmartScraperGraph(AbstractGraph):
     """
@@ -89,6 +90,28 @@ def _create_graph(self) -> BaseGraph:
             }
         )
 
+        cond_node = None
+        regen_node = None
+        if self.config.get("reattempt") is True:
+            cond_node = ConditionalNode(
+                input="results",
+                output=["results"],
+                node_name="ConditionalNode",
+                node_config={
+                    "key_name": "results",
+                    "condition": 'results and results!="NA"',
+                }
+            )
+            regen_node = GenerateAnswerNode(
+                input="user_prompt & results",
+                output=["answer"],
+                node_config={
+                    "llm_model": self.llm_model,
+                    "additional_info": REGEN_ADDITIONAL_INFO,
+                    "schema": self.schema,
+                }
+            )
+
         if self.config.get("html_mode") is False:
             parse_node = ParseNode(
                 input="doc",
@@ -99,6 +122,7 @@ def _create_graph(self) -> BaseGraph:
                 }
             )
 
+        reasoning_node = None
         if self.config.get("reasoning"):
             reasoning_node =  ReasoningNode(
                 input="user_prompt & (relevant_chunks | parsed_doc | doc)",
@@ -109,68 +133,72 @@ def _create_graph(self) -> BaseGraph:
                     "schema": self.schema,
                 }
             )
+            
+        # Define the graph variation configurations
+        # (html_mode, reasoning, reattempt)
+        graph_variation_config = {
+            (False, True, False): {
+            "nodes": [fetch_node, parse_node, reasoning_node, generate_answer_node],
+            "edges": [(fetch_node, parse_node), (parse_node, reasoning_node), (reasoning_node, generate_answer_node)]
+            },
+            (True, True, False): {
+                "nodes": [fetch_node, reasoning_node, generate_answer_node],
+                "edges": [(fetch_node, reasoning_node), (reasoning_node, generate_answer_node)]
+            },
+            (True, False, False): {
+                "nodes": [fetch_node, generate_answer_node],
+                "edges": [(fetch_node, generate_answer_node)]
+            },
+            (False, False, False): {
+                "nodes": [fetch_node, parse_node, generate_answer_node],
+                "edges": [(fetch_node, parse_node), (parse_node, generate_answer_node)]
+            },
+            (False, True, True): {
+                "nodes": [fetch_node, parse_node, reasoning_node, generate_answer_node, cond_node, regen_node],
+                "edges": [(fetch_node, parse_node), (parse_node, reasoning_node), (reasoning_node, generate_answer_node), 
+                          (generate_answer_node, cond_node), (cond_node, regen_node), (cond_node, None)]
+            },
+            (True, True, True): {
+                "nodes": [fetch_node, reasoning_node, generate_answer_node, cond_node, regen_node],
+                "edges": [(fetch_node, reasoning_node), (reasoning_node, generate_answer_node), 
+                          (generate_answer_node, cond_node), (cond_node, regen_node), (cond_node, None)]
+            },
+            (True, False, True): {
+                "nodes": [fetch_node, generate_answer_node, cond_node, regen_node],
+                "edges": [(fetch_node, generate_answer_node), (generate_answer_node, cond_node), 
+                          (cond_node, regen_node), (cond_node, None)]
+            },
+            (False, False, True): {
+                "nodes": [fetch_node, parse_node, generate_answer_node, cond_node, regen_node],
+                "edges": [(fetch_node, parse_node), (parse_node, generate_answer_node), 
+                          (generate_answer_node, cond_node), (cond_node, regen_node), (cond_node, None)]
+            }
+        }
 
-        if self.config.get("html_mode") is False and self.config.get("reasoning") is True:
-
-            return BaseGraph(
-                nodes=[
-                    fetch_node,
-                    parse_node,
-                    reasoning_node,
-                    generate_answer_node,
-                ],
-                edges=[
-                    (fetch_node, parse_node),
-                    (parse_node, reasoning_node),
-                    (reasoning_node, generate_answer_node)
-                ],
-                entry_point=fetch_node,
-                graph_name=self.__class__.__name__
-            )
-
-        elif self.config.get("html_mode") is True and self.config.get("reasoning") is True:
+        # Get the current conditions
+        html_mode = self.config.get("html_mode", False)
+        reasoning = self.config.get("reasoning", False)
+        reattempt = self.config.get("reattempt", False)
 
-            return BaseGraph(
-                nodes=[
-                    fetch_node,
-                    reasoning_node,
-                    generate_answer_node,
-                ],
-                edges=[
-                    (fetch_node, reasoning_node),
-                    (reasoning_node, generate_answer_node)
-                ],
-                entry_point=fetch_node,
-                graph_name=self.__class__.__name__
-            )
+        # Retrieve the appropriate graph configuration
+        config = graph_variation_config.get((html_mode, reasoning, reattempt))
 
-        elif self.config.get("html_mode") is True and self.config.get("reasoning") is False:
+        if config:
             return BaseGraph(
-                nodes=[
-                    fetch_node,
-                    generate_answer_node,
-                ],
-                edges=[
-                    (fetch_node, generate_answer_node)
-                ],
+                nodes=config["nodes"],
+                edges=config["edges"],
                 entry_point=fetch_node,
                 graph_name=self.__class__.__name__
             )
 
+        # Default return if no conditions match
         return BaseGraph(
-                nodes=[
-                    fetch_node,
-                    parse_node,
-                    generate_answer_node,
-                ],
-                edges=[
-                    (fetch_node, parse_node),
-                    (parse_node, generate_answer_node)
-                ],
-                entry_point=fetch_node,
-                graph_name=self.__class__.__name__
-            )
-
+            nodes=[fetch_node, parse_node, generate_answer_node],
+            edges=[(fetch_node, parse_node), (parse_node, generate_answer_node)],
+            entry_point=fetch_node,
+            graph_name=self.__class__.__name__
+        )
+        
     def run(self) -> str:
         """
         Executes the scraping process and returns the answer to the prompt.
diff --git a/scrapegraphai/prompts/__init__.py b/scrapegraphai/prompts/__init__.py
index b23374a4..ea916842 100644
--- a/scrapegraphai/prompts/__init__.py
+++ b/scrapegraphai/prompts/__init__.py
@@ -5,7 +5,7 @@
 from .generate_answer_node_prompts import   (TEMPLATE_CHUNKS,
                                             TEMPLATE_NO_CHUNKS,
                                             TEMPLATE_MERGE, TEMPLATE_CHUNKS_MD,
-                                            TEMPLATE_NO_CHUNKS_MD, TEMPLATE_MERGE_MD)
+                                            TEMPLATE_NO_CHUNKS_MD, TEMPLATE_MERGE_MD, REGEN_ADDITIONAL_INFO)
 from .generate_answer_node_csv_prompts import (TEMPLATE_CHUKS_CSV,
                                                TEMPLATE_NO_CHUKS_CSV,
                                                TEMPLATE_MERGE_CSV)
diff --git a/scrapegraphai/prompts/generate_answer_node_prompts.py b/scrapegraphai/prompts/generate_answer_node_prompts.py
index f9506a7b..a14f27f4 100644
--- a/scrapegraphai/prompts/generate_answer_node_prompts.py
+++ b/scrapegraphai/prompts/generate_answer_node_prompts.py
@@ -86,3 +86,7 @@
 USER QUESTION: {question}\n
 WEBSITE CONTENT: {context}\n 
 """
+
+REGEN_ADDITIONAL_INFO = """
+You are a  scraper and you have just failed to scrape the requested information from a website. \n
+I want you to try again and provide the missing informations. \n"""

From 038d2ef916ff0306c1fa5258a161889281d54235 Mon Sep 17 00:00:00 2001
From: ekinsenler <ekinsenler@gmail.com>
Date: Tue, 15 Oct 2024 15:23:05 +0300
Subject: [PATCH 04/39] refactor cond node structure to fit with the new
 implementation

---
 scrapegraphai/graphs/base_graph.py          |  7 ++++++-
 scrapegraphai/graphs/smart_scraper_graph.py | 10 +++++-----
 scrapegraphai/nodes/conditional_node.py     |  2 +-
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/scrapegraphai/graphs/base_graph.py b/scrapegraphai/graphs/base_graph.py
index 5fa9ff34..d3a9cf85 100644
--- a/scrapegraphai/graphs/base_graph.py
+++ b/scrapegraphai/graphs/base_graph.py
@@ -95,7 +95,10 @@ def _set_conditional_node_edges(self):
                     raise ValueError(f"ConditionalNode '{node.node_name}' must have exactly two outgoing edges.")
                 # Assign true_node_name and false_node_name
                 node.true_node_name = outgoing_edges[0][1].node_name
-                node.false_node_name = outgoing_edges[1][1].node_name
+                try:
+                    node.false_node_name = outgoing_edges[1][1].node_name
+                except:
+                    node.false_node_name = None
 
     def _execute_standard(self, initial_state: dict) -> Tuple[dict, list]:
         """
@@ -221,6 +224,8 @@ def _execute_standard(self, initial_state: dict) -> Tuple[dict, list]:
                 node_names = {node.node_name for node in self.nodes}
                 if result in node_names:
                     current_node_name = result
+                elif result is None:
+                    current_node_name = None
                 else:
                     raise ValueError(f"Conditional Node returned a node name '{result}' that does not exist in the graph")
                 
diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py
index 478f6634..03e8cd5d 100644
--- a/scrapegraphai/graphs/smart_scraper_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_graph.py
@@ -94,16 +94,16 @@ def _create_graph(self) -> BaseGraph:
         regen_node = None
         if self.config.get("reattempt") is True:
             cond_node = ConditionalNode(
-                input="results",
-                output=["results"],
+                input="answer",
+                output=["answer"],
                 node_name="ConditionalNode",
                 node_config={
-                    "key_name": "results",
-                    "condition": 'results and results!="NA"',
+                    "key_name": "answer",
+                    "condition": 'not answer or answer=="NA"',
                 }
             )
             regen_node = GenerateAnswerNode(
-                input="user_prompt & results",
+                input="user_prompt & answer",
                 output=["answer"],
                 node_config={
                     "llm_model": self.llm_model,
diff --git a/scrapegraphai/nodes/conditional_node.py b/scrapegraphai/nodes/conditional_node.py
index 02ff61e9..c5ff58f3 100644
--- a/scrapegraphai/nodes/conditional_node.py
+++ b/scrapegraphai/nodes/conditional_node.py
@@ -61,7 +61,7 @@ def execute(self, state: dict) -> dict:
             str: The name of the next node to execute based on the presence of the key.
         """
 
-        if self.true_node_name is None or self.false_node_name is None:
+        if self.true_node_name is None:
             raise ValueError("ConditionalNode's next nodes are not set properly.")
 
         if self.condition:

From 612c644623fa6f4fe77a64a5f1a6a4d6cd5f4254 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Wed, 16 Oct 2024 18:37:50 +0800
Subject: [PATCH 05/39] feat: implement ScrapeGraph class for only web scraping
 automation

---
 scrapegraphai/graphs/scrape_graph.py | 98 ++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 scrapegraphai/graphs/scrape_graph.py

diff --git a/scrapegraphai/graphs/scrape_graph.py b/scrapegraphai/graphs/scrape_graph.py
new file mode 100644
index 00000000..a08149aa
--- /dev/null
+++ b/scrapegraphai/graphs/scrape_graph.py
@@ -0,0 +1,98 @@
+"""
+SmartScraperGraph Module
+"""
+from typing import Optional
+from pydantic import BaseModel
+from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+from ..nodes import (
+    FetchNode,
+    ParseNode,
+)
+
+class ScrapeGraph(AbstractGraph):
+    """
+    ScrapeGraph is a scraping pipeline that automates the process of 
+    extracting information from web pages.
+
+    Attributes:
+        prompt (str): The prompt for the graph.
+        source (str): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+        schema (BaseModel): The schema for the graph output.
+        verbose (bool): A flag indicating whether to show print statements during execution.
+        headless (bool): A flag indicating whether to run the graph in headless mode.
+
+    Args:
+        prompt (str): The prompt for the graph.
+        source (str): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+        schema (BaseModel): The schema for the graph output.
+
+    Example:
+        >>> scraper = ScraperGraph(
+        ...     "https://en.wikipedia.org/wiki/Chioggia",
+        ...     {"llm": {"model": "openai/gpt-3.5-turbo"}}
+        ... )
+        >>> result = smart_scraper.run()
+        )
+    """
+
+    def __init__(self, source: str, config: dict, prompt: str = "", schema: Optional[BaseModel] = None):
+        super().__init__(prompt, config, source, schema)
+
+        self.input_key = "url" if source.startswith("http") else "local_dir"
+
+    def _create_graph(self) -> BaseGraph:
+        """
+        Creates the graph of nodes representing the workflow for web scraping.
+
+        Returns:
+            BaseGraph: A graph instance representing the web scraping workflow.
+        """
+        fetch_node = FetchNode(
+            input="url| local_dir",
+            output=["doc"],
+            node_config={
+                "llm_model": self.llm_model,
+                "force": self.config.get("force", False),
+                "cut": self.config.get("cut", True),
+                "loader_kwargs": self.config.get("loader_kwargs", {}),
+                "browser_base": self.config.get("browser_base"),
+                "scrape_do": self.config.get("scrape_do")
+            }
+        )
+
+        parse_node = ParseNode(
+            input="doc",
+            output=["parsed_doc"],
+            node_config={
+                "llm_model": self.llm_model,
+                "chunk_size": self.model_token
+            }
+        )
+
+        return BaseGraph(
+            nodes=[
+                fetch_node,
+                parse_node,
+            ],
+            edges=[
+                (fetch_node, parse_node),
+            ],
+            entry_point=fetch_node,
+            graph_name=self.__class__.__name__
+        )
+
+    def run(self) -> str:
+        """
+        Executes the scraping process and returns the scraping content.
+
+        Returns:
+            str: The scraping content.
+        """
+
+        inputs = {"user_prompt": self.prompt, self.input_key: self.source}
+        self.final_state, self.execution_info = self.graph.execute(inputs)
+
+        return self.final_state.get("parsed_doc", "No document found.")

From 3e3e1b2f3ae8ed803d03b3b44b199e139baa68d4 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Wed, 16 Oct 2024 19:38:53 +0800
Subject: [PATCH 06/39] feat: Implement SmartScraperMultiParseMergeFirstGraph
 class that scrapes a list of URLs and merge the content first and finally
 generates answers to a given prompt. (Different from the
 SmartScraperMultiGraph is that in this case the content is merged before to
 be processed by the llm.)

---
 scrapegraphai/graphs/__init__.py              |   2 +
 ...t_scraper_multi_parse_merge_first_graph.py | 103 ++++++++++++++++++
 2 files changed, 105 insertions(+)
 create mode 100644 scrapegraphai/graphs/smart_scraper_multi_parse_merge_first_graph.py

diff --git a/scrapegraphai/graphs/__init__.py b/scrapegraphai/graphs/__init__.py
index 5b217bc9..0acec56b 100644
--- a/scrapegraphai/graphs/__init__.py
+++ b/scrapegraphai/graphs/__init__.py
@@ -25,3 +25,5 @@
 from .smart_scraper_multi_concat_graph import SmartScraperMultiConcatGraph
 from .code_generator_graph import CodeGeneratorGraph
 from .depth_search_graph import DepthSearchGraph
+from .smart_scraper_multi_parse_merge_first_graph import SmartScraperMultiParseMergeFirstGraph
+from .scrape_graph import ScrapeGraph
diff --git a/scrapegraphai/graphs/smart_scraper_multi_parse_merge_first_graph.py b/scrapegraphai/graphs/smart_scraper_multi_parse_merge_first_graph.py
new file mode 100644
index 00000000..860e2ca2
--- /dev/null
+++ b/scrapegraphai/graphs/smart_scraper_multi_parse_merge_first_graph.py
@@ -0,0 +1,103 @@
+""" 
+SmartScraperMultiGraph Module
+"""
+from copy import deepcopy
+from typing import List, Optional
+from pydantic import BaseModel
+from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+from .scrape_graph import ScrapeGraph
+from ..nodes import (
+    GraphIteratorNode,
+    MergeAnswersNode,
+)
+from ..utils.copy import safe_deepcopy
+
+class SmartScraperMultiParseMergeFirstGraph(AbstractGraph):
+    """ 
+    SmartScraperMultiParseMergeFirstGraph is a scraping pipeline that scrapes a 
+    list of URLs and merge the content first and finally generates answers to a given prompt.
+    It only requires a user prompt and a list of URLs.
+    The difference with the SmartScraperMultiGraph is that in this case the content is merged
+    before to be passed to the llm.
+
+    Attributes:
+        prompt (str): The user prompt to search the internet.
+        llm_model (dict): The configuration for the language model.
+        embedder_model (dict): The configuration for the embedder model.
+        headless (bool): A flag to run the browser in headless mode.
+        verbose (bool): A flag to display the execution information.
+        model_token (int): The token limit for the language model.
+
+    Args:
+        prompt (str): The user prompt to search the internet.
+        source (List[str]): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+        schema (Optional[BaseModel]): The schema for the graph output.
+
+    Example:
+        >>> search_graph = SmartScraperMultiParseMergeFirstGraph(
+        ...     prompt="Who is Marco Perini?",
+        ...     source= [
+        ...         "https://perinim.github.io/",
+        ...         "https://perinim.github.io/cv/"
+        ...     ],
+        ...     config={"llm": {"model": "openai/gpt-3.5-turbo"}}
+        ... )
+        >>> result = search_graph.run()
+    """
+
+    def __init__(self, prompt: str, source: List[str], 
+                 config: dict, schema: Optional[BaseModel] = None):
+
+        self.copy_config = safe_deepcopy(config)
+        self.copy_schema = deepcopy(schema)
+        super().__init__(prompt, config, source, schema)
+
+    def _create_graph(self) -> BaseGraph:
+        """
+        Creates the graph of nodes representing the workflow for web scraping 
+        and parsing and then merge the content and generates answers to a given prompt.
+        """
+        graph_iterator_node = GraphIteratorNode(
+            input="user_prompt & urls",
+            output=["parsed_doc"],
+            node_config={
+                "graph_instance": ScrapeGraph,
+                "scraper_config": self.copy_config,
+            },
+            schema=self.copy_schema
+        )
+
+        merge_answers_node = MergeAnswersNode(
+            input="user_prompt & parsed_doc",
+            output=["answer"],
+            node_config={
+                "llm_model": self.llm_model,
+                "schema": self.copy_schema
+            }
+        )
+
+        return BaseGraph(
+            nodes=[
+                graph_iterator_node,
+                merge_answers_node,
+            ],
+            edges=[
+                (graph_iterator_node, merge_answers_node),
+            ],
+            entry_point=graph_iterator_node,
+            graph_name=self.__class__.__name__
+        )
+
+    def run(self) -> str:
+        """
+        Executes the web scraping and parsing process first and 
+        then concatenate the content and generates answers to a given prompt.
+
+        Returns:
+            str: The answer to the prompt.
+        """
+        inputs = {"user_prompt": self.prompt, "urls": self.source}
+        self.final_state, self.execution_info = self.graph.execute(inputs)
+        return self.final_state.get("answer", "No answer found.")

From cdb3c1100ee1117afedbc70437317acaf7c7c1d3 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Wed, 16 Oct 2024 20:05:03 +0800
Subject: [PATCH 07/39] test: Add scrape_graph test

---
 tests/graphs/scrape_graph_test.py | 50 +++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 tests/graphs/scrape_graph_test.py

diff --git a/tests/graphs/scrape_graph_test.py b/tests/graphs/scrape_graph_test.py
new file mode 100644
index 00000000..00d3f4fb
--- /dev/null
+++ b/tests/graphs/scrape_graph_test.py
@@ -0,0 +1,50 @@
+"""
+Module for testing the scrape graph class
+"""
+
+import os
+import pytest
+import pandas as pd
+from dotenv import load_dotenv
+from scrapegraphai.graphs import ScrapeGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+@pytest.fixture
+def graph_config():
+    """Configuration of the graph"""
+    openai_key = os.getenv("OPENAI_APIKEY")
+    return {
+        "llm": {
+            "api_key": openai_key,
+            "model": "openai/gpt-3.5-turbo",
+        },
+        "verbose": True,
+        "headless": False,
+    }
+
+def test_scraping_pipeline(graph_config):
+    """Start of the scraping pipeline"""
+    scrape_graph = ScrapeGraph(
+        source="https://perinim.github.io/projects/",
+        config=graph_config,
+    )
+
+    result = scrape_graph.run()
+
+    assert result is not None
+    assert isinstance(result, list)
+
+def test_get_execution_info(graph_config):
+    """Get the execution info"""
+    scrape_graph = ScrapeGraph(
+        source="https://perinim.github.io/projects/",
+        config=graph_config,
+    )
+
+    scrape_graph.run()
+
+    graph_exec_info = scrape_graph.get_execution_info()
+
+    assert graph_exec_info is not None

From 464b8b04ea0d51280849173d5eda92d4d4db8612 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Wed, 16 Oct 2024 20:05:36 +0800
Subject: [PATCH 08/39] test: Add smart_scraper_multi_parse_merge_first_graph
 test

---
 ...aper_multi_parse_merge_first_graph_test.py | 59 +++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 tests/graphs/smart_scraper_multi_parse_merge_first_graph_test.py

diff --git a/tests/graphs/smart_scraper_multi_parse_merge_first_graph_test.py b/tests/graphs/smart_scraper_multi_parse_merge_first_graph_test.py
new file mode 100644
index 00000000..506ce5da
--- /dev/null
+++ b/tests/graphs/smart_scraper_multi_parse_merge_first_graph_test.py
@@ -0,0 +1,59 @@
+"""
+Module for testing the smart scraper class
+"""
+
+import os
+import pytest
+import pandas as pd
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiParseConcatFirstGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+@pytest.fixture
+def graph_config():
+    """Configuration of the graph"""
+    openai_key = os.getenv("OPENAI_APIKEY")
+
+    return {
+        "llm": {
+            "api_key": openai_key,
+            "model": "openai/gpt-3.5-turbo",
+        },
+        "verbose": True,
+        "headless": False,
+    }
+
+def test_scraping_pipeline(graph_config):
+    """Start of the scraping pipeline"""
+    smart_scraper_multi_parse_concat_first_graph = SmartScraperMultiParseConcatFirstGraph(
+        prompt="Who is Marco Perini?",
+        source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+        ],
+        config=graph_config,
+    )
+
+    result = smart_scraper_multi_parse_concat_first_graph.run()
+
+    assert result is not None
+    assert isinstance(result, dict) 
+
+def test_get_execution_info(graph_config):
+    """Get the execution info"""
+    smart_scraper_multi_parse_concat_first_graph = SmartScraperMultiParseConcatFirstGraph(
+        prompt="Who is Marco Perini?",
+        source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+        ],
+        config=graph_config,
+    )
+
+    smart_scraper_multi_parse_concat_first_graph.run()
+
+    graph_exec_info = smart_scraper_multi_parse_concat_first_graph.get_execution_info()
+
+    assert graph_exec_info is not None

From eaa83edc04b803f2a14c7705549fae62c64275fb Mon Sep 17 00:00:00 2001
From: ekinsenler <ekinsenler@gmail.com>
Date: Wed, 16 Oct 2024 15:21:23 +0300
Subject: [PATCH 09/39] update project requirement and add example

---
 examples/extras/cond_smartscraper_usage.py | 38 ++++++++++++++++++++++
 pyproject.toml                             |  3 +-
 2 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 examples/extras/cond_smartscraper_usage.py

diff --git a/examples/extras/cond_smartscraper_usage.py b/examples/extras/cond_smartscraper_usage.py
new file mode 100644
index 00000000..54c40712
--- /dev/null
+++ b/examples/extras/cond_smartscraper_usage.py
@@ -0,0 +1,38 @@
+"""
+Basic example of scraping pipeline using SmartScraperMultiConcatGraph with Groq
+"""
+
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperGraph
+
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("GROQ_APIKEY"),
+        "model": "groq/gemma-7b-it",
+    },
+    "verbose": True,
+    "headless": True,
+    "reattempt": True #Setting this to True will allow the graph to reattempt the scraping process
+}
+
+# *******************************************************
+# Create the SmartScraperMultiCondGraph instance and run it
+# *******************************************************
+
+multiple_search_graph = SmartScraperGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    schema=None,
+    config=graph_config
+)
+
+result = multiple_search_graph.run()
+print(json.dumps(result, indent=4))
diff --git a/pyproject.toml b/pyproject.toml
index 6bade627..7a374c97 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,7 +38,8 @@ dependencies = [
     "async-timeout>=4.0.3",
     "transformers>=4.44.2",
     "googlesearch-python>=1.2.5",
-    "simpleeval>=1.0.0"
+    "simpleeval>=1.0.0",
+    "async_timeout>=4.0.3"
 ]
 
 license = "MIT"

From 9266a36b2efdf7027470d59aa14b654d68f7cb51 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Wed, 16 Oct 2024 15:54:35 +0000
Subject: [PATCH 10/39] ci(release): 1.27.0-beta.1 [skip ci]

## [1.27.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.26.6-beta.1...v1.27.0-beta.1) (2024-10-16)

### Features

* add conditional node structure to the smart_scraper_graph and implemented a structured way to check condition ([cacd9cd](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cacd9cde004dace1a7dcc27981245632a78b95f3))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3b45f437..54d4f9d9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.27.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.26.6-beta.1...v1.27.0-beta.1) (2024-10-16)
+
+
+### Features
+
+* add conditional node structure to the smart_scraper_graph and implemented a structured way to check condition ([cacd9cd](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cacd9cde004dace1a7dcc27981245632a78b95f3))
+
 ## [1.26.6-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.26.5...v1.26.6-beta.1) (2024-10-14)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 17488a43..be40f076 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.26.6b1"
+version = "1.27.0b1"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From 2512262be81b686f559711584e69c725dd53a187 Mon Sep 17 00:00:00 2001
From: shenghong <roryhaung1@gmail.com>
Date: Thu, 17 Oct 2024 06:46:34 +0800
Subject: [PATCH 11/39] Rename
 smart_scraper_multi_parse_merge_first_graph_test.py to
 smart_scraper_multi_parse_merge_first_graph_openai_test.py

---
 ...=> smart_scraper_multi_parse_merge_first_graph_openai_test.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/graphs/{smart_scraper_multi_parse_merge_first_graph_test.py => smart_scraper_multi_parse_merge_first_graph_openai_test.py} (100%)

diff --git a/tests/graphs/smart_scraper_multi_parse_merge_first_graph_test.py b/tests/graphs/smart_scraper_multi_parse_merge_first_graph_openai_test.py
similarity index 100%
rename from tests/graphs/smart_scraper_multi_parse_merge_first_graph_test.py
rename to tests/graphs/smart_scraper_multi_parse_merge_first_graph_openai_test.py

From 69ff6495564a5c670b89c0f802ebb1602f0e7cfa Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Fri, 18 Oct 2024 01:36:29 +0800
Subject: [PATCH 12/39] fix: fix the example variable name

---
 scrapegraphai/graphs/smart_scraper_multi_concat_graph.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapegraphai/graphs/smart_scraper_multi_concat_graph.py b/scrapegraphai/graphs/smart_scraper_multi_concat_graph.py
index 312d6457..a13d8aa1 100644
--- a/scrapegraphai/graphs/smart_scraper_multi_concat_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_multi_concat_graph.py
@@ -35,11 +35,11 @@ class SmartScraperMultiConcatGraph(AbstractGraph):
         schema (Optional[BaseModel]): The schema for the graph output.
 
     Example:
-        >>> search_graph = MultipleSearchGraph(
+        >>> smart_scraper_multi_concat_graph = SmartScraperMultiConcatGraph(
         ...     "What is Chioggia famous for?",
         ...     {"llm": {"model": "openai/gpt-3.5-turbo"}}
         ... )
-        >>> result = search_graph.run()
+        >>> result = smart_scraper_multi_concat_graph.run()
     """
     
     def __init__(self, prompt: str, source: List[str], 

From 94d8042c2a510b29138127e1abd4ddd9e0b49ed0 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Fri, 18 Oct 2024 01:39:42 +0800
Subject: [PATCH 13/39] rename smart_scraper_multi_graph to
 smart_scraper_multi_abstract_graph

---
 .../smart_scraper_multi_abstract_graph.py     | 104 ++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 scrapegraphai/graphs/smart_scraper_multi_abstract_graph.py

diff --git a/scrapegraphai/graphs/smart_scraper_multi_abstract_graph.py b/scrapegraphai/graphs/smart_scraper_multi_abstract_graph.py
new file mode 100644
index 00000000..f5ffdf96
--- /dev/null
+++ b/scrapegraphai/graphs/smart_scraper_multi_abstract_graph.py
@@ -0,0 +1,104 @@
+""" 
+SmartScraperMultiGraph Module
+"""
+from copy import deepcopy
+from typing import List, Optional
+from pydantic import BaseModel
+from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+from .smart_scraper_graph import SmartScraperGraph
+from ..nodes import (
+    GraphIteratorNode,
+    MergeAnswersNode
+)
+from ..utils.copy import safe_deepcopy
+
+class SmartScraperMultiAbstractGraph(AbstractGraph):
+    """ 
+    SmartScraperMultiAbstractGraph is a scraping pipeline that scrapes a 
+    list of URLs and generates answers to a given prompt.
+    It only requires a user prompt and a list of URLs.
+    The difference with the SmartScraperMultiGraph is that in this case the content will be abstracted
+    by llm and then merged finally passed to the llm.
+
+    Attributes:
+        prompt (str): The user prompt to search the internet.
+        llm_model (dict): The configuration for the language model.
+        embedder_model (dict): The configuration for the embedder model.
+        headless (bool): A flag to run the browser in headless mode.
+        verbose (bool): A flag to display the execution information.
+        model_token (int): The token limit for the language model.
+
+    Args:
+        prompt (str): The user prompt to search the internet.
+        source (List[str]): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+        schema (Optional[BaseModel]): The schema for the graph output.
+
+    Example:
+        >>> smart_scraper_multi_abstract_graph = SmartScraperMultiAbstractGraph(
+        ...     "What is Chioggia famous for?",
+        ...     {"llm": {"model": "openai/gpt-3.5-turbo"}}
+        ... )
+        >>> result = smart_scraper_multi_abstract_graph.run()
+    """
+
+    def __init__(self, prompt: str, source: List[str], 
+                 config: dict, schema: Optional[BaseModel] = None):
+
+        self.max_results = config.get("max_results", 3)
+        self.copy_config = safe_deepcopy(config)
+        self.copy_schema = deepcopy(schema)
+
+        super().__init__(prompt, config, source, schema)
+
+    def _create_graph(self) -> BaseGraph:
+        """
+        Creates the graph of nodes representing the workflow for web scraping and searching.
+
+        Returns:
+            BaseGraph: A graph instance representing the web scraping and searching workflow.
+        """
+
+        graph_iterator_node = GraphIteratorNode(
+            input="user_prompt & urls",
+            output=["results"],
+            node_config={
+                "graph_instance": SmartScraperGraph,
+                "scraper_config": self.copy_config,
+            },
+            schema=self.copy_schema
+        )
+
+        merge_answers_node = MergeAnswersNode(
+            input="user_prompt & results",
+            output=["answer"],
+            node_config={
+                "llm_model": self.llm_model,
+                "schema": self.copy_schema
+            }
+        )
+
+        return BaseGraph(
+            nodes=[
+                graph_iterator_node,
+                merge_answers_node,
+            ],
+            edges=[
+                (graph_iterator_node, merge_answers_node),
+            ],
+            entry_point=graph_iterator_node,
+            graph_name=self.__class__.__name__
+        )
+
+    def run(self) -> str:
+        """
+        Executes the web scraping and searching process.
+
+        Returns:
+            str: The answer to the prompt.
+        """
+        inputs = {"user_prompt": self.prompt, "urls": self.source}
+        self.final_state, self.execution_info = self.graph.execute(inputs)
+
+        return self.final_state.get("answer", "No answer found.")

From dfc67c670d871fac5116223461a56c9560959eb9 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Fri, 18 Oct 2024 01:49:54 +0800
Subject: [PATCH 14/39] rename the smart_scraper_multi_parse_merge_first_graph
 to smart_scraper_multi_graph,so delete this file

---
 ...t_scraper_multi_parse_merge_first_graph.py | 103 ------------------
 1 file changed, 103 deletions(-)
 delete mode 100644 scrapegraphai/graphs/smart_scraper_multi_parse_merge_first_graph.py

diff --git a/scrapegraphai/graphs/smart_scraper_multi_parse_merge_first_graph.py b/scrapegraphai/graphs/smart_scraper_multi_parse_merge_first_graph.py
deleted file mode 100644
index 860e2ca2..00000000
--- a/scrapegraphai/graphs/smart_scraper_multi_parse_merge_first_graph.py
+++ /dev/null
@@ -1,103 +0,0 @@
-""" 
-SmartScraperMultiGraph Module
-"""
-from copy import deepcopy
-from typing import List, Optional
-from pydantic import BaseModel
-from .base_graph import BaseGraph
-from .abstract_graph import AbstractGraph
-from .scrape_graph import ScrapeGraph
-from ..nodes import (
-    GraphIteratorNode,
-    MergeAnswersNode,
-)
-from ..utils.copy import safe_deepcopy
-
-class SmartScraperMultiParseMergeFirstGraph(AbstractGraph):
-    """ 
-    SmartScraperMultiParseMergeFirstGraph is a scraping pipeline that scrapes a 
-    list of URLs and merge the content first and finally generates answers to a given prompt.
-    It only requires a user prompt and a list of URLs.
-    The difference with the SmartScraperMultiGraph is that in this case the content is merged
-    before to be passed to the llm.
-
-    Attributes:
-        prompt (str): The user prompt to search the internet.
-        llm_model (dict): The configuration for the language model.
-        embedder_model (dict): The configuration for the embedder model.
-        headless (bool): A flag to run the browser in headless mode.
-        verbose (bool): A flag to display the execution information.
-        model_token (int): The token limit for the language model.
-
-    Args:
-        prompt (str): The user prompt to search the internet.
-        source (List[str]): The source of the graph.
-        config (dict): Configuration parameters for the graph.
-        schema (Optional[BaseModel]): The schema for the graph output.
-
-    Example:
-        >>> search_graph = SmartScraperMultiParseMergeFirstGraph(
-        ...     prompt="Who is Marco Perini?",
-        ...     source= [
-        ...         "https://perinim.github.io/",
-        ...         "https://perinim.github.io/cv/"
-        ...     ],
-        ...     config={"llm": {"model": "openai/gpt-3.5-turbo"}}
-        ... )
-        >>> result = search_graph.run()
-    """
-
-    def __init__(self, prompt: str, source: List[str], 
-                 config: dict, schema: Optional[BaseModel] = None):
-
-        self.copy_config = safe_deepcopy(config)
-        self.copy_schema = deepcopy(schema)
-        super().__init__(prompt, config, source, schema)
-
-    def _create_graph(self) -> BaseGraph:
-        """
-        Creates the graph of nodes representing the workflow for web scraping 
-        and parsing and then merge the content and generates answers to a given prompt.
-        """
-        graph_iterator_node = GraphIteratorNode(
-            input="user_prompt & urls",
-            output=["parsed_doc"],
-            node_config={
-                "graph_instance": ScrapeGraph,
-                "scraper_config": self.copy_config,
-            },
-            schema=self.copy_schema
-        )
-
-        merge_answers_node = MergeAnswersNode(
-            input="user_prompt & parsed_doc",
-            output=["answer"],
-            node_config={
-                "llm_model": self.llm_model,
-                "schema": self.copy_schema
-            }
-        )
-
-        return BaseGraph(
-            nodes=[
-                graph_iterator_node,
-                merge_answers_node,
-            ],
-            edges=[
-                (graph_iterator_node, merge_answers_node),
-            ],
-            entry_point=graph_iterator_node,
-            graph_name=self.__class__.__name__
-        )
-
-    def run(self) -> str:
-        """
-        Executes the web scraping and parsing process first and 
-        then concatenate the content and generates answers to a given prompt.
-
-        Returns:
-            str: The answer to the prompt.
-        """
-        inputs = {"user_prompt": self.prompt, "urls": self.source}
-        self.final_state, self.execution_info = self.graph.execute(inputs)
-        return self.final_state.get("answer", "No answer found.")

From 78bd40c3b54cd656e0fe2e789e978b59dcb96d5b Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Fri, 18 Oct 2024 01:51:26 +0800
Subject: [PATCH 15/39] modify the graph name

---
 scrapegraphai/graphs/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapegraphai/graphs/__init__.py b/scrapegraphai/graphs/__init__.py
index 0acec56b..bfb8e300 100644
--- a/scrapegraphai/graphs/__init__.py
+++ b/scrapegraphai/graphs/__init__.py
@@ -13,7 +13,7 @@
 from .csv_scraper_graph import CSVScraperGraph
 from .omni_scraper_graph import OmniScraperGraph
 from .omni_search_graph import OmniSearchGraph
-from .smart_scraper_multi_graph import SmartScraperMultiGraph
+from .smart_scraper_multi_abstract_graph import SmartScraperMultiAbstractGraph
 from .json_scraper_multi_graph import JSONScraperMultiGraph
 from .csv_scraper_multi_graph import CSVScraperMultiGraph
 from .xml_scraper_multi_graph import XMLScraperMultiGraph
@@ -25,5 +25,5 @@
 from .smart_scraper_multi_concat_graph import SmartScraperMultiConcatGraph
 from .code_generator_graph import CodeGeneratorGraph
 from .depth_search_graph import DepthSearchGraph
-from .smart_scraper_multi_parse_merge_first_graph import SmartScraperMultiParseMergeFirstGraph
+from .smart_scraper_multi_graph import SmartScraperMultiGraph
 from .scrape_graph import ScrapeGraph

From 6dbac936683042ef2e517a71b6fb1655508a1568 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Fri, 18 Oct 2024 01:52:39 +0800
Subject: [PATCH 16/39] rename the SmartScraperMultiParseMergeFirstGraph to
 SmartScraperMultiGraph

---
 .../graphs/smart_scraper_multi_graph.py       | 39 ++++++++++---------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/scrapegraphai/graphs/smart_scraper_multi_graph.py b/scrapegraphai/graphs/smart_scraper_multi_graph.py
index 5dff3277..2f628e81 100644
--- a/scrapegraphai/graphs/smart_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_multi_graph.py
@@ -6,18 +6,20 @@
 from pydantic import BaseModel
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
-from .smart_scraper_graph import SmartScraperGraph
+from .scrape_graph import ScrapeGraph
 from ..nodes import (
     GraphIteratorNode,
-    MergeAnswersNode
+    MergeAnswersNode,
 )
 from ..utils.copy import safe_deepcopy
 
 class SmartScraperMultiGraph(AbstractGraph):
     """ 
     SmartScraperMultiGraph is a scraping pipeline that scrapes a 
-    list of URLs and generates answers to a given prompt.
+    list of URLs and merge the content first and finally generates answers to a given prompt.
     It only requires a user prompt and a list of URLs.
+    The difference with the SmartScraperMultiGraph is that in this case the content is merged
+    before to be passed to the llm.
 
     Attributes:
         prompt (str): The user prompt to search the internet.
@@ -34,42 +36,41 @@ class SmartScraperMultiGraph(AbstractGraph):
         schema (Optional[BaseModel]): The schema for the graph output.
 
     Example:
-        >>> search_graph = MultipleSearchGraph(
-        ...     "What is Chioggia famous for?",
-        ...     {"llm": {"model": "openai/gpt-3.5-turbo"}}
+        >>> smart_scraper_multi_graph = SmartScraperMultiGraph(
+        ...     prompt="Who is Marco Perini?",
+        ...     source= [
+        ...         "https://perinim.github.io/",
+        ...         "https://perinim.github.io/cv/"
+        ...     ],
+        ...     config={"llm": {"model": "openai/gpt-3.5-turbo"}}
         ... )
-        >>> result = search_graph.run()
+        >>> result = smart_scraper_multi_graph.run()
     """
 
     def __init__(self, prompt: str, source: List[str], 
                  config: dict, schema: Optional[BaseModel] = None):
 
-        self.max_results = config.get("max_results", 3)
         self.copy_config = safe_deepcopy(config)
         self.copy_schema = deepcopy(schema)
-
         super().__init__(prompt, config, source, schema)
 
     def _create_graph(self) -> BaseGraph:
         """
-        Creates the graph of nodes representing the workflow for web scraping and searching.
-
-        Returns:
-            BaseGraph: A graph instance representing the web scraping and searching workflow.
+        Creates the graph of nodes representing the workflow for web scraping 
+        and parsing and then merge the content and generates answers to a given prompt.
         """
-
         graph_iterator_node = GraphIteratorNode(
             input="user_prompt & urls",
-            output=["results"],
+            output=["parsed_doc"],
             node_config={
-                "graph_instance": SmartScraperGraph,
+                "graph_instance": ScrapeGraph,
                 "scraper_config": self.copy_config,
             },
             schema=self.copy_schema
         )
 
         merge_answers_node = MergeAnswersNode(
-            input="user_prompt & results",
+            input="user_prompt & parsed_doc",
             output=["answer"],
             node_config={
                 "llm_model": self.llm_model,
@@ -91,12 +92,12 @@ def _create_graph(self) -> BaseGraph:
 
     def run(self) -> str:
         """
-        Executes the web scraping and searching process.
+        Executes the web scraping and parsing process first and 
+        then concatenate the content and generates answers to a given prompt.
 
         Returns:
             str: The answer to the prompt.
         """
         inputs = {"user_prompt": self.prompt, "urls": self.source}
         self.final_state, self.execution_info = self.graph.execute(inputs)
-
         return self.final_state.get("answer", "No answer found.")

From 974f88a77e853884d8a83c0d44a79c013727cc55 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Fri, 18 Oct 2024 03:01:59 +0800
Subject: [PATCH 17/39] rename SmartScraperMultiGraph to
 SmartScraperMultiLiteGraph

---
 ...r_multi_graph.py => smart_scraper_multi_lite_graph.py} | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
 rename scrapegraphai/graphs/{smart_scraper_multi_graph.py => smart_scraper_multi_lite_graph.py} (93%)

diff --git a/scrapegraphai/graphs/smart_scraper_multi_graph.py b/scrapegraphai/graphs/smart_scraper_multi_lite_graph.py
similarity index 93%
rename from scrapegraphai/graphs/smart_scraper_multi_graph.py
rename to scrapegraphai/graphs/smart_scraper_multi_lite_graph.py
index 2f628e81..14e576d9 100644
--- a/scrapegraphai/graphs/smart_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_multi_lite_graph.py
@@ -13,9 +13,9 @@
 )
 from ..utils.copy import safe_deepcopy
 
-class SmartScraperMultiGraph(AbstractGraph):
+class SmartScraperMultiLiteGraph(AbstractGraph):
     """ 
-    SmartScraperMultiGraph is a scraping pipeline that scrapes a 
+    SmartScraperMultiLiteGraph is a scraping pipeline that scrapes a 
     list of URLs and merge the content first and finally generates answers to a given prompt.
     It only requires a user prompt and a list of URLs.
     The difference with the SmartScraperMultiGraph is that in this case the content is merged
@@ -36,7 +36,7 @@ class SmartScraperMultiGraph(AbstractGraph):
         schema (Optional[BaseModel]): The schema for the graph output.
 
     Example:
-        >>> smart_scraper_multi_graph = SmartScraperMultiGraph(
+        >>> smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
         ...     prompt="Who is Marco Perini?",
         ...     source= [
         ...         "https://perinim.github.io/",
@@ -44,7 +44,7 @@ class SmartScraperMultiGraph(AbstractGraph):
         ...     ],
         ...     config={"llm": {"model": "openai/gpt-3.5-turbo"}}
         ... )
-        >>> result = smart_scraper_multi_graph.run()
+        >>> result = smart_scraper_multi_lite_graph.run()
     """
 
     def __init__(self, prompt: str, source: List[str], 

From 3e8f047ab606db4549c5d3b28b681f47b8c08725 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Fri, 18 Oct 2024 03:10:57 +0800
Subject: [PATCH 18/39] Renamed smart_scraper_multi_abstract_graph back to
 smart_scraper_multi_graph.

---
 scrapegraphai/graphs/__init__.py               |  4 ++--
 ...t_graph.py => smart_scraper_multi_graph.py} | 18 +++++++++++-------
 2 files changed, 13 insertions(+), 9 deletions(-)
 rename scrapegraphai/graphs/{smart_scraper_multi_abstract_graph.py => smart_scraper_multi_graph.py} (84%)

diff --git a/scrapegraphai/graphs/__init__.py b/scrapegraphai/graphs/__init__.py
index bfb8e300..9c8bc820 100644
--- a/scrapegraphai/graphs/__init__.py
+++ b/scrapegraphai/graphs/__init__.py
@@ -13,7 +13,7 @@
 from .csv_scraper_graph import CSVScraperGraph
 from .omni_scraper_graph import OmniScraperGraph
 from .omni_search_graph import OmniSearchGraph
-from .smart_scraper_multi_abstract_graph import SmartScraperMultiAbstractGraph
+from .smart_scraper_multi_graph import SmartScraperMultiGraph
 from .json_scraper_multi_graph import JSONScraperMultiGraph
 from .csv_scraper_multi_graph import CSVScraperMultiGraph
 from .xml_scraper_multi_graph import XMLScraperMultiGraph
@@ -25,5 +25,5 @@
 from .smart_scraper_multi_concat_graph import SmartScraperMultiConcatGraph
 from .code_generator_graph import CodeGeneratorGraph
 from .depth_search_graph import DepthSearchGraph
-from .smart_scraper_multi_graph import SmartScraperMultiGraph
+from .smart_scraper_multi_lite_graph import SmartScraperMultiLiteGraph
 from .scrape_graph import ScrapeGraph
diff --git a/scrapegraphai/graphs/smart_scraper_multi_abstract_graph.py b/scrapegraphai/graphs/smart_scraper_multi_graph.py
similarity index 84%
rename from scrapegraphai/graphs/smart_scraper_multi_abstract_graph.py
rename to scrapegraphai/graphs/smart_scraper_multi_graph.py
index f5ffdf96..420dc784 100644
--- a/scrapegraphai/graphs/smart_scraper_multi_abstract_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_multi_graph.py
@@ -13,12 +13,12 @@
 )
 from ..utils.copy import safe_deepcopy
 
-class SmartScraperMultiAbstractGraph(AbstractGraph):
+class SmartScraperMultiGraph(AbstractGraph):
     """ 
-    SmartScraperMultiAbstractGraph is a scraping pipeline that scrapes a 
+    SmartScraperMultiGraph is a scraping pipeline that scrapes a 
     list of URLs and generates answers to a given prompt.
     It only requires a user prompt and a list of URLs.
-    The difference with the SmartScraperMultiGraph is that in this case the content will be abstracted
+    The difference with the SmartScraperMultiLiteGraph is that in this case the content will be abstracted
     by llm and then merged finally passed to the llm.
 
     Attributes:
@@ -36,11 +36,15 @@ class SmartScraperMultiAbstractGraph(AbstractGraph):
         schema (Optional[BaseModel]): The schema for the graph output.
 
     Example:
-        >>> smart_scraper_multi_abstract_graph = SmartScraperMultiAbstractGraph(
-        ...     "What is Chioggia famous for?",
-        ...     {"llm": {"model": "openai/gpt-3.5-turbo"}}
+        >>> smart_scraper_multi_graph = SmartScraperMultiGraph(
+        ...     prompt="Who is Marco Perini?",
+        ...     source= [
+        ...         "https://perinim.github.io/",
+        ...         "https://perinim.github.io/cv/"
+        ...     ],
+        ...     config={"llm": {"model": "openai/gpt-3.5-turbo"}}
         ... )
-        >>> result = smart_scraper_multi_abstract_graph.run()
+        >>> result = smart_scraper_multi_graph.run()
     """
 
     def __init__(self, prompt: str, source: List[str], 

From 28dda2b476e1b2da9e39cc212133fcaca7cc5b11 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Fri, 18 Oct 2024 03:14:08 +0800
Subject: [PATCH 19/39] rename graph name

---
 ...=> smart_scraper_multi_lite_graph_openai_test.py} | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
 rename tests/graphs/{smart_scraper_multi_parse_merge_first_graph_openai_test.py => smart_scraper_multi_lite_graph_openai_test.py} (70%)

diff --git a/tests/graphs/smart_scraper_multi_parse_merge_first_graph_openai_test.py b/tests/graphs/smart_scraper_multi_lite_graph_openai_test.py
similarity index 70%
rename from tests/graphs/smart_scraper_multi_parse_merge_first_graph_openai_test.py
rename to tests/graphs/smart_scraper_multi_lite_graph_openai_test.py
index 506ce5da..0a0e0a69 100644
--- a/tests/graphs/smart_scraper_multi_parse_merge_first_graph_openai_test.py
+++ b/tests/graphs/smart_scraper_multi_lite_graph_openai_test.py
@@ -6,7 +6,7 @@
 import pytest
 import pandas as pd
 from dotenv import load_dotenv
-from scrapegraphai.graphs import SmartScraperMultiParseConcatFirstGraph
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
 from scrapegraphai.utils import prettify_exec_info
 
 load_dotenv()
@@ -27,7 +27,7 @@ def graph_config():
 
 def test_scraping_pipeline(graph_config):
     """Start of the scraping pipeline"""
-    smart_scraper_multi_parse_concat_first_graph = SmartScraperMultiParseConcatFirstGraph(
+    smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
         prompt="Who is Marco Perini?",
         source= [
         "https://perinim.github.io/",
@@ -36,14 +36,14 @@ def test_scraping_pipeline(graph_config):
         config=graph_config,
     )
 
-    result = smart_scraper_multi_parse_concat_first_graph.run()
+    result = smart_scraper_multi_lite_graph.run()
 
     assert result is not None
     assert isinstance(result, dict) 
 
 def test_get_execution_info(graph_config):
     """Get the execution info"""
-    smart_scraper_multi_parse_concat_first_graph = SmartScraperMultiParseConcatFirstGraph(
+    smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
         prompt="Who is Marco Perini?",
         source= [
         "https://perinim.github.io/",
@@ -52,8 +52,8 @@ def test_get_execution_info(graph_config):
         config=graph_config,
     )
 
-    smart_scraper_multi_parse_concat_first_graph.run()
+    smart_scraper_multi_lite_graph.run()
 
-    graph_exec_info = smart_scraper_multi_parse_concat_first_graph.get_execution_info()
+    graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
 
     assert graph_exec_info is not None

From da2a3c8ec7d9c3c7e805fd6193035bd1bc284375 Mon Sep 17 00:00:00 2001
From: roryhaung <roryhaung1@gmail.com>
Date: Fri, 18 Oct 2024 03:19:00 +0800
Subject: [PATCH 20/39] add smart_scraper_multi_lite_graph example

---
 .../openai/smart_scraper_multi_lite_openai.py | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 examples/openai/smart_scraper_multi_lite_openai.py

diff --git a/examples/openai/smart_scraper_multi_lite_openai.py b/examples/openai/smart_scraper_multi_lite_openai.py
new file mode 100644
index 00000000..69eeafc7
--- /dev/null
+++ b/examples/openai/smart_scraper_multi_lite_openai.py
@@ -0,0 +1,47 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("OPENAI_API_KEY"),
+        "model": "openai/gpt-4o",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))

From d84d29538985ef8d04badfed547c6fdc73d7774d Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Fri, 18 Oct 2024 20:18:25 +0000
Subject: [PATCH 21/39] ci(release): 1.27.0-beta.2 [skip ci]

## [1.27.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.1...v1.27.0-beta.2) (2024-10-18)

### Bug Fixes

* refactoring of gpt2 tokenizer ([44c3f9c](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/44c3f9c98939c44caa86dc582242819a7c6a0f80))

### CI

* **release:** 1.26.6 [skip ci] ([a4634c7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/a4634c73312b5c08581a8d670d53b7eebe8dadc1))
---
 CHANGELOG.md   | 12 ++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8f320431..9631d303 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,15 @@
+## [1.27.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.1...v1.27.0-beta.2) (2024-10-18)
+
+
+### Bug Fixes
+
+* refactoring of gpt2 tokenizer ([44c3f9c](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/44c3f9c98939c44caa86dc582242819a7c6a0f80))
+
+
+### CI
+
+* **release:** 1.26.6 [skip ci] ([a4634c7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/a4634c73312b5c08581a8d670d53b7eebe8dadc1))
+
 ## [1.27.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.26.6-beta.1...v1.27.0-beta.1) (2024-10-16)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 46e533e0..a7b15a89 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.27.0b1"
+version = "1.27.0b2"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

From 9cd9a874f91bbbb2990444818e8ab2d0855cc361 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Fri, 18 Oct 2024 22:34:42 +0200
Subject: [PATCH 22/39] chore: fix example

Committing even though this is not the bug we were looking for
---
 examples/together/depth_search_graph_together.py |  7 +++----
 requirements-dev.lock                            | 12 +-----------
 requirements.lock                                |  6 +-----
 3 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/examples/together/depth_search_graph_together.py b/examples/together/depth_search_graph_together.py
index 7a2e7f3e..fb7b4d9e 100644
--- a/examples/together/depth_search_graph_together.py
+++ b/examples/together/depth_search_graph_together.py
@@ -7,13 +7,12 @@
 
 load_dotenv()
 
-openai_key = os.getenv("OPENAI_APIKEY")
+together_key = os.getenv("TOGETHER_APIKEY")
 
 graph_config = {
     "llm": {
-        "api_key": "***************************",
-        "model": "oneapi/qwen-turbo",
-        "base_url": "http://127.0.0.1:3000/v1",  # 设置 OneAPI URL
+        "model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+        "api_key": together_key,
     },
     "verbose": True,
     "headless": False,
diff --git a/requirements-dev.lock b/requirements-dev.lock
index bca5e9c2..61bd3e2b 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -30,8 +30,6 @@ anyio==4.4.0
 astroid==3.2.4
     # via pylint
 async-timeout==4.0.3
-    # via aiohttp
-    # via langchain
     # via scrapegraphai
 attrs==24.2.0
     # via aiohttp
@@ -80,9 +78,6 @@ distro==1.9.0
     # via openai
 docutils==0.19
     # via sphinx
-exceptiongroup==1.2.2
-    # via anyio
-    # via pytest
 fastapi==0.112.0
     # via burr
 fastapi-pagination==0.12.26
@@ -136,6 +131,7 @@ graphviz==0.20.3
     # via burr
 greenlet==3.0.3
     # via playwright
+    # via sqlalchemy
 grpcio==1.65.4
     # via google-api-core
     # via grpcio-status
@@ -504,9 +500,6 @@ tokenizers==0.19.1
     # via transformers
 toml==0.10.2
     # via streamlit
-tomli==2.0.1
-    # via pylint
-    # via pytest
 tomlkit==0.13.0
     # via pylint
 tornado==6.4.1
@@ -524,8 +517,6 @@ transformers==4.44.2
     # via scrapegraphai
 typing-extensions==4.12.2
     # via altair
-    # via anyio
-    # via astroid
     # via fastapi
     # via fastapi-pagination
     # via google-generativeai
@@ -540,7 +531,6 @@ typing-extensions==4.12.2
     # via sqlalchemy
     # via streamlit
     # via typing-inspect
-    # via uvicorn
 typing-inspect==0.9.0
     # via dataclasses-json
     # via sf-hamilton
diff --git a/requirements.lock b/requirements.lock
index 38be6e68..c2c40996 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -19,8 +19,6 @@ anyio==4.4.0
     # via httpx
     # via openai
 async-timeout==4.0.3
-    # via aiohttp
-    # via langchain
     # via scrapegraphai
 attrs==23.2.0
     # via aiohttp
@@ -50,8 +48,6 @@ dill==0.3.8
     # via multiprocess
 distro==1.9.0
     # via openai
-exceptiongroup==1.2.2
-    # via anyio
 fastembed==0.3.6
     # via scrapegraphai
 filelock==3.15.4
@@ -91,6 +87,7 @@ googlesearch-python==1.2.5
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
+    # via sqlalchemy
 grpcio==1.65.1
     # via google-api-core
     # via grpcio-status
@@ -371,7 +368,6 @@ tqdm==4.66.4
 transformers==4.44.2
     # via scrapegraphai
 typing-extensions==4.12.2
-    # via anyio
     # via google-generativeai
     # via huggingface-hub
     # via langchain-core

From f576afaf0c1dd6d1dbf79fd5e642f6dca9dbe862 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Sun, 20 Oct 2024 08:15:19 +0000
Subject: [PATCH 23/39] ci(release): 1.27.0-beta.3 [skip ci]

## [1.27.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.2...v1.27.0-beta.3) (2024-10-20)

### Features

* implement ScrapeGraph class for only web scraping automation ([612c644](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/612c644623fa6f4fe77a64a5f1a6a4d6cd5f4254))
* Implement SmartScraperMultiParseMergeFirstGraph class that scrapes a list of URLs and merge the content first and finally generates answers to a given prompt. ([3e3e1b2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/3e3e1b2f3ae8ed803d03b3b44b199e139baa68d4))

### Bug Fixes

* fix the example variable name ([69ff649](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/69ff6495564a5c670b89c0f802ebb1602f0e7cfa))

### chore

* fix example ([9cd9a87](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9cd9a874f91bbbb2990444818e8ab2d0855cc361))

### Test

* Add scrape_graph test ([cdb3c11](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cdb3c1100ee1117afedbc70437317acaf7c7c1d3))
* Add smart_scraper_multi_parse_merge_first_graph test ([464b8b0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/464b8b04ea0d51280849173d5eda92d4d4db8612))
---
 CHANGELOG.md   | 24 ++++++++++++++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9631d303..6c029ea3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,27 @@
+## [1.27.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.2...v1.27.0-beta.3) (2024-10-20)
+
+
+### Features
+
+* implement ScrapeGraph class for only web scraping automation ([612c644](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/612c644623fa6f4fe77a64a5f1a6a4d6cd5f4254))
+* Implement SmartScraperMultiParseMergeFirstGraph class that scrapes a list of URLs and merge the content first and finally generates answers to a given prompt. ([3e3e1b2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/3e3e1b2f3ae8ed803d03b3b44b199e139baa68d4))
+
+
+### Bug Fixes
+
+* fix the example variable name ([69ff649](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/69ff6495564a5c670b89c0f802ebb1602f0e7cfa))
+
+
+### chore
+
+* fix example ([9cd9a87](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9cd9a874f91bbbb2990444818e8ab2d0855cc361))
+
+
+### Test
+
+* Add scrape_graph test ([cdb3c11](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cdb3c1100ee1117afedbc70437317acaf7c7c1d3))
+* Add smart_scraper_multi_parse_merge_first_graph test ([464b8b0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/464b8b04ea0d51280849173d5eda92d4d4db8612))
+
 ## [1.27.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.1...v1.27.0-beta.2) (2024-10-18)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index a7b15a89..85d7b442 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.27.0b2"
+version = "1.27.0b3"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

From 2991ca8dd207cc83409a84c261a1f87e5da47e01 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 21 Oct 2024 09:33:40 +0200
Subject: [PATCH 24/39] add examples smart scraper lite

---
 .../smart_scraper_multi_lite_anthropic.py     | 35 ++++++++++++++
 .../azure/smart_scraper_multi_lite_azure.py   | 35 ++++++++++++++
 .../smart_scraper_multi_lite_bedrock.py       | 29 ++++++++++++
 .../smart_scraper_multi_lite_deepseek.py      | 35 ++++++++++++++
 .../ernie/smart_scraper_multi_lite_ernie.py   | 35 ++++++++++++++
 .../smart_scraper_multi_lite_fireworks.py     | 35 ++++++++++++++
 .../smart_scraper_multi_lite_gemini.py        |  0
 .../smart_scraper_multi_lite_google_genai.py  | 34 ++++++++++++++
 ...mart_scraper_multi_lite_google_vertexai.py | 35 ++++++++++++++
 .../smart_scraper_multi_lite_vertex.py        |  0
 .../groq/smart_scraper_multi_lite_groq.py     | 35 ++++++++++++++
 ...smart_scraper_multi_lite_huggingfacehub.py | 34 ++++++++++++++
 ...smart_scraper_multi_lite_uhggingfacehub.py |  0
 .../smart_scraper_multi_lite_ollama.py        | 45 ++++++++++++++++++
 .../smart_scraper_multi_lite_mistral.py       | 35 ++++++++++++++
 .../smart_scraper_multi_lite_moonshot.py      | 34 ++++++++++++++
 .../smart_scraper_multi_lite_nemotron.py      | 46 +++++++++++++++++++
 .../oneapi/smart_scraper_multi_lite_oneapi.py | 43 +++++++++++++++++
 .../smart_scraper_multi_lite_together.py      | 43 +++++++++++++++++
 19 files changed, 588 insertions(+)
 create mode 100644 examples/anthropic/smart_scraper_multi_lite_anthropic.py
 create mode 100644 examples/azure/smart_scraper_multi_lite_azure.py
 create mode 100644 examples/bedrock/smart_scraper_multi_lite_bedrock.py
 create mode 100644 examples/deepseek/smart_scraper_multi_lite_deepseek.py
 create mode 100644 examples/ernie/smart_scraper_multi_lite_ernie.py
 create mode 100644 examples/fireworks/smart_scraper_multi_lite_fireworks.py
 create mode 100644 examples/google_genai/smart_scraper_multi_lite_gemini.py
 create mode 100644 examples/google_genai/smart_scraper_multi_lite_google_genai.py
 create mode 100644 examples/google_vertexai/smart_scraper_multi_lite_google_vertexai.py
 create mode 100644 examples/google_vertexai/smart_scraper_multi_lite_vertex.py
 create mode 100644 examples/groq/smart_scraper_multi_lite_groq.py
 create mode 100644 examples/huggingfacehub/smart_scraper_multi_lite_huggingfacehub.py
 create mode 100644 examples/huggingfacehub/smart_scraper_multi_lite_uhggingfacehub.py
 create mode 100644 examples/local_models/smart_scraper_multi_lite_ollama.py
 create mode 100644 examples/mistral/smart_scraper_multi_lite_mistral.py
 create mode 100644 examples/moonshot/smart_scraper_multi_lite_moonshot.py
 create mode 100644 examples/nemotron/smart_scraper_multi_lite_nemotron.py
 create mode 100644 examples/oneapi/smart_scraper_multi_lite_oneapi.py
 create mode 100644 examples/together/smart_scraper_multi_lite_together.py

diff --git a/examples/anthropic/smart_scraper_multi_lite_anthropic.py b/examples/anthropic/smart_scraper_multi_lite_anthropic.py
new file mode 100644
index 00000000..7cf3c09d
--- /dev/null
+++ b/examples/anthropic/smart_scraper_multi_lite_anthropic.py
@@ -0,0 +1,35 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("ANTHROPIC_API_KEY"),
+        "model": "anthropic/claude-3-haiku-20240307",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
diff --git a/examples/azure/smart_scraper_multi_lite_azure.py b/examples/azure/smart_scraper_multi_lite_azure.py
new file mode 100644
index 00000000..b9046d9f
--- /dev/null
+++ b/examples/azure/smart_scraper_multi_lite_azure.py
@@ -0,0 +1,35 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.environ["AZURE_OPENAI_KEY"],
+        "model": "azure_openai/gpt-4o"
+    },
+    "verbose": True,
+    "headless": False
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
diff --git a/examples/bedrock/smart_scraper_multi_lite_bedrock.py b/examples/bedrock/smart_scraper_multi_lite_bedrock.py
new file mode 100644
index 00000000..5cb26067
--- /dev/null
+++ b/examples/bedrock/smart_scraper_multi_lite_bedrock.py
@@ -0,0 +1,29 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import json
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+graph_config = {
+    "llm": {
+        "client": "client_name",
+        "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+        "temperature": 0.0
+    }
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/deepseek/smart_scraper_multi_lite_deepseek.py b/examples/deepseek/smart_scraper_multi_lite_deepseek.py
new file mode 100644
index 00000000..eb5eea01
--- /dev/null
+++ b/examples/deepseek/smart_scraper_multi_lite_deepseek.py
@@ -0,0 +1,35 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("DEEPSEEK_API_KEY"),
+        "model": "deepseek/deepseek-coder-33b-instruct",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
diff --git a/examples/ernie/smart_scraper_multi_lite_ernie.py b/examples/ernie/smart_scraper_multi_lite_ernie.py
new file mode 100644
index 00000000..777a760e
--- /dev/null
+++ b/examples/ernie/smart_scraper_multi_lite_ernie.py
@@ -0,0 +1,35 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("ERNIE_API_KEY"),
+        "model": "ernie/ernie-bot-4",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
diff --git a/examples/fireworks/smart_scraper_multi_lite_fireworks.py b/examples/fireworks/smart_scraper_multi_lite_fireworks.py
new file mode 100644
index 00000000..4ffaf6bb
--- /dev/null
+++ b/examples/fireworks/smart_scraper_multi_lite_fireworks.py
@@ -0,0 +1,35 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("FIREWORKS_API_KEY"),
+        "model": "fireworks/llama-v2-70b-chat",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
diff --git a/examples/google_genai/smart_scraper_multi_lite_gemini.py b/examples/google_genai/smart_scraper_multi_lite_gemini.py
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/google_genai/smart_scraper_multi_lite_google_genai.py b/examples/google_genai/smart_scraper_multi_lite_google_genai.py
new file mode 100644
index 00000000..e14e2ceb
--- /dev/null
+++ b/examples/google_genai/smart_scraper_multi_lite_google_genai.py
@@ -0,0 +1,34 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("GOOGLE_API_KEY"),
+        "model": "gemini-pro",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/google_vertexai/smart_scraper_multi_lite_google_vertexai.py b/examples/google_vertexai/smart_scraper_multi_lite_google_vertexai.py
new file mode 100644
index 00000000..5c293416
--- /dev/null
+++ b/examples/google_vertexai/smart_scraper_multi_lite_google_vertexai.py
@@ -0,0 +1,35 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "project": os.getenv("GOOGLE_CLOUD_PROJECT"),
+        "location": "us-central1",
+        "model": "text-bison@001",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/google_vertexai/smart_scraper_multi_lite_vertex.py b/examples/google_vertexai/smart_scraper_multi_lite_vertex.py
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/groq/smart_scraper_multi_lite_groq.py b/examples/groq/smart_scraper_multi_lite_groq.py
new file mode 100644
index 00000000..9c8e4d1d
--- /dev/null
+++ b/examples/groq/smart_scraper_multi_lite_groq.py
@@ -0,0 +1,35 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("GROQ_API_KEY"),
+        "model": "mixtral-8x7b-32768",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
diff --git a/examples/huggingfacehub/smart_scraper_multi_lite_huggingfacehub.py b/examples/huggingfacehub/smart_scraper_multi_lite_huggingfacehub.py
new file mode 100644
index 00000000..2d7a3a45
--- /dev/null
+++ b/examples/huggingfacehub/smart_scraper_multi_lite_huggingfacehub.py
@@ -0,0 +1,34 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("HUGGINGFACEHUB_API_TOKEN"),
+        "model": "huggingfacehub/meta-llama/Llama-2-70b-chat-hf",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/huggingfacehub/smart_scraper_multi_lite_uhggingfacehub.py b/examples/huggingfacehub/smart_scraper_multi_lite_uhggingfacehub.py
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/local_models/smart_scraper_multi_lite_ollama.py b/examples/local_models/smart_scraper_multi_lite_ollama.py
new file mode 100644
index 00000000..f09c4cb4
--- /dev/null
+++ b/examples/local_models/smart_scraper_multi_lite_ollama.py
@@ -0,0 +1,45 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import json
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "model": "ollama/llama3.1",
+        "temperature": 0,
+        "format": "json",  # Ollama needs the format to be specified explicitly
+        "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "verbose": True,
+    "headless": False
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
diff --git a/examples/mistral/smart_scraper_multi_lite_mistral.py b/examples/mistral/smart_scraper_multi_lite_mistral.py
new file mode 100644
index 00000000..ce2d19bf
--- /dev/null
+++ b/examples/mistral/smart_scraper_multi_lite_mistral.py
@@ -0,0 +1,35 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("MISTRAL_API_KEY"),
+        "model": "mistral/mistral-medium",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
diff --git a/examples/moonshot/smart_scraper_multi_lite_moonshot.py b/examples/moonshot/smart_scraper_multi_lite_moonshot.py
new file mode 100644
index 00000000..b3e2b7be
--- /dev/null
+++ b/examples/moonshot/smart_scraper_multi_lite_moonshot.py
@@ -0,0 +1,34 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("MOONSHOT_API_KEY"),
+        "model": "moonshot/moonshot-v1-8b",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/nemotron/smart_scraper_multi_lite_nemotron.py b/examples/nemotron/smart_scraper_multi_lite_nemotron.py
new file mode 100644
index 00000000..7639d820
--- /dev/null
+++ b/examples/nemotron/smart_scraper_multi_lite_nemotron.py
@@ -0,0 +1,46 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("NEMOTRON_API_KEY"),
+        "model": "nemotron/nemotron-3-8b-chat",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/oneapi/smart_scraper_multi_lite_oneapi.py b/examples/oneapi/smart_scraper_multi_lite_oneapi.py
new file mode 100644
index 00000000..8cf66dea
--- /dev/null
+++ b/examples/oneapi/smart_scraper_multi_lite_oneapi.py
@@ -0,0 +1,43 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+together_key = os.getenv("TOGETHER_APIKEY")
+
+graph_config = {
+    "llm": {
+        "model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+        "api_key": together_key,
+    },
+    "verbose": True,
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/together/smart_scraper_multi_lite_together.py b/examples/together/smart_scraper_multi_lite_together.py
new file mode 100644
index 00000000..8cf66dea
--- /dev/null
+++ b/examples/together/smart_scraper_multi_lite_together.py
@@ -0,0 +1,43 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+together_key = os.getenv("TOGETHER_APIKEY")
+
+graph_config = {
+    "llm": {
+        "model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+        "api_key": together_key,
+    },
+    "verbose": True,
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))

From b84883bfd12f4d1b4a0528e2c0503b649ea1e1fb Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 21 Oct 2024 09:39:17 +0200
Subject: [PATCH 25/39] add smartscraper lite

---
 .../anthropic/smart_scraper_lite_anthropic.py | 32 +++++++++++++
 examples/azure/smart_scraper_lite_azure.py    | 31 ++++++++++++
 .../bedrock/smart_scraper_lite_bedrock.py     | 26 ++++++++++
 .../deepseek/smart_scraper_lite_deepseek.py   | 31 ++++++++++++
 examples/ernie/smart_scraper_lite_ernie.py    | 31 ++++++++++++
 .../fireworks/smart_scraper_lite_fireworks.py | 31 ++++++++++++
 .../smart_scraper_lite_google_genai.py        | 31 ++++++++++++
 .../smart_scraper_lite_google_vertexai.py     | 33 +++++++++++++
 .../smart_scraper_multi_lite_vertex.py        | 47 +++++++++++++++++++
 examples/groq/smart_scraper_lite_groq.py      | 31 ++++++++++++
 .../smart_scraper_lite_huggingfacehub.py      | 31 ++++++++++++
 .../local_models/smart_scraper_lite_ollama.py | 30 ++++++++++++
 .../mistral/smart_scraper_lite_mistral.py     | 31 ++++++++++++
 .../moonshot/smart_scraper_lite_moonshot.py   | 31 ++++++++++++
 .../nemotron/smart_scraper_lite_nemotron.py   | 32 +++++++++++++
 examples/oneapi/smart_scraper_lite_oneapi.py  | 32 +++++++++++++
 examples/openai/smart_scraper_lite_openai.py  | 32 +++++++++++++
 .../together/smart_scraper_lite_together.py   |  1 +
 18 files changed, 544 insertions(+)
 create mode 100644 examples/anthropic/smart_scraper_lite_anthropic.py
 create mode 100644 examples/azure/smart_scraper_lite_azure.py
 create mode 100644 examples/bedrock/smart_scraper_lite_bedrock.py
 create mode 100644 examples/deepseek/smart_scraper_lite_deepseek.py
 create mode 100644 examples/ernie/smart_scraper_lite_ernie.py
 create mode 100644 examples/fireworks/smart_scraper_lite_fireworks.py
 create mode 100644 examples/google_genai/smart_scraper_lite_google_genai.py
 create mode 100644 examples/google_vertexai/smart_scraper_lite_google_vertexai.py
 create mode 100644 examples/groq/smart_scraper_lite_groq.py
 create mode 100644 examples/huggingfacehub/smart_scraper_lite_huggingfacehub.py
 create mode 100644 examples/local_models/smart_scraper_lite_ollama.py
 create mode 100644 examples/mistral/smart_scraper_lite_mistral.py
 create mode 100644 examples/moonshot/smart_scraper_lite_moonshot.py
 create mode 100644 examples/nemotron/smart_scraper_lite_nemotron.py
 create mode 100644 examples/oneapi/smart_scraper_lite_oneapi.py
 create mode 100644 examples/openai/smart_scraper_lite_openai.py
 create mode 100644 examples/together/smart_scraper_lite_together.py

diff --git a/examples/anthropic/smart_scraper_lite_anthropic.py b/examples/anthropic/smart_scraper_lite_anthropic.py
new file mode 100644
index 00000000..698623c6
--- /dev/null
+++ b/examples/anthropic/smart_scraper_lite_anthropic.py
@@ -0,0 +1,32 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("ANTHROPIC_API_KEY"),
+        "model": "anthropic/claude-3-haiku-20240307",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
diff --git a/examples/azure/smart_scraper_lite_azure.py b/examples/azure/smart_scraper_lite_azure.py
new file mode 100644
index 00000000..335c4832
--- /dev/null
+++ b/examples/azure/smart_scraper_lite_azure.py
@@ -0,0 +1,31 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.environ["AZURE_OPENAI_KEY"],
+        "model": "azure_openai/gpt-4o"
+    },
+    "verbose": True,
+    "headless": False
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/bedrock/smart_scraper_lite_bedrock.py b/examples/bedrock/smart_scraper_lite_bedrock.py
new file mode 100644
index 00000000..2bf0471c
--- /dev/null
+++ b/examples/bedrock/smart_scraper_lite_bedrock.py
@@ -0,0 +1,26 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import json
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+graph_config = {
+    "llm": {
+        "client": "client_name",
+        "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+        "temperature": 0.0
+    }
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/deepseek/smart_scraper_lite_deepseek.py b/examples/deepseek/smart_scraper_lite_deepseek.py
new file mode 100644
index 00000000..a70d76b0
--- /dev/null
+++ b/examples/deepseek/smart_scraper_lite_deepseek.py
@@ -0,0 +1,31 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("DEEPSEEK_API_KEY"),
+        "model": "deepseek/deepseek-coder-33b-instruct",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/ernie/smart_scraper_lite_ernie.py b/examples/ernie/smart_scraper_lite_ernie.py
new file mode 100644
index 00000000..5d3ba9d9
--- /dev/null
+++ b/examples/ernie/smart_scraper_lite_ernie.py
@@ -0,0 +1,31 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("ERNIE_API_KEY"),
+        "model": "ernie/ernie-bot-4",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/fireworks/smart_scraper_lite_fireworks.py b/examples/fireworks/smart_scraper_lite_fireworks.py
new file mode 100644
index 00000000..6c9a7745
--- /dev/null
+++ b/examples/fireworks/smart_scraper_lite_fireworks.py
@@ -0,0 +1,31 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("FIREWORKS_API_KEY"),
+        "model": "fireworks/llama-v2-70b-chat",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/google_genai/smart_scraper_lite_google_genai.py b/examples/google_genai/smart_scraper_lite_google_genai.py
new file mode 100644
index 00000000..9b776735
--- /dev/null
+++ b/examples/google_genai/smart_scraper_lite_google_genai.py
@@ -0,0 +1,31 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("GOOGLE_API_KEY"),
+        "model": "gemini-pro",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/google_vertexai/smart_scraper_lite_google_vertexai.py b/examples/google_vertexai/smart_scraper_lite_google_vertexai.py
new file mode 100644
index 00000000..eca61bbb
--- /dev/null
+++ b/examples/google_vertexai/smart_scraper_lite_google_vertexai.py
@@ -0,0 +1,33 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "project": os.getenv("GOOGLE_CLOUD_PROJECT"),
+        "location": "us-central1",
+        "model": "text-bison@001",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
diff --git a/examples/google_vertexai/smart_scraper_multi_lite_vertex.py b/examples/google_vertexai/smart_scraper_multi_lite_vertex.py
index e69de29b..60ff3638 100644
--- a/examples/google_vertexai/smart_scraper_multi_lite_vertex.py
+++ b/examples/google_vertexai/smart_scraper_multi_lite_vertex.py
@@ -0,0 +1,47 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "project": os.getenv("GOOGLE_CLOUD_PROJECT"),
+        "location": "us-central1",
+        "model": "text-bison@001",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+    ],
+    config=graph_config
+)
+
+result = smart_scraper_multi_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/groq/smart_scraper_lite_groq.py b/examples/groq/smart_scraper_lite_groq.py
new file mode 100644
index 00000000..5fe6022f
--- /dev/null
+++ b/examples/groq/smart_scraper_lite_groq.py
@@ -0,0 +1,31 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("GROQ_API_KEY"),
+        "model": "mixtral-8x7b-32768",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/huggingfacehub/smart_scraper_lite_huggingfacehub.py b/examples/huggingfacehub/smart_scraper_lite_huggingfacehub.py
new file mode 100644
index 00000000..4faa8a47
--- /dev/null
+++ b/examples/huggingfacehub/smart_scraper_lite_huggingfacehub.py
@@ -0,0 +1,31 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("HUGGINGFACEHUB_API_TOKEN"),
+        "model": "huggingfacehub/meta-llama/Llama-2-70b-chat-hf",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/local_models/smart_scraper_lite_ollama.py b/examples/local_models/smart_scraper_lite_ollama.py
new file mode 100644
index 00000000..2cf6c402
--- /dev/null
+++ b/examples/local_models/smart_scraper_lite_ollama.py
@@ -0,0 +1,30 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+
+"""
+import json
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+graph_config = {
+    "llm": {
+        "model": "ollama/llama3.1",
+        "temperature": 0,
+        "format": "json",
+        "base_url": "http://localhost:11434",
+    },
+    "verbose": True,
+    "headless": False
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/mistral/smart_scraper_lite_mistral.py b/examples/mistral/smart_scraper_lite_mistral.py
new file mode 100644
index 00000000..390371f9
--- /dev/null
+++ b/examples/mistral/smart_scraper_lite_mistral.py
@@ -0,0 +1,31 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("MISTRAL_API_KEY"),
+        "model": "mistral/mistral-medium",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/moonshot/smart_scraper_lite_moonshot.py b/examples/moonshot/smart_scraper_lite_moonshot.py
new file mode 100644
index 00000000..509027fb
--- /dev/null
+++ b/examples/moonshot/smart_scraper_lite_moonshot.py
@@ -0,0 +1,31 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("ANTHROPIC_API_KEY"),
+        "model": "anthropic/claude-3-haiku-20240307",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/nemotron/smart_scraper_lite_nemotron.py b/examples/nemotron/smart_scraper_lite_nemotron.py
new file mode 100644
index 00000000..6c1d8528
--- /dev/null
+++ b/examples/nemotron/smart_scraper_lite_nemotron.py
@@ -0,0 +1,32 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("NEMOTRON_API_KEY"),
+        "model": "nemotron/nemotron-3.5-turbo",
+        "base_url": "http://127.0.0.1:3000/v1",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/oneapi/smart_scraper_lite_oneapi.py b/examples/oneapi/smart_scraper_lite_oneapi.py
new file mode 100644
index 00000000..b271acb3
--- /dev/null
+++ b/examples/oneapi/smart_scraper_lite_oneapi.py
@@ -0,0 +1,32 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("ONEAPI_API_KEY"),
+        "model": "oneapi/gpt-3.5-turbo",
+        "base_url": "http://127.0.0.1:3000/v1",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/openai/smart_scraper_lite_openai.py b/examples/openai/smart_scraper_lite_openai.py
new file mode 100644
index 00000000..5de725bb
--- /dev/null
+++ b/examples/openai/smart_scraper_lite_openai.py
@@ -0,0 +1,32 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+import os
+import json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperLiteGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("OPENAI_API_KEY"),
+        "model": "openai/gpt-4o",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+smart_scraper_lite_graph = SmartScraperLiteGraph(
+    prompt="Who is Marco Perini?",
+    source="https://perinim.github.io/",
+    config=graph_config
+)
+
+result = smart_scraper_lite_graph.run()
+print(json.dumps(result, indent=4))
+
+graph_exec_info = smart_scraper_lite_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
diff --git a/examples/together/smart_scraper_lite_together.py b/examples/together/smart_scraper_lite_together.py
new file mode 100644
index 00000000..0519ecba
--- /dev/null
+++ b/examples/together/smart_scraper_lite_together.py
@@ -0,0 +1 @@
+ 
\ No newline at end of file

From 52b6bf5fb8c570aa8ef026916230c5d52996f887 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 21 Oct 2024 10:12:53 +0200
Subject: [PATCH 26/39] feat: refactoring of ScrapeGraph to
 SmartScraperLiteGraph

---
 scrapegraphai/graphs/__init__.py                         | 2 +-
 .../{scrape_graph.py => smart_scraper_lite_graph.py}     | 9 +++++----
 scrapegraphai/graphs/smart_scraper_multi_lite_graph.py   | 4 ++--
 3 files changed, 8 insertions(+), 7 deletions(-)
 rename scrapegraphai/graphs/{scrape_graph.py => smart_scraper_lite_graph.py} (92%)

diff --git a/scrapegraphai/graphs/__init__.py b/scrapegraphai/graphs/__init__.py
index 9c8bc820..2c75f0f7 100644
--- a/scrapegraphai/graphs/__init__.py
+++ b/scrapegraphai/graphs/__init__.py
@@ -26,4 +26,4 @@
 from .code_generator_graph import CodeGeneratorGraph
 from .depth_search_graph import DepthSearchGraph
 from .smart_scraper_multi_lite_graph import SmartScraperMultiLiteGraph
-from .scrape_graph import ScrapeGraph
+from .smart_scraper_lite_graph import SmartScraperLiteGraph
diff --git a/scrapegraphai/graphs/scrape_graph.py b/scrapegraphai/graphs/smart_scraper_lite_graph.py
similarity index 92%
rename from scrapegraphai/graphs/scrape_graph.py
rename to scrapegraphai/graphs/smart_scraper_lite_graph.py
index a08149aa..77437145 100644
--- a/scrapegraphai/graphs/scrape_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_lite_graph.py
@@ -10,9 +10,9 @@
     ParseNode,
 )
 
-class ScrapeGraph(AbstractGraph):
+class SmartScraperLiteGraph(AbstractGraph):
     """
-    ScrapeGraph is a scraping pipeline that automates the process of 
+    SmartScraperLiteGraph is a scraping pipeline that automates the process of 
     extracting information from web pages.
 
     Attributes:
@@ -30,7 +30,7 @@ class ScrapeGraph(AbstractGraph):
         schema (BaseModel): The schema for the graph output.
 
     Example:
-        >>> scraper = ScraperGraph(
+        >>> scraper = SmartScraperLiteGraph(
         ...     "https://en.wikipedia.org/wiki/Chioggia",
         ...     {"llm": {"model": "openai/gpt-3.5-turbo"}}
         ... )
@@ -38,7 +38,8 @@ class ScrapeGraph(AbstractGraph):
         )
     """
 
-    def __init__(self, source: str, config: dict, prompt: str = "", schema: Optional[BaseModel] = None):
+    def __init__(self, source: str, config: dict, prompt: str = "", 
+                 schema: Optional[BaseModel] = None):
         super().__init__(prompt, config, source, schema)
 
         self.input_key = "url" if source.startswith("http") else "local_dir"
diff --git a/scrapegraphai/graphs/smart_scraper_multi_lite_graph.py b/scrapegraphai/graphs/smart_scraper_multi_lite_graph.py
index 14e576d9..bb17bd03 100644
--- a/scrapegraphai/graphs/smart_scraper_multi_lite_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_multi_lite_graph.py
@@ -6,7 +6,7 @@
 from pydantic import BaseModel
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
-from .scrape_graph import ScrapeGraph
+from .smart_scraper_lite_graph import SmartScraperLiteGraph
 from ..nodes import (
     GraphIteratorNode,
     MergeAnswersNode,
@@ -63,7 +63,7 @@ def _create_graph(self) -> BaseGraph:
             input="user_prompt & urls",
             output=["parsed_doc"],
             node_config={
-                "graph_instance": ScrapeGraph,
+                "graph_instance": SmartScraperLiteGraph,
                 "scraper_config": self.copy_config,
             },
             schema=self.copy_schema

From 3d6bbcdaa3828ff257adb22f2f7c1a46343de5b5 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Mon, 21 Oct 2024 08:14:25 +0000
Subject: [PATCH 27/39] ci(release): 1.27.0-beta.4 [skip ci]

## [1.27.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.3...v1.27.0-beta.4) (2024-10-21)

### Features

* refactoring of ScrapeGraph to SmartScraperLiteGraph ([52b6bf5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/52b6bf5fb8c570aa8ef026916230c5d52996f887))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c029ea3..1c2d2eeb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.27.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.3...v1.27.0-beta.4) (2024-10-21)
+
+
+### Features
+
+* refactoring of ScrapeGraph to SmartScraperLiteGraph ([52b6bf5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/52b6bf5fb8c570aa8ef026916230c5d52996f887))
+
 ## [1.27.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.2...v1.27.0-beta.3) (2024-10-20)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 85d7b442..912533e2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.27.0b3"
+version = "1.27.0b4"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

From 0ea00c078f2811f0d1b356bd84cafde80763c703 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 21 Oct 2024 10:30:21 +0200
Subject: [PATCH 28/39] feat: refactoring of export functions

---
 scrapegraphai/utils/__init__.py        |  3 +-
 scrapegraphai/utils/convert_to_csv.py  | 55 --------------------------
 scrapegraphai/utils/convert_to_json.py | 52 ------------------------
 3 files changed, 1 insertion(+), 109 deletions(-)
 delete mode 100644 scrapegraphai/utils/convert_to_csv.py
 delete mode 100644 scrapegraphai/utils/convert_to_json.py

diff --git a/scrapegraphai/utils/__init__.py b/scrapegraphai/utils/__init__.py
index d5badca9..22f6a4bc 100644
--- a/scrapegraphai/utils/__init__.py
+++ b/scrapegraphai/utils/__init__.py
@@ -1,8 +1,6 @@
 """
     __init__.py file for utils folder
 """
-from .convert_to_csv import convert_to_csv
-from .convert_to_json import convert_to_json
 from .prettify_exec_info import prettify_exec_info
 from .proxy_rotation import Proxy, parse_or_search_proxy, search_proxy_servers
 from .save_audio_from_bytes import save_audio_from_bytes
@@ -28,3 +26,4 @@
                                     validation_focused_code_generation,
                                     semantic_focused_code_generation)
 from .save_code_to_file import save_code_to_file
+from .data_export import export_to_json, export_to_csv, export_to_xml
diff --git a/scrapegraphai/utils/convert_to_csv.py b/scrapegraphai/utils/convert_to_csv.py
deleted file mode 100644
index e0664541..00000000
--- a/scrapegraphai/utils/convert_to_csv.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""
-Module that given a filename and a position saves the file in the csv format
-"""
-import os
-import sys
-import pandas as pd
-
-def convert_to_csv(data: dict, filename: str, position: str = None) -> None:
-    """
-    Converts a dictionary to a CSV file and saves it at a specified location.
-
-    Args:
-        data (dict): The data to be converted into CSV format.
-        filename (str): The name of the output CSV file, without the '.csv' extension.
-        position (str, optional): The file path where the CSV should be saved.
-        Defaults to the directory of the caller script if not provided.
-
-    Returns:
-        None: The function does not return anything.
-        
-    Raises:
-        FileNotFoundError: If the specified directory does not exist.
-        PermissionError: If write permissions are lacking for the directory.
-        TypeError: If `data` is not a dictionary.
-        Exception: For other issues that may arise during the creation or saving of the CSV file.
-
-    Example:
-        >>> convert_to_csv({'id': [1, 2], 'value': [10, 20]}, 'output', '/path/to/save')
-        Saves a CSV file named 'output.csv' at '/path/to/save'.
-    """
-
-    if ".csv" in filename:
-        filename = filename.replace(".csv", "")
-
-    if position is None:
-        caller_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
-        position = caller_dir
-
-    try:
-        if not isinstance(data, dict):
-            raise TypeError("Input data must be a dictionary")
-
-        os.makedirs(position, exist_ok=True)
-
-        df = pd.DataFrame.from_dict(data, orient='index')
-        df.to_csv(os.path.join(position, f"{filename}.csv"), index=False)
-
-    except FileNotFoundError as fnfe:
-        raise FileNotFoundError(
-            f"The specified directory '{position}' does not exist.") from fnfe
-    except PermissionError as pe:
-        raise PermissionError(
-            f"You don't have permission to write to '{position}'.") from pe
-    except Exception as e:
-        raise e
diff --git a/scrapegraphai/utils/convert_to_json.py b/scrapegraphai/utils/convert_to_json.py
deleted file mode 100644
index 4e1711f1..00000000
--- a/scrapegraphai/utils/convert_to_json.py
+++ /dev/null
@@ -1,52 +0,0 @@
-"""
-Convert to json module
-"""
-import json
-import os
-import sys
-
-def convert_to_json(data: dict, filename: str, position: str = None) -> None:
-    """
-    Converts a dictionary to a JSON file and saves it at a specified location.
-
-    Args:
-        data (dict): The data to be converted into JSON format.
-        filename (str): The name of the output JSON file, without the '.json' extension.
-        position (str, optional): The file path where the JSON file should be saved. 
-        Defaults to the directory of the caller script if not provided.
-
-    Returns:
-        None: The function does not return anything.
-        
-    Raises:
-        ValueError: If 'filename' contains '.json'.
-        FileNotFoundError: If the specified directory does not exist.
-        PermissionError: If write permissions are lacking for the directory.
-
-    Example:
-        >>> convert_to_json({'id': [1, 2], 'value': [10, 20]}, 'output', '/path/to/save')
-        Saves a JSON file named 'output.json' at '/path/to/save'.
-
-    Notes:
-        This function automatically ensures the directory exists before 
-        attempting to write the file. 
-        If the directory does not exist, it will attempt to create it.
-    """
-
-    if ".json" in filename:
-        filename = filename.replace(".json", "")  # Remove .json extension
-
-    if position is None:
-        caller_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
-        position = caller_dir
-
-    try:
-        os.makedirs(position, exist_ok=True)
-        with open(os.path.join(position, f"{filename}.json"), "w", encoding="utf-8") as f:
-            f.write(json.dumps(data))
-    except FileNotFoundError as fnfe:
-        raise FileNotFoundError(
-            f"The specified directory '{position}' does not exist.") from fnfe
-    except PermissionError as pe:
-        raise PermissionError(
-            f"You don't have permission to write to '{position}'.") from pe

From 5002c713d5a76b2c2e4313f888d9768e3f3142e1 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Tue, 22 Oct 2024 07:06:26 +0000
Subject: [PATCH 29/39] ci(release): 1.27.0-beta.5 [skip ci]

## [1.27.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.4...v1.27.0-beta.5) (2024-10-22)

### Features

* refactoring of export functions ([0ea00c0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/0ea00c078f2811f0d1b356bd84cafde80763c703))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1c2d2eeb..8d4aea50 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.27.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.4...v1.27.0-beta.5) (2024-10-22)
+
+
+### Features
+
+* refactoring of export functions ([0ea00c0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/0ea00c078f2811f0d1b356bd84cafde80763c703))
+
 ## [1.27.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.3...v1.27.0-beta.4) (2024-10-21)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 912533e2..b006de1a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.27.0b4"
+version = "1.27.0b5"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

From ae275ec5e86c0bb8fdbeadc2e5f69816d1dea635 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Wed, 23 Oct 2024 12:08:00 +0200
Subject: [PATCH 30/39] feat: add integration with scrape.do

---
 scrapegraphai/nodes/fetch_node.py         | 6 +++---
 scrapegraphai/nodes/fetch_node_level_k.py | 6 ++++++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py
index 4cd549a5..d90864e9 100644
--- a/scrapegraphai/nodes/fetch_node.py
+++ b/scrapegraphai/nodes/fetch_node.py
@@ -270,10 +270,10 @@ def handle_web_source(self, state, source):
         else:
             loader_kwargs = {}
 
-            if self.node_config is not None:
+            if self.node_config:
                 loader_kwargs = self.node_config.get("loader_kwargs", {})
 
-            if self.browser_base is not None:
+            if self.browser_base:
                 try:
                     from ..docloaders.browser_base import browser_base_fetch
                 except ImportError:
@@ -285,7 +285,7 @@ def handle_web_source(self, state, source):
 
                 document = [Document(page_content=content,
                                     metadata={"source": source}) for content in data]
-            elif self.scrape_do is not None:
+            elif self.scrape_do:
                 from ..docloaders.scrape_do import scrape_do_fetch
                 if (self.scrape_do.get("use_proxy") is None) or \
                 self.scrape_do.get("geoCode") is None or \
diff --git a/scrapegraphai/nodes/fetch_node_level_k.py b/scrapegraphai/nodes/fetch_node_level_k.py
index 0f772edf..ce8e4042 100644
--- a/scrapegraphai/nodes/fetch_node_level_k.py
+++ b/scrapegraphai/nodes/fetch_node_level_k.py
@@ -57,6 +57,7 @@ def __init__(
         self.headless = node_config.get("headless", True) if node_config else True
         self.loader_kwargs = node_config.get("loader_kwargs", {}) if node_config else {}
         self.browser_base = node_config.get("browser_base", None)
+        self.scrape_do = node_config.get("scrape_do", None)
         self.depth = node_config.get("depth", 1) if node_config else 1
         self.only_inside_links = node_config.get("only_inside_links", False) if node_config else False
         self.min_input_len = 1
@@ -115,6 +116,11 @@ def fetch_content(self, source: str, loader_kwargs) -> Optional[str]:
                                       self.browser_base.get("project_id"), [source])
             document = [Document(page_content=content,
                                  metadata={"source": source}) for content in data]
+        elif self.scrape_do:
+            from ..docloaders.scrape_do import scrape_do_fetch
+            data = scrape_do_fetch(self.scrape_do.get("api_key"), source)
+            document = [Document(page_content=data,
+                                 metadata={"source": source})]
         else:
             loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
             document = loader.load()

From 94b9836ef6cd9c24bb8c04d7049d5477cc8ed807 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Wed, 23 Oct 2024 10:09:36 +0000
Subject: [PATCH 31/39] ci(release): 1.27.0-beta.6 [skip ci]

## [1.27.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.5...v1.27.0-beta.6) (2024-10-23)

### Features

* add integration with scrape.do ([ae275ec](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ae275ec5e86c0bb8fdbeadc2e5f69816d1dea635))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8d4aea50..a3615122 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.27.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.5...v1.27.0-beta.6) (2024-10-23)
+
+
+### Features
+
+* add integration with scrape.do ([ae275ec](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ae275ec5e86c0bb8fdbeadc2e5f69816d1dea635))
+
 ## [1.27.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.4...v1.27.0-beta.5) (2024-10-22)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index b006de1a..b3fc2f11 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.27.0b5"
+version = "1.27.0b6"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

From f658092dffb20ea111cc00950f617057482788f4 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Wed, 23 Oct 2024 12:15:16 +0200
Subject: [PATCH 32/39] feat: refactoring of get_probable_tags node

---
 scrapegraphai/nodes/get_probable_tags_node.py        | 10 ++--------
 scrapegraphai/prompts/__init__.py                    |  1 +
 .../prompts/get_probable_tags_node_prompts.py        | 12 ++++++++++++
 3 files changed, 15 insertions(+), 8 deletions(-)
 create mode 100644 scrapegraphai/prompts/get_probable_tags_node_prompts.py

diff --git a/scrapegraphai/nodes/get_probable_tags_node.py b/scrapegraphai/nodes/get_probable_tags_node.py
index 9ba38283..e34bbbb4 100644
--- a/scrapegraphai/nodes/get_probable_tags_node.py
+++ b/scrapegraphai/nodes/get_probable_tags_node.py
@@ -4,6 +4,7 @@
 from typing import List, Optional
 from langchain.output_parsers import CommaSeparatedListOutputParser
 from langchain.prompts import PromptTemplate
+from ..prompts import TEMPLATE_GET_PROBABLE_TAGS
 from ..utils.logging import get_logger
 from .base_node import BaseNode
 
@@ -68,14 +69,7 @@ def execute(self, state: dict) -> dict:
         output_parser = CommaSeparatedListOutputParser()
         format_instructions = output_parser.get_format_instructions()
 
-        template = """
-        PROMPT:
-        You are a website scraper that knows all the types of html tags.
-        You are now asked to list all the html tags where you think you can find the information of the asked question.\n 
-        INSTRUCTIONS: {format_instructions} \n  
-        WEBPAGE: The webpage is: {webpage} \n 
-        QUESTION: The asked question is the following: {question}
-        """
+        template = TEMPLATE_GET_PROBABLE_TAGS
 
         tag_prompt = PromptTemplate(
             template=template,
diff --git a/scrapegraphai/prompts/__init__.py b/scrapegraphai/prompts/__init__.py
index ea916842..15889108 100644
--- a/scrapegraphai/prompts/__init__.py
+++ b/scrapegraphai/prompts/__init__.py
@@ -36,3 +36,4 @@
 from .reasoning_node_prompts import (TEMPLATE_REASONING,
                                      TEMPLATE_REASONING_WITH_CONTEXT)
 from .merge_generated_scripts_prompts import TEMPLATE_MERGE_SCRIPTS_PROMPT
+from .get_probable_tags_node_prompts import TEMPLATE_GET_PROBABLE_TAGS
diff --git a/scrapegraphai/prompts/get_probable_tags_node_prompts.py b/scrapegraphai/prompts/get_probable_tags_node_prompts.py
new file mode 100644
index 00000000..ed86e163
--- /dev/null
+++ b/scrapegraphai/prompts/get_probable_tags_node_prompts.py
@@ -0,0 +1,12 @@
+"""
+Get probable tags node prompts
+"""
+
+TEMPLATE_GET_PROBABLE_TAGS = """
+  PROMPT:
+        You are a website scraper that knows all the types of html tags.
+        You are now asked to list all the html tags where you think you can find the information of the asked question.\n 
+        INSTRUCTIONS: {format_instructions} \n  
+        WEBPAGE: The webpage is: {webpage} \n 
+        QUESTION: The asked question is the following: {question}
+"""

From 407f1ce4eb22fb284ef0624dd3f7bf7ba432fa5c Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Thu, 24 Oct 2024 06:45:14 +0000
Subject: [PATCH 33/39] ci(release): 1.27.0-beta.7 [skip ci]

## [1.27.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.6...v1.27.0-beta.7) (2024-10-24)

### Features

* refactoring of get_probable_tags node ([f658092](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f658092dffb20ea111cc00950f617057482788f4))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a3615122..346cf772 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.27.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.6...v1.27.0-beta.7) (2024-10-24)
+
+
+### Features
+
+* refactoring of get_probable_tags node ([f658092](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f658092dffb20ea111cc00950f617057482788f4))
+
 ## [1.27.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.5...v1.27.0-beta.6) (2024-10-23)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index b3fc2f11..0fab27b6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.27.0b6"
+version = "1.27.0b7"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

From 4f1ed939e671e46bb546b6b605db87e87c0d66ee Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Thu, 24 Oct 2024 06:55:58 +0000
Subject: [PATCH 34/39] ci(release): 1.27.0-beta.8 [skip ci]

## [1.27.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.7...v1.27.0-beta.8) (2024-10-24)

### Bug Fixes

* removed tokenizer ([a184716](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/a18471688f0b79f06fb7078b01b68eeddc88eae4))

### CI

* **release:** 1.26.7 [skip ci] ([ec9ef2b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ec9ef2bcda9aa81f66b943829fcdb22fe265976e))
---
 CHANGELOG.md   | 12 ++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b908800e..71c7f6dd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,15 @@
+## [1.27.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.7...v1.27.0-beta.8) (2024-10-24)
+
+
+### Bug Fixes
+
+* removed tokenizer ([a184716](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/a18471688f0b79f06fb7078b01b68eeddc88eae4))
+
+
+### CI
+
+* **release:** 1.26.7 [skip ci] ([ec9ef2b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ec9ef2bcda9aa81f66b943829fcdb22fe265976e))
+
 ## [1.27.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.6...v1.27.0-beta.7) (2024-10-24)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 553c574c..e12c8ff7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.27.0b7"
+version = "1.27.0b8"
 
 
 

From 51c55eb3a2984ba60572edbcdea4c30620e18d76 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 24 Oct 2024 09:10:51 +0200
Subject: [PATCH 35/39] feat: add model integration gpt4

---
 scrapegraphai/nodes/generate_answer_from_image_node.py | 4 ++--
 scrapegraphai/prompts/description_node_prompts.py      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/scrapegraphai/nodes/generate_answer_from_image_node.py b/scrapegraphai/nodes/generate_answer_from_image_node.py
index 7134cabe..9359b2bb 100644
--- a/scrapegraphai/nodes/generate_answer_from_image_node.py
+++ b/scrapegraphai/nodes/generate_answer_from_image_node.py
@@ -71,10 +71,10 @@ async def execute_async(self, state: dict) -> dict:
         images = state.get('screenshots', [])
         analyses = []
 
-        supported_models = ("gpt-4o", "gpt-4o-mini", "gpt-4-turbo")
+        supported_models = ("gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-4")
 
         if self.node_config["config"]["llm"]["model"].split("/")[-1]not in supported_models:
-            raise ValueError(f"""Model '{self.node_config['config']['llm']['model']}'
+            raise ValueError(f"""The model provided
                              is not supported. Supported models are: 
                              {', '.join(supported_models)}.""")
 
diff --git a/scrapegraphai/prompts/description_node_prompts.py b/scrapegraphai/prompts/description_node_prompts.py
index 86264d0b..944ed24e 100644
--- a/scrapegraphai/prompts/description_node_prompts.py
+++ b/scrapegraphai/prompts/description_node_prompts.py
@@ -7,4 +7,4 @@
 following content from a website. \n
 Please provide a description summary of maximum of 20 words. \n
 CONTENT OF THE WEBSITE: {content}
-"""
\ No newline at end of file
+"""

From c8a000f1d943734a921b34e91498b2f29c8c9422 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 24 Oct 2024 10:11:36 +0200
Subject: [PATCH 36/39] fix: fix export function

---
 scrapegraphai/utils/data_export.py | 53 ++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 scrapegraphai/utils/data_export.py

diff --git a/scrapegraphai/utils/data_export.py b/scrapegraphai/utils/data_export.py
new file mode 100644
index 00000000..afa05af4
--- /dev/null
+++ b/scrapegraphai/utils/data_export.py
@@ -0,0 +1,53 @@
+import json
+import csv
+import xml.etree.ElementTree as ET
+from typing import List, Dict, Any
+
+def export_to_json(data: List[Dict[str, Any]], filename: str) -> None:
+    """
+    Export data to a JSON file.
+    
+    :param data: List of dictionaries containing the data to export
+    :param filename: Name of the file to save the JSON data
+    """
+    with open(filename, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=4)
+    print(f"Data exported to {filename}")
+
+def export_to_csv(data: List[Dict[str, Any]], filename: str) -> None:
+    """
+    Export data to a CSV file.
+    
+    :param data: List of dictionaries containing the data to export
+    :param filename: Name of the file to save the CSV data
+    """
+    if not data:
+        print("No data to export")
+        return
+
+    keys = data[0].keys()
+    with open(filename, 'w', newline='', encoding='utf-8') as f:
+        writer = csv.DictWriter(f, fieldnames=keys)
+        writer.writeheader()
+        writer.writerows(data)
+    print(f"Data exported to {filename}")
+
+def export_to_xml(data: List[Dict[str, Any]], filename: str, root_element: str = "data") -> None:
+    """
+    Export data to an XML file.
+    
+    :param data: List of dictionaries containing the data to export
+    :param filename: Name of the file to save the XML data
+    :param root_element: Name of the root element in the XML structure
+    """
+    root = ET.Element(root_element)
+    for item in data:
+        element = ET.SubElement(root, "item")
+        for key, value in item.items():
+            sub_element = ET.SubElement(element, key)
+            sub_element.text = str(value)
+
+    tree = ET.ElementTree(root)
+    tree.write(filename, encoding='utf-8', xml_declaration=True)
+    print(f"Data exported to {filename}")
+

From 6179ab99a4803c1d086848d72d5966bd184e3087 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 24 Oct 2024 15:20:36 +0200
Subject: [PATCH 37/39] Update data_export.py

---
 scrapegraphai/utils/data_export.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scrapegraphai/utils/data_export.py b/scrapegraphai/utils/data_export.py
index afa05af4..fbff45e2 100644
--- a/scrapegraphai/utils/data_export.py
+++ b/scrapegraphai/utils/data_export.py
@@ -1,3 +1,7 @@
+"""
+data_export module 
+This module provides functions to export data to various file formats.
+"""
 import json
 import csv
 import xml.etree.ElementTree as ET

From fd57cc7c126658960e33b7214c2cc656ea032d8f Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Thu, 24 Oct 2024 22:39:44 +0000
Subject: [PATCH 38/39] ci(release): 1.27.0-beta.9 [skip ci]

## [1.27.0-beta.9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.8...v1.27.0-beta.9) (2024-10-24)

### Features

* add model integration gpt4 ([51c55eb](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/51c55eb3a2984ba60572edbcdea4c30620e18d76))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 71c7f6dd..abeac5ec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.27.0-beta.9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.8...v1.27.0-beta.9) (2024-10-24)
+
+
+### Features
+
+* add model integration gpt4 ([51c55eb](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/51c55eb3a2984ba60572edbcdea4c30620e18d76))
+
 ## [1.27.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.7...v1.27.0-beta.8) (2024-10-24)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index e12c8ff7..539ef425 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.27.0b8"
+version = "1.27.0b9"
 
 
 

From eee131e959a36a4471f72610eefbc1764808b6be Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Fri, 25 Oct 2024 06:45:23 +0000
Subject: [PATCH 39/39] ci(release): 1.27.0-beta.10 [skip ci]

## [1.27.0-beta.10](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.9...v1.27.0-beta.10) (2024-10-25)

### Bug Fixes

* fix export function ([c8a000f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c8a000f1d943734a921b34e91498b2f29c8c9422))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index abeac5ec..58aba1fb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.27.0-beta.10](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.9...v1.27.0-beta.10) (2024-10-25)
+
+
+### Bug Fixes
+
+* fix export function ([c8a000f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c8a000f1d943734a921b34e91498b2f29c8c9422))
+
 ## [1.27.0-beta.9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0-beta.8...v1.27.0-beta.9) (2024-10-24)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 539ef425..be705469 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.27.0b9"
+version = "1.27.0b10"