From 073d226723f5f03b960865d07408905b7a506180 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Tue, 18 Jun 2024 14:35:13 +0200
Subject: [PATCH 1/8] feat: add new search engine avaiability and new tests

---
 examples/single_node/search_internet_node.py | 50 +++++++++++++++++
 scrapegraphai/nodes/search_internet_node.py  |  4 +-
 scrapegraphai/utils/research_web.py          |  3 +-
 tests/nodes/search_internet_node_test.py     | 58 ++++++++++++++++++++
 4 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 examples/single_node/search_internet_node.py
 create mode 100644 tests/nodes/search_internet_node_test.py

diff --git a/examples/single_node/search_internet_node.py b/examples/single_node/search_internet_node.py
new file mode 100644
index 00000000..8a8149fa
--- /dev/null
+++ b/examples/single_node/search_internet_node.py
@@ -0,0 +1,50 @@
+"""
+Example of custom graph using existing nodes
+"""
+
+from scrapegraphai.models import Ollama
+from scrapegraphai.nodes import SearchInternetNode
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "model": "llama3",
+        "temperature": 0,
+        "streaming": True
+    },
+    "search_engine": "google",
+    "max_results": 3,
+    "verbose": True
+}
+
+# ************************************************
+# Define the node
+# ************************************************
+
+llm_model = Ollama(graph_config["llm"])
+
+search_node = SearchInternetNode(
+    input="user_input",
+    output=["search_results"],
+    node_config={
+        "llm_model": llm_model,
+        "search_engine": graph_config["search_engine"],
+        "max_results": graph_config["max_results"],
+        "verbose": graph_config["verbose"]
+    }
+)
+
+# ************************************************
+# Test the node
+# ************************************************
+
+state = {
+    "user_input": "What is the capital of France?"
+}
+
+result = search_node.execute(state)
+
+print(result)
diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py
index 9fa4a8f5..59c56975 100644
--- a/scrapegraphai/nodes/search_internet_node.py
+++ b/scrapegraphai/nodes/search_internet_node.py
@@ -43,6 +43,7 @@ def __init__(
         self.verbose = (
             False if node_config is None else node_config.get("verbose", False)
         )
+        self.search_engine = node_config.get("search_engine", "google")
         self.max_results = node_config.get("max_results", 3)
 
     def execute(self, state: dict) -> dict:
@@ -97,7 +98,8 @@ def execute(self, state: dict) -> dict:
 
         self.logger.info(f"Search Query: {search_query}")
 
-        answer = search_on_web(query=search_query, max_results=self.max_results)
+        answer = search_on_web(query=search_query, max_results=self.max_results,
+                               search_engine=self.search_engine)
 
         if len(answer) == 0:
             # raise an exception if no answer is found
diff --git a/scrapegraphai/utils/research_web.py b/scrapegraphai/utils/research_web.py
index a839a680..62ffd2ee 100644
--- a/scrapegraphai/utils/research_web.py
+++ b/scrapegraphai/utils/research_web.py
@@ -26,7 +26,8 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
         >>> search_on_web("example query", search_engine="Google", max_results=5)
         ['http://example.com', 'http://example.org', ...]
 
-    This function allows switching between Google and DuckDuckGo to perform internet searches, returning a list of result URLs.
+    This function allows switching between Google and DuckDuckGo to perform 
+    internet searches, returning a list of result URLs.
     """
 
     if search_engine.lower() == "google":
diff --git a/tests/nodes/search_internet_node_test.py b/tests/nodes/search_internet_node_test.py
new file mode 100644
index 00000000..db2cbdee
--- /dev/null
+++ b/tests/nodes/search_internet_node_test.py
@@ -0,0 +1,58 @@
+import unittest
+from scrapegraphai.models import Ollama
+from scrapegraphai.nodes import SearchInternetNode
+
+class TestSearchInternetNode(unittest.TestCase):
+
+    def setUp(self):
+        # Configuration for the graph
+        self.graph_config = {
+            "llm": {
+                "model": "llama3",
+                "temperature": 0,
+                "streaming": True
+            },
+            "search_engine": "google",
+            "max_results": 3,
+            "verbose": True
+        }
+
+        # Define the model
+        self.llm_model = Ollama(self.graph_config["llm"])
+
+        # Initialize the SearchInternetNode
+        self.search_node = SearchInternetNode(
+            input="user_input",
+            output=["search_results"],
+            node_config={
+                "llm_model": self.llm_model,
+                "search_engine": self.graph_config["search_engine"],
+                "max_results": self.graph_config["max_results"],
+                "verbose": self.graph_config["verbose"]
+            }
+        )
+
+    def test_execute_search_node(self):
+        # Initial state
+        state = {
+            "user_input": "What is the capital of France?"
+        }
+
+        # Expected output
+        expected_output = {
+            "user_input": "What is the capital of France?",
+            "search_results": [
+                "https://en.wikipedia.org/wiki/Paris",
+                "https://en.wikipedia.org/wiki/France",
+                "https://en.wikipedia.org/wiki/%C3%8Ele-de-France"
+            ]
+        }
+
+        # Execute the node
+        result = self.search_node.execute(state)
+
+        # Assert the results
+        self.assertEqual(result, expected_output)
+
+if __name__ == "__main__":
+    unittest.main()

From aedda448682ce5a921a62e661bffb02478bab75f Mon Sep 17 00:00:00 2001
From: Jason Vertrees <jason@jasons-mbp-2.lan>
Date: Tue, 18 Jun 2024 12:36:50 -0500
Subject: [PATCH 2/8] fix: updated for schema changes docs: updated for schema
 changes

---
 examples/ernie/smart_scraper_schema_ernie.py  | 39 +++++++++----------
 .../smart_scraper_schema_huggingfacehub.py    | 27 +++++--------
 .../smart_scraper_schema_groq_openai.py       | 31 +++++++--------
 scrapegraphai/graphs/abstract_graph.py        |  2 +-
 .../graphs/csv_scraper_multi_graph.py         |  6 ++-
 scrapegraphai/graphs/deep_scraper_graph.py    |  4 +-
 scrapegraphai/graphs/json_scraper_graph.py    |  4 +-
 .../graphs/json_scraper_multi_graph.py        |  2 +-
 scrapegraphai/graphs/omni_scraper_graph.py    |  4 +-
 scrapegraphai/graphs/omni_search_graph.py     |  2 +-
 scrapegraphai/graphs/pdf_scraper_graph.py     |  4 +-
 .../graphs/pdf_scraper_multi_graph.py         |  2 +-
 scrapegraphai/graphs/script_creator_graph.py  |  4 +-
 .../graphs/script_creator_multi_graph.py      |  6 ++-
 scrapegraphai/graphs/search_graph.py          |  2 +-
 scrapegraphai/graphs/smart_scraper_graph.py   |  4 +-
 .../graphs/smart_scraper_multi_graph.py       |  2 +-
 scrapegraphai/graphs/speech_graph.py          |  4 +-
 scrapegraphai/graphs/xml_scraper_graph.py     |  4 +-
 .../graphs/xml_scraper_multi_graph.py         |  2 +-
 20 files changed, 73 insertions(+), 82 deletions(-)

diff --git a/examples/ernie/smart_scraper_schema_ernie.py b/examples/ernie/smart_scraper_schema_ernie.py
index 65448821..64a74937 100644
--- a/examples/ernie/smart_scraper_schema_ernie.py
+++ b/examples/ernie/smart_scraper_schema_ernie.py
@@ -2,32 +2,31 @@
 Basic example of scraping pipeline using SmartScraper with schema
 """
 
-import os, json
+import json
+import os
+from typing import Dict
+
 from dotenv import load_dotenv
+from pydantic import BaseModel
+
 from scrapegraphai.graphs import SmartScraperGraph
 
+
 load_dotenv()
 
 # ************************************************
 # Define the output schema for the graph
 # ************************************************
 
-schema= """
-    { 
-    "Projects": [
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            }, 
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            } 
-        ] 
-    } 
-"""
+
+class Project(BaseModel):
+    title: str
+    description: str
+
+
+class Projects(BaseModel):
+    Projects: Dict[str, Project]
+
 
 # ************************************************
 # Define the configuration for the graph
@@ -37,7 +36,7 @@
 
 graph_config = {
     "llm": {
-        "api_key":openai_key,
+        "api_key": openai_key,
         "model": "gpt-3.5-turbo",
     },
     "verbose": True,
@@ -51,8 +50,8 @@
 smart_scraper_graph = SmartScraperGraph(
     prompt="List me all the projects with their description",
     source="https://perinim.github.io/projects/",
-    schema=schema,
-    config=graph_config
+    schema=Projects,
+    config=graph_config,
 )
 
 result = smart_scraper_graph.run()
diff --git a/examples/huggingfacehub/smart_scraper_schema_huggingfacehub.py b/examples/huggingfacehub/smart_scraper_schema_huggingfacehub.py
index 1e0c94d6..784079e4 100644
--- a/examples/huggingfacehub/smart_scraper_schema_huggingfacehub.py
+++ b/examples/huggingfacehub/smart_scraper_schema_huggingfacehub.py
@@ -4,6 +4,9 @@
 
 import os
 from dotenv import load_dotenv
+from typing import Dict
+
+from pydantic import BaseModel
 from scrapegraphai.graphs import SmartScraperGraph
 from scrapegraphai.utils import prettify_exec_info
 from langchain_community.llms import HuggingFaceEndpoint
@@ -13,22 +16,12 @@
 # Define the output schema for the graph
 # ************************************************
 
-schema= """
-    { 
-    "Projects": [
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            }, 
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            } 
-        ] 
-    } 
-"""
+class Project(BaseModel):
+    title: str
+    description: str
+
+class Projects(BaseModel):
+    Projects: Dict[str, Project]
 
 ## required environment variable in .env
 #HUGGINGFACEHUB_API_TOKEN
@@ -61,7 +54,7 @@
 smart_scraper_graph = SmartScraperGraph(
     prompt="List me all the projects with their description",
     source="https://perinim.github.io/projects/",
-    schema=schema,
+    schema=Projects,
     config=graph_config
 )
 result = smart_scraper_graph.run()
diff --git a/examples/mixed_models/smart_scraper_schema_groq_openai.py b/examples/mixed_models/smart_scraper_schema_groq_openai.py
index 321c71b8..f177cb61 100644
--- a/examples/mixed_models/smart_scraper_schema_groq_openai.py
+++ b/examples/mixed_models/smart_scraper_schema_groq_openai.py
@@ -2,8 +2,13 @@
 Basic example of scraping pipeline using SmartScraper with schema
 """
 
-import os, json
+import json
+import os
+from typing import Dict, List
+
 from dotenv import load_dotenv
+from pydantic import BaseModel
+
 from scrapegraphai.graphs import SmartScraperGraph
 from scrapegraphai.utils import prettify_exec_info
 
@@ -13,22 +18,12 @@
 # Define the output schema for the graph
 # ************************************************
 
-schema= """
-    { 
-    "Projects": [
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            }, 
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            } 
-        ] 
-    } 
-"""
+class Project(BaseModel):
+    title: str
+    description: str
+
+class Projects(BaseModel):
+    Projects: Dict[str, Project]
 
 # ************************************************
 # Define the configuration for the graph
@@ -60,7 +55,7 @@
     prompt="List me all the projects with their description.",
     # also accepts a string with the already downloaded HTML code
     source="https://perinim.github.io/projects/",
-    schema=schema,
+    schema=Projects,
     config=graph_config
 )
 
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index ed61255c..ef188b27 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -39,7 +39,7 @@ class AbstractGraph(ABC):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
         llm_model: An instance of a language model client, configured for generating answers.
         embedder_model: An instance of an embedding model client,
                         configured for generating embeddings.
diff --git a/scrapegraphai/graphs/csv_scraper_multi_graph.py b/scrapegraphai/graphs/csv_scraper_multi_graph.py
index fd15f49a..716e9aca 100644
--- a/scrapegraphai/graphs/csv_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/csv_scraper_multi_graph.py
@@ -5,6 +5,8 @@
 from copy import copy, deepcopy
 from typing import List, Optional
 
+from pydantic import BaseModel
+
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from .csv_scraper_graph import CSVScraperGraph
@@ -32,7 +34,7 @@ class CSVScraperMultiGraph(AbstractGraph):
         prompt (str): The user prompt to search the internet.
         source (List[str]): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.
 
     Example:
         >>> search_graph = MultipleSearchGraph(
@@ -42,7 +44,7 @@ class CSVScraperMultiGraph(AbstractGraph):
         >>> result = search_graph.run()
     """
 
-    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
+    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
 
         self.max_results = config.get("max_results", 3)
 
diff --git a/scrapegraphai/graphs/deep_scraper_graph.py b/scrapegraphai/graphs/deep_scraper_graph.py
index e9e41771..df04c9ce 100644
--- a/scrapegraphai/graphs/deep_scraper_graph.py
+++ b/scrapegraphai/graphs/deep_scraper_graph.py
@@ -34,7 +34,7 @@ class DeepScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
         llm_model: An instance of a language model client, configured for generating answers.
         embedder_model: An instance of an embedding model client, 
         configured for generating embeddings.
@@ -45,7 +45,7 @@ class DeepScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
 
     Example:
         >>> deep_scraper = DeepScraperGraph(
diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py
index 09a5f02e..4165a194 100644
--- a/scrapegraphai/graphs/json_scraper_graph.py
+++ b/scrapegraphai/graphs/json_scraper_graph.py
@@ -23,7 +23,7 @@ class JSONScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
         llm_model: An instance of a language model client, configured for generating answers.
         embedder_model: An instance of an embedding model client, 
         configured for generating embeddings.
@@ -34,7 +34,7 @@ class JSONScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
 
     Example:
         >>> json_scraper = JSONScraperGraph(
diff --git a/scrapegraphai/graphs/json_scraper_multi_graph.py b/scrapegraphai/graphs/json_scraper_multi_graph.py
index 2824c416..48fd8217 100644
--- a/scrapegraphai/graphs/json_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/json_scraper_multi_graph.py
@@ -33,7 +33,7 @@ class JSONScraperMultiGraph(AbstractGraph):
         prompt (str): The user prompt to search the internet.
         source (List[str]): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.
 
     Example:
         >>> search_graph = MultipleSearchGraph(
diff --git a/scrapegraphai/graphs/omni_scraper_graph.py b/scrapegraphai/graphs/omni_scraper_graph.py
index a5eefad2..5b1ad30b 100644
--- a/scrapegraphai/graphs/omni_scraper_graph.py
+++ b/scrapegraphai/graphs/omni_scraper_graph.py
@@ -29,7 +29,7 @@ class OmniScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
         llm_model: An instance of a language model client, configured for generating answers.
         embedder_model: An instance of an embedding model client, 
         configured for generating embeddings.
@@ -41,7 +41,7 @@ class OmniScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
 
     Example:
         >>> omni_scraper = OmniScraperGraph(
diff --git a/scrapegraphai/graphs/omni_search_graph.py b/scrapegraphai/graphs/omni_search_graph.py
index df525949..b6f6df59 100644
--- a/scrapegraphai/graphs/omni_search_graph.py
+++ b/scrapegraphai/graphs/omni_search_graph.py
@@ -34,7 +34,7 @@ class OmniSearchGraph(AbstractGraph):
     Args:
         prompt (str): The user prompt to search the internet.
         config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.
 
     Example:
         >>> omni_search_graph = OmniSearchGraph(
diff --git a/scrapegraphai/graphs/pdf_scraper_graph.py b/scrapegraphai/graphs/pdf_scraper_graph.py
index 41099d8b..89d8018c 100644
--- a/scrapegraphai/graphs/pdf_scraper_graph.py
+++ b/scrapegraphai/graphs/pdf_scraper_graph.py
@@ -26,7 +26,7 @@ class PDFScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
         llm_model: An instance of a language model client, configured for generating answers.
         embedder_model: An instance of an embedding model client, 
         configured for generating embeddings.
@@ -38,7 +38,7 @@ class PDFScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
 
     Example:
         >>> pdf_scraper = PDFScraperGraph(
diff --git a/scrapegraphai/graphs/pdf_scraper_multi_graph.py b/scrapegraphai/graphs/pdf_scraper_multi_graph.py
index e9b5660b..86b2477f 100644
--- a/scrapegraphai/graphs/pdf_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/pdf_scraper_multi_graph.py
@@ -34,7 +34,7 @@ class PdfScraperMultiGraph(AbstractGraph):
         prompt (str): The user prompt to search the internet.
         source (List[str]): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.
 
     Example:
         >>> search_graph = MultipleSearchGraph(
diff --git a/scrapegraphai/graphs/script_creator_graph.py b/scrapegraphai/graphs/script_creator_graph.py
index ce3fa319..83bef2ab 100644
--- a/scrapegraphai/graphs/script_creator_graph.py
+++ b/scrapegraphai/graphs/script_creator_graph.py
@@ -23,7 +23,7 @@ class ScriptCreatorGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
         llm_model: An instance of a language model client, configured for generating answers.
         embedder_model: An instance of an embedding model client, 
         configured for generating embeddings.
@@ -36,7 +36,7 @@ class ScriptCreatorGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
 
     Example:
         >>> script_creator = ScriptCreatorGraph(
diff --git a/scrapegraphai/graphs/script_creator_multi_graph.py b/scrapegraphai/graphs/script_creator_multi_graph.py
index 2b36f4ed..a415a82c 100644
--- a/scrapegraphai/graphs/script_creator_multi_graph.py
+++ b/scrapegraphai/graphs/script_creator_multi_graph.py
@@ -5,6 +5,8 @@
 from copy import copy, deepcopy
 from typing import List, Optional
 
+from pydantic import BaseModel
+
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from .script_creator_graph import ScriptCreatorGraph
@@ -30,7 +32,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
         prompt (str): The user prompt to search the internet.
         source (List[str]): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.
     Example:
         >>> script_graph = ScriptCreatorMultiGraph(
         ...     "What is Chioggia famous for?",
@@ -41,7 +43,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
         >>> result = script_graph.run()
     """
 
-    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
+    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
 
         self.max_results = config.get("max_results", 3)
 
diff --git a/scrapegraphai/graphs/search_graph.py b/scrapegraphai/graphs/search_graph.py
index 6bece062..7efcccc2 100644
--- a/scrapegraphai/graphs/search_graph.py
+++ b/scrapegraphai/graphs/search_graph.py
@@ -33,7 +33,7 @@ class SearchGraph(AbstractGraph):
     Args:
         prompt (str): The user prompt to search the internet.
         config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.
 
     Example:
         >>> search_graph = SearchGraph(
diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py
index 9ee0c3cc..cfbfc000 100644
--- a/scrapegraphai/graphs/smart_scraper_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_graph.py
@@ -26,7 +26,7 @@ class SmartScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
         llm_model: An instance of a language model client, configured for generating answers.
         embedder_model: An instance of an embedding model client, 
         configured for generating embeddings.
@@ -37,7 +37,7 @@ class SmartScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
 
     Example:
         >>> smart_scraper = SmartScraperGraph(
diff --git a/scrapegraphai/graphs/smart_scraper_multi_graph.py b/scrapegraphai/graphs/smart_scraper_multi_graph.py
index 996beff1..84e028fc 100644
--- a/scrapegraphai/graphs/smart_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_multi_graph.py
@@ -33,7 +33,7 @@ class SmartScraperMultiGraph(AbstractGraph):
         prompt (str): The user prompt to search the internet.
         source (List[str]): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.
 
     Example:
         >>> search_graph = MultipleSearchGraph(
diff --git a/scrapegraphai/graphs/speech_graph.py b/scrapegraphai/graphs/speech_graph.py
index 1058d127..4816a154 100644
--- a/scrapegraphai/graphs/speech_graph.py
+++ b/scrapegraphai/graphs/speech_graph.py
@@ -28,7 +28,7 @@ class SpeechGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
         llm_model: An instance of a language model client, configured for generating answers.
         embedder_model: An instance of an embedding model client, configured for generating embeddings.
         verbose (bool): A flag indicating whether to show print statements during execution.
@@ -39,7 +39,7 @@ class SpeechGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
 
     Example:
         >>> speech_graph = SpeechGraph(
diff --git a/scrapegraphai/graphs/xml_scraper_graph.py b/scrapegraphai/graphs/xml_scraper_graph.py
index dbab0b73..4513422b 100644
--- a/scrapegraphai/graphs/xml_scraper_graph.py
+++ b/scrapegraphai/graphs/xml_scraper_graph.py
@@ -24,7 +24,7 @@ class XMLScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
         llm_model: An instance of a language model client, configured for generating answers.
         embedder_model: An instance of an embedding model client, 
         configured for generating embeddings.
@@ -36,7 +36,7 @@ class XMLScraperGraph(AbstractGraph):
         prompt (str): The prompt for the graph.
         source (str): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
 
     Example:
         >>> xml_scraper = XMLScraperGraph(
diff --git a/scrapegraphai/graphs/xml_scraper_multi_graph.py b/scrapegraphai/graphs/xml_scraper_multi_graph.py
index e1f4423c..da772647 100644
--- a/scrapegraphai/graphs/xml_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/xml_scraper_multi_graph.py
@@ -34,7 +34,7 @@ class XMLScraperMultiGraph(AbstractGraph):
         prompt (str): The user prompt to search the internet.
         source (List[str]): The source of the graph.
         config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.
 
     Example:
         >>> search_graph = MultipleSearchGraph(

From ce0a47aee5edbb26fd82e41f6688a4bc48a10822 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Tue, 18 Jun 2024 18:54:59 +0000
Subject: [PATCH 3/8] ci(release): 1.7.0-beta.13 [skip ci]

## [1.7.0-beta.13](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.0-beta.12...v1.7.0-beta.13) (2024-06-18)

### Bug Fixes

* updated for schema changes ([aedda44](https://github.com/VinciGit00/Scrapegraph-ai/commit/aedda448682ce5a921a62e661bffb02478bab75f))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe471b0c..7ca50119 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.7.0-beta.13](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.0-beta.12...v1.7.0-beta.13) (2024-06-18)
+
+
+### Bug Fixes
+
+* updated for schema changes ([aedda44](https://github.com/VinciGit00/Scrapegraph-ai/commit/aedda448682ce5a921a62e661bffb02478bab75f))
+
 ## [1.7.0-beta.12](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.0-beta.11...v1.7.0-beta.12) (2024-06-17)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 7901a1a0..b94b3e4c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "scrapegraphai"
 
 
-version = "1.7.0b12"
+version = "1.7.0b13"
 
 
 

From aa2160c108764745a696ffc16038f370e9702c14 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Tue, 18 Jun 2024 21:28:29 +0200
Subject: [PATCH 4/8] feat: add research with bing + test function

---
 scrapegraphai/utils/research_web.py | 34 ++++++++++++++++++++---------
 tests/utils/research_web_test.py    | 28 ++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 10 deletions(-)
 create mode 100644 tests/utils/research_web_test.py

diff --git a/scrapegraphai/utils/research_web.py b/scrapegraphai/utils/research_web.py
index 62ffd2ee..ac7fc09d 100644
--- a/scrapegraphai/utils/research_web.py
+++ b/scrapegraphai/utils/research_web.py
@@ -1,11 +1,12 @@
 """
-Module for making the request on the web
+research web module
 """
 import re
 from typing import List
 from langchain_community.tools import DuckDuckGoSearchResults
 from googlesearch import search as google_search
-
+import requests
+from bs4 import BeautifulSoup
 
 def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10) -> List[str]:
     """
@@ -13,35 +14,48 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
 
     Args:
         query (str): The search query to find on the internet.
-        search_engine (str, optional): Specifies the search engine to use, options include 'Google' or 'DuckDuckGo'. Default is 'Google'.
+        search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', or 'Bing'. Default is 'Google'.
         max_results (int, optional): The maximum number of search results to return.
 
     Returns:
         List[str]: A list of URLs as strings that are the search results.
 
     Raises:
-        ValueError: If the search engine specified is neither 'Google' nor 'DuckDuckGo'.
+        ValueError: If the search engine specified is neither 'Google', 'DuckDuckGo', nor 'Bing'.
 
     Example:
         >>> search_on_web("example query", search_engine="Google", max_results=5)
         ['http://example.com', 'http://example.org', ...]
 
-    This function allows switching between Google and DuckDuckGo to perform 
+    This function allows switching between Google, DuckDuckGo, and Bing to perform 
     internet searches, returning a list of result URLs.
     """
 
     if search_engine.lower() == "google":
         res = []
-
         for url in google_search(query, stop=max_results):
             res.append(url)
         return res
+
     elif search_engine.lower() == "duckduckgo":
         research = DuckDuckGoSearchResults(max_results=max_results)
         res = research.run(query)
-
         links = re.findall(r'https?://[^\s,\]]+', res)
-
         return links
-    raise ValueError(
-        "The only search engines available are DuckDuckGo or Google")
+
+    elif search_engine.lower() == "bing":
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
+        search_url = f"https://www.bing.com/search?q={query}"
+        response = requests.get(search_url, headers=headers)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, "html.parser")
+
+        search_results = []
+        for result in soup.find_all('li', class_='b_algo', limit=max_results):
+            link = result.find('a')['href']
+            search_results.append(link)
+        return search_results
+
+    raise ValueError("The only search engines available are DuckDuckGo, Google, or Bing")
diff --git a/tests/utils/research_web_test.py b/tests/utils/research_web_test.py
new file mode 100644
index 00000000..46630625
--- /dev/null
+++ b/tests/utils/research_web_test.py
@@ -0,0 +1,28 @@
+import pytest
+from scrapegraphai.utils.research_web import search_on_web  # Replace with actual path to your file
+
+
+def test_google_search():
+    """Tests search_on_web with Google search engine."""
+    results = search_on_web("test query", search_engine="Google", max_results=2)
+    assert len(results) == 2
+    # You can further assert if the results actually contain 'test query' in the title/snippet using additional libraries
+
+def test_bing_search():
+    """Tests search_on_web with Bing search engine."""
+    results = search_on_web("test query", search_engine="Bing", max_results=1)
+    assert results is not None
+    # You can further assert if the results contain '.com' or '.org' in the domain
+
+
+def test_invalid_search_engine():
+    """Tests search_on_web with invalid search engine."""
+    with pytest.raises(ValueError):
+        search_on_web("test query", search_engine="Yahoo", max_results=5)
+
+
+def test_max_results():
+    """Tests search_on_web with different max_results values."""
+    results_5 = search_on_web("test query", max_results=5)
+    results_10 = search_on_web("test query", max_results=10)
+    assert len(results_5) <= len(results_10)

From ec77ff7ea4eb071469c2fb53e5959d4ea1f73ad6 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Wed, 19 Jun 2024 08:40:20 +0000
Subject: [PATCH 5/8] ci(release): 1.7.0-beta.14 [skip ci]

## [1.7.0-beta.14](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.0-beta.13...v1.7.0-beta.14) (2024-06-19)

### Features

* add new search engine avaiability and new tests ([073d226](https://github.com/VinciGit00/Scrapegraph-ai/commit/073d226723f5f03b960865d07408905b7a506180))
* add research with bing + test function ([aa2160c](https://github.com/VinciGit00/Scrapegraph-ai/commit/aa2160c108764745a696ffc16038f370e9702c14))
---
 CHANGELOG.md   | 8 ++++++++
 pyproject.toml | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7ca50119..dc1c5bf8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+## [1.7.0-beta.14](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.0-beta.13...v1.7.0-beta.14) (2024-06-19)
+
+
+### Features
+
+* add new search engine avaiability and new tests ([073d226](https://github.com/VinciGit00/Scrapegraph-ai/commit/073d226723f5f03b960865d07408905b7a506180))
+* add research with bing + test function ([aa2160c](https://github.com/VinciGit00/Scrapegraph-ai/commit/aa2160c108764745a696ffc16038f370e9702c14))
+
 ## [1.7.0-beta.13](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.0-beta.12...v1.7.0-beta.13) (2024-06-18)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index b94b3e4c..02114c26 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "scrapegraphai"
 
 
-version = "1.7.0b13"
+version = "1.7.0b14"
 
 
 

From f75e0835fc21f7a5acd10980b4b017b35510709c Mon Sep 17 00:00:00 2001
From: Maorsg <maor.gordonguterman@gmail.com>
Date: Mon, 24 Jun 2024 21:31:28 -0400
Subject: [PATCH 6/8] added a function to the search_graph class to allow user
 to return URLs considered in the search

---
 scrapegraphai/graphs/search_graph.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/scrapegraphai/graphs/search_graph.py b/scrapegraphai/graphs/search_graph.py
index 6bece062..1f4e8950 100644
--- a/scrapegraphai/graphs/search_graph.py
+++ b/scrapegraphai/graphs/search_graph.py
@@ -3,7 +3,7 @@
 """
 
 from copy import copy, deepcopy
-from typing import Optional
+from typing import Optional, List
 from pydantic import BaseModel
 
 from .base_graph import BaseGraph
@@ -17,6 +17,7 @@
 )
 
 
+
 class SearchGraph(AbstractGraph):
     """ 
     SearchGraph is a scraping pipeline that searches the internet for answers to a given prompt.
@@ -29,6 +30,7 @@ class SearchGraph(AbstractGraph):
         headless (bool): A flag to run the browser in headless mode.
         verbose (bool): A flag to display the execution information.
         model_token (int): The token limit for the language model.
+        considered_urls (List[str]): A list of URLs considered during the search.
 
     Args:
         prompt (str): The user prompt to search the internet.
@@ -41,10 +43,10 @@ class SearchGraph(AbstractGraph):
         ...     {"llm": {"model": "gpt-3.5-turbo"}}
         ... )
         >>> result = search_graph.run()
+        >>> print(search_graph.get_considered_urls())
     """
 
     def __init__(self, prompt: str, config: dict, schema: Optional[BaseModel] = None):
-
         self.max_results = config.get("max_results", 3)
 
         if all(isinstance(value, str) for value in config.values()):
@@ -53,6 +55,7 @@ def __init__(self, prompt: str, config: dict, schema: Optional[BaseModel] = None
             self.copy_config = deepcopy(config)
         
         self.copy_schema = deepcopy(schema)
+        self.considered_urls = []  # New attribute to store URLs
 
         super().__init__(prompt, config, schema)
 
@@ -64,10 +67,7 @@ def _create_graph(self) -> BaseGraph:
             BaseGraph: A graph instance representing the web scraping and searching workflow.
         """
 
-        # ************************************************
         # Create a SmartScraperGraph instance
-        # ************************************************
-
         smart_scraper_instance = SmartScraperGraph(
             prompt="",
             source="",
@@ -75,10 +75,7 @@ def _create_graph(self) -> BaseGraph:
             schema=self.copy_schema
         )
 
-        # ************************************************
         # Define the graph nodes
-        # ************************************************
-
         search_internet_node = SearchInternetNode(
             input="user_prompt",
             output=["urls"],
@@ -128,4 +125,17 @@ def run(self) -> str:
         inputs = {"user_prompt": self.prompt}
         self.final_state, self.execution_info = self.graph.execute(inputs)
 
+        # Store the URLs after execution
+        if 'urls' in self.final_state:
+            self.considered_urls = self.final_state['urls']
+
         return self.final_state.get("answer", "No answer found.")
+
+    def get_considered_urls(self) -> List[str]:
+        """
+        Returns the list of URLs considered during the search.
+
+        Returns:
+            List[str]: A list of URLs considered during the search.
+        """
+        return self.considered_urls

From bbfbbd93be3c87c5f25e3c75ec7d677832d37467 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Tue, 25 Jun 2024 08:49:47 +0000
Subject: [PATCH 7/8] ci(release): 1.8.0-beta.1 [skip ci]

## [1.8.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.4...v1.8.0-beta.1) (2024-06-25)

### Features

* add new search engine avaiability and new tests ([073d226](https://github.com/VinciGit00/Scrapegraph-ai/commit/073d226723f5f03b960865d07408905b7a506180))
* add research with bing + test function ([aa2160c](https://github.com/VinciGit00/Scrapegraph-ai/commit/aa2160c108764745a696ffc16038f370e9702c14))

### Bug Fixes

* updated for schema changes ([aedda44](https://github.com/VinciGit00/Scrapegraph-ai/commit/aedda448682ce5a921a62e661bffb02478bab75f))

### CI

* **release:** 1.7.0-beta.13 [skip ci] ([ce0a47a](https://github.com/VinciGit00/Scrapegraph-ai/commit/ce0a47aee5edbb26fd82e41f6688a4bc48a10822))
* **release:** 1.7.0-beta.14 [skip ci] ([ec77ff7](https://github.com/VinciGit00/Scrapegraph-ai/commit/ec77ff7ea4eb071469c2fb53e5959d4ea1f73ad6))
---
 CHANGELOG.md   | 19 +++++++++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e276828d..a69adb32 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,22 @@
+## [1.8.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.4...v1.8.0-beta.1) (2024-06-25)
+
+
+### Features
+
+* add new search engine avaiability and new tests ([073d226](https://github.com/VinciGit00/Scrapegraph-ai/commit/073d226723f5f03b960865d07408905b7a506180))
+* add research with bing + test function ([aa2160c](https://github.com/VinciGit00/Scrapegraph-ai/commit/aa2160c108764745a696ffc16038f370e9702c14))
+
+
+### Bug Fixes
+
+* updated for schema changes ([aedda44](https://github.com/VinciGit00/Scrapegraph-ai/commit/aedda448682ce5a921a62e661bffb02478bab75f))
+
+
+### CI
+
+* **release:** 1.7.0-beta.13 [skip ci] ([ce0a47a](https://github.com/VinciGit00/Scrapegraph-ai/commit/ce0a47aee5edbb26fd82e41f6688a4bc48a10822))
+* **release:** 1.7.0-beta.14 [skip ci] ([ec77ff7](https://github.com/VinciGit00/Scrapegraph-ai/commit/ec77ff7ea4eb071469c2fb53e5959d4ea1f73ad6))
+
 ## [1.7.4](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.3...v1.7.4) (2024-06-21)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index f9df8d3e..0df19e6f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "scrapegraphai"
 
 
-version = "1.7.4"
+version = "1.8.0b1"
 
 
 

From a3c43c9a9941023ee9a9ddcf8eed7337870cf5aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vin=C3=ADcius=20Feitosa=20da=20Silva?=
 <oviniciusfeitosa@gmail.com>
Date: Thu, 27 Jun 2024 17:53:20 -0300
Subject: [PATCH 8/8] =?UTF-8?q?=F0=9F=90=9B=20Rename=20`user=5Fprompt`=20p?=
 =?UTF-8?q?arameter=20to=20`prompt`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adjustment makes the class consistent with the definition of the arguments.
---
 scrapegraphai/builders/graph_builder.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scrapegraphai/builders/graph_builder.py b/scrapegraphai/builders/graph_builder.py
index ab19a251..e807a0df 100644
--- a/scrapegraphai/builders/graph_builder.py
+++ b/scrapegraphai/builders/graph_builder.py
@@ -40,11 +40,11 @@ class GraphBuilder:
         ValueError: If 'api_key' is not included in llm_config.
     """
 
-    def __init__(self, user_prompt: str, config: dict):
+    def __init__(self, prompt: str, config: dict):
         """
         Initializes the GraphBuilder with a user prompt and language model configuration.
         """
-        self.user_prompt = user_prompt
+        self.prompt = prompt
         self.config = config
         self.llm = self._create_llm(config["llm"])
         self.nodes_description = self._generate_nodes_description()
@@ -122,7 +122,7 @@ def build_graph(self):
         Returns:
             dict: A JSON representation of the graph configuration.
         """
-        return self.chain.invoke(self.user_prompt)
+        return self.chain.invoke(self.prompt)
 
     @staticmethod
     def convert_json_to_graphviz(json_data, format: str = 'pdf'):