ScrapeGraphAI · PeriniM · Feb 22, 2025 · Dec 18, 2024 · Jan 2, 2025 · Jan 2, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,18 @@
+## [1.3.0-beta.1](https://github.com/ScrapeGraphAI/langchain-scrapegraph/compare/v1.2.1-beta.1...v1.3.0-beta.1) (2025-02-22)
+
+
+### Features
+
+* searchscraper ([6a96801](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/6a968015d9c8f4ce798111850b0f000c3317c467))
+* updated tests searchscraper ([a771564](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/a771564838b637f6aef0277e5ca3d723208d6701))
+
+## [1.2.1-beta.1](https://github.com/ScrapeGraphAI/langchain-scrapegraph/compare/v1.2.0...v1.2.1-beta.1) (2025-01-02)
+
+
+### Bug Fixes
+
+* updated docs url ([f7b640c](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/f7b640c29d9780a30212acb19b09247b765a41ff))
+
 ## [1.2.0](https://github.com/ScrapeGraphAI/langchain-scrapegraph/compare/v1.1.0...v1.2.0) (2024-12-18)
 
 

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![Python Support](https://img.shields.io/pypi/pyversions/langchain-scrapegraph.svg)](https://pypi.org/project/langchain-scrapegraph/)
-[![Documentation](https://img.shields.io/badge/Documentation-Latest-green)](https://scrapegraphai.com/documentation)
+[![Documentation](https://img.shields.io/badge/Documentation-Latest-green)](https://docs.scrapegraphai.com/integrations/langchain)
 
 Supercharge your LangChain agents with AI-powered web scraping capabilities. LangChain-ScrapeGraph provides a seamless integration between [LangChain](https://github.com/langchain-ai/langchain) and [ScrapeGraph AI](https://scrapegraphai.com), enabling your agents to extract structured data from websites using natural language.
 
@@ -58,98 +58,76 @@ result = tool.invoke({
 print(result)
 ```
 
-<details>
-<summary>🔍 Using Output Schemas with SmartscraperTool</summary>
-
-You can define the structure of the output using Pydantic models:
+### 🌐 SearchscraperTool
+Search and extract structured information from the web using natural language prompts.
 
 ```python
-from typing import List
-from pydantic import BaseModel, Field
-from langchain_scrapegraph.tools import SmartScraperTool
+from langchain_scrapegraph.tools import SearchScraperTool
 
-class WebsiteInfo(BaseModel):
-    title: str = Field(description="The main title of the webpage")
-    description: str = Field(description="The main description or first paragraph")
-    urls: List[str] = Field(description="The URLs inside the webpage")
-
-# Initialize with schema
-tool = SmartScraperTool(llm_output_schema=WebsiteInfo)
+# Initialize the tool (uses SGAI_API_KEY from environment)
+tool = SearchScraperTool()
 
-# The output will conform to the WebsiteInfo schema
+# Search and extract information using natural language
 result = tool.invoke({
-    "website_url": "https://www.example.com",
-    "user_prompt": "Extract the website information"
+    "user_prompt": "What are the key features and pricing of ChatGPT Plus?"
 })
 
 print(result)
 # {
-#     "title": "Example Domain",
-#     "description": "This domain is for use in illustrative examples...",
-#     "urls": ["https://www.iana.org/domains/example"]
+#     "product": {
+#         "name": "ChatGPT Plus",
+#         "description": "Premium version of ChatGPT..."
+#     },
+#     "features": [...],
+#     "pricing": {...},
+#     "reference_urls": [
+#         "https://openai.com/chatgpt",
+#         ...
+#     ]
 # }
 ```
-</details>
-
-### 💻 LocalscraperTool
-Extract information from HTML content using AI.
-
-```python
-from langchain_scrapegraph.tools import LocalScraperTool
-
-tool = LocalScraperTool()
-result = tool.invoke({
-    "user_prompt": "Extract all contact information",
-    "website_html": "<html>...</html>"
-})
-
-print(result)
-```
 
 <details>
-<summary>🔍 Using Output Schemas with LocalscraperTool</summary>
+<summary>🔍 Using Output Schemas with SearchscraperTool</summary>
 
 You can define the structure of the output using Pydantic models:
 
 ```python
-from typing import Optional
+from typing import List, Dict
 from pydantic import BaseModel, Field
-from langchain_scrapegraph.tools import LocalScraperTool
+from langchain_scrapegraph.tools import SearchScraperTool
 
-class CompanyInfo(BaseModel):
-    name: str = Field(description="The company name")
-    description: str = Field(description="The company description")
-    email: Optional[str] = Field(description="Contact email if available")
-    phone: Optional[str] = Field(description="Contact phone if available")
+class ProductInfo(BaseModel):
+    name: str = Field(description="Product name")
+    features: List[str] = Field(description="List of product features")
+    pricing: Dict[str, Any] = Field(description="Pricing information")
+    reference_urls: List[str] = Field(description="Source URLs for the information")
 
 # Initialize with schema
-tool = LocalScraperTool(llm_output_schema=CompanyInfo)
-
-html_content = """
-<html>
-    <body>
-        <h1>TechCorp Solutions</h1>
-        <p>We are a leading AI technology company.</p>
-        <div class="contact">
-            <p>Email: contact@techcorp.com</p>
-            <p>Phone: (555) 123-4567</p>
-        </div>
-    </body>
-</html>
-"""
-
-# The output will conform to the CompanyInfo schema
+tool = SearchScraperTool(llm_output_schema=ProductInfo)
+
+# The output will conform to the ProductInfo schema
 result = tool.invoke({
-    "website_html": html_content,
-    "user_prompt": "Extract the company information"
+    "user_prompt": "What are the key features and pricing of ChatGPT Plus?"
 })
 
 print(result)
 # {
-#     "name": "TechCorp Solutions",
-#     "description": "We are a leading AI technology company.",
-#     "email": "contact@techcorp.com",
-#     "phone": "(555) 123-4567"
+#     "name": "ChatGPT Plus",
+#     "features": [
+#         "GPT-4 access",
+#         "Faster response speed",
+#         ...
+#     ],
+#     "pricing": {
+#         "amount": 20,
+#         "currency": "USD",
+#         "period": "monthly"
+#     },
+#     "reference_urls": [
+#         "https://openai.com/chatgpt",
+#         ...
+#     ]
 # }
 ```
 </details>

diff --git a/examples/agent_example.py b/examples/agent_example.py
@@ -11,7 +11,7 @@
 
 from langchain_scrapegraph.tools import (
     GetCreditsTool,
-    LocalScraperTool,
+    SearchScraperTool,
     SmartScraperTool,
 )
 
@@ -20,8 +20,8 @@
 # Initialize the tools
 tools = [
     SmartScraperTool(),
-    LocalScraperTool(),
     GetCreditsTool(),
+    SearchScraperTool(),
 ]
 
 # Create the prompt template

diff --git a/examples/localscraper_tool.py b/examples/localscraper_tool.py
diff --git a/examples/localscraper_tool_schema.py b/examples/localscraper_tool_schema.py
diff --git a/examples/searchscraper_tool.py b/examples/searchscraper_tool.py
@@ -0,0 +1,16 @@
+from scrapegraph_py.logger import sgai_logger
+
+from langchain_scrapegraph.tools import SearchScraperTool
+
+sgai_logger.set_logging(level="INFO")
+
+# Will automatically get SGAI_API_KEY from environment
+tool = SearchScraperTool()
+
+# Example prompt
+user_prompt = "What are the key features and pricing of ChatGPT Plus?"
+
+# Use the tool
+result = tool.invoke({"user_prompt": user_prompt})
+
+print("\nResult:", result)
diff --git a/examples/searchscraper_tool_schema.py b/examples/searchscraper_tool_schema.py
@@ -0,0 +1,41 @@
+from typing import Dict, List
+
+from pydantic import BaseModel, Field
+from scrapegraph_py.logger import sgai_logger
+
+from langchain_scrapegraph.tools import SearchScraperTool
+
+
+class Feature(BaseModel):
+    name: str = Field(description="Name of the feature")
+    description: str = Field(description="Description of the feature")
+
+
+class PricingPlan(BaseModel):
+    name: str = Field(description="Name of the pricing plan")
+    price: Dict[str, str] = Field(
+        description="Price details including amount, currency, and period"
+    )
+    features: List[str] = Field(description="List of features included in the plan")
+
+
+class ProductInfo(BaseModel):
+    name: str = Field(description="Name of the product")
+    description: str = Field(description="Description of the product")
+    features: List[Feature] = Field(description="List of product features")
+    pricing: Dict[str, List[PricingPlan]] = Field(description="Pricing information")
+    reference_urls: List[str] = Field(description="Source URLs for the information")
+
+
+sgai_logger.set_logging(level="INFO")
+
+# Initialize with Pydantic model class
+tool = SearchScraperTool(llm_output_schema=ProductInfo)
+
+# Example prompt
+user_prompt = "What are the key features and pricing of ChatGPT Plus?"
+
+# Use the tool - output will conform to ProductInfo schema
+result = tool.invoke({"user_prompt": user_prompt})
+
+print("\nResult:", result)
diff --git a/examples/smartscraper_tool_schema.py b/examples/smartscraper_tool_schema.py
@@ -17,10 +17,31 @@ class WebsiteInfo(BaseModel):
 # Initialize with Pydantic model class
 tool = SmartScraperTool(llm_output_schema=WebsiteInfo)
 
-# Example website and prompt
+# Example 1: Using website URL
 website_url = "https://www.example.com"
 user_prompt = "Extract info about the website"
 
-# Use the tool - output will conform to WebsiteInfo schema
-result = tool.invoke({"website_url": website_url, "user_prompt": user_prompt})
-print(result)
+# Use the tool with URL
+result_url = tool.invoke({"website_url": website_url, "user_prompt": user_prompt})
+print("\nResult from URL:", result_url)
+
+# Example 2: Using HTML content directly
+html_content = """
+<html>
+    <body>
+        <h1>Example Domain</h1>
+        <p>This domain is for use in illustrative examples.</p>
+        <a href="https://www.iana.org/domains/example">More information...</a>
+    </body>
+</html>
+"""
+
+# Use the tool with HTML content
+result_html = tool.invoke(
+    {
+        "website_url": website_url,  # Still required but will be overridden
+        "website_html": html_content,
+        "user_prompt": user_prompt,
+    }
+)
+print("\nResult from HTML:", result_html)
diff --git a/langchain_scrapegraph/tools/__init__.py b/langchain_scrapegraph/tools/__init__.py
@@ -1,6 +1,6 @@
 from .credits import GetCreditsTool
-from .localscraper import LocalScraperTool
 from .markdownify import MarkdownifyTool
+from .searchscraper import SearchScraperTool
 from .smartscraper import SmartScraperTool
 
-__all__ = ["SmartScraperTool", "GetCreditsTool", "MarkdownifyTool", "LocalScraperTool"]
+__all__ = ["SmartScraperTool", "GetCreditsTool", "MarkdownifyTool", "SearchScraperTool"]