Skip to content

Fix schema option not working - Unit Tests #947

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions tests/graphs/abstract_graph_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,11 @@ class TestAbstractGraph:
),
({"model": "ollama/llama2"}, ChatOllama),
({"model": "oneapi/qwen-turbo", "api_key": "oneapi-api-key"}, OneApi),
(
{"model": "deepseek/deepseek-coder", "api_key": "deepseek-api-key"},
DeepSeek,
),
(
{
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"region_name": "IDK",
"temperature": 0.7
},
ChatBedrock,
),
Expand Down Expand Up @@ -134,6 +131,7 @@ def test_create_llm_unknown_provider(self):
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"region_name": "IDK",
"rate_limit": {"requests_per_second": 1},
"temperature": 0.7
},
ChatBedrock,
),
Expand Down
56 changes: 45 additions & 11 deletions tests/nodes/fetch_node_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,41 @@
from scrapegraphai.nodes import FetchNode


def test_fetch_html(mocker):
def test_fetch_html(monkeypatch):
title = "ScrapeGraph AI"
link_url = "https://github.com/VinciGit00/Scrapegraph-ai"
img_url = "https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapegraphai_logo.png"
content = f"""
<html>
<head>
<head>
<title>{title}</title>
</head>
<body>
</head>
<body>
<a href="{link_url}">ScrapeGraphAI: You Only Scrape Once</a>
<img src="{img_url}" alt="Scrapegraph-ai Logo">
</body>
</body>
</html>
"""
mock_loader_cls = mocker.patch("scrapegraphai.nodes.fetch_node.ChromiumLoader")
mock_loader = mock_loader_cls.return_value
mock_loader.load.return_value = [Document(page_content=content)]
# Define a fake ChromiumLoader that returns our fixed content
class FakeChromiumLoader:
def __init__(self, sources, headless, storage_state, **loader_kwargs):
self.sources = sources
self.headless = headless
self.storage_state = storage_state
self.loader_kwargs = loader_kwargs

def load(self):
return [Document(page_content=content)]

# Use monkeypatch to replace ChromiumLoader with FakeChromiumLoader
monkeypatch.setattr("scrapegraphai.nodes.fetch_node.ChromiumLoader", FakeChromiumLoader)
node = FetchNode(
input="url | local_dir",
output=["doc", "links", "images"],
node_config={"headless": False},
)
result = node.execute({"url": "https://scrapegraph-ai.com/example"})

mock_loader.load.assert_called_once()
doc = result["doc"][0]
assert result is not None
assert "ScrapeGraph AI" in doc.page_content
Expand All @@ -40,6 +49,11 @@ def test_fetch_html(mocker):


def test_fetch_json():
"""Test fetching content from a JSON file by creating a dummy JSON file"""
import os
os.makedirs("inputs", exist_ok=True)
with open("inputs/example.json", "w", encoding="utf-8") as f:
f.write('{"test": "json content"}')
node = FetchNode(
input="json",
output=["doc"],
Expand All @@ -49,6 +63,11 @@ def test_fetch_json():


def test_fetch_xml():
"""Test fetching content from an XML file by creating a dummy XML file"""
import os
os.makedirs("inputs", exist_ok=True)
with open("inputs/books.xml", "w", encoding="utf-8") as f:
f.write("<books><book>Test Book</book></books>")
node = FetchNode(
input="xml",
output=["doc"],
Expand All @@ -58,6 +77,16 @@ def test_fetch_xml():


def test_fetch_csv():
"""Test fetching content from a CSV file by creating a dummy CSV file and mocking pandas if necessary"""
import os
os.makedirs("inputs", exist_ok=True)
with open("inputs/username.csv", "w", encoding="utf-8") as f:
f.write("col1,col2\nvalue1,value2")
import sys, types
if "pandas" not in sys.modules:
dummy_pandas = types.ModuleType("pandas")
dummy_pandas.read_csv = lambda path: {"col1": ["value1"], "col2": ["value2"]}
sys.modules["pandas"] = dummy_pandas
node = FetchNode(
input="csv",
output=["doc"],
Expand All @@ -67,10 +96,15 @@ def test_fetch_csv():


def test_fetch_txt():
"""Test fetching content from a plain text file by creating a dummy text file with HTML content"""
import os
os.makedirs("inputs", exist_ok=True)
with open("inputs/plain_html_example.txt", "w", encoding="utf-8") as f:
f.write("<html><body>Test plain HTML content</body></html>")
node = FetchNode(
input="txt",
input="local_dir",
output=["doc", "links", "images"],
)
with open("inputs/plain_html_example.txt") as f:
result = node.execute({"txt": f.read()})
result = node.execute({"local_dir": f.read()})
assert result is not None
84 changes: 84 additions & 0 deletions tests/test_generate_answer_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,87 @@ def test_init_chat_ollama_format():
"dummy_input", ["output"], node_config=node_config_with_schema
)
assert node2.llm_model.format == "dummy_schema_json"

import time
import json
from requests.exceptions import Timeout

# New dummy chain classes and dummy llms for testing invoke_with_timeout and ChatBedrock behavior.
class DummySlowChain:
"""A dummy chain that simulates a slow response by returning quickly but using patched time."""
def invoke(self, inputs):
return "response"

class DummyChainException:
"""A dummy chain that simulates an exception during invoke."""
def invoke(self, inputs):
raise Exception("dummy error")

class DummyChatBedrock:
"""A dummy ChatBedrock chain that returns a predefined answer. It should be used when output_parser is None."""
def __call__(self, *args, **kwargs):
return {"content": "bedrock answer"}

def __or__(self, other):
return self

# New tests to increase test coverage.
def test_invoke_with_timeout_exceeded(dummy_node):
"""
Test that invoke_with_timeout raises a Timeout exception when the chain.invoke
takes longer than the specified timeout.
"""
# Set dummy_node.timeout to a known value (from dummy_node, timeout=1 is used)
original_time = time.time
# Create a list of times such that the first call returns start_time,
# and the next call returns start_time + timeout + 1.
call_times = [100, 100 + dummy_node.timeout + 1]
def fake_time():
return call_times.pop(0) if call_times else original_time()
time_time_backup = time.time
time.time = fake_time
try:
with pytest.raises(Timeout):
dummy_node.invoke_with_timeout(DummySlowChain(), {"key": "value"}, dummy_node.timeout)
finally:
time.time = time_time_backup

def test_invoke_with_timeout_exception_propagates(dummy_node):
"""
Test that invoke_with_timeout properly propagates exceptions raised by the chain.invoke call.
"""
with pytest.raises(Exception) as excinfo:
dummy_node.invoke_with_timeout(DummyChainException(), {"key": "value"}, dummy_node.timeout)
assert "dummy error" in str(excinfo.value)

@pytest.mark.parametrize("content_key", ["relevant_chunks", "parsed_doc", "doc", "content"])
def test_process_with_alternative_content_keys(dummy_node_for_process, content_key):
"""
Test that process() successfully uses alternative content keys.
"""
state = {"user_prompt": "What is the answer?", content_key: "Alternative content"}
dummy_node_for_process.chain = DummyChain()
dummy_node_for_process.invoke_with_timeout = lambda chain, inputs, timeout: chain.invoke(inputs)
result_state = dummy_node_for_process.process(state)
assert result_state["output"] == {"content": "successful answer"}

def test_execute_single_chunk_with_bedrock():
"""
Test execute() for the single chunk branch using a DummyChatBedrock instance.
This verifies that if the llm_model is a ChatBedrock, no output_parser is applied.
"""
node_config = {"llm_model": DummyChatBedrock(), "verbose": False, "timeout": 480}
node = GenerateAnswerNode("dummy_input & doc", ["output"], node_config=node_config)
node.logger = DummyLogger()
node.get_input_keys = lambda state: ["dummy_input", "doc"]
state = {"dummy_input": "What is the answer?", "doc": ["Only one chunk bedrock"]}

# simulate a simple chain invocation (in the single-chunk branch)
def fake_invoke_with_timeout(chain, inputs, timeout):
if "question" in inputs:
return {"content": "bedrock answer"}
return {"content": "unexpected response"}

node.invoke_with_timeout = fake_invoke_with_timeout
output_state = node.execute(state)
assert output_state["output"] == {"content": "bedrock answer"}
3 changes: 2 additions & 1 deletion tests/utils/test_proxy_rotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def test_is_ipv4_address():
assert is_ipv4_address("no-address") is False


def test_parse_or_search_proxy_success():
def test_parse_or_search_proxy_success(monkeypatch):
proxy = {
"server": "192.168.1.1:8080",
"username": "username",
Expand All @@ -91,6 +91,7 @@ def test_parse_or_search_proxy_success():
"timeout": 10.0,
},
}
monkeypatch.setattr("scrapegraphai.utils.proxy_rotation._search_proxy", lambda proxy: {"server": "dummy_proxy:8080"})

found_proxy = parse_or_search_proxy(proxy_broker)

Expand Down