diff --git a/tests/graphs/scrape_json_ollama.py b/tests/graphs/scrape_json_ollama.py index a1ce8875..17ef80b1 100644 --- a/tests/graphs/scrape_json_ollama.py +++ b/tests/graphs/scrape_json_ollama.py @@ -1,56 +1,50 @@ -""" -Module for scraping json documents +""" +Module for scraping JSON documents """ import os +import json import pytest + from scrapegraphai.graphs import JSONScraperGraph +# Load configuration from a JSON file +CONFIG_FILE = "config.json" +with open(CONFIG_FILE, "r") as f: + CONFIG = json.load(f) +# Fixture to read the sample JSON file @pytest.fixture def sample_json(): """ - Example of text + Read the sample JSON file """ - file_name = "inputs/example.json" - curr_dir = os.path.dirname(os.path.realpath(__file__)) - file_path = os.path.join(curr_dir, file_name) - - with open(file_path, 'r', encoding="utf-8") as file: + file_path = os.path.join(os.path.dirname(__file__), "inputs", "example.json") + with open(file_path, "r", encoding="utf-8") as file: text = file.read() - return text - -@pytest.fixture -def graph_config(): +# Parametrized fixture to load graph configurations +@pytest.fixture(params=CONFIG["graph_configs"]) +def graph_config(request): """ - Configuration of the graph + Load graph configuration """ - return { - "llm": { - "model": "ollama/mistral", - "temperature": 0, - "format": "json", - "base_url": "http://localhost:11434", - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434", - } - } - + return request.param -def test_scraping_pipeline(sample_json: str, graph_config: dict): +# Test function for the scraping pipeline +def test_scraping_pipeline(sample_json, graph_config): """ - Start of the scraping pipeline + Test the scraping pipeline """ + expected_titles = ["Title 1", "Title 2", "Title 3"] # Replace with expected titles + smart_scraper_graph = JSONScraperGraph( prompt="List me all the titles", source=sample_json, config=graph_config ) - result = smart_scraper_graph.run() assert result is not None + assert isinstance(result, list) + assert sorted(result) == sorted(expected_titles)