Skip to content

test: Enhance JSON scraping pipeline test #352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 24 additions & 30 deletions tests/graphs/scrape_json_ollama.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,50 @@
"""
Module for scraping json documents
"""
Module for scraping JSON documents
"""
import os
import json
import pytest

from scrapegraphai.graphs import JSONScraperGraph

# Load configuration from a JSON file
CONFIG_FILE = "config.json"
with open(CONFIG_FILE, "r") as f:
CONFIG = json.load(f)

# Fixture to read the sample JSON file
@pytest.fixture
def sample_json():
"""
Example of text
Read the sample JSON file
"""
file_name = "inputs/example.json"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, file_name)

with open(file_path, 'r', encoding="utf-8") as file:
file_path = os.path.join(os.path.dirname(__file__), "inputs", "example.json")
with open(file_path, "r", encoding="utf-8") as file:
text = file.read()

return text


@pytest.fixture
def graph_config():
# Parametrized fixture to load graph configurations
@pytest.fixture(params=CONFIG["graph_configs"])
def graph_config(request):
"""
Configuration of the graph
Load graph configuration
"""
return {
"llm": {
"model": "ollama/mistral",
"temperature": 0,
"format": "json",
"base_url": "http://localhost:11434",
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
"base_url": "http://localhost:11434",
}
}

return request.param

def test_scraping_pipeline(sample_json: str, graph_config: dict):
# Test function for the scraping pipeline
def test_scraping_pipeline(sample_json, graph_config):
"""
Start of the scraping pipeline
Test the scraping pipeline
"""
expected_titles = ["Title 1", "Title 2", "Title 3"] # Replace with expected titles

smart_scraper_graph = JSONScraperGraph(
prompt="List me all the titles",
source=sample_json,
config=graph_config
)

result = smart_scraper_graph.run()

assert result is not None
assert isinstance(result, list)
assert sorted(result) == sorted(expected_titles)
Loading