From 17dd936af7cfd1d0822202d908e50ab11893bddd Mon Sep 17 00:00:00 2001 From: iamgodot Date: Wed, 12 Jun 2024 22:45:43 -0700 Subject: [PATCH] test: fix tests for fetch node with proper mock&refactor --- tests/nodes/fetch_node_test.py | 133 +++++++++++++-------------------- 1 file changed, 50 insertions(+), 83 deletions(-) diff --git a/tests/nodes/fetch_node_test.py b/tests/nodes/fetch_node_test.py index b3f61706..ea0c995d 100644 --- a/tests/nodes/fetch_node_test.py +++ b/tests/nodes/fetch_node_test.py @@ -1,104 +1,71 @@ -import os -import pytest -from unittest.mock import patch, MagicMock from scrapegraphai.nodes import FetchNode +from langchain_core.documents import Document -def get_file_path(file_name): - """ - Helper function to get the absolute file path. - """ - curr_dir = os.path.dirname(os.path.realpath(__file__)) - file_path = os.path.join(curr_dir, file_name) - return file_path -@patch('scrapegraphai.nodes.FetchNode.execute') -def test_fetch_node_html(mock_execute): - """ - Test FetchNode with HTML input. +def test_fetch_html(mocker): + title = "ScrapeGraph AI" + link_url = "https://github.com/VinciGit00/Scrapegraph-ai" + img_url = "https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapegraphai_logo.png" + content = f""" + + + {title} + + + ScrapeGraphAI: You Only Scrape Once + Scrapegraph-ai Logo + + """ - mock_execute.return_value = MagicMock() - fetch_node = FetchNode( + mock_loader_cls = mocker.patch("scrapegraphai.nodes.fetch_node.ChromiumLoader") + mock_loader = mock_loader_cls.return_value + mock_loader.load.return_value = [Document(page_content=content)] + node = FetchNode( input="url | local_dir", - output=["doc"], - node_config={ - "headless": False - } + output=["doc", "links", "images"], + node_config={"headless": False}, ) - state = { - "url": "https://twitter.com/home" - } - result = fetch_node.execute(state) - assert result is not None - mock_execute.assert_called_once_with(state) + result = node.execute({"url": "https://scrapegraph-ai.com/example"}) -@patch('scrapegraphai.nodes.FetchNode.execute') -def test_fetch_node_json(mock_execute): - """ - Test FetchNode with JSON input. - """ - mock_execute.return_value = MagicMock() - file_path_json = get_file_path("inputs/example.json") - state_json = { - "json": file_path_json - } - fetch_node_json = FetchNode( + mock_loader.load.assert_called_once() + doc = result["doc"][0] + assert title in doc.page_content + assert link_url in result["links"] + assert img_url in result["images"] + + +def test_fetch_json(): + node = FetchNode( input="json", output=["doc"], ) - result_json = fetch_node_json.execute(state_json) - assert result_json is not None - mock_execute.assert_called_once_with(state_json) + result = node.execute({"json": "tests/nodes/inputs/example.json"}) + assert result is not None -@patch('scrapegraphai.nodes.FetchNode.execute') -def test_fetch_node_xml(mock_execute): - """ - Test FetchNode with XML input. - """ - mock_execute.return_value = MagicMock() - file_path_xml = get_file_path("inputs/books.xml") - state_xml = { - "xml": file_path_xml - } - fetch_node_xml = FetchNode( + +def test_fetch_xml(): + node = FetchNode( input="xml", output=["doc"], ) - result_xml = fetch_node_xml.execute(state_xml) - assert result_xml is not None - mock_execute.assert_called_once_with(state_xml) + result = node.execute({"xml": "tests/nodes/inputs/books.xml"}) + assert result is not None -@patch('scrapegraphai.nodes.FetchNode.execute') -def test_fetch_node_csv(mock_execute): - """ - Test FetchNode with CSV input. - """ - mock_execute.return_value = MagicMock() - file_path_csv = get_file_path("inputs/username.csv") - state_csv = { - "csv": file_path_csv - } - fetch_node_csv = FetchNode( + +def test_fetch_csv(): + node = FetchNode( input="csv", output=["doc"], ) - result_csv = fetch_node_csv.execute(state_csv) - assert result_csv is not None - mock_execute.assert_called_once_with(state_csv) + result = node.execute({"csv": "tests/nodes/inputs/username.csv"}) + assert result is not None -@patch('scrapegraphai.nodes.FetchNode.execute') -def test_fetch_node_txt(mock_execute): - """ - Test FetchNode with TXT input. - """ - mock_execute.return_value = MagicMock() - file_path_txt = get_file_path("inputs/plain_html_example.txt") - state_txt = { - "txt": file_path_txt - } - fetch_node_txt = FetchNode( + +def test_fetch_txt(): + node = FetchNode( input="txt", - output=["doc"], + output=["doc", "links", "images"], ) - result_txt = fetch_node_txt.execute(state_txt) - assert result_txt is not None - mock_execute.assert_called_once_with(state_txt) + with open("tests/nodes/inputs/plain_html_example.txt") as f: + result = node.execute({"txt": f.read()}) + assert result is not None