Skip to content

Pre/beta #922

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 13, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 23 additions & 42 deletions tests/test_scrape_do.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,32 @@
import urllib.parse
import pytest

from unittest.mock import patch, Mock
from scrapegraphai.docloaders.scrape_do import scrape_do_fetch
from unittest.mock import Mock, patch

class TestScrapeDoFetch:
@patch('scrapegraphai.docloaders.scrape_do.requests.get')
@patch('scrapegraphai.docloaders.scrape_do.os.getenv')
def test_scrape_do_fetch_with_proxy_geocode_and_super_proxy(self, mock_getenv, mock_get):
"""
Test scrape_do_fetch function with proxy mode, geoCode, and super_proxy enabled.
This test verifies that the function correctly handles proxy settings,
geoCode parameter, and super_proxy flag when making a request.
"""
# Mock environment variable
mock_getenv.return_value = "proxy.scrape.do:8080"

# Mock the response
mock_response = Mock()
mock_response.text = "Mocked response content"
mock_get.return_value = mock_response

# Test parameters
token = "test_token"
target_url = "https://example.com"
use_proxy = True
geoCode = "US"
super_proxy = True

# Call the function
result = scrape_do_fetch(token, target_url, use_proxy, geoCode, super_proxy)
def test_scrape_do_fetch_without_proxy():
"""
Test scrape_do_fetch function using API mode (without proxy).

# Assertions
assert result == "Mocked response content"
mock_get.assert_called_once()
call_args = mock_get.call_args
This test verifies that:
1. The function correctly uses the API mode when use_proxy is False.
2. The correct URL is constructed with the token and encoded target URL.
3. The function returns the expected response text.
"""
token = "test_token"
target_url = "https://example.com"
encoded_url = urllib.parse.quote(target_url)
expected_response = "Mocked API response"

with patch("requests.get") as mock_get:
mock_response = Mock()
mock_response.text = expected_response
mock_get.return_value = mock_response

# Check if the URL is correct
assert call_args[0][0] == target_url
result = scrape_do_fetch(token, target_url, use_proxy=False)

# Check if proxies are set correctly
assert call_args[1]['proxies'] == {
"http": f"http://{token}:@proxy.scrape.do:8080",
"https": f"http://{token}:@proxy.scrape.do:8080",
}
expected_url = f"http://api.scrape.do?token={token}&url={encoded_url}"
mock_get.assert_called_once_with(expected_url)

# Check if verify is False
assert call_args[1]['verify'] is False
assert result == expected_response

# Check if params are set correctly
assert call_args[1]['params'] == {"geoCode": "US", "super": "true"}
Loading