From 379a7cd1878059a1fce8b9247e6f9fce8091fbca Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sun, 2 Feb 2025 03:01:04 +0700 Subject: [PATCH] Allow to customize scrape.do proxy/api endpoints --- scrapegraphai/docloaders/scrape_do.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scrapegraphai/docloaders/scrape_do.py b/scrapegraphai/docloaders/scrape_do.py index 6f64d9f2..be37e3f7 100644 --- a/scrapegraphai/docloaders/scrape_do.py +++ b/scrapegraphai/docloaders/scrape_do.py @@ -5,6 +5,7 @@ import urllib.parse import requests +import os import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) @@ -29,7 +30,8 @@ def scrape_do_fetch( """ encoded_url = urllib.parse.quote(target_url) if use_proxy: - proxy_mode_url = f"http://{token}:@proxy.scrape.do:8080" + proxy_scrape_do_url = os.getenv("PROXY_SCRAPE_DO_URL", "proxy.scrape.do:8080") + proxy_mode_url = f"http://{token}:@{proxy_scrape_do_url}" proxies = { "http": proxy_mode_url, "https": proxy_mode_url, @@ -41,7 +43,8 @@ def scrape_do_fetch( target_url, proxies=proxies, verify=False, params=params ) else: - url = f"http://api.scrape.do?token={token}&url={encoded_url}" + api_scrape_do_url = os.getenv("API_SCRAPE_DO_URL", "api.scrape.do") + url = f"http://{api_scrape_do_url}?token={token}&url={encoded_url}" response = requests.get(url) return response.text