Skip to content

Commit 6ea26c3

Browse files
committed
add transformation functoin for read_mode
1 parent 79a2f51 commit 6ea26c3

File tree

3 files changed

+36
-0
lines changed

3 files changed

+36
-0
lines changed

scrapegraphai/utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@
1010
from .sys_dynamic_import import dynamic_import, srcfile_import
1111
from .cleanup_html import cleanup_html
1212
from .logging import *
13+
from .read_mode import transform_link

scrapegraphai/utils/read_mode.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""
2+
read_mode module
3+
"""
4+
import hashlib
5+
from urllib.parse import urlparse, quote
6+
7+
def transform_link(original_link):
8+
"""
9+
Transforms the given link into a specific format used by the chrome-distiller.
10+
11+
Args:
12+
original_link (str): The original URL to be transformed.
13+
14+
Returns:
15+
str: The transformed URL in the chrome-distiller format.
16+
"""
17+
parsed_original_link = urlparse(original_link)
18+
base_part = 'chrome-distiller://00000000-0000-0000-0000-000000000000_'
19+
hash_value = hashlib.sha256(original_link.encode()).hexdigest()
20+
final_link = f"{base_part}{hash_value}/?url={quote(original_link)}"
21+
return final_link

tests/utils/read_mode.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import pytest
2+
from scrapegraphai.utils.read_mode import transform_link
3+
4+
def test_transform_link():
5+
original_link = 'https://android.stackexchange.com/questions/218970'
6+
expected_hash = '07036109224c60335e35e3b4c22dd02cf775f69d4430245c4c454aff570d6787'
7+
expected_output = f'chrome-distiller://00000000-0000-0000-0000-000000000000_{expected_hash}/?url=https%3A//android.stackexchange.com/questions/218970'
8+
9+
transformed_link = transform_link(original_link)
10+
11+
assert transformed_link == expected_output, f"Expected {expected_output}, but got {transformed_link}"
12+
13+
if __name__ == "__main__":
14+
pytest.main()

0 commit comments

Comments
 (0)