22import os
33import requests
44from bs4 import BeautifulSoup
5- from services .redis_store import RedisStore
65from urllib .parse import urljoin , urlparse
6+ from models .utils import pwd
77
88logger = logging .getLogger (__name__ )
99logger .setLevel (logging .INFO )
@@ -25,19 +25,23 @@ def base(url):
2525 return url
2626
2727
28- def get_favicon_filename (url ):
28+ def favicon_filename (url ):
2929 return f"{ normalize_domain (url )} .favicon.ico"
3030
3131
32+ def favicon_failed_filename (url ):
33+ return f"{ normalize_domain (url )} .failed"
34+
35+
3236def make_request (url ):
3337 request_headers = {
3438 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
3539 }
3640 return requests .get (url , headers = request_headers , allow_redirects = True , timeout = 5 )
3741
3842
39- def favicon_path (icon_path , url ):
40- favicon_filename = get_favicon_filename (url )
43+ def favicon_path (icon_path , favicon_filename ):
44+ favicon_filename = favicon_filename (url )
4145 return os .path .join (icon_path , favicon_filename )
4246
4347
@@ -66,22 +70,22 @@ def download_favicon(url, icon_dir):
6670
6771
6872def _download (url , icon_dir , icon_url ):
69- redis_store = RedisStore ()
7073 try :
7174 response = make_request (icon_url )
72-
7375 if response .status_code == 200 and response .headers .get ('content-type' , '' ).lower ().startswith ('image/' ):
74- filename = favicon_path (icon_dir , url )
75- with open (favicon_path ( icon_dir , url ) , 'wb' ) as file :
76+ filename = pwd . joinpath (icon_dir , favicon_filename ( url ) )
77+ with open (filename , 'wb' ) as file :
7678 file .write (response .content )
7779 logger .debug (f"saving { url } as { filename } " )
7880 else :
79- redis_store .save_processed_domain (
80- normalize_domain (url ),
81- reason = f'response_code: { response .status_code } content-type: { response .headers .get ("content-type" , "" )} '
82- )
83- logger .debug (f"issues { url } complete" )
81+ filename = pwd .joinpath (icon_dir , favicon_failed_filename (url ))
82+ with open (filename , 'wb' ) as file :
83+ file .write (f'response_code: { response .status_code } content-type: { response .headers .get ("content-type" , "" )} ' )
84+ logger .debug (f"Marking { url } as failed with { filename } " )
8485 except Exception as ex :
85- redis_store .save_processed_domain (normalize_domain (url ), reason = f'{ ex } ' )
86+ filename = pwd .joinpath (icon_dir , favicon_failed_filename (url ))
87+ with open (filename , 'wb' ) as file :
88+ file .write (f'Error: { ex } ' )
89+ logger .debug (f"Marking { url } as failed with { filename } " )
8690
8791 logger .debug (f"_download({ icon_url } ) completed" )
0 commit comments