@@ -83,17 +83,17 @@ def print_list(snapshots):
83
83
84
84
85
85
# create filelist
86
- def query_list (snapshots : sc .SnapshotCollection , url : str , range : int , mode : str ):
86
+ def query_list (snapshots : sc .SnapshotCollection , url : str , range : int , explicit : bool , mode : str ):
87
87
try :
88
88
v .write ("\n Querying snapshots..." )
89
89
if range :
90
90
range = datetime .now ().year - range
91
91
range = "&from=" + str (range )
92
92
else :
93
93
range = ""
94
- cdxQuery = f"https://web.archive.org/cdx/search/xd?output=json&url=*.{ url } /*{ range } &fl=timestamp,original&filter=!statuscode:200"
94
+ cdx_url = f"*.{ url } /*" if not explicit else f"{ url } "
95
+ cdxQuery = f"https://web.archive.org/cdx/search/xd?output=json&url={ cdx_url } { range } &fl=timestamp,original&filter=!statuscode:200"
95
96
cdxResult = requests .get (cdxQuery )
96
- if cdxResult .status_code != 200 : v .write (f"\n -----> ERROR: could not query snapshots, status code: { cdxResult .status_code } " ); exit ()
97
97
snapshots .create_full (cdxResult )
98
98
if mode == "current" : snapshots .create_current ()
99
99
v .write (f"\n -----> { snapshots .count_list ()} snapshots found" )
@@ -142,6 +142,9 @@ def download_list(snapshots, output, retry, worker):
142
142
"""
143
143
Download a list of urls in format: [{"timestamp": "20190815104545", "url": "https://www.google.com/"}]
144
144
"""
145
+ if snapshots .count_list () == 0 :
146
+ v .write ("\n No snapshots found to download" )
147
+ return
145
148
v .write ("\n Downloading latest snapshots of each file..." , progress = 0 )
146
149
download_list = snapshots .CDX_LIST
147
150
if worker > 1 :
0 commit comments