Skip to content

Commit 6bb6076

Browse files
committed
Merge branch 'f/start-end-timestamp-range'
1 parent ecab7ec commit 6bb6076

File tree

5 files changed

+23
-14
lines changed

5 files changed

+23
-14
lines changed

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ This script allows you to download content from the Wayback Machine (archive.org
3434
### Arguments
3535

3636
- `-h`, `--help`: Show the help message and exit.
37-
- `-v`, `--version`: Show the script's version.
37+
- `-a`, `--about`: Show information about the script and exit.
3838

3939
#### Required Arguments
4040

@@ -50,9 +50,15 @@ This script allows you to download content from the Wayback Machine (archive.org
5050

5151
- `-l`, `--list`: Only print the snapshots available within the specified range. Does not download the snapshots.
5252
- `-e`, `--explicit`: Only download the explicit given url. No wildcard subdomains or paths.
53-
- `-r RANGE`, `--range RANGE`: Specify the range in years for which to search and download snapshots.
5453
- `-o OUTPUT`, `--output OUTPUT`: The folder where downloaded files will be saved.
5554

55+
- **Range Selection:**<br>
56+
Specify the range in years or a specific timestamp either start, end or both. If you specify the `range` argument, the `start` and `end` arguments will be ignored. Format for timestamps: YYYYMMDDhhmmss. You can only give a year or increase specificity by going through the timestamp starting on the left.<br>
57+
(year 2019, year+month 201901, year+month+day 20190101, year+month+day+hour 2019010112)
58+
- `-r RANGE`, `--range RANGE`: Specify the range in years for which to search and download snapshots.
59+
- `--start`: Timestamp to start searching.
60+
- `--end`: Timestamp to end searching.
61+
5662
#### Additional
5763

5864
- `--verbosity [LEVEL]`: Set the verbosity: json (print json response), progress (show progress bar) or standard (default).

pywaybackup/SnapshotCollection.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def __init__(self):
1515

1616
def create_full(self, cdxResult):
1717
self.CDX_JSON = cdxResult.json()[1:]
18-
self.CDX_LIST = [{"id": i, "timestamp": snapshot[0], "url": snapshot[1]} for i, snapshot in enumerate(self.CDX_JSON)]
18+
self.CDX_LIST = [{"timestamp": snapshot[0], "url": snapshot[1]} for i, snapshot in enumerate(self.CDX_JSON)]
1919
self.CDX_LIST = sorted(self.CDX_LIST, key=lambda k: k['timestamp'], reverse=True)
2020

2121
def create_current(self):
@@ -38,7 +38,7 @@ def create_entry(self, cdx_entry: dict, output: str) -> dict:
3838
else: download_dir = os.path.join(output, domain, timestamp, subdir)
3939
download_file = os.path.join(download_dir, filename)
4040
cdx_entry = {
41-
"id": cdx_entry["id"],
41+
"id": len(self.SNAPSHOT_COLLECTION),
4242
"url": download_url,
4343
"file": download_file,
4444
"timestamp": timestamp,

pywaybackup/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.6.2"
1+
__version__ = "0.6.3"

pywaybackup/archive.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,15 @@ def print_list(snapshots):
8383

8484

8585
# create filelist
86-
def query_list(snapshots: sc.SnapshotCollection, url: str, range: int, explicit: bool, mode: str):
86+
# timestamp format yyyyMMddhhmmss
87+
def query_list(snapshots: sc.SnapshotCollection, url: str, range: int, start: int, end: int, explicit: bool, mode: str):
8788
try:
8889
v.write("\nQuerying snapshots...")
89-
if range:
90-
range = datetime.now().year - range
91-
range = "&from=" + str(range)
92-
else:
93-
range = ""
90+
range = ""
91+
if not range:
92+
if start: range = range + f"&from={start}"
93+
if end: range = range + f"&to={end}"
94+
else: range = "&from=" + str(datetime.now().year - range)
9495
cdx_url = f"*.{url}/*" if not explicit else f"{url}"
9596
cdxQuery = f"https://web.archive.org/cdx/search/xd?output=json&url={cdx_url}{range}&fl=timestamp,original&filter=!statuscode:200"
9697
cdxResult = requests.get(cdxQuery)

pywaybackup/main.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@ def main():
1919
optional = parser.add_argument_group('optional')
2020
optional.add_argument('-l', '--list', action='store_true', help='Only print snapshots (opt range in y)')
2121
optional.add_argument('-e', '--explicit', action='store_true', help='Search only for the explicit given url')
22-
optional.add_argument('-r', '--range', type=int, help='Range in years to search')
2322
optional.add_argument('-o', '--output', type=str, help='Output folder')
24-
optional.add_argument('-v', '--verbosity', type=str, default="standard", choices=["standard", "progress", "json"], help='Verbosity level')
23+
optional.add_argument('-r', '--range', type=int, help='Range in years to search')
24+
optional.add_argument('--start', type=int, help='Start timestamp format: YYYYMMDDhhmmss')
25+
optional.add_argument('--end', type=int, help='End timestamp format: YYYYMMDDhhmmss')
2526
special = parser.add_argument_group('special')
27+
special.add_argument('--verbosity', type=str, default="standard", choices=["standard", "progress", "json"], help='Verbosity level')
2628
special.add_argument('--retry', type=int, default=0, metavar="X-TIMES", help='Retry failed downloads (opt tries as int, else infinite)')
2729
special.add_argument('--worker', type=int, default=1, metavar="AMOUNT", help='Number of worker (simultaneous downloads)')
2830

@@ -40,7 +42,7 @@ def main():
4042
else:
4143
if args.output is None:
4244
args.output = os.path.join(os.getcwd(), "waybackup_snapshots")
43-
archive.query_list(snapshots, args.url, args.range, args.explicit, mode)
45+
archive.query_list(snapshots, args.url, args.range, args.start, args.end, args.explicit, mode)
4446
if args.list:
4547
archive.print_list(snapshots)
4648
else:

0 commit comments

Comments
 (0)