Skip to content

Commit 7048488

Browse files
committed
Merge branch 'h/bugfixes-for-db-filtering'
1 parent a759be9 commit 7048488

File tree

4 files changed

+20
-17
lines changed

4 files changed

+20
-17
lines changed

README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,4 @@ Exceptions will be written into `waybackup_error.log` (each run overwrites the f
273273
## Contributing
274274

275275
I'm always happy for some feature requests to improve the usability of this tool.
276-
Feel free to give suggestions and report issues. Project is still far from being perfect.
277-
278-
> Please PR from dev into dev.
276+
Feel free to give suggestions and report issues. Project is still far from being perfect.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ packages = ["pywaybackup"]
77

88
[project]
99
name = "pywaybackup"
10-
version = "3.3.0"
10+
version = "3.3.1"
1111
description = "Query and download archive.org as simple as possible."
1212
authors = [
1313
{ name = "bitdruid", email = "bitdruid@outlook.com" }

pywaybackup/SnapshotCollection.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,21 @@ def process_cdx(cls, cdxfile, csvfile):
7070
cls.db.set_index_complete()
7171
else:
7272
vb.write(verbose=True, content="\nAlready indexed snapshots")
73-
if cls.MODE_LAST or cls.MODE_FIRST:
74-
if not cls.db.get_filter_complete():
75-
vb.write(content="\nFiltering snapshots (last or first version)...")
76-
cls.filter_snapshots() # filter: keep newest or oldest based on MODE
77-
cls.db.set_filter_complete()
78-
else:
79-
vb.write(verbose=True, content="\nAlready filtered snapshots (last or first version)")
73+
if not cls.db.get_filter_complete():
74+
vb.write(content="\nFiltering snapshots (last or first version)...")
75+
cls.filter_snapshots() # filter: keep newest or oldest based on MODE
76+
cls.db.set_filter_complete()
77+
else:
78+
vb.write(verbose=True, content="\nAlready filtered snapshots (last or first version)")
8079

8180
cls.skip_set(csvfile) # set response to NULL or read csv file and write values into db
81+
82+
83+
84+
85+
86+
@classmethod
87+
def calculate(cls):
8288
cls.SNAPSHOT_UNHANDLED = cls.count_totals(unhandled=True) # count all unhandled in db
8389
cls.SNAPSHOT_HANDLED = cls.count_totals(handled=True) # count all handled in db
8490
cls.SNAPSHOT_TOTAL = cls.count_totals(total=True) # count all in db
@@ -96,7 +102,8 @@ def process_cdx(cls, cdxfile, csvfile):
96102
if cls.FILTER_RESPONSE > 0:
97103
vb.write(content=f"-----> {'skip statuscode'.ljust(18)}: {cls.FILTER_RESPONSE}")
98104

99-
vb.write(content=f"\n-----> {'to utilize'.ljust(18)}: {cls.SNAPSHOT_UNHANDLED:,}")
105+
if cls.SNAPSHOT_UNHANDLED > 0:
106+
vb.write(content=f"\n-----> {'to utilize'.ljust(18)}: {cls.SNAPSHOT_UNHANDLED:,}")
100107

101108

102109

@@ -179,9 +186,6 @@ def csv_create(cls, csvfile):
179186
cls.db.cursor.execute("UPDATE snapshot_tbl SET response = NULL WHERE response = 'LOCK'") # reset locked to unprocessed
180187
cls.db.cursor.execute("SELECT * FROM csv_view WHERE response IS NOT NULL") # only write processed snapshots
181188
headers = [description[0] for description in cls.db.cursor.description]
182-
if "snapshot_id" in headers:
183-
snapshot_id_index = headers.index("snapshot_id")
184-
headers.pop(snapshot_id_index)
185189
with open(csvfile, "w", encoding="utf-8") as f:
186190
writer = csv.writer(f)
187191
writer.writerow(headers)
@@ -341,9 +345,9 @@ def count_totals(cls, total=False, handled=False, unhandled=False, success=False
341345
if unhandled:
342346
return cls.db.cursor.execute("SELECT COUNT(rowid) FROM snapshot_tbl WHERE response IS NULL").fetchone()[0]
343347
if success:
344-
return cls.db.cursor.execute("SELECT COUNT(rowid) FROM snapshot_tbl WHERE file IS NOT NULL").fetchone()[0]
348+
return cls.db.cursor.execute("SELECT COUNT(rowid) FROM snapshot_tbl WHERE file IS NOT NULL AND file != ''").fetchone()[0]
345349
if fail:
346-
return cls.db.cursor.execute("SELECT COUNT(rowid) FROM snapshot_tbl WHERE file IS NULL").fetchone()[0]
350+
return cls.db.cursor.execute("SELECT COUNT(rowid) FROM snapshot_tbl WHERE file IS NULL OR file = ''").fetchone()[0]
347351

348352
@staticmethod
349353
def modify_snapshot(connection, snapshot_id, column, value):

pywaybackup/archive_download.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def run_query(cdxfile: str, cdxquery: str) -> None:
115115
cdxquery = create_query(queryrange, limit, filter_filetype, filter_statuscode, start, end, explicit)
116116
cdxfile = run_query(cdxfile, cdxquery)
117117
sc.process_cdx(cdxfile, csvfile)
118+
sc.calculate()
118119

119120

120121

0 commit comments

Comments
 (0)