Skip to content

Commit 3028d19

Browse files
committed
INTEGRITY: Add timestamp field in scan.dat and filtering support via modification time
1 parent 6ea2bba commit 3028d19

File tree

1 file changed

+49
-7
lines changed

1 file changed

+49
-7
lines changed

compute_hash.py

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import struct
55
import sys
66
from enum import Enum
7+
from datetime import datetime, date
8+
from collections import defaultdict
79

810
class FileType(Enum):
911
NON_MAC = "non_mac"
@@ -154,7 +156,6 @@ def is_actual_resource_fork_mac(filepath):
154156
""" Returns boolean, checking the actual mac fork if it exists. """
155157

156158
resource_fork_path = os.path.join(filepath, "..namedfork", "rsrc")
157-
print(resource_fork_path)
158159
return os.path.exists(resource_fork_path)
159160

160161
def is_appledouble(file_byte_stream):
@@ -505,7 +506,7 @@ def file_filter(files):
505506
for file in to_be_deleted:
506507
del files[file]
507508

508-
def compute_hash_of_dirs(root_directory, depth, size=0, alg="md5"):
509+
def compute_hash_of_dirs(root_directory, depth, size=0, limit_timestamps_date=None, alg="md5"):
509510
""" Return dictionary containing checksums of all files in directory """
510511
res = []
511512

@@ -518,10 +519,14 @@ def compute_hash_of_dirs(root_directory, depth, size=0, alg="md5"):
518519
for root, _, contents in os.walk(directory):
519520
files.extend([os.path.join(root, f) for f in contents])
520521

522+
# Filter out the files based on user input date - limit_timestamps_date
523+
filtered_file_map = filter_files_by_timestamp(files, limit_timestamp_date)
524+
521525
# Produce filetype and filename(name to be used in game entry) for each file
522-
for filepath in files:
526+
for filepath in filtered_file_map:
523527
file_collection[filepath] = file_classification(filepath)
524528

529+
525530
# Remove extra entries of macfiles to avoid extra checksum calculation in form of non mac files
526531
# Checksum for both the forks are calculated using a single file, so other files should be removed from the collection
527532
file_filter(file_collection)
@@ -538,11 +543,45 @@ def compute_hash_of_dirs(root_directory, depth, size=0, alg="md5"):
538543
relative_dir = os.path.dirname(os.path.dirname(relative_path))
539544
relative_path = os.path.join(relative_dir, base_name)
540545

541-
hash_of_dir[relative_path] = file_checksum(file_path, alg, size, file_info)
546+
hash_of_dir[relative_path] = file_checksum(file_path, alg, size, file_info) + (filtered_file_map[file_path],)
542547

543548
res.append(hash_of_dir)
544549
return res
545550

551+
552+
def validate_date(date_str):
553+
"""
554+
Confirms if the user provided timestamp is in a valid format.
555+
Returns the date as a datetime object.
556+
"""
557+
formats = ["%Y-%m-%d", "%Y-%m", "%Y"]
558+
for fmt in formats:
559+
try:
560+
return datetime.strptime(date_str, fmt).date()
561+
except ValueError:
562+
continue
563+
raise ValueError("Invalid date format. Use YYYY, YYYY-MM, or YYYY-MM-DD")
564+
565+
566+
def filter_files_by_timestamp(files, limit_timestamps_date):
567+
"""
568+
Removes the files those were modified after a certain timestamp provided by the user.
569+
The files those were modified today are kept.
570+
Returns filtered map with filepath and its modification time
571+
"""
572+
573+
filtered_file_map = defaultdict(str)
574+
user_date = validate_date(limit_timestamps_date)
575+
today = date.today()
576+
577+
for filepath in files:
578+
mtime = datetime.fromtimestamp(os.path.getmtime(filepath)).date()
579+
if limit_timestamps_date is None or (limit_timestamps_date is not None and (mtime <= user_date or mtime == today)):
580+
filtered_file_map[filepath] = str(mtime)
581+
582+
return filtered_file_map
583+
584+
546585
def create_dat_file(hash_of_dirs, path, checksum_size=0):
547586
with open(f"{os.path.basename(path)}.dat", "w") as file:
548587
# Header
@@ -556,8 +595,8 @@ def create_dat_file(hash_of_dirs, path, checksum_size=0):
556595
# Game files
557596
for hash_of_dir in hash_of_dirs:
558597
file.write("game (\n")
559-
for filename, (hashes, filesize) in hash_of_dir.items():
560-
data = f"name \"{filename}\" size {filesize}"
598+
for filename, (hashes, filesize, timestamp) in hash_of_dir.items():
599+
data = f"name \"{filename}\" size {filesize} timestamp {timestamp}"
561600
for key, value in hashes:
562601
data += f" {key} {value}"
563602

@@ -579,10 +618,13 @@ def error(self, message):
579618
help="Depth from root to game directories")
580619
parser.add_argument("--size",
581620
help="Use first n bytes of file to calculate checksum")
621+
parser.add_argument("--limit-timestamps",
622+
help="Format - YYYY-MM-DD or YYYY-MM or YYYY. Filters out the files those were modified after the given timestamp. Note that if the modification time is today, it would not be filtered out.")
582623
args = parser.parse_args()
583624
path = os.path.abspath(args.directory) if args.directory else os.getcwd()
584625
depth = int(args.depth) if args.depth else 0
585626
checksum_size = int(args.size) if args.size else 0
627+
limit_timestamp_date = str(args.limit_timestamps) if args.limit_timestamps else None
586628

587629
create_dat_file(compute_hash_of_dirs(
588-
path, depth, checksum_size), path, checksum_size)
630+
path, depth, checksum_size, limit_timestamp_date), path, checksum_size)

0 commit comments

Comments
 (0)