Skip to content

Commit bd3f2f4

Browse files
committed
INTEGRITY: Add modification timestamps for macfiles
1 parent 8970cd6 commit bd3f2f4

File tree

1 file changed

+72
-5
lines changed

1 file changed

+72
-5
lines changed

compute_hash.py

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import struct
55
import sys
66
from enum import Enum
7-
from datetime import datetime, date
7+
from datetime import datetime, date, timedelta
88
from collections import defaultdict
99

1010
class FileType(Enum):
@@ -75,9 +75,10 @@ def get_dirs_at_depth(directory, depth):
7575
if depth == num_sep_this - num_sep:
7676
yield root
7777

78-
def read_be_32(byte_stream):
78+
def read_be_32(byte_stream, signed=False):
7979
""" Return unsigned integer of size_in_bits, assuming the data is big-endian """
80-
(uint,) = struct.unpack(">I", byte_stream[:32//8])
80+
format = ">i" if signed else ">I"
81+
(uint,) = struct.unpack(format, byte_stream[:32//8])
8182
return uint
8283

8384
def read_be_16(byte_stream):
@@ -534,7 +535,6 @@ def compute_hash_of_dirs(root_directory, depth, size=0, limit_timestamps_date=No
534535
for filepath in filtered_file_map:
535536
file_collection[filepath] = file_classification(filepath)
536537

537-
538538
# Remove extra entries of macfiles to avoid extra checksum calculation in form of non mac files
539539
# Checksum for both the forks are calculated using a single file, so other files should be removed from the collection
540540
file_filter(file_collection)
@@ -557,6 +557,70 @@ def compute_hash_of_dirs(root_directory, depth, size=0, limit_timestamps_date=No
557557
return res
558558

559559

560+
def extract_macbin_mtime(file_byte_stream):
561+
"""
562+
Returns modification time of macbinary file from the header.
563+
Doc - +$5f / 4: modification date/time.
564+
Doc - Timestamps are unsigned 32-bit values indicating the time in seconds since midnight on Jan 1, 1904, in local time.
565+
"""
566+
macbin_epoch = datetime(1904, 1, 1)
567+
header = file_byte_stream[:128]
568+
macbin_seconds = read_be_32(header[0x5f:])
569+
return (macbin_epoch + timedelta(seconds=macbin_seconds)).date()
570+
571+
572+
def extract_mtime_appledouble(file_byte_stream):
573+
"""
574+
Returns modification time of appledouble file.
575+
Doc 1 - The File Dates Info entry (ID=8) consists of the file creation, modification, backup
576+
and access times (see Figure 2-1), stored as a signed number of seconds before
577+
or after 12:00 a.m. (midnight), January 1, 2000 Greenwich Mean Time (GMT)
578+
579+
Doc 2 -
580+
struct ASFileDates /* entry ID 8, file dates info */
581+
{
582+
sint32 create; /* file creation date/time */
583+
sint32 modify; /* last modification date/time */
584+
sint32 backup; /* last backup date/time */
585+
sint32 access; /* last access date/time */
586+
}; /* ASFileDates */
587+
"""
588+
entry_count = read_be_16(file_byte_stream[24:])
589+
for entry in range(entry_count):
590+
start_index = 26 + entry*12
591+
id = read_be_32(file_byte_stream[start_index:])
592+
offset = read_be_32(file_byte_stream[start_index+4:])
593+
length = read_be_32(file_byte_stream[start_index+8:])
594+
595+
if id == 8:
596+
date_info_data = file_byte_stream[offset:offset + length]
597+
if len(date_info_data) < 16:
598+
raise ValueError("FileDatesInfo block is too short.")
599+
appledouble_epoch = datetime(2000, 1, 1)
600+
modify_seconds = read_be_32(date_info_data[4:8], signed=True)
601+
return (appledouble_epoch + timedelta(seconds=modify_seconds)).date()
602+
603+
return None
604+
605+
606+
def macfile_timestamp(filepath):
607+
"""
608+
Returns the modification times for the mac file from their finderinfo.
609+
If the file is not a macfile, it returns None
610+
"""
611+
with open(filepath, "rb") as f:
612+
data = f.read()
613+
# Macbinary
614+
if is_macbin(filepath):
615+
return extract_macbin_mtime(data)
616+
617+
# Appledouble
618+
if is_appledouble_rsrc(filepath) or is_appledouble_in_dot_(filepath) or is_appledouble_in_macosx(filepath):
619+
return extract_mtime_appledouble(data)
620+
621+
return None
622+
623+
560624
def validate_date(date_str):
561625
"""
562626
Confirms if the user provided timestamp is in a valid format.
@@ -579,12 +643,15 @@ def filter_files_by_timestamp(files, limit_timestamps_date):
579643
"""
580644

581645
filtered_file_map = defaultdict(str)
646+
582647
if limit_timestamp_date is not None:
583648
user_date = validate_date(limit_timestamps_date)
584649
today = date.today()
585650

586651
for filepath in files:
587-
mtime = datetime.fromtimestamp(os.path.getmtime(filepath)).date()
652+
mtime = macfile_timestamp(filepath)
653+
if mtime is None:
654+
mtime = datetime.fromtimestamp(os.path.getmtime(filepath)).date()
588655
if limit_timestamps_date is None or (limit_timestamps_date is not None and (mtime <= user_date or mtime == today)):
589656
filtered_file_map[filepath] = str(mtime)
590657

0 commit comments

Comments
 (0)