Skip to content

Commit 8575f8e

Browse files
committed
INTEGIRTY: Add punycode encoding for scan utlity.
1 parent bd3f2f4 commit 8575f8e

File tree

1 file changed

+80
-19
lines changed

1 file changed

+80
-19
lines changed

compute_hash.py

Lines changed: 80 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ class FileType(Enum):
1818

1919
script_version = "0.1"
2020

21+
SPECIAL_SYMBOLS = '/":*|\\?%<>\x7f'
22+
2123
# CRC table
2224
CRC16_XMODEM_TABLE = [
2325
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
@@ -75,6 +77,83 @@ def get_dirs_at_depth(directory, depth):
7577
if depth == num_sep_this - num_sep:
7678
yield root
7779

80+
81+
def my_escape_string(s: str) -> str:
82+
"""
83+
Escape strings
84+
85+
Escape the following:
86+
- escape char: \x81
87+
- unallowed filename chars: https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
88+
- control chars < 0x20
89+
"""
90+
new_name = ""
91+
for char in s:
92+
if char == "\x81":
93+
new_name += "\x81\x79"
94+
elif char in SPECIAL_SYMBOLS or ord(char) < 0x20:
95+
new_name += "\x81" + chr(0x80 + ord(char))
96+
else:
97+
new_name += char
98+
return new_name
99+
100+
101+
def encode_punycode(orig):
102+
"""
103+
Punyencode strings
104+
105+
- escape special characters and
106+
- ensure filenames can't end in a space or dotif temp == None:
107+
"""
108+
s = my_escape_string(orig)
109+
encoded = s.encode("punycode").decode("ascii")
110+
# punyencoding adds an '-' at the end when there are no special chars
111+
# don't use it for comparing
112+
compare = encoded
113+
if encoded.endswith("-"):
114+
compare = encoded[:-1]
115+
if orig != compare or compare[-1] in " .":
116+
return "xn--" + encoded
117+
return orig
118+
119+
120+
def punycode_need_encode(orig):
121+
"""
122+
A filename needs to be punyencoded when it:
123+
124+
- contains a char that should be escaped or
125+
- ends with a dot or a space.
126+
"""
127+
if len(orig) > 4 and orig[:4] == "xn--":
128+
return False
129+
if not all((0x20 <= ord(c) < 0x80) and c not in SPECIAL_SYMBOLS for c in orig):
130+
return True
131+
if orig[-1] in " .":
132+
return True
133+
return False
134+
135+
136+
def split_path_recursive(path):
137+
parts = []
138+
while True:
139+
head, tail = os.path.split(path)
140+
if tail:
141+
parts.insert(0, tail)
142+
path = head
143+
else:
144+
if head:
145+
parts.insert(0, head)
146+
break
147+
return parts
148+
149+
def encode_path_components(filepath):
150+
"""
151+
Puny encodes all separate components of filepath
152+
"""
153+
parts = split_path_recursive(filepath)
154+
encoded_parts = [encode_punycode(p) if punycode_need_encode(p) else p for p in parts]
155+
return os.path.join(*encoded_parts)
156+
78157
def read_be_32(byte_stream, signed=False):
79158
""" Return unsigned integer of size_in_bits, assuming the data is big-endian """
80159
format = ">i" if signed else ">I"
@@ -202,25 +281,6 @@ def macbin_get_datafork(file_byte_stream):
202281
(datalen,) = struct.unpack(">I", file_byte_stream[0x53:0x57])
203282
return file_byte_stream[0x80: 0x80 + datalen]
204283

205-
def is_appledouble(file_byte_stream):
206-
"""
207-
Appledouble Structure -
208-
209-
Header:
210-
+$00 / 4: signature (0x00 0x05 0x16 0x00)
211-
+$04 / 4: version (0x00 0x01 0x00 0x00 (v1) -or- 0x00 0x02 0x00 0x00 (v2))
212-
+$08 /16: home file system string (v1) -or- zeroes (v2)
213-
+$18 / 2: number of entries
214-
215-
Entries:
216-
+$00 / 4: entry ID (1-15)
217-
+$04 / 4: offset to data from start of file
218-
+$08 / 4: length of entry in bytes; may be zero
219-
"""
220-
if (not file_byte_stream or read_be_32(file_byte_stream) != 0x00051607):
221-
return False
222-
223-
return True
224284

225285
def appledouble_get_resfork_data(file_byte_stream):
226286
""" Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of an appledouble file"""
@@ -672,6 +732,7 @@ def create_dat_file(hash_of_dirs, path, checksum_size=0):
672732
for hash_of_dir in hash_of_dirs:
673733
file.write("game (\n")
674734
for filename, (hashes, size, size_r, size_rd, timestamp) in hash_of_dir.items():
735+
filename = encode_path_components(filename)
675736
data = f"name \"{filename}\" size {size} size-r {size_r} size-rd {size_rd} timestamp {timestamp}"
676737
for key, value in hashes:
677738
data += f" {key} {value}"

0 commit comments

Comments
 (0)