Skip to content

Commit 3694362

Browse files
committed
fix encoding issue in get_file_from_iso_fp
1 parent 68a4f30 commit 3694362

File tree

1 file changed

+28
-19
lines changed

1 file changed

+28
-19
lines changed

pycdlib/pycdlib.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -734,8 +734,8 @@ def _find_iso_record(self, iso_path, encoding='utf-8'):
734734
return _find_dr_record_by_name(self.pvd, iso_path, encoding)
735735

736736
@lru_cache(maxsize=256)
737-
def _find_rr_record(self, rr_path):
738-
# type: (bytes) -> dr.DirectoryRecord
737+
def _find_rr_record(self, rr_path, encoding='utf-8'):
738+
# type: (bytes, str) -> dr.DirectoryRecord
739739
"""
740740
An internal method to find a directory record on the ISO given a Rock
741741
Ridge path. If the entry is found, it returns the directory record
@@ -755,7 +755,7 @@ def _find_rr_record(self, rr_path):
755755

756756
splitpath = utils.split_path(rr_path)
757757

758-
currpath = splitpath.pop(0).decode('utf-8').encode('utf-8')
758+
currpath = splitpath.pop(0).decode('utf-8').encode(encoding)
759759

760760
entry = root_dir_record
761761

@@ -806,13 +806,13 @@ def _find_rr_record(self, rr_path):
806806
if not child.is_dir():
807807
break
808808
entry = child
809-
currpath = splitpath.pop(0).decode('utf-8').encode('utf-8')
809+
currpath = splitpath.pop(0).decode('utf-8').encode(encoding)
810810

811811
raise pycdlibexception.PyCdlibInvalidInput('Could not find path')
812812

813813
@lru_cache(maxsize=256)
814-
def _find_joliet_record(self, joliet_path):
815-
# type: (bytes) -> dr.DirectoryRecord
814+
def _find_joliet_record(self, joliet_path, encoding='utf-16_be'):
815+
# type: (bytes, str) -> dr.DirectoryRecord
816816
"""
817817
An internal method to find a directory record on the ISO given a Joliet
818818
path. If the entry is found, it returns the directory record object
@@ -826,7 +826,7 @@ def _find_joliet_record(self, joliet_path):
826826
"""
827827
if self.joliet_vd is None:
828828
raise pycdlibexception.PyCdlibInternalError('Joliet path requested on non-Joliet ISO')
829-
return _find_dr_record_by_name(self.joliet_vd, joliet_path, 'utf-16_be')
829+
return _find_dr_record_by_name(self.joliet_vd, joliet_path, encoding)
830830

831831
@lru_cache(maxsize=256)
832832
def _find_udf_record(self, udf_path):
@@ -2425,8 +2425,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
24252425
utils.copy_data(data_len, blocksize, data_fp, outfp)
24262426

24272427
def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
2428-
joliet_path):
2429-
# type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes]) -> None
2428+
joliet_path, encoding=None):
2429+
# type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str) -> None
24302430
"""
24312431
An internal method to fetch a single file from the ISO and write it out
24322432
to the file object.
@@ -2446,13 +2446,19 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
24462446
if joliet_path is not None:
24472447
if self.joliet_vd is None:
24482448
raise pycdlibexception.PyCdlibInvalidInput('Cannot fetch a joliet_path from a non-Joliet ISO')
2449-
found_record = self._find_joliet_record(joliet_path)
2449+
if not encoding:
2450+
encoding = 'utf-16_be'
2451+
found_record = self._find_joliet_record(joliet_path, encoding)
24502452
elif rr_path is not None:
24512453
if not self.rock_ridge:
24522454
raise pycdlibexception.PyCdlibInvalidInput('Cannot fetch a rr_path from a non-Rock Ridge ISO')
2453-
found_record = self._find_rr_record(rr_path)
2455+
if not encoding:
2456+
encoding = 'utf-8'
2457+
found_record = self._find_rr_record(rr_path, encoding)
24542458
elif iso_path is not None:
2455-
found_record = self._find_iso_record(iso_path)
2459+
if not encoding:
2460+
encoding = 'utf-8'
2461+
found_record = self._find_iso_record(iso_path, encoding)
24562462
else:
24572463
raise pycdlibexception.PyCdlibInternalError('Invalid path passed to get_file_from_iso_fp')
24582464

@@ -3502,7 +3508,7 @@ def _get_iso_entry(self, iso_path, encoding='utf-8'):
35023508

35033509
return self._find_iso_record(iso_path, encoding)
35043510

3505-
def _get_rr_entry(self, rr_path):
3511+
def _get_rr_entry(self, rr_path, encoding='utf-8'):
35063512
# type: (bytes) -> dr.DirectoryRecord
35073513
"""
35083514
Internal method to get the directory record for a Rock Ridge path.
@@ -3516,9 +3522,9 @@ def _get_rr_entry(self, rr_path):
35163522
if self._needs_reshuffle:
35173523
self._reshuffle_extents()
35183524

3519-
return self._find_rr_record(rr_path)
3525+
return self._find_rr_record(rr_path, encoding)
35203526

3521-
def _get_joliet_entry(self, joliet_path):
3527+
def _get_joliet_entry(self, joliet_path, encoding='utf-16_be'):
35223528
# type: (bytes) -> dr.DirectoryRecord
35233529
"""
35243530
Internal method to get the directory record for a Joliet path.
@@ -3532,7 +3538,7 @@ def _get_joliet_entry(self, joliet_path):
35323538
if self._needs_reshuffle:
35333539
self._reshuffle_extents()
35343540

3535-
return self._find_joliet_record(joliet_path)
3541+
return self._find_joliet_record(joliet_path, encoding)
35363542

35373543
def _get_udf_entry(self, udf_path):
35383544
# type: (str) -> udfmod.UDFFileEntry
@@ -4199,6 +4205,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
41994205
iso_path = None
42004206
rr_path = None
42014207
udf_path = None
4208+
encoding = None
42024209
num_paths = 0
42034210
for key, value in kwargs.items():
42044211
if key == 'blocksize':
@@ -4229,6 +4236,8 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42294236
num_paths += 1
42304237
elif value is not None:
42314238
raise pycdlibexception.PyCdlibInvalidInput('udf_path must be a string')
4239+
elif key == 'encoding':
4240+
encoding = value
42324241
else:
42334242
raise pycdlibexception.PyCdlibInvalidInput('Unknown keyword %s' % (key))
42344243

@@ -4239,7 +4248,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42394248
self._udf_get_file_from_iso_fp(outfp, blocksize, udf_path)
42404249
else:
42414250
self._get_file_from_iso_fp(outfp, blocksize, iso_path, rr_path,
4242-
joliet_path)
4251+
joliet_path, encoding)
42434252

42444253
def get_and_write(self, iso_path, local_path, blocksize=8192):
42454254
# type: (str, str, int) -> None
@@ -5494,9 +5503,9 @@ def list_children(self, **kwargs):
54945503
else:
54955504
use_rr = False
54965505
if 'joliet_path' in kwargs:
5497-
rec = self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path']))
5506+
rec = self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path']), kwargs['encoding'])
54985507
elif 'rr_path' in kwargs:
5499-
rec = self._get_rr_entry(utils.normpath(kwargs['rr_path']))
5508+
rec = self._get_rr_entry(utils.normpath(kwargs['rr_path']), kwargs['encoding'])
55005509
use_rr = True
55015510
else:
55025511
rec = self._get_iso_entry(utils.normpath(kwargs['iso_path']), kwargs['encoding'])

0 commit comments

Comments
 (0)