@@ -705,8 +705,8 @@ def _seek_to_extent(self, extent):
705
705
self ._cdfp .seek (extent * self .logical_block_size )
706
706
707
707
@functools .lru_cache (maxsize = 256 )
708
- def _find_iso_record (self , iso_path ):
709
- # type: (bytes) -> dr.DirectoryRecord
708
+ def _find_iso_record (self , iso_path , encoding = 'utf-8' ):
709
+ # type: (bytes, str ) -> dr.DirectoryRecord
710
710
"""
711
711
An internal method to find a directory record on the ISO given an ISO
712
712
path. If the entry is found, it returns the directory record object
@@ -718,11 +718,11 @@ def _find_iso_record(self, iso_path):
718
718
Returns:
719
719
The directory record entry representing the entry on the ISO.
720
720
"""
721
- return _find_dr_record_by_name (self .pvd , iso_path , 'utf-8' )
721
+ return _find_dr_record_by_name (self .pvd , iso_path , encoding )
722
722
723
723
@functools .lru_cache (maxsize = 256 )
724
- def _find_rr_record (self , rr_path ):
725
- # type: (bytes) -> dr.DirectoryRecord
724
+ def _find_rr_record (self , rr_path , encoding = 'utf-8' ):
725
+ # type: (bytes, str ) -> dr.DirectoryRecord
726
726
"""
727
727
An internal method to find a directory record on the ISO given a Rock
728
728
Ridge path. If the entry is found, it returns the directory record
@@ -742,7 +742,7 @@ def _find_rr_record(self, rr_path):
742
742
743
743
splitpath = utils .split_path (rr_path )
744
744
745
- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
745
+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
746
746
747
747
entry = root_dir_record
748
748
@@ -793,13 +793,13 @@ def _find_rr_record(self, rr_path):
793
793
if not child .is_dir ():
794
794
break
795
795
entry = child
796
- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
796
+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
797
797
798
798
raise pycdlibexception .PyCdlibInvalidInput ('Could not find path' )
799
799
800
800
@functools .lru_cache (maxsize = 256 )
801
- def _find_joliet_record (self , joliet_path ):
802
- # type: (bytes) -> dr.DirectoryRecord
801
+ def _find_joliet_record (self , joliet_path , encoding = 'utf-16_be' ):
802
+ # type: (bytes, str ) -> dr.DirectoryRecord
803
803
"""
804
804
An internal method to find a directory record on the ISO given a Joliet
805
805
path. If the entry is found, it returns the directory record object
@@ -813,7 +813,7 @@ def _find_joliet_record(self, joliet_path):
813
813
"""
814
814
if self .joliet_vd is None :
815
815
raise pycdlibexception .PyCdlibInternalError ('Joliet path requested on non-Joliet ISO' )
816
- return _find_dr_record_by_name (self .joliet_vd , joliet_path , 'utf-16_be' )
816
+ return _find_dr_record_by_name (self .joliet_vd , joliet_path , encoding )
817
817
818
818
@functools .lru_cache (maxsize = 256 )
819
819
def _find_udf_record (self , udf_path ):
@@ -2412,8 +2412,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
2412
2412
utils .copy_data (data_len , blocksize , data_fp , outfp )
2413
2413
2414
2414
def _get_file_from_iso_fp (self , outfp , blocksize , iso_path , rr_path ,
2415
- joliet_path ):
2416
- # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes]) -> None
2415
+ joliet_path , encoding = None ):
2416
+ # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str ) -> None
2417
2417
"""
2418
2418
An internal method to fetch a single file from the ISO and write it out
2419
2419
to the file object.
@@ -2433,13 +2433,16 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
2433
2433
if joliet_path is not None :
2434
2434
if self .joliet_vd is None :
2435
2435
raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a joliet_path from a non-Joliet ISO' )
2436
- found_record = self ._find_joliet_record (joliet_path )
2436
+ encoding = encoding or 'utf-16_be'
2437
+ found_record = self ._find_joliet_record (joliet_path , encoding )
2437
2438
elif rr_path is not None :
2438
2439
if not self .rock_ridge :
2439
2440
raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a rr_path from a non-Rock Ridge ISO' )
2440
- found_record = self ._find_rr_record (rr_path )
2441
+ encoding = encoding or 'utf-8'
2442
+ found_record = self ._find_rr_record (rr_path , encoding )
2441
2443
elif iso_path is not None :
2442
- found_record = self ._find_iso_record (iso_path )
2444
+ encoding = encoding or 'utf-8'
2445
+ found_record = self ._find_iso_record (iso_path , encoding )
2443
2446
else :
2444
2447
raise pycdlibexception .PyCdlibInternalError ('Invalid path passed to get_file_from_iso_fp' )
2445
2448
@@ -3471,8 +3474,8 @@ def _rm_joliet_dir(self, joliet_path):
3471
3474
3472
3475
return num_bytes_to_remove
3473
3476
3474
- def _get_iso_entry (self , iso_path ):
3475
- # type: (bytes) -> dr.DirectoryRecord
3477
+ def _get_iso_entry (self , iso_path , encoding = 'utf-8' ):
3478
+ # type: (bytes, str ) -> dr.DirectoryRecord
3476
3479
"""
3477
3480
Internal method to get the directory record for an ISO path.
3478
3481
@@ -3484,10 +3487,10 @@ def _get_iso_entry(self, iso_path):
3484
3487
if self ._needs_reshuffle :
3485
3488
self ._reshuffle_extents ()
3486
3489
3487
- return self ._find_iso_record (iso_path )
3490
+ return self ._find_iso_record (iso_path , encoding )
3488
3491
3489
- def _get_rr_entry (self , rr_path ):
3490
- # type: (bytes) -> dr.DirectoryRecord
3492
+ def _get_rr_entry (self , rr_path , encoding = 'utf-8' ):
3493
+ # type: (bytes, str ) -> dr.DirectoryRecord
3491
3494
"""
3492
3495
Internal method to get the directory record for a Rock Ridge path.
3493
3496
@@ -3500,10 +3503,10 @@ def _get_rr_entry(self, rr_path):
3500
3503
if self ._needs_reshuffle :
3501
3504
self ._reshuffle_extents ()
3502
3505
3503
- return self ._find_rr_record (rr_path )
3506
+ return self ._find_rr_record (rr_path , encoding )
3504
3507
3505
- def _get_joliet_entry (self , joliet_path ):
3506
- # type: (bytes) -> dr.DirectoryRecord
3508
+ def _get_joliet_entry (self , joliet_path , encoding = 'utf-16_be' ):
3509
+ # type: (bytes, str ) -> dr.DirectoryRecord
3507
3510
"""
3508
3511
Internal method to get the directory record for a Joliet path.
3509
3512
@@ -3516,7 +3519,7 @@ def _get_joliet_entry(self, joliet_path):
3516
3519
if self ._needs_reshuffle :
3517
3520
self ._reshuffle_extents ()
3518
3521
3519
- return self ._find_joliet_record (joliet_path )
3522
+ return self ._find_joliet_record (joliet_path , encoding )
3520
3523
3521
3524
def _get_udf_entry (self , udf_path ):
3522
3525
# type: (str) -> udfmod.UDFFileEntry
@@ -4172,6 +4175,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
4172
4175
with iso_path, rr_path, and udf_path).
4173
4176
udf_path - The absolute UDF path to lookup on the ISO (exclusive with
4174
4177
iso_path, rr_path, and joliet_path).
4178
+ encoding - The encoding to use for parsing the filenames.
4175
4179
Returns:
4176
4180
Nothing.
4177
4181
"""
@@ -4183,6 +4187,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
4183
4187
iso_path = None
4184
4188
rr_path = None
4185
4189
udf_path = None
4190
+ encoding = None
4186
4191
num_paths = 0
4187
4192
for key , value in kwargs .items ():
4188
4193
if key == 'blocksize' :
@@ -4213,6 +4218,8 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
4213
4218
num_paths += 1
4214
4219
elif value is not None :
4215
4220
raise pycdlibexception .PyCdlibInvalidInput ('udf_path must be a string' )
4221
+ elif key == 'encoding' :
4222
+ encoding = value
4216
4223
else :
4217
4224
raise pycdlibexception .PyCdlibInvalidInput ('Unknown keyword %s' % (key ))
4218
4225
@@ -4223,7 +4230,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
4223
4230
self ._udf_get_file_from_iso_fp (outfp , blocksize , udf_path )
4224
4231
else :
4225
4232
self ._get_file_from_iso_fp (outfp , blocksize , iso_path , rr_path ,
4226
- joliet_path )
4233
+ joliet_path , encoding )
4227
4234
4228
4235
def get_and_write (self , iso_path , local_path , blocksize = 8192 ):
4229
4236
# type: (str, str, int) -> None
@@ -5459,6 +5466,8 @@ def list_children(self, **kwargs):
5459
5466
if key in ('joliet_path' , 'rr_path' , 'iso_path' , 'udf_path' ):
5460
5467
if value is not None :
5461
5468
num_paths += 1
5469
+ elif key in ('encoding' ):
5470
+ continue
5462
5471
else :
5463
5472
raise pycdlibexception .PyCdlibInvalidInput ("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
5464
5473
@@ -5476,12 +5485,15 @@ def list_children(self, **kwargs):
5476
5485
else :
5477
5486
use_rr = False
5478
5487
if 'joliet_path' in kwargs :
5479
- rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]))
5488
+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-16_be'
5489
+ rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]), kwargs ['encoding' ])
5480
5490
elif 'rr_path' in kwargs :
5481
- rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]))
5491
+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5492
+ rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]), kwargs ['encoding' ])
5482
5493
use_rr = True
5483
5494
else :
5484
- rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]))
5495
+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5496
+ rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]), kwargs ['encoding' ])
5485
5497
5486
5498
for c in _yield_children (rec , use_rr ):
5487
5499
yield c
@@ -5626,8 +5638,8 @@ def rm_isohybrid(self):
5626
5638
5627
5639
self .isohybrid_mbr = None
5628
5640
5629
- def full_path_from_dirrecord (self , rec , rockridge = False ):
5630
- # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool) -> str
5641
+ def full_path_from_dirrecord (self , rec , rockridge = False , user_encoding = None ):
5642
+ # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool, str ) -> str
5631
5643
"""
5632
5644
Get the absolute path of a directory record.
5633
5645
@@ -5646,6 +5658,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
5646
5658
if self .joliet_vd is not None and id (rec .vd ) == id (self .joliet_vd ):
5647
5659
encoding = 'utf-16_be'
5648
5660
5661
+ if user_encoding :
5662
+ encoding = user_encoding
5649
5663
# A root entry has no Rock Ridge entry, even on a Rock Ridge ISO.
5650
5664
# Always return / here.
5651
5665
if rec .is_root :
@@ -5685,6 +5699,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
5685
5699
encoding = rec .file_ident .encoding
5686
5700
else :
5687
5701
encoding = 'utf-8'
5702
+ if user_encoding :
5703
+ encoding = user_encoding
5688
5704
udf_rec = rec # type: Optional[udfmod.UDFFileEntry]
5689
5705
while udf_rec is not None :
5690
5706
ident = udf_rec .file_identifier ()
@@ -5893,13 +5909,13 @@ def walk(self, **kwargs):
5893
5909
while dirs :
5894
5910
dir_record = dirs .popleft ()
5895
5911
5896
- relpath = self .full_path_from_dirrecord (dir_record ,
5897
- rockridge = path_type == 'rr_path' )
5912
+ relpath = self .full_path_from_dirrecord (dir_record , rockridge = path_type == 'rr_path' ,
5913
+ user_encoding = user_encoding )
5898
5914
dirlist = []
5899
5915
filelist = []
5900
5916
dirdict = {}
5901
5917
5902
- for child in reversed (list (self .list_children (** {path_type : relpath }))):
5918
+ for child in reversed (list (self .list_children (** {path_type : relpath , 'encoding' : kwargs . get ( 'encoding' , None ) }))):
5903
5919
if child is None or child .is_dot () or child .is_dotdot ():
5904
5920
continue
5905
5921
0 commit comments