@@ -495,7 +495,6 @@ def _find_dr_record_by_name(vd, path, encoding):
495
495
return root_dir_record
496
496
497
497
splitpath = utils .split_path (path )
498
-
499
498
currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
500
499
501
500
entry = root_dir_record
@@ -518,7 +517,6 @@ def _find_dr_record_by_name(vd, path, encoding):
518
517
index = lo
519
518
if index != len (thelist ) and thelist [index ].file_ident == currpath :
520
519
child = thelist [index ]
521
-
522
520
if child is None :
523
521
# We failed to find this component of the path, so break out of the
524
522
# loop and fail.
@@ -533,7 +531,6 @@ def _find_dr_record_by_name(vd, path, encoding):
533
531
# We found the last child we are looking for; return it.
534
532
if not splitpath :
535
533
return child
536
-
537
534
if not child .is_dir ():
538
535
break
539
536
entry = child
@@ -718,8 +715,8 @@ def _seek_to_extent(self, extent):
718
715
self ._cdfp .seek (extent * self .logical_block_size )
719
716
720
717
@lru_cache (maxsize = 256 )
721
- def _find_iso_record (self , iso_path ):
722
- # type: (bytes) -> dr.DirectoryRecord
718
+ def _find_iso_record (self , iso_path , encoding = 'utf-8' ):
719
+ # type: (bytes, str ) -> dr.DirectoryRecord
723
720
"""
724
721
An internal method to find a directory record on the ISO given an ISO
725
722
path. If the entry is found, it returns the directory record object
@@ -731,11 +728,11 @@ def _find_iso_record(self, iso_path):
731
728
Returns:
732
729
The directory record entry representing the entry on the ISO.
733
730
"""
734
- return _find_dr_record_by_name (self .pvd , iso_path , 'utf-8' )
731
+ return _find_dr_record_by_name (self .pvd , iso_path , encoding )
735
732
736
733
@lru_cache (maxsize = 256 )
737
- def _find_rr_record (self , rr_path ):
738
- # type: (bytes) -> dr.DirectoryRecord
734
+ def _find_rr_record (self , rr_path , encoding = 'utf-8' ):
735
+ # type: (bytes, str ) -> dr.DirectoryRecord
739
736
"""
740
737
An internal method to find a directory record on the ISO given a Rock
741
738
Ridge path. If the entry is found, it returns the directory record
@@ -755,7 +752,7 @@ def _find_rr_record(self, rr_path):
755
752
756
753
splitpath = utils .split_path (rr_path )
757
754
758
- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
755
+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
759
756
760
757
entry = root_dir_record
761
758
@@ -806,13 +803,13 @@ def _find_rr_record(self, rr_path):
806
803
if not child .is_dir ():
807
804
break
808
805
entry = child
809
- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
806
+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
810
807
811
808
raise pycdlibexception .PyCdlibInvalidInput ('Could not find path' )
812
809
813
810
@lru_cache (maxsize = 256 )
814
- def _find_joliet_record (self , joliet_path ):
815
- # type: (bytes) -> dr.DirectoryRecord
811
+ def _find_joliet_record (self , joliet_path , encoding = 'utf-16_be' ):
812
+ # type: (bytes, str ) -> dr.DirectoryRecord
816
813
"""
817
814
An internal method to find a directory record on the ISO given a Joliet
818
815
path. If the entry is found, it returns the directory record object
@@ -826,7 +823,7 @@ def _find_joliet_record(self, joliet_path):
826
823
"""
827
824
if self .joliet_vd is None :
828
825
raise pycdlibexception .PyCdlibInternalError ('Joliet path requested on non-Joliet ISO' )
829
- return _find_dr_record_by_name (self .joliet_vd , joliet_path , 'utf-16_be' )
826
+ return _find_dr_record_by_name (self .joliet_vd , joliet_path , encoding )
830
827
831
828
@lru_cache (maxsize = 256 )
832
829
def _find_udf_record (self , udf_path ):
@@ -2425,8 +2422,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
2425
2422
utils .copy_data (data_len , blocksize , data_fp , outfp )
2426
2423
2427
2424
def _get_file_from_iso_fp (self , outfp , blocksize , iso_path , rr_path ,
2428
- joliet_path ):
2429
- # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes]) -> None
2425
+ joliet_path , encoding = None ):
2426
+ # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str ) -> None
2430
2427
"""
2431
2428
An internal method to fetch a single file from the ISO and write it out
2432
2429
to the file object.
@@ -2446,13 +2443,16 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
2446
2443
if joliet_path is not None :
2447
2444
if self .joliet_vd is None :
2448
2445
raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a joliet_path from a non-Joliet ISO' )
2449
- found_record = self ._find_joliet_record (joliet_path )
2446
+ encoding = encoding or 'utf-16_be'
2447
+ found_record = self ._find_joliet_record (joliet_path , encoding )
2450
2448
elif rr_path is not None :
2451
2449
if not self .rock_ridge :
2452
2450
raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a rr_path from a non-Rock Ridge ISO' )
2453
- found_record = self ._find_rr_record (rr_path )
2451
+ encoding = encoding or 'utf-8'
2452
+ found_record = self ._find_rr_record (rr_path , encoding )
2454
2453
elif iso_path is not None :
2455
- found_record = self ._find_iso_record (iso_path )
2454
+ encoding = encoding or 'utf-8'
2455
+ found_record = self ._find_iso_record (iso_path , encoding )
2456
2456
else :
2457
2457
raise pycdlibexception .PyCdlibInternalError ('Invalid path passed to get_file_from_iso_fp' )
2458
2458
@@ -3487,8 +3487,8 @@ def _rm_joliet_dir(self, joliet_path):
3487
3487
3488
3488
return num_bytes_to_remove
3489
3489
3490
- def _get_iso_entry (self , iso_path ):
3491
- # type: (bytes) -> dr.DirectoryRecord
3490
+ def _get_iso_entry (self , iso_path , encoding = 'utf-8' ):
3491
+ # type: (bytes, str ) -> dr.DirectoryRecord
3492
3492
"""
3493
3493
Internal method to get the directory record for an ISO path.
3494
3494
@@ -3500,10 +3500,10 @@ def _get_iso_entry(self, iso_path):
3500
3500
if self ._needs_reshuffle :
3501
3501
self ._reshuffle_extents ()
3502
3502
3503
- return self ._find_iso_record (iso_path )
3503
+ return self ._find_iso_record (iso_path , encoding )
3504
3504
3505
- def _get_rr_entry (self , rr_path ):
3506
- # type: (bytes) -> dr.DirectoryRecord
3505
+ def _get_rr_entry (self , rr_path , encoding = 'utf-8' ):
3506
+ # type: (bytes, str ) -> dr.DirectoryRecord
3507
3507
"""
3508
3508
Internal method to get the directory record for a Rock Ridge path.
3509
3509
@@ -3516,10 +3516,10 @@ def _get_rr_entry(self, rr_path):
3516
3516
if self ._needs_reshuffle :
3517
3517
self ._reshuffle_extents ()
3518
3518
3519
- return self ._find_rr_record (rr_path )
3519
+ return self ._find_rr_record (rr_path , encoding )
3520
3520
3521
- def _get_joliet_entry (self , joliet_path ):
3522
- # type: (bytes) -> dr.DirectoryRecord
3521
+ def _get_joliet_entry (self , joliet_path , encoding = 'utf-16_be' ):
3522
+ # type: (bytes, str ) -> dr.DirectoryRecord
3523
3523
"""
3524
3524
Internal method to get the directory record for a Joliet path.
3525
3525
@@ -3532,7 +3532,7 @@ def _get_joliet_entry(self, joliet_path):
3532
3532
if self ._needs_reshuffle :
3533
3533
self ._reshuffle_extents ()
3534
3534
3535
- return self ._find_joliet_record (joliet_path )
3535
+ return self ._find_joliet_record (joliet_path , encoding )
3536
3536
3537
3537
def _get_udf_entry (self , udf_path ):
3538
3538
# type: (str) -> udfmod.UDFFileEntry
@@ -4199,6 +4199,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
4199
4199
iso_path = None
4200
4200
rr_path = None
4201
4201
udf_path = None
4202
+ encoding = None
4202
4203
num_paths = 0
4203
4204
for key , value in kwargs .items ():
4204
4205
if key == 'blocksize' :
@@ -4229,6 +4230,8 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
4229
4230
num_paths += 1
4230
4231
elif value is not None :
4231
4232
raise pycdlibexception .PyCdlibInvalidInput ('udf_path must be a string' )
4233
+ elif key == 'encoding' :
4234
+ encoding = value
4232
4235
else :
4233
4236
raise pycdlibexception .PyCdlibInvalidInput ('Unknown keyword %s' % (key ))
4234
4237
@@ -4239,7 +4242,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
4239
4242
self ._udf_get_file_from_iso_fp (outfp , blocksize , udf_path )
4240
4243
else :
4241
4244
self ._get_file_from_iso_fp (outfp , blocksize , iso_path , rr_path ,
4242
- joliet_path )
4245
+ joliet_path , encoding )
4243
4246
4244
4247
def get_and_write (self , iso_path , local_path , blocksize = 8192 ):
4245
4248
# type: (str, str, int) -> None
@@ -5475,6 +5478,8 @@ def list_children(self, **kwargs):
5475
5478
if key in ('joliet_path' , 'rr_path' , 'iso_path' , 'udf_path' ):
5476
5479
if value is not None :
5477
5480
num_paths += 1
5481
+ elif key in ('encoding' ):
5482
+ continue
5478
5483
else :
5479
5484
raise pycdlibexception .PyCdlibInvalidInput ("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
5480
5485
@@ -5492,12 +5497,15 @@ def list_children(self, **kwargs):
5492
5497
else :
5493
5498
use_rr = False
5494
5499
if 'joliet_path' in kwargs :
5495
- rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]))
5500
+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-16_be'
5501
+ rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]), kwargs ['encoding' ])
5496
5502
elif 'rr_path' in kwargs :
5497
- rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]))
5503
+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5504
+ rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]), kwargs ['encoding' ])
5498
5505
use_rr = True
5499
5506
else :
5500
- rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]))
5507
+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5508
+ rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]), kwargs ['encoding' ])
5501
5509
5502
5510
for c in _yield_children (rec , use_rr ):
5503
5511
yield c
@@ -5642,8 +5650,8 @@ def rm_isohybrid(self):
5642
5650
5643
5651
self .isohybrid_mbr = None
5644
5652
5645
- def full_path_from_dirrecord (self , rec , rockridge = False ):
5646
- # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool) -> str
5653
+ def full_path_from_dirrecord (self , rec , rockridge = False , user_encoding = None ):
5654
+ # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool, str ) -> str
5647
5655
"""
5648
5656
Get the absolute path of a directory record.
5649
5657
@@ -5662,6 +5670,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
5662
5670
if self .joliet_vd is not None and id (rec .vd ) == id (self .joliet_vd ):
5663
5671
encoding = 'utf-16_be'
5664
5672
5673
+ if user_encoding :
5674
+ encoding = user_encoding
5665
5675
# A root entry has no Rock Ridge entry, even on a Rock Ridge ISO.
5666
5676
# Always return / here.
5667
5677
if rec .is_root :
@@ -5701,6 +5711,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
5701
5711
encoding = rec .file_ident .encoding
5702
5712
else :
5703
5713
encoding = 'utf-8'
5714
+ if user_encoding :
5715
+ encoding = user_encoding
5704
5716
udf_rec = rec # type: Optional[udfmod.UDFFileEntry]
5705
5717
while udf_rec is not None :
5706
5718
ident = udf_rec .file_identifier ()
@@ -5913,13 +5925,13 @@ def walk(self, **kwargs):
5913
5925
while dirs :
5914
5926
dir_record = dirs .popleft ()
5915
5927
5916
- relpath = self .full_path_from_dirrecord (dir_record ,
5917
- rockridge = path_type == 'rr_path' )
5928
+ relpath = self .full_path_from_dirrecord (dir_record , rockridge = path_type == 'rr_path' ,
5929
+ user_encoding = user_encoding )
5918
5930
dirlist = []
5919
5931
filelist = []
5920
5932
dirdict = {}
5921
5933
5922
- for child in reversed (list (self .list_children (** {path_type : relpath }))):
5934
+ for child in reversed (list (self .list_children (** {path_type : relpath , 'encoding' : kwargs . get ( 'encoding' , None ) }))):
5923
5935
if child is None or child .is_dot () or child .is_dotdot ():
5924
5936
continue
5925
5937
0 commit comments