54
54
else :
55
55
from pipes import quote as shlex_quote
56
56
57
-
58
57
"""
59
58
Low level support for p/7zip-based archive extraction.
60
59
"""
61
60
62
-
63
61
logger = logging .getLogger (__name__ )
64
62
65
63
TRACE = False
66
64
TRACE_DEEP = False
65
+ TRACE_ENTRIES = False
67
66
68
- if TRACE or TRACE_DEEP :
67
+ if TRACE or TRACE_DEEP or TRACE_ENTRIES :
69
68
import sys
70
69
logging .basicConfig (stream = sys .stdout )
71
70
logger .setLevel (logging .DEBUG )
@@ -518,45 +517,6 @@ def list_entries(location, arch_type='*'):
518
517
return parse_7z_listing (stdout , utf ), error_messages
519
518
520
519
521
- def as_entry (infos ):
522
- """
523
- Return an Entry built from a 7zip path listing data in the `infos` mapping.
524
- """
525
- is_symlink = False
526
- is_hardlink = False
527
- link_target = None
528
-
529
- sl = infos .get ('Symbolic Link' )
530
-
531
- if sl :
532
- is_symlink = True
533
- link_target = sl
534
-
535
- hl = infos .get ('Hard Link' )
536
- if hl :
537
- is_hardlink = True
538
- link_target = hl
539
-
540
- if sl and hl :
541
- from pprint import pformat
542
- raise ExtractWarningIncorrectEntry (
543
- 'A symlink cannot be also a hardlink: {}' .format (pformat (infos )))
544
-
545
- is_dir = infos .get ('Folder' , False ) == '+'
546
-
547
- e = Entry (
548
- path = infos .get ('Path' ),
549
- size = infos .get ('Size' , 0 ),
550
- date = infos .get ('Modified' , None ),
551
- is_dir = is_dir ,
552
- is_file = not is_dir ,
553
- is_symlink = is_symlink ,
554
- is_hardlink = is_hardlink ,
555
- link_target = link_target ,
556
- )
557
- return e
558
-
559
-
560
520
def parse_7z_listing (location , utf = False ):
561
521
"""
562
522
Return a list Entry objects from parsing a long format 7zip listing from a
@@ -567,16 +527,18 @@ def parse_7z_listing(location, utf=False):
567
527
568
528
The 7zip -slt format looks like this:
569
529
530
+ 1. a header with:
570
531
- copyright and version details
571
532
- '--' line
572
533
- archive header info, varying based on the archive types and subtype
573
534
- lines of key=value pairs
574
- - Errors: followed by one or more message lines
575
- - Warnings: followed by one or more message lines
576
- - Open Warning: : followed by one or more message lines
577
- - sometimes a '---' line
535
+ - ERRORS: followed by one or more message lines
536
+ - WARNINGS: followed by one or more message lines
578
537
- blank line
579
- - '----------' line
538
+
539
+ 2. blocks of path aka. entry data, one for each path with:
540
+
541
+ - '----------' line once as the indicator of path blocks starting
580
542
- for each archive member:
581
543
- lines of either
582
544
- key = value pairs, with a possible twist that the Path may
@@ -585,162 +547,94 @@ def parse_7z_listing(location, utf=False):
585
547
- Warnings: followed by one or more message lines
586
548
- Open Warning: : followed by one or more message lines
587
549
- blank line
588
- - two blank lines
550
+
551
+ 3. a footer
552
+ - blank line
589
553
- footer sometimes with lines with summary stats
590
554
such as Warnings: 1 Errors: 1
591
555
- a line with two or more dashes or an empty line
556
+
557
+ We ignore the header and footer in a listing.
592
558
"""
593
559
594
560
if utf or py3 :
595
561
# read to unicode
596
562
with io .open (location , 'r' , encoding = 'utf-8' ) as listing :
597
563
text = listing .read ()
598
- if TRACE_DEEP :
599
- print ('=====================================================' )
600
- print (text )
601
- print ('=====================================================' )
602
-
603
564
text = text .replace (u'\r \n ' , u'\n ' )
604
565
605
- header_sep = u'\n ----------\n '
606
- empty = u''
607
- body_sep = u'\n \n \n '
608
- path_block_sep = u'Path ='
609
- msg_sep = u':'
610
- equal_sep = u'='
611
- errror_line_starters = 'Open Warning:' , 'Errors:' , 'Warnings:'
612
- line_sep = u'\n '
566
+ end_of_header = u'----------\n '
567
+ path_key = u'Path'
568
+ kv_sep = u'='
569
+ path_blocks_sep = u'\n \n '
570
+ line_sep = u'\n '
613
571
614
572
else :
615
573
# read to bytes
616
574
with io .open (location , 'rb' ) as listing :
617
575
text = listing .read ()
618
576
text = text .replace (b'\r \n ' , b'\n ' )
619
577
620
- header_sep = b'\n ----------\n '
621
- empty = b''
622
- body_sep = b'\n \n \n '
623
- path_block_sep = b'Path ='
624
- msg_sep = b':'
625
- equal_sep = b'='
626
- errror_line_starters = b'Open Warning:' , b'Errors:' , b'Warnings:'
627
- line_sep = b'\n '
578
+ end_of_header = b'----------\n '
579
+ path_key = b'Path'
580
+ kv_sep = b'='
581
+ path_blocks_sep = b'\n \n '
582
+ line_sep = b'\n '
628
583
629
584
if TRACE :
630
585
logger .debug ('parse_7z_listing: initial text: type: ' + repr (type (text )))
631
586
print ('--------------------------------------' )
632
587
print (text )
633
588
print ('--------------------------------------' )
634
589
635
- header_tail = re .split (header_sep , text , flags = re .MULTILINE ) # NOQA
636
- if len (header_tail ) != 2 :
637
- # we more than one a header, confusion entails.
638
- raise ExtractWarningIncorrectEntry (
639
- 'Incorrect 7zip listing with multiple headers: {}' .format (repr (header_tail )))
590
+ # for now we ignore the header
591
+ _header , _ , paths = text .rpartition (end_of_header )
640
592
641
- if len ( header_tail ) == 1 :
593
+ if not paths :
642
594
# we have only a header, likely an error condition or an empty archive
643
595
return []
644
596
645
- # FIXME: do something with header and footer?
646
- _header , body = header_tail
647
- body_and_footer = re .split (body_sep , body , flags = re .MULTILINE ) # NOQA
648
- no_footer = len (body_and_footer ) == 1
649
- multiple_footers = len (body_and_footer ) > 2
650
- _footer = empty
651
-
652
- if no_footer :
653
- body = body_and_footer [0 ]
654
- elif multiple_footers :
655
- raise ExtractWarningIncorrectEntry (
656
- 'Incorrect 7zip listing with multiple footers: {}' .format (repr (body_and_footer )))
657
- else :
658
- body , _footer == body_and_footer
659
-
660
- entries = []
597
+ # each block representing one path or file:
598
+ # - starts with a "Path = <some/path>" key/value
599
+ # - continues with key = value pairs each on a single line
600
+ # (unless there is a \n in file name which is an error condition)
601
+ # - ends with an empty line
602
+ # then we have a global footer
661
603
662
- if TRACE :
663
- logger .debug ('parse_7z_listing: body:' )
664
- print (body )
604
+ path_blocks = [pb for pb in paths .split (path_blocks_sep ) if pb and path_key in pb ]
665
605
666
- path_blocks = [pb .strip () for pb in
667
- re .split (path_block_sep , body , flags = re .MULTILINE ) if pb and pb .strip ()] # NOQA
668
-
669
- if TRACE_DEEP :
670
- logger .debug ('parse_7z_listing: path_blocks:' )
671
- pprint .pprint (path_blocks )
606
+ entries = []
672
607
673
608
for path_block in path_blocks :
674
- if TRACE :
675
- logger .debug ('parse_7z_listing: path_block: {}' .format (path_block ))
676
-
677
- errors = []
678
- infos = {}
679
-
680
- lines = path_block .splitlines (False )
681
-
682
- if len (lines ) == 1 :
683
- # a temp macOS debug statement
684
- raise Exception (text )
685
-
686
- # the first line is the Path line
687
- path_line = lines .pop (0 ).strip ()
688
- if 'Path =' in path_line :
689
- _ , _ , path = path_line .partition ('Path =' )
690
- path = path .lstrip ()
691
- else :
692
- path = path_line
693
-
694
- second = lines [0 ]
695
-
696
- if equal_sep not in second :
697
- # the path contain line breaks and the next line continues the name
698
- path = line_sep .join ([path , second ])
699
- lines .pop (0 )
700
-
701
- infos ['Path' ] = path
702
-
703
- is_err = False
704
-
705
- # process the remainining non-path lines
706
- for line in lines :
707
- if TRACE_DEEP :
708
- logger .debug ('parse_7z_listing: line: "{}"' .format (line ))
709
-
710
- line = line .strip ()
711
-
712
- if not line :
713
- continue
714
-
715
- if line .startswith (errror_line_starters ):
716
- is_err = True
717
- messages = line .split (msg_sep , 1 )
718
- errors .append (messages )
719
- continue
720
-
721
- if equal_sep not in line and is_err :
722
- # not a key = value line, an error message
723
- errors .append (line )
724
- continue
725
-
726
- parts = line .split (equal_sep , 1 )
727
-
728
- if len (parts ) != 2 :
729
- raise ExtractWarningIncorrectEntry (
730
- 'Incorrect 7zip listing line with no key=value: {}' .format (repr (line )))
731
-
732
- is_err = False
733
- key , value = parts
734
- key = key .strip ()
735
- value = value .strip ()
736
- assert key not in infos , 'Duplicate keys in 7zip listing'
737
- infos [key ] = value or empty
738
-
739
- if infos :
740
- entr = as_entry (infos )
741
- entries .append (entr )
742
-
743
- if TRACE_DEEP :
609
+ # we ignore empty lines as well as lines that do not contain a key
610
+ lines = [line .strip () for line in path_block .splitlines (False ) if line .strip ()]
611
+ if not lines :
612
+ continue
613
+ # we have a weird case of path with line returns in the file name
614
+ # we concatenate these in the first Path line
615
+ while len (lines ) > 1 and lines [0 ].startswith (path_key ) and kv_sep not in lines [1 ]:
616
+ first_line = lines [0 ]
617
+ second_line = lines .pop (1 )
618
+ first_line = line_sep .join ([first_line , second_line ])
619
+ lines [0 ] = first_line
620
+
621
+ dangling_lines = [line for line in lines if kv_sep not in line ]
622
+ entry_errors = []
623
+ if dangling_lines :
624
+ emsg = 'Invalid 7z listing path block missing "=" as key/value separator: {}' .format (repr (path_block ))
625
+ entry_errors .append (emsg )
626
+
627
+ entry_attributes = {}
628
+ key_lines = [line for line in lines if kv_sep in line ]
629
+ for line in key_lines :
630
+ k , _ , v = line .partition (kv_sep )
631
+ k = k .strip ()
632
+ v = v .strip ()
633
+ entry_attributes [k ] = v
634
+
635
+ entries .append (Entry .from_dict (infos = entry_attributes , errors = entry_errors ))
636
+
637
+ if TRACE_ENTRIES :
744
638
logger .debug ('parse_7z_listing: entries# {}\n ' .format (len (entries )))
745
639
for entry in entries :
746
640
logger .debug (' ' + repr (entry .to_dict ()))
@@ -777,12 +671,9 @@ class Entry(object):
777
671
link_target = attr .ib (default = None )
778
672
errors = attr .ib (default = attr .Factory (list ))
779
673
780
- def parent (self ):
781
- return posixpath .dirname (self .path .rstrip ('/' ))
782
-
783
674
def to_dict (self , full = False ):
784
675
data = attr .asdict (self )
785
- data .pop ('errors' , None )
676
+ # data.pop('errors', None)
786
677
if not full :
787
678
data .pop ('date' , None )
788
679
return data
@@ -795,3 +686,56 @@ def is_relative_path(self):
795
686
796
687
def is_empty (self ):
797
688
return not self .size
689
+
690
+ @classmethod
691
+ def from_dict (cls , infos , errors = None ):
692
+ """
693
+ Return an Entry built from a 7zip path listing data in the `infos` mapping.
694
+ """
695
+ is_symlink = False
696
+ is_hardlink = False
697
+ link_target = None
698
+
699
+ sl = infos .get ('Symbolic Link' )
700
+
701
+ if sl :
702
+ is_symlink = True
703
+ link_target = sl
704
+
705
+ hl = infos .get ('Hard Link' )
706
+ if hl :
707
+ is_hardlink = True
708
+ link_target = hl
709
+
710
+ if sl and hl :
711
+ from pprint import pformat
712
+ raise ExtractWarningIncorrectEntry (
713
+ 'A symlink cannot be also a hardlink: {}' .format (pformat (infos )))
714
+
715
+ # depending on the type of arhcive the file vs dir flags are in
716
+ # diiferent attributes :|
717
+ is_dir = (
718
+ # in some listings we have this: Mode = drwxrwxr-x
719
+ infos .get ('Mode' , '' ).lower ().startswith ('d' )
720
+ or
721
+ # in cpio and a few more we have a Folder attrib
722
+ infos .get ('Folder' , '' ).startswith ('+' )
723
+ or
724
+ # in 7z listing we have this: Attributes = D_ drwxrwxr-x
725
+ infos .get ('Attributes' , '' ).lower ().startswith ('d_' )
726
+ ) or False
727
+
728
+ is_file = not is_dir
729
+
730
+ e = cls (
731
+ path = infos .get ('Path' ),
732
+ size = infos .get ('Size' , 0 ),
733
+ date = infos .get ('Modified' , None ),
734
+ is_dir = is_dir ,
735
+ is_file = is_file ,
736
+ is_symlink = is_symlink ,
737
+ is_hardlink = is_hardlink ,
738
+ link_target = link_target ,
739
+ errors = errors or [],
740
+ )
741
+ return e
0 commit comments