aboutcode-org
diff --git a/‎extractcode.ABOUT
Lines changed: 4 additions & 4 deletions b/‎extractcode.ABOUT
Lines changed: 4 additions & 4 deletions
diff --git a/‎setup.cfg
Lines changed: 22 additions & 1 deletion b/‎setup.cfg
Lines changed: 22 additions & 1 deletion
diff --git a/‎src/extractcode/__init__.py
Lines changed: 4 additions & 4 deletions b/‎src/extractcode/__init__.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/extractcode/archive.py
Lines changed: 58 additions & 17 deletions b/‎src/extractcode/archive.py
Lines changed: 58 additions & 17 deletions
diff --git a/‎src/extractcode/cli.py
Lines changed: 84 additions & 18 deletions b/‎src/extractcode/cli.py
Lines changed: 84 additions & 18 deletions
@@ -1,9 +1,9 @@
 about_resource: .
 copyright: copyright (c) nexB. Inc. and others
-description: A mostly universal archive extractor using z7zip, libarchve, other 
- libraries and the Python standard library for reliable archive extraction.
- It is used by ScanCode toolkit and related projects 
-keywords: archive, extraction, libarchive, 7zip, scancode-toolkit
+description: A mostly universal archive extractor using 7zip, libarchive and the
+ Python standard library for reliable archive extraction on Linux, Windows and
+ macOS. It is used by ScanCode toolkit and related projects. 
+keywords: archive, extraction, libarchive, 7zip, gzip, xz, lzma, bzip2, tar, ar, cpio, scancode-toolkit
 homepage_url: https://github.com/nexB/extractcode
 holder: nexB. Inc. and others
 holder_contact: info@aboutcode.org
 
@@ -10,7 +10,7 @@ author_email = info@aboutcode.org
 license = Apache-2.0
 
 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390
-description = A mostly universal archive extractor using z7zip, libarchve, other libraries and the Python standard library for reliable archive extraction.
+description = A mostly universal archive extractor using 7zip, libarchive and the Python standard library for reliable archive extraction.
 long_description = file:README.rst
 url = https://github.com/nexB/extractcode
 classifiers =
@@ -26,6 +26,27 @@ keywords =
     extraction
     libarchive
     7zip
+    7z
+    gzip
+    bzip2
+    xz
+    lzma
+    lz4
+    lzip
+    zstd
+    Z
+    tar
+    xar
+    ar
+    cpio
+    vmdk
+    qcow2
+    vhd
+    iso
+    deb
+    cab
+    rpm
+    patch
     scancode-toolkit
 
 [options]
 
@@ -137,11 +137,11 @@ def remove_backslashes_and_dotdots(directory):
 def new_name(location, is_dir=False):
     """
     Return a new non-existing location from a `location` usable to write a file
-    or create directory without overwriting existing files or directories in the same
-    parent directory, ignoring the case of the filename.
+    or create directory without overwriting existing files or directories in the
+    same parent directory, ignoring the case of the filename.
 
-    The case of the filename is ignored to ensure that similar results are returned
-    across case sensitive (*nix) and case insensitive file systems.
+    The case of the filename is ignored to ensure that similar results are
+    returned across case sensitive (*nix) and case insensitive file systems.
 
     To find a new unique filename, this tries new names this way:
      * pad a directory name with _X where X is an incremented number.
 
@@ -15,7 +15,6 @@
 from commoncode import filetype
 from commoncode import functional
 from commoncode.ignore import is_ignored
-
 from typecode import contenttype
 
 from extractcode import all_kinds
@@ -204,7 +203,9 @@ def get_handlers(location):
         mtype = T.mimetype_file
 
         if TRACE_DEEP:
-            logger.debug('get_handlers: processing %(location)s: ftype: %(ftype)s, mtype: %(mtype)s ' % locals())
+            logger.debug(
+                'get_handlers: processing %(location)s: '
+                'ftype: %(ftype)s, mtype: %(mtype)s ' % locals())
         for handler in archive_handlers:
             if not handler.extractors:
                 continue
@@ -223,9 +224,19 @@ def get_handlers(location):
                 extension_matched = exts and location.lower().endswith(exts)
 
             if TRACE_DEEP:
-                print(f'  get_handlers: matched type: {type_matched}, mime: {mime_matched}, ext: {extension_matched}' % locals())
-
-            if handler.strict and not (type_matched and mime_matched and extension_matched):
+                print(
+                    f'  get_handlers: matched type: {type_matched}, '
+                    f'mime: {mime_matched}, ext: {extension_matched}' % locals()
+                  )
+
+            if (
+                handler.strict
+                and not (
+                    type_matched
+                    and mime_matched
+                    and extension_matched
+                )
+            ):
                 if TRACE_DEEP:
                     print(f'  get_handlers: skip strict: {handler.name}')
                 continue
@@ -449,17 +460,30 @@ def try_to_extract(location, target_dir, extractor):
 
 extract_deb = libarchive2.extract
 
-# sevenzip is best for windows lib formats and works fine otherwise. libarchive works on standard ar formats.
-extract_ar = functional.partial(extract_with_fallback, extractor1=libarchive2.extract, extractor2=sevenzip.extract)
+# sevenzip is best for windows lib formats and works fine otherwise. libarchive
+# works on standard ar formats.
+extract_ar = functional.partial(
+    extract_with_fallback,
+    extractor1=libarchive2.extract,
+    extractor2=sevenzip.extract,
+)
 
 extract_msi = sevenzip.extract
 extract_cpio = libarchive2.extract
 
 # sevenzip should be best at extracting 7zip but most often libarchive is better first
-extract_7z = functional.partial(extract_with_fallback, extractor1=libarchive2.extract, extractor2=sevenzip.extract)
+extract_7z = functional.partial(
+    extract_with_fallback,
+    extractor1=libarchive2.extract,
+    extractor2=sevenzip.extract,
+)
 
 # libarchive is best for the run of the mill zips, but sevenzip sometimes is better
-extract_zip = functional.partial(extract_with_fallback, extractor1=libarchive2.extract, extractor2=sevenzip.extract)
+extract_zip = functional.partial(
+    extract_with_fallback,
+    extractor1=libarchive2.extract,
+    extractor2=sevenzip.extract,
+)
 
 extract_springboot = functional.partial(try_to_extract, extractor=extract_zip)
 
@@ -515,7 +539,12 @@ def try_to_extract(location, target_dir, extractor):
 
 OfficeDocHandler = Handler(
     name='Office doc',
-    filetypes=('zip archive', 'microsoft word 2007+', 'microsoft excel 2007+', 'microsoft powerpoint 2007+'),
+    filetypes=(
+        'zip archive',
+        'microsoft word 2007+',
+        'microsoft excel 2007+',
+        'microsoft powerpoint 2007+',
+    ),
     mimetypes=('application/zip', 'application/vnd.openxmlformats',),
     # Extensions of office documents that are zip files too
     extensions=(
@@ -553,7 +582,7 @@ def try_to_extract(location, target_dir, extractor):
     strict=True
 )
 
-    # see http://tools.android.com/tech-docs/new-build-system/aar-formats
+# see http://tools.android.com/tech-docs/new-build-system/aar-formats
 AndroidLibHandler = Handler(
     name='Android library',
     filetypes=('zip archive',),
@@ -827,8 +856,16 @@ def try_to_extract(location, target_dir, extractor):
     name='Tar bzip2',
     filetypes=('bzip2 compressed',),
     mimetypes=('application/x-bzip2',),
-    extensions=('.tar.bz2', '.tar.bz', '.tar.bzip', '.tar.bzip2',
-          '.tbz', '.tbz2', '.tb2', '.tarbz2',),
+    extensions=(
+        '.tar.bz2',
+        '.tar.bz',
+        '.tar.bzip',
+        '.tar.bzip2',
+        '.tbz',
+        '.tbz2',
+        '.tb2',
+        '.tarbz2',
+    ),
     kind=regular_nested,
     extractors=[extract_tar],
     strict=False
@@ -876,10 +913,11 @@ def try_to_extract(location, target_dir, extractor):
 
 NugetHandler = Handler(
     name='Nuget',
-    # weirdly enough the detection by libmagic is sometimes wrong
-    # TODO file a bug upstream
-    # this is due to this: https://en.wikipedia.org/wiki/Open_Packaging_Conventions#File_formats_using_the_OPC
+    # TODO: file a bug upstream
+    # Weirdly enough the detection by libmagic is sometimes wrong
+    # this is due to this issue:
     # being recognized by libmagic as an OOXML file
+    # https://en.wikipedia.org/wiki/Open_Packaging_Conventions#File_formats_using_the_OPC
     filetypes=('zip archive', 'microsoft ooxml',),
     mimetypes=('application/zip', 'application/octet-stream',),
     extensions=('.nupkg',),
@@ -921,7 +959,10 @@ def try_to_extract(location, target_dir, extractor):
 DebHandler = Handler(
     name='Debian package',
     filetypes=('debian binary package',),
-    mimetypes=('application/vnd.debian.binary-package', 'application/x-archive',),
+    mimetypes=(
+        'application/vnd.debian.binary-package',
+        'application/x-archive',
+    ),
     extensions=('.deb', '.udeb',),
     kind=package,
     extractors=[extract_deb],
 
@@ -84,28 +84,85 @@ class ExtractCommand(cliutils.BaseCommand):
 @click.command(name='extractcode', epilog=epilog_text, cls=ExtractCommand)
 @click.pass_context
 
-@click.argument('input', metavar='<input>', type=click.Path(exists=True, readable=True))
-
-@click.option('--verbose', is_flag=True, default=False, help='Print verbose file-by-file progress messages.')
-@click.option('--quiet', is_flag=True, default=False, help='Do not print any summary or progress message.')
-@click.option('--shallow', is_flag=True, default=False, help='Do not extract recursively nested archives (e.g. not archives in archives).')
-@click.option('--replace-originals', is_flag=True, default=False, help='Replace extracted archives by the extracted content.')
-@click.option('--ignore', default=[], multiple=True, help='Ignore files/directories following a glob-pattern.')
-@click.option('--all-formats', is_flag=True, default=False, help='Extract archives from all known formats.')
+@click.argument(
+    'input',
+    metavar='<input>',
+    type=click.Path(exists=True, readable=True),
+)
+
+@click.option(
+    '--verbose',
+    is_flag=True,
+    help='Print verbose file-by-file progress messages.',
+)
+@click.option(
+    '--quiet',
+    is_flag=True,
+    help='Do not print any summary or progress message.',
+)
+@click.option(
+    '--shallow',
+    is_flag=True,
+    help='Do not extract recursively nested archives in archives.',
+)
+@click.option(
+    '--replace-originals',
+    is_flag=True,
+    help='Replace extracted archives by the extracted content.',
+)
+@click.option(
+    '--ignore',
+    default=[],
+    multiple=True,
+    help='Ignore files/directories matching this glob pattern.',
+)
+
+@click.option(
+    '--all-formats',
+    is_flag=True,
+    help='Extract archives from all known formats.',
+)
 
 @click.help_option('-h', '--help')
-@click.option('--about', is_flag=True, is_eager=True, callback=print_about, help='Show information about ExtractCode and licensing and exit.')
-@click.option('--version', is_flag=True, is_eager=True, callback=print_version, help='Show the version and exit.')
-def extractcode(ctx, input, verbose, quiet, shallow, replace_originals, ignore, all_formats, *args, **kwargs):  # NOQA
-    """extract archives and compressed files found in the <input> file or directory tree.
+@click.option(
+    '--about',
+    is_flag=True,
+    is_eager=True,
+    callback=print_about,
+    help='Show information about ExtractCode and its licensing and exit.',
+)
+@click.option(
+    '--version',
+    is_flag=True,
+    is_eager=True,
+    callback=print_version,
+    help='Show the version and exit.',
+)
+def extractcode(
+    ctx,
+    input,  # NOQA
+    verbose,
+    quiet,
+    shallow,
+    replace_originals,
+    ignore,
+    all_formats,
+    *args,
+    **kwargs,
+):
+    """extract archives and compressed files in the <input> file or directory tree.
 
     Archives found inside an extracted archive are extracted recursively.
     Use --shallow for a shallow extraction.
     Extraction for each archive is done in-place in a new directory named
     '<archive file name>-extract' created side-by-side with an archive.
     """
 
-    abs_location = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(input)))
+    abs_location = fileutils.as_posixpath(
+        os.path.abspath(
+            os.path.expanduser(input)
+        )
+    )
 
     def extract_event(item):
         """
@@ -159,10 +216,16 @@ def display_extract_summary():
                 )
 
             for e in xev.errors:
-                echo_stderr('ERROR extracting: %(source)s: %(e)s' % locals(), fg='red')
+                echo_stderr(
+                    'ERROR extracting: %(source)s: %(e)s' % locals(),
+                    fg='red'
+                )
 
             for warn in xev.warnings:
-                echo_stderr('WARNING extracting: %(source)s: %(warn)s' % locals(), fg='yellow')
+                echo_stderr(
+                    'WARNING extracting: %(source)s: %(warn)s' % locals(),
+                    fg='yellow'
+                )
 
         summary_color = 'green'
         if has_warnings:
@@ -190,6 +253,7 @@ def display_extract_summary():
 
     if not quiet:
         echo_stderr('Extracting archives...', fg='green')
+
         with cliutils.progressmanager(extractibles,
             item_show_func=extract_event, verbose=verbose) as extraction_events:
 
@@ -199,7 +263,9 @@ def display_extract_summary():
                     if repr(xev) not in unique_extract_events_with_errors:
                         extract_result_with_errors.append(xev)
                         unique_extract_events_with_errors.add(repr(xev))
+
         display_extract_summary()
+
     else:
         for xev in extractibles:
             if xev.done and (xev.warnings or xev.errors):
@@ -211,9 +277,9 @@ def display_extract_summary():
 
 def get_relative_path(path, len_base_path, base_is_dir):
     """
-    Return a posix relative path from the posix 'path' relative to a
-    base path of `len_base_path` length where the base is a directory if
-    `base_is_dir` True or a file otherwise.
+    Return a posix relative path from the posix 'path' relative to a base path
+    of `len_base_path` length where the base is a directory if `base_is_dir`
+    True or a file otherwise.
     """
     path = os.fsdecode(path)
     if base_is_dir: