Skip to content

Commit e872181

Browse files
committed
Remove Python2 support
Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent 9f0e9d6 commit e872181

17 files changed

+420
-799
lines changed

src/extractcode/__init__.py

Lines changed: 34 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,22 @@
11
#
2-
# Copyright (c) 2018 nexB Inc. and others. All rights reserved.
3-
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
4-
# The ScanCode software is licensed under the Apache License version 2.0.
5-
# Data generated with ScanCode require an acknowledgment.
2+
# Copyright (c) nexB Inc. and others.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Visit https://aboutcode.org and https://github.com/nexB/ for support and download.
66
# ScanCode is a trademark of nexB Inc.
77
#
8-
# You may not use this software except in compliance with the License.
9-
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10-
# Unless required by applicable law or agreed to in writing, software distributed
11-
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12-
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13-
# specific language governing permissions and limitations under the License.
8+
# Licensed under the Apache License, Version 2.0 (the "License");
9+
# you may not use this file except in compliance with the License.
10+
# You may obtain a copy of the License at
1411
#
15-
# When you publish or redistribute any data created with ScanCode or any ScanCode
16-
# derivative work, you must accompany this data with the following acknowledgment:
12+
# http://www.apache.org/licenses/LICENSE-2.0
13+
#
14+
# Unless required by applicable law or agreed to in writing, software
15+
# distributed under the License is distributed on an "AS IS" BASIS,
16+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
# See the License for the specific language governing permissions and
18+
# limitations under the License.
1719
#
18-
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
19-
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
20-
# ScanCode should be considered or used as legal advice. Consult an Attorney
21-
# for any legal advice.
22-
# ScanCode is a free software code scanning tool from nexB Inc. and others.
23-
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.
24-
25-
from __future__ import absolute_import
26-
from __future__ import print_function
27-
from __future__ import unicode_literals
2820

2921
import logging
3022
import os
@@ -33,17 +25,16 @@
3325
import shutil
3426
import sys
3527

28+
from os.path import dirname
29+
from os.path import join
30+
from os.path import exists
31+
3632
from commoncode.fileutils import as_posixpath
3733
from commoncode.fileutils import create_dir
3834
from commoncode.fileutils import file_name
39-
from commoncode.fileutils import fsencode
4035
from commoncode.fileutils import parent_directory
4136
from commoncode.text import toascii
4237
from commoncode.system import on_linux
43-
from commoncode.system import py2
44-
from os.path import dirname
45-
from os.path import join
46-
from os.path import exists
4738

4839
logger = logging.getLogger(__name__)
4940
DEBUG = False
@@ -53,16 +44,8 @@
5344

5445
root_dir = join(dirname(__file__), 'bin')
5546

56-
POSIX_PATH_SEP = b'/' if (on_linux and py2) else '/'
57-
WIN_PATH_SEP = b'\\' if (on_linux and py2) else '\\'
58-
PATHS_SEPS = POSIX_PATH_SEP + WIN_PATH_SEP
59-
EMPTY_STRING = b'' if (on_linux and py2) else ''
60-
DOT = b'.' if (on_linux and py2) else '.'
61-
DOTDOT = DOT + DOT
62-
UNDERSCORE = b'_' if (on_linux and py2) else '_'
63-
6447
# Suffix added to extracted target_dir paths
65-
EXTRACT_SUFFIX = b'-extract' if (on_linux and py2) else r'-extract'
48+
EXTRACT_SUFFIX = '-extract'
6649

6750
# high level archive "kinds"
6851
docs = 1
@@ -103,60 +86,46 @@ def is_extraction_path(path):
10386
"""
10487
Return True is the path points to an extraction path.
10588
"""
106-
if on_linux and py2:
107-
path = fsencode(path)
108-
109-
return path and path.rstrip(PATHS_SEPS).endswith(EXTRACT_SUFFIX)
89+
return path and path.rstrip('\\/').endswith(EXTRACT_SUFFIX)
11090

11191

11292
def is_extracted(location):
11393
"""
11494
Return True is the location is already extracted to the corresponding
11595
extraction location.
11696
"""
117-
if on_linux and py2:
118-
location = fsencode(location)
11997
return location and exists(get_extraction_path(location))
12098

12199

122100
def get_extraction_path(path):
123101
"""
124102
Return a path where to extract.
125103
"""
126-
if on_linux and py2:
127-
path = fsencode(path)
128-
return path.rstrip(PATHS_SEPS) + EXTRACT_SUFFIX
104+
return path.rstrip('\\/') + EXTRACT_SUFFIX
129105

130106

131107
def remove_archive_suffix(path):
132108
"""
133109
Remove all the extracted suffix from a path.
134110
"""
135-
if on_linux and py2:
136-
path = fsencode(path)
137-
return re.sub(EXTRACT_SUFFIX, EMPTY_STRING, path)
111+
return re.sub(EXTRACT_SUFFIX, '', path)
138112

139113

140114
def remove_backslashes_and_dotdots(directory):
141115
"""
142116
Walk a directory and rename the files if their names contain backslashes.
143117
Return a list of errors if any.
144118
"""
145-
if on_linux and py2:
146-
directory = fsencode(directory)
147119
errors = []
148120
for top, _, files in os.walk(directory):
149121
for filename in files:
150-
if not (WIN_PATH_SEP in filename or DOTDOT in filename):
122+
if not ('\\' in filename or '..' in filename):
151123
continue
152124
try:
153-
new_path = as_posixpath(filename)
154-
new_path = new_path.strip(POSIX_PATH_SEP)
155-
new_path = posixpath.normpath(new_path)
156-
new_path = new_path.replace(DOTDOT, POSIX_PATH_SEP)
157-
new_path = new_path.strip(POSIX_PATH_SEP)
125+
new_path = as_posixpath(filename).strip('/')
126+
new_path = posixpath.normpath(new_path).replace('..', '/').strip('/')
158127
new_path = posixpath.normpath(new_path)
159-
segments = new_path.split(POSIX_PATH_SEP)
128+
segments = new_path.split('/')
160129
directory = join(top, *segments[:-1])
161130
create_dir(directory)
162131
shutil.move(join(top, filename), join(top, *segments))
@@ -180,9 +149,7 @@ def new_name(location, is_dir=False):
180149
the extension unchanged.
181150
"""
182151
assert location
183-
if on_linux and py2:
184-
location = fsencode(location)
185-
location = location.rstrip(PATHS_SEPS)
152+
location = location.rstrip('\\/')
186153
assert location
187154

188155
parent = parent_directory(location)
@@ -193,8 +160,8 @@ def new_name(location, is_dir=False):
193160
filename = file_name(location)
194161

195162
# corner case
196-
if filename in (DOT, DOT):
197-
filename = UNDERSCORE
163+
if filename in ('.', '..'):
164+
filename = '_'
198165

199166
# if unique, return this
200167
if filename.lower() not in siblings_lower:
@@ -204,19 +171,19 @@ def new_name(location, is_dir=False):
204171
if is_dir:
205172
# directories do not have an "extension"
206173
base_name = filename
207-
ext = EMPTY_STRING
174+
ext = ''
208175
else:
209-
base_name, dot, ext = filename.partition(DOT)
176+
base_name, dot, ext = filename.partition('.')
210177
if dot:
211-
ext = dot + ext
178+
ext = f'.{ext}'
212179
else:
213180
base_name = filename
214-
ext = EMPTY_STRING
181+
ext = ''
215182

216183
# find a unique filename, adding a counter int to the base_name
217184
counter = 1
218185
while 1:
219-
filename = base_name + UNDERSCORE + str(counter) + ext
186+
filename = f'{base_name}_{counter}{ext}'
220187
if filename.lower() not in siblings_lower:
221188
break
222189
counter += 1

src/extractcode/api.py

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,22 @@
11
#
2-
# Copyright (c) nexB Inc. and others. All rights reserved.
3-
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
4-
# The ScanCode software is licensed under the Apache License version 2.0.
5-
# Data generated with ScanCode require an acknowledgment.
2+
# Copyright (c) nexB Inc. and others.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Visit https://aboutcode.org and https://github.com/nexB/ for support and download.
66
# ScanCode is a trademark of nexB Inc.
77
#
8-
# You may not use this software except in compliance with the License.
9-
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10-
# Unless required by applicable law or agreed to in writing, software distributed
11-
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12-
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13-
# specific language governing permissions and limitations under the License.
8+
# Licensed under the Apache License, Version 2.0 (the "License");
9+
# you may not use this file except in compliance with the License.
10+
# You may obtain a copy of the License at
1411
#
15-
# When you publish or redistribute any data created with ScanCode or any ScanCode
16-
# derivative work, you must accompany this data with the following acknowledgment:
12+
# http://www.apache.org/licenses/LICENSE-2.0
13+
#
14+
# Unless required by applicable law or agreed to in writing, software
15+
# distributed under the License is distributed on an "AS IS" BASIS,
16+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
# See the License for the specific language governing permissions and
18+
# limitations under the License.
1719
#
18-
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
19-
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
20-
# ScanCode should be considered or used as legal advice. Consult an Attorney
21-
# for any legal advice.
22-
# ScanCode is a free software code scanning tool from nexB Inc. and others.
23-
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.
24-
25-
from __future__ import absolute_import
26-
from __future__ import division
27-
from __future__ import print_function
28-
from __future__ import unicode_literals
2920

3021

3122
"""

src/extractcode/archive.py

Lines changed: 21 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,32 @@
11
#
2-
# Copyright (c) 2018 nexB Inc. and others. All rights reserved.
3-
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
4-
# The ScanCode software is licensed under the Apache License version 2.0.
5-
# Data generated with ScanCode require an acknowledgment.
2+
# Copyright (c) nexB Inc. and others.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Visit https://aboutcode.org and https://github.com/nexB/ for support and download.
66
# ScanCode is a trademark of nexB Inc.
77
#
8-
# You may not use this software except in compliance with the License.
9-
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10-
# Unless required by applicable law or agreed to in writing, software distributed
11-
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12-
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13-
# specific language governing permissions and limitations under the License.
8+
# Licensed under the Apache License, Version 2.0 (the "License");
9+
# you may not use this file except in compliance with the License.
10+
# You may obtain a copy of the License at
1411
#
15-
# When you publish or redistribute any data created with ScanCode or any ScanCode
16-
# derivative work, you must accompany this data with the following acknowledgment:
12+
# http://www.apache.org/licenses/LICENSE-2.0
13+
#
14+
# Unless required by applicable law or agreed to in writing, software
15+
# distributed under the License is distributed on an "AS IS" BASIS,
16+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
# See the License for the specific language governing permissions and
18+
# limitations under the License.
1719
#
18-
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
19-
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
20-
# ScanCode should be considered or used as legal advice. Consult an Attorney
21-
# for any legal advice.
22-
# ScanCode is a free software code scanning tool from nexB Inc. and others.
23-
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.
24-
25-
from __future__ import absolute_import
26-
from __future__ import print_function
27-
from __future__ import unicode_literals
2820

2921
from collections import namedtuple
3022
import logging
3123
import os
3224

33-
from commoncode import compat
3425
from commoncode import fileutils
3526
from commoncode import filetype
3627
from commoncode import functional
3728
from commoncode.ignore import is_ignored
3829
from commoncode.system import on_linux
39-
from commoncode.system import py2
4030

4131
from typecode import contenttype
4232

@@ -154,8 +144,6 @@ def get_best_handler(location, kinds=all_kinds):
154144
"""
155145
Return the best handler of None for the file at location.
156146
"""
157-
if on_linux and py2:
158-
location = fileutils.fsencode(location)
159147
location = os.path.abspath(os.path.expanduser(location))
160148
if not filetype.is_file(location):
161149
return
@@ -173,9 +161,6 @@ def get_handlers(location):
173161
Return an iterable of (handler, type_matched, mime_matched,
174162
extension_matched,) for this `location`.
175163
"""
176-
if on_linux and py2:
177-
location = fileutils.fsencode(location)
178-
179164
if filetype.is_file(location):
180165

181166
T = contenttype.get_type(location)
@@ -197,8 +182,6 @@ def get_handlers(location):
197182
mime_matched = handler.mimetypes and any(m in mtype for m in handler.mimetypes)
198183
exts = handler.extensions
199184
if exts:
200-
if on_linux and py2:
201-
exts = tuple(fileutils.fsencode(e) for e in exts)
202185
extension_matched = exts and location.lower().endswith(exts)
203186

204187
if TRACE_DEEP:
@@ -326,13 +309,10 @@ def extract_twice(location, target_dir, extractor1, extractor2):
326309
hard to trace and debug very quickly. A depth of two is simple and sane and
327310
covers most common cases.
328311
"""
329-
if on_linux and py2:
330-
location = fileutils.fsencode(location)
331-
target_dir = fileutils.fsencode(target_dir)
332312
abs_location = os.path.abspath(os.path.expanduser(location))
333-
abs_target_dir = compat.unicode(os.path.abspath(os.path.expanduser(target_dir)))
313+
abs_target_dir = str(os.path.abspath(os.path.expanduser(target_dir)))
334314
# extract first the intermediate payload to a temp dir
335-
temp_target = compat.unicode(fileutils.get_temp_dir(prefix='extractcode-extract-'))
315+
temp_target = str(fileutils.get_temp_dir(prefix='extractcode-extract-'))
336316
warnings = extractor1(abs_location, temp_target)
337317
if TRACE:
338318
logger.debug('extract_twice: temp_target: %(temp_target)r' % locals())
@@ -364,17 +344,17 @@ def extract_with_fallback(location, target_dir, extractor1, extractor2):
364344
and a fallback extractor will succeed.
365345
"""
366346
abs_location = os.path.abspath(os.path.expanduser(location))
367-
abs_target_dir = compat.unicode(os.path.abspath(os.path.expanduser(target_dir)))
347+
abs_target_dir = str(os.path.abspath(os.path.expanduser(target_dir)))
368348
# attempt extract first to a temp dir
369-
temp_target1 = compat.unicode(fileutils.get_temp_dir(prefix='extractcode-extract1-'))
349+
temp_target1 = str(fileutils.get_temp_dir(prefix='extractcode-extract1-'))
370350
try:
371351
warnings = extractor1(abs_location, temp_target1)
372352
if TRACE:
373353
logger.debug('extract_with_fallback: temp_target1: %(temp_target1)r' % locals())
374354
fileutils.copytree(temp_target1, abs_target_dir)
375355
except:
376356
try:
377-
temp_target2 = compat.unicode(fileutils.get_temp_dir(prefix='extractcode-extract2-'))
357+
temp_target2 = str(fileutils.get_temp_dir(prefix='extractcode-extract2-'))
378358
warnings = extractor2(abs_location, temp_target2)
379359
if TRACE:
380360
logger.debug('extract_with_fallback: temp_target2: %(temp_target2)r' % locals())
@@ -395,8 +375,8 @@ def try_to_extract(location, target_dir, extractor):
395375
but do not care if this fails.
396376
"""
397377
abs_location = os.path.abspath(os.path.expanduser(location))
398-
abs_target_dir = compat.unicode(os.path.abspath(os.path.expanduser(target_dir)))
399-
temp_target = compat.unicode(fileutils.get_temp_dir(prefix='extractcode-extract1-'))
378+
abs_target_dir = str(os.path.abspath(os.path.expanduser(target_dir)))
379+
temp_target = str(fileutils.get_temp_dir(prefix='extractcode-extract1-'))
400380
warnings = []
401381
try:
402382
warnings = extractor(abs_location, temp_target)

0 commit comments

Comments
 (0)