Skip to content

Commit 43f5287

Browse files
committed
Use lockfile on read too
We need to use locks also on read, and not only on write Windows expereinces race conditions otherwise Reference: #215 Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent 1ac83be commit 43f5287

File tree

2 files changed

+43
-30
lines changed

2 files changed

+43
-30
lines changed

src/_packagedcode/pypi.py

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ def parse_metadata(location, datasource_id, package_type):
457457
name=name,
458458
version=version,
459459
description=get_description(metainfo=meta, location=str(location)),
460-
#TODO: https://github.com/aboutcode-org/scancode-toolkit/issues/3014
460+
# TODO: https://github.com/aboutcode-org/scancode-toolkit/issues/3014
461461
declared_license=get_declared_license(meta),
462462
keywords=get_keywords(meta),
463463
parties=get_parties(meta),
@@ -521,19 +521,25 @@ class PypiWheelHandler(BasePypiHandler):
521521

522522
@classmethod
523523
def parse(cls, location):
524-
with zipfile.ZipFile(location) as zf:
525-
for path in ZipPath(zf).iterdir():
526-
if not path.name.endswith(META_DIR_SUFFIXES):
527-
continue
528-
for metapath in path.iterdir():
529-
if not metapath.name.endswith('METADATA'):
530-
continue
531524

532-
yield parse_metadata(
533-
location=metapath,
534-
datasource_id=cls.datasource_id,
535-
package_type=cls.default_package_type,
536-
)
525+
from python_inspector import lockfile
526+
from python_inspector.utils_pypi import PYINSP_CACHE_LOCK_TIMEOUT
527+
lock_file = os.path.join(f"{location}.lockfile")
528+
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
529+
530+
with zipfile.ZipFile(location) as zf:
531+
for path in ZipPath(zf).iterdir():
532+
if not path.name.endswith(META_DIR_SUFFIXES):
533+
continue
534+
for metapath in path.iterdir():
535+
if not metapath.name.endswith('METADATA'):
536+
continue
537+
538+
yield parse_metadata(
539+
location=metapath,
540+
datasource_id=cls.datasource_id,
541+
package_type=cls.default_package_type,
542+
)
537543

538544

539545
class PypiEggHandler(BasePypiHandler):
@@ -547,20 +553,26 @@ class PypiEggHandler(BasePypiHandler):
547553

548554
@classmethod
549555
def parse(cls, location):
550-
with zipfile.ZipFile(location) as zf:
551-
for path in ZipPath(zf).iterdir():
552-
if not path.name.endswith(META_DIR_SUFFIXES):
553-
continue
554556

555-
for metapath in path.iterdir():
556-
if not metapath.name.endswith('PKG-INFO'):
557+
from python_inspector import lockfile
558+
from python_inspector.utils_pypi import PYINSP_CACHE_LOCK_TIMEOUT
559+
lock_file = os.path.join(f"{location}.lockfile")
560+
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
561+
562+
with zipfile.ZipFile(location) as zf:
563+
for path in ZipPath(zf).iterdir():
564+
if not path.name.endswith(META_DIR_SUFFIXES):
557565
continue
558566

559-
yield parse_metadata(
560-
location=metapath,
561-
datasource_id=cls.datasource_id,
562-
package_type=cls.default_package_type,
563-
)
567+
for metapath in path.iterdir():
568+
if not metapath.name.endswith('PKG-INFO'):
569+
continue
570+
571+
yield parse_metadata(
572+
location=metapath,
573+
datasource_id=cls.datasource_id,
574+
package_type=cls.default_package_type,
575+
)
564576

565577

566578
class PypiSdistArchiveHandler(BasePypiHandler):
@@ -765,7 +777,6 @@ def parse(cls, location):
765777
)
766778
]
767779

768-
769780
yield models.PackageData(
770781
datasource_id=cls.datasource_id,
771782
type=cls.default_package_type,
@@ -818,6 +829,7 @@ def get_resolved_purl(purl: PackageURL, specifiers: SpecifierSet):
818829
is_resolved=is_resolved,
819830
)
820831

832+
821833
class PipfileHandler(BaseDependencyFileHandler):
822834
datasource_id = 'pipfile'
823835
path_patterns = ('*Pipfile',)

src/python_inspector/utils_pypi.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,6 @@ def get_python_dot_version(version):
196196
DEFAULT_PYTHON_VERSION = settings.DEFAULT_PYTHON_VERSION
197197
CACHE_THIRDPARTY_DIR = settings.CACHE_THIRDPARTY_DIR
198198

199-
200199
################################################################################
201200

202201
EXTENSIONS_SDIST = (
@@ -316,9 +315,10 @@ async def get_supported_and_valid_wheels(
316315
):
317316
continue
318317
if TRACE_DEEP:
318+
durl = await wheel.download_url(repo)
319319
print(
320320
f""" get_supported_and_valid_wheels: Getting wheel from index (or cache):
321-
{await wheel.download_url(repo)}"""
321+
{durl}"""
322322
)
323323
wheels.append(wheel)
324324
return wheels
@@ -1685,6 +1685,7 @@ async def get(
16851685
# the cache key is a hash of the normalized path
16861686
cache_key = self.sha256_hash(quote_plus(path_or_url.strip("/")))
16871687
cached = os.path.join(self.directory, cache_key)
1688+
lock_file = f"{cached}.lockfile"
16881689

16891690
if force or not os.path.exists(cached):
16901691
if TRACE_DEEP:
@@ -1699,16 +1700,16 @@ async def get(
16991700
wmode = "w" if as_text else "wb"
17001701

17011702
# acquire lock and wait until timeout to get a lock or die
1702-
lock_file = os.path.join(self.directory, f"{cache_key}.lockfile")
1703-
17041703
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
17051704
async with aiofiles.open(cached, mode=wmode) as fo:
17061705
await fo.write(content)
17071706
return content, cached
17081707
else:
17091708
if TRACE_DEEP:
17101709
print(f" FILE CACHE HIT: {path_or_url}")
1711-
return await get_local_file_content(path=cached, as_text=as_text), cached
1710+
# also lock on read to avoid race conditions
1711+
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
1712+
return await get_local_file_content(path=cached, as_text=as_text), cached
17121713

17131714

17141715
CACHE = Cache()

0 commit comments

Comments
 (0)