|
22 | 22 |
|
23 | 23 | import json
|
24 | 24 | import logging
|
| 25 | +import time |
| 26 | +from collections import defaultdict |
25 | 27 |
|
26 | 28 | from django.conf import settings
|
27 | 29 |
|
|
30 | 32 | from univers.version_range import RANGE_CLASS_BY_SCHEMES
|
31 | 33 | from univers.version_range import InvalidVersionRange
|
32 | 34 |
|
| 35 | +from scanpipe.models import AbstractTaskFieldsModel |
33 | 36 | from scanpipe.pipes import LoopProgress
|
| 37 | +from scanpipe.pipes import flag |
| 38 | +from scanpipe.pipes.output import to_json |
| 39 | + |
| 40 | + |
| 41 | +class PurlDBException(Exception): |
| 42 | + pass |
| 43 | + |
34 | 44 |
|
35 | 45 | label = "PurlDB"
|
36 | 46 | logger = logging.getLogger(__name__)
|
@@ -100,9 +110,11 @@ def request_get(url, payload=None, timeout=DEFAULT_TIMEOUT):
|
100 | 110 | logger.debug(f"{label} [Exception] {exception}")
|
101 | 111 |
|
102 | 112 |
|
103 |
| -def request_post(url, data, headers=None, timeout=DEFAULT_TIMEOUT): |
| 113 | +def request_post(url, data=None, headers=None, files=None, timeout=DEFAULT_TIMEOUT): |
104 | 114 | try:
|
105 |
| - response = session.post(url, data=data, timeout=timeout, headers=headers) |
| 115 | + response = session.post( |
| 116 | + url, data=data, timeout=timeout, headers=headers, files=files |
| 117 | + ) |
106 | 118 | response.raise_for_status()
|
107 | 119 | return response.json()
|
108 | 120 | except (requests.RequestException, ValueError, TypeError) as exception:
|
@@ -320,3 +332,111 @@ def populate_purldb_with_discovered_dependencies(project, logger=logger.info):
|
320 | 332 | chunk_size=10,
|
321 | 333 | logger=logger,
|
322 | 334 | )
|
| 335 | + |
| 336 | + |
| 337 | +def send_project_json_to_matchcode( |
| 338 | + project, timeout=DEFAULT_TIMEOUT, api_url=PURLDB_API_URL |
| 339 | +): |
| 340 | + """ |
| 341 | + Given a `project`, create a JSON scan of the `project` CodebaseResources and |
| 342 | + send it to PurlDB for matching. Return a tuple containing strings of the url |
| 343 | + to the particular match run and the url to the match results. |
| 344 | + """ |
| 345 | + scan_output_location = to_json(project) |
| 346 | + with open(scan_output_location, "rb") as f: |
| 347 | + files = {"upload_file": f} |
| 348 | + response = request_post( |
| 349 | + url=f"{api_url}matching/", |
| 350 | + timeout=timeout, |
| 351 | + files=files, |
| 352 | + ) |
| 353 | + run_url = response["runs"][0]["url"] |
| 354 | + return run_url |
| 355 | + |
| 356 | + |
| 357 | +def poll_until_success(run_url, sleep=10): |
| 358 | + """ |
| 359 | + Given a URL to a scancode.io run instance, `run_url`, return True when the |
| 360 | + run instance has completed successfully. |
| 361 | +
|
| 362 | + Raise a PurlDBException when the run instance has faield, stopped, or gone |
| 363 | + stale. |
| 364 | + """ |
| 365 | + run_status = AbstractTaskFieldsModel.Status |
| 366 | + while True: |
| 367 | + response = request_get(run_url) |
| 368 | + if response: |
| 369 | + status = response["status"] |
| 370 | + if status == run_status.SUCCESS: |
| 371 | + return True |
| 372 | + |
| 373 | + if status in [ |
| 374 | + run_status.NOT_STARTED, |
| 375 | + run_status.QUEUED, |
| 376 | + run_status.RUNNING, |
| 377 | + ]: |
| 378 | + continue |
| 379 | + |
| 380 | + if status in [ |
| 381 | + run_status.FAILURE, |
| 382 | + run_status.STOPPED, |
| 383 | + run_status.STALE, |
| 384 | + ]: |
| 385 | + log = response["log"] |
| 386 | + msg = f"Matching run has stopped:\n\n{log}" |
| 387 | + raise PurlDBException(msg) |
| 388 | + |
| 389 | + time.sleep(sleep) |
| 390 | + |
| 391 | + |
| 392 | +def get_match_results(run_url): |
| 393 | + """ |
| 394 | + Given the `run_url` for a pipeline running the matchcode matching pipeline, |
| 395 | + return the match results for that run. |
| 396 | + """ |
| 397 | + response = request_get(run_url) |
| 398 | + project_url = response["project"] |
| 399 | + # `project_url` can have params, such as "?format=json" |
| 400 | + if "?" in project_url: |
| 401 | + project_url, _ = project_url.split("?") |
| 402 | + project_url = project_url.rstrip("/") |
| 403 | + results_url = project_url + "/results/" |
| 404 | + return request_get(results_url) |
| 405 | + |
| 406 | + |
| 407 | +def map_match_results(match_results): |
| 408 | + """ |
| 409 | + Given `match_results`, which is a mapping of ScanCode.io codebase results, |
| 410 | + return a defaultdict(list) where the keys are the package_uid of matched |
| 411 | + packages and the value is a list containing the paths of Resources |
| 412 | + associated with the package_uid. |
| 413 | + """ |
| 414 | + resource_results = match_results.get("files", []) |
| 415 | + resource_paths_by_package_uids = defaultdict(list) |
| 416 | + for resource in resource_results: |
| 417 | + for_packages = resource.get("for_packages", []) |
| 418 | + for package_uid in for_packages: |
| 419 | + resource_paths_by_package_uids[package_uid].append(resource["path"]) |
| 420 | + return resource_paths_by_package_uids |
| 421 | + |
| 422 | + |
| 423 | +def create_packages_from_match_results(project, match_results): |
| 424 | + """ |
| 425 | + Given `match_results`, which is a mapping of ScanCode.io codebase results, |
| 426 | + use the Package data from it to create DiscoveredPackages for `project` and |
| 427 | + associate the proper Resources of `project` to the DiscoveredPackages. |
| 428 | + """ |
| 429 | + from scanpipe.pipes.d2d import create_package_from_purldb_data |
| 430 | + |
| 431 | + resource_paths_by_package_uids = map_match_results(match_results) |
| 432 | + matched_packages = match_results.get("packages", []) |
| 433 | + for matched_package in matched_packages: |
| 434 | + package_uid = matched_package["package_uid"] |
| 435 | + resource_paths = resource_paths_by_package_uids[package_uid] |
| 436 | + resources = project.codebaseresources.filter(path__in=resource_paths) |
| 437 | + create_package_from_purldb_data( |
| 438 | + project, |
| 439 | + resources=resources, |
| 440 | + package_data=matched_package, |
| 441 | + status=flag.MATCHED_TO_PURLDB_PACKAGE, |
| 442 | + ) |
0 commit comments