Update refresh_project_copyrights.py

LuckySkyWalker · web-flow · commit 90f1db34a136 · 2025-05-27T15:06:31.000+01:00
Counting origins and clearer debug output
diff --git a/examples/client/refresh_project_copyrights.py b/examples/client/refresh_project_copyrights.py
@@ -4,6 +4,7 @@
 # Ian Ashworth, May 2025
 #
 import http.client
+import signal
 from sys import api_version
 import sys
 import csv
@@ -18,11 +19,18 @@
 
 http.client._MAXHEADERS = 1000
 
+job_status = 0
+
 logging.basicConfig(
     level=logging.INFO,
     format="[%(asctime)s] {%(module)s:%(lineno)d} %(levelname)s - %(message)s"
 )
 
+# initialise
+all_my_comp_data = []
+my_statistics = {}
+
+
 def RepDebug(level, msg):
     if hasattr(args, 'debug') and level <= args.debug:
         print("dbg{" + str(level) + "} " + msg)
@@ -33,6 +41,59 @@ def RepWarning(msg):
     print("WARNING: " + msg)
     return True
 
+def CompleteTask(job_status):
+    now = datetime.datetime.now()
+    my_statistics['_jobStatus'] = job_status
+
+    print('Finished: %s' % now.strftime("%Y-%m-%d %H:%M:%S"))
+    print('Summary:')
+    pprint(my_statistics)
+
+    # if dumping data
+    if args.dump_data:
+        # if outputting to a CSV file
+        if args.csv_file:
+            '''Note: See the BD API doc and in particular .../api-doc/public.html#_bom_vulnerability_endpoints
+                for a complete list of the fields available. The below code shows a subset of them just to
+                illustrate how to write out the data into a CSV format.
+            '''
+            logging.info(f"Exporting {len(all_my_comp_data)} records to CSV file {args.csv_file}")
+
+            with open(args.csv_file, 'w') as csv_f:
+                field_names = [
+                    'Component',
+                    'Component Version',
+                    'Status',
+                    'Url'
+                ]
+
+                writer = csv.DictWriter(csv_f, fieldnames=field_names)
+                writer.writeheader()
+
+                for my_comp_data in all_my_comp_data:
+                    row_data = {
+                        'Component': my_comp_data['componentName'],
+                        'Component Version': my_comp_data['componentVersion'],
+                        'Status': my_comp_data['status'],
+                        'Url': my_comp_data['url']
+                    }
+                    writer.writerow(row_data)
+        else:
+            # print to screen
+            pprint(all_my_comp_data)
+
+def SignalHandler(sig, frame):
+    # Complete the work
+    print("Ctrl+C detected!")
+
+    # tidy up and complete the job
+    CompleteTask(1)
+    sys.exit(job_status)
+
+# ------------------------------------------------------------------------------
+# register the signal handler
+signal.signal(signal.SIGINT, SignalHandler)
+
 
 # Parse command line arguments
 parser = argparse.ArgumentParser("Refresh copyrights for project/version components")
@@ -46,10 +107,12 @@ def RepWarning(msg):
 parser.add_argument("--project", dest='project_name', help="Project name")
 parser.add_argument("--version", dest='version_name', help="Version name")
 
-parser.add_argument("--max-projects", dest='max_projects', type=int, help="Maximum projects to inspect else all")
+parser.add_argument("--max-projects", dest='max_projects', type=int, help="Maximum number of projects to inspect else all")
 parser.add_argument("--max-versions-per-project", dest='max_versions_per_project', type=int, help="Maximum versions per project to inspect else all")
 parser.add_argument("--max-components", dest='max_components', type=int, help="Maximum components to inspect in total else all")
 
+parser.add_argument("--skip-projects", dest='skip_projects', type=int, help="Skip first 'n' projects to inspect")
+
 parser.add_argument("--debug", dest='debug', type=int, default=0, help="Debug verbosity (0=none 'n'=level)")
 parser.add_argument("--dryrun", dest='dry_run', type=int, default=0, help="Dry run test (0=no 1=yes)")
 
@@ -72,11 +135,10 @@ def RepWarning(msg):
     retries=args.retries,
 )
 
-# initialise
-all_my_comp_data = []
-my_statistics = {}
 
 
+str_unknown = "n/a"
+
 # version of components API to call
 comp_api_version = 6
 
@@ -118,10 +180,13 @@ def RepWarning(msg):
 my_statistics['_cntProjects'] = 0
 my_statistics['_cntVersions'] = 0
 my_statistics['_cntComponents'] = 0
+my_statistics['_cntOrigins'] = 0
+
 my_statistics['_cntRefresh'] = 0
 my_statistics['_cntNoOrigins'] = 0
 my_statistics['_cntNoIDs'] = 0
-
+my_statistics['_cntSkippedProjects'] = 0
+my_statistics['_jobStatus'] = 0
 
 # record any control values
 if args.project_name:
@@ -152,18 +217,33 @@ def RepWarning(msg):
     # all projects are in scope
     projects = bd.get_resource('projects')
 
+
+cnt_project = 0
+cnt_call = 0
+
 # loop through projects list
 for this_project in projects:
 
+    cnt_project += 1
+
+    # check if we are skipping over this project
+    if args.skip_projects and cnt_project <= args.skip_projects:
+        my_statistics['_cntSkippedProjects'] += 1
+        RepDebug(1, 'Skipping project [%d] [%s]' % (cnt_project, this_project['name']))
+        continue
+
     # check if we have hit any limit
     if args.max_components and my_statistics['_cntComponents'] >= args.max_components:
+        RepDebug(1, 'Reached component limit [%d]' % args.max_components)
         break
 
     if args.max_projects and my_statistics['_cntProjects'] >= args.max_projects:
+        RepDebug(1, 'Reached project limit [%d]' % args.max_projects)
         break
 
+    # process this project
     my_statistics['_cntProjects'] += 1
-    RepDebug(1, '## Project %d: %s' % (my_statistics['_cntProjects'], this_project['name']))
+    RepDebug(1, '## Project: [%d] [%s]' % (cnt_project, this_project['name']))
 
     if args.version_name:
         # note the specific project version of interest
@@ -184,60 +264,80 @@ def RepWarning(msg):
 
         # check if we have hit any limit
         if args.max_components and my_statistics['_cntComponents'] >= args.max_components:
-            # exit component loop - at the limit
+            RepDebug(1, 'Reached component limit [%d]' % args.max_components)
             break
 
         if args.max_versions_per_project and nVersionsPerProject >= args.max_versions_per_project:
-            # exit loop - at the version per project limit
+            RepDebug(1, 'Reached versions per project limit [%d]' % args.max_versions_per_project)
             break
 
         nVersionsPerProject += 1
         my_statistics['_cntVersions'] += 1
 
         # Announce
 #        logging.debug(f"Found {this_project['name']}:{this_version['versionName']}")
-        RepDebug(3, '   Version: %s' % this_version['versionName'])
+        RepDebug(3, '   Version: [%s]' % this_version['versionName'])
 
 
         # iterate through all components for this project version
         for this_comp_data in bd.get_resource('components', this_version, **comp_kwargs):
 
             if args.max_components and my_statistics['_cntComponents'] >= args.max_components:
-                # exit component loop - at the limit
                 break
 
             my_statistics['_cntComponents'] += 1
-            comp_label = "{} ({})".format(this_comp_data['componentName'], this_comp_data['componentVersionName'])
 
-            RepDebug(4, '     Component: %s' % comp_label)
+            if this_comp_data.get("componentName"):
+                comp_name = this_comp_data['componentName']
+            else:
+                comp_name = str_unknown
+
+            if this_comp_data.get("componentVersionName"):
+                comp_version_name = this_comp_data['componentVersionName']
+            else:
+                comp_version_name = str_unknown
+
+            comp_label = "{} ({})".format(comp_name, comp_version_name)
+
+            RepDebug(4, '     Component: [%s]' % comp_label)
 
             if this_comp_data['inputExternalIds'].__len__() > 0:
                 inputExternalIds = this_comp_data['inputExternalIds'][0]
             else:
                 my_statistics['_cntNoIDs'] += 1
-                inputExternalIds = "n/a"
-            RepDebug(2, '       ID: %s' % inputExternalIds)
+                inputExternalIds = str_unknown
+            RepDebug(2, '       ID: [%s]' % inputExternalIds)
 
 
-            # refresh the copyrights for this component
+            # refresh the copyrights for this component-origin
             if this_comp_data['origins'].__len__() > 0:
 
                 n_origin = 0
 
                 for this_origin in this_comp_data['origins']:
 
                     n_origin += 1
-                    origin_id = this_origin['externalId']
+                    my_statistics['_cntOrigins'] += 1
+
+                    if this_origin.get('externalId'):
+                        origin_id = this_origin['externalId']
+                    else:
+                        origin_id = str_unknown
+
                     url = this_origin['origin']
 
                     # refresh with end point
                     url += "/copyrights-refresh"
 
                     status = -1
+                    cnt_call += 1
+                    call_id = "{}.{}".format(cnt_project, cnt_call)
 
                     if args.dry_run != 0:
-                        RepDebug(1, "DryRun: no=%d origin=%s url=%s" % (n_origin, origin_id, url))
+                        RepDebug(2, '         DryRun: %s - origin - no [%d]  id [%s]  url [%s]' % (call_id, n_origin, origin_id, url))
                     else:
+                        RepDebug(3,
+                                 '       Origin: %s - origin - no [%d]  id [%s]  url [%s]' % (call_id, n_origin, origin_id, url))
                         try:
                             response = bd.session.put(url, data=None, **refresh_kwargs)
                             RepDebug(5,'Refresh response: origin [%s] [%s]' % (this_origin, response))
@@ -274,8 +374,8 @@ def RepWarning(msg):
                 # if recording the data
                 if args.dump_data:
                     my_data = {}
-                    my_data['componentName'] = this_comp_data['componentName']
-                    my_data['componentVersion'] = this_comp_data['componentVersionName']
+                    my_data['componentName'] = comp_name
+                    my_data['componentVersion'] = comp_version_name
                     my_data['status'] = status
                     my_data['url'] = url
 
@@ -287,42 +387,6 @@ def RepWarning(msg):
 
 # end of processing loop
 
-now = datetime.datetime.now()
-print('Finished: %s' % now.strftime("%Y-%m-%d %H:%M:%S"))
-print('Summary:')
-pprint(my_statistics)
-
-# if dumping data
-if args.dump_data:
-    # if outputting to a CSV file
-    if args.csv_file:
-        '''Note: See the BD API doc and in particular .../api-doc/public.html#_bom_vulnerability_endpoints
-            for a complete list of the fields available. The below code shows a subset of them just to
-            illustrate how to write out the data into a CSV format.
-        '''
-        logging.info(f"Exporting {len(all_my_comp_data)} records to CSV file {args.csv_file}")
-
-        with open(args.csv_file, 'w') as csv_f:
-            field_names = [
-                'Component',
-                'Component Version',
-                'Status',
-                'Url'
-            ]
-
-            writer = csv.DictWriter(csv_f, fieldnames=field_names)
-            writer.writeheader()
-
-            for my_comp_data in all_my_comp_data:
-                row_data = {
-                    'Component': my_comp_data['componentName'],
-                    'Component Version': my_comp_data['componentVersion'],
-                    'Status': my_comp_data['status'],
-                    'Url': my_comp_data['url']
-                }
-                writer.writerow(row_data)
-    else:
-        # print to screen
-        pprint(all_my_comp_data)
+CompleteTask(0)
 
 #end