|
23 | 23 | from collections import Counter
|
24 | 24 | from collections import defaultdict
|
25 | 25 | from contextlib import suppress
|
| 26 | +from dataclasses import dataclass |
26 | 27 | from pathlib import Path
|
| 28 | +from re import match as regex_match |
27 | 29 |
|
28 | 30 | from django.contrib.postgres.aggregates.general import ArrayAgg
|
29 | 31 | from django.core.exceptions import MultipleObjectsReturned
|
|
43 | 45 | from scanpipe import pipes
|
44 | 46 | from scanpipe.models import CodebaseRelation
|
45 | 47 | from scanpipe.models import CodebaseResource
|
| 48 | +from scanpipe.models import convert_glob_to_django_regex |
46 | 49 | from scanpipe.pipes import LoopProgress
|
47 | 50 | from scanpipe.pipes import flag
|
48 | 51 | from scanpipe.pipes import get_resource_diff_ratio
|
@@ -781,94 +784,263 @@ def _map_javascript_resource(
|
781 | 784 | resource.update(status=flag.MAPPED)
|
782 | 785 |
|
783 | 786 |
|
784 |
| -def _map_about_file_resource(project, about_file_resource, to_resources): |
785 |
| - about_file_location = str(about_file_resource.location_path) |
786 |
| - package_data = resolve.resolve_about_package(about_file_location) |
| 787 | +@dataclass |
| 788 | +class AboutFileIndexes: |
| 789 | + """ |
| 790 | + About file indexes are used to create packages from |
| 791 | + About files and map the resources described in them |
| 792 | + to the respective packages created, using regex path |
| 793 | + patterns and other About file data. |
| 794 | + """ |
| 795 | + |
| 796 | + # Mapping of About file paths and the regex pattern |
| 797 | + # string for the files documented |
| 798 | + regex_by_about_path: dict |
| 799 | + # Mapping of About file paths and a list of path pattern |
| 800 | + # strings, for the files to be ignored |
| 801 | + ignore_regex_by_about_path: dict |
| 802 | + # Resource objects for About files present in the codebase, |
| 803 | + # by their path |
| 804 | + about_resources_by_path: dict |
| 805 | + # mapping of package data present in the About file, by path |
| 806 | + about_pkgdata_by_path: dict |
| 807 | + # List of mapped resources for each About file, by path |
| 808 | + mapped_resources_by_aboutpath: dict |
| 809 | + |
| 810 | + @classmethod |
| 811 | + def create_indexes(cls, project, from_about_files, logger=None): |
| 812 | + """ |
| 813 | + Return an ABOUT file index, containing path pattern mappings, |
| 814 | + package data, and resources, created from `from_about_files`, |
| 815 | + the About file resources. |
| 816 | + """ |
| 817 | + about_pkgdata_by_path = {} |
| 818 | + regex_by_about_path = {} |
| 819 | + ignore_regex_by_about_path = {} |
| 820 | + about_resources_by_path = {} |
| 821 | + mapped_resources_by_aboutpath = {} |
| 822 | + |
| 823 | + count_indexed_about_files = 0 |
| 824 | + |
| 825 | + for about_file_resource in from_about_files: |
| 826 | + package_data = resolve.resolve_about_package( |
| 827 | + input_location=str(about_file_resource.location_path) |
| 828 | + ) |
| 829 | + error_message_details = { |
| 830 | + "path": about_file_resource.path, |
| 831 | + "package_data": package_data, |
| 832 | + } |
| 833 | + if not package_data: |
| 834 | + project.add_error( |
| 835 | + description="Cannot create package from ABOUT file", |
| 836 | + model="map_about_files", |
| 837 | + details=error_message_details, |
| 838 | + ) |
| 839 | + continue |
787 | 840 |
|
788 |
| - error_message_details = { |
789 |
| - "path": about_file_resource.path, |
790 |
| - "package_data": package_data, |
791 |
| - } |
792 |
| - if not package_data: |
793 |
| - project.add_error( |
794 |
| - description="Cannot create package from ABOUT file", |
795 |
| - model="map_about_files", |
796 |
| - details=error_message_details, |
| 841 | + about_pkgdata_by_path[about_file_resource.path] = package_data |
| 842 | + files_pattern = package_data.get("filename") |
| 843 | + if not files_pattern: |
| 844 | + # Cannot map anything without the about_resource value. |
| 845 | + project.add_error( |
| 846 | + description="ABOUT file does not have about_resource", |
| 847 | + model="map_about_files", |
| 848 | + details=error_message_details, |
| 849 | + ) |
| 850 | + continue |
| 851 | + else: |
| 852 | + count_indexed_about_files += 1 |
| 853 | + regex = convert_glob_to_django_regex(files_pattern) |
| 854 | + regex_by_about_path[about_file_resource.path] = regex |
| 855 | + |
| 856 | + if extra_data := package_data.get("extra_data"): |
| 857 | + ignore_regex = [] |
| 858 | + for pattern in extra_data.get("ignored_resources", []): |
| 859 | + ignore_regex.append(convert_glob_to_django_regex(pattern)) |
| 860 | + if ignore_regex: |
| 861 | + ignore_regex_by_about_path[about_file_resource.path] = ignore_regex |
| 862 | + |
| 863 | + about_resources_by_path[about_file_resource.path] = about_file_resource |
| 864 | + mapped_resources_by_aboutpath[about_file_resource.path] = [] |
| 865 | + |
| 866 | + if logger: |
| 867 | + logger( |
| 868 | + f"Created mapping index from {count_indexed_about_files:,d} .ABOUT " |
| 869 | + f"files in the from/ codebase." |
| 870 | + ) |
| 871 | + |
| 872 | + return cls( |
| 873 | + about_pkgdata_by_path=about_pkgdata_by_path, |
| 874 | + regex_by_about_path=regex_by_about_path, |
| 875 | + ignore_regex_by_about_path=ignore_regex_by_about_path, |
| 876 | + about_resources_by_path=about_resources_by_path, |
| 877 | + mapped_resources_by_aboutpath=mapped_resources_by_aboutpath, |
797 | 878 | )
|
798 |
| - return |
799 | 879 |
|
800 |
| - filename = package_data.get("filename") |
801 |
| - if not filename: |
802 |
| - # Cannot map anything without the about_resource value. |
803 |
| - project.add_error( |
804 |
| - description="ABOUT file does not have about_resource", |
805 |
| - model="map_about_files", |
806 |
| - details=error_message_details, |
| 880 | + def get_matched_about_path(self, to_resource): |
| 881 | + """ |
| 882 | + Map `to_resource` using the about file index, and if |
| 883 | + mapped, return the path string to the About file it |
| 884 | + was mapped to, and if not mapped or ignored, return |
| 885 | + None. |
| 886 | + """ |
| 887 | + resource_mapped = False |
| 888 | + for about_path, regex_pattern in self.regex_by_about_path.items(): |
| 889 | + if regex_match(pattern=regex_pattern, string=to_resource.path): |
| 890 | + resource_mapped = True |
| 891 | + break |
| 892 | + |
| 893 | + if not resource_mapped: |
| 894 | + return |
| 895 | + |
| 896 | + ignore_regex_patterns = self.ignore_regex_by_about_path.get(about_path, []) |
| 897 | + ignore_resource = False |
| 898 | + for ignore_regex_pattern in ignore_regex_patterns: |
| 899 | + if regex_match(pattern=ignore_regex_pattern, string=to_resource.path): |
| 900 | + ignore_resource = True |
| 901 | + break |
| 902 | + |
| 903 | + if ignore_resource: |
| 904 | + return |
| 905 | + |
| 906 | + return about_path |
| 907 | + |
| 908 | + def map_deployed_to_devel_using_about(self, to_resources): |
| 909 | + """ |
| 910 | + Return mapped resources which are mapped using the |
| 911 | + path patterns in About file indexes. Resources are |
| 912 | + mapped for each About file in the index, and |
| 913 | + their status is updated accordingly. |
| 914 | + """ |
| 915 | + mapped_to_resources = [] |
| 916 | + |
| 917 | + for to_resource in to_resources: |
| 918 | + about_path = self.get_matched_about_path(to_resource) |
| 919 | + if not about_path: |
| 920 | + continue |
| 921 | + |
| 922 | + mapped_resources_about = self.mapped_resources_by_aboutpath.get(about_path) |
| 923 | + if mapped_resources_about: |
| 924 | + mapped_resources_about.append(to_resource) |
| 925 | + else: |
| 926 | + self.mapped_resources_by_aboutpath[about_path] = [to_resource] |
| 927 | + mapped_to_resources.append(to_resource) |
| 928 | + to_resource.update(status=flag.ABOUT_MAPPED) |
| 929 | + |
| 930 | + return mapped_to_resources |
| 931 | + |
| 932 | + def get_about_file_companions(self, about_path): |
| 933 | + """ |
| 934 | + Given an ``about_path`` path string to an About file, |
| 935 | + get CodebaseResource objects for the companion license |
| 936 | + and notice files. |
| 937 | + """ |
| 938 | + about_file_resource = self.about_resources_by_path.get(about_path) |
| 939 | + about_file_extra_data = self.about_pkgdata_by_path.get(about_path).get( |
| 940 | + "extra_data" |
807 | 941 | )
|
808 |
| - return |
809 | 942 |
|
810 |
| - ignored_resources = [] |
811 |
| - if extra_data := package_data.get("extra_data"): |
812 |
| - ignored_resources = extra_data.get("ignored_resources") |
813 |
| - |
814 |
| - # Fetch all resources that are covered by the .ABOUT file. |
815 |
| - codebase_resources = to_resources.filter(path__contains=f"/{filename.lstrip('/')}") |
816 |
| - if not codebase_resources: |
817 |
| - # If there's nothing to map on the ``to/`` do not create the package. |
818 |
| - project.add_warning( |
819 |
| - description=( |
820 |
| - "Resource paths listed at about_resource is not found" |
821 |
| - " in the to/ codebase" |
822 |
| - ), |
823 |
| - model="map_about_files", |
824 |
| - details=error_message_details, |
| 943 | + about_file_companion_names = [ |
| 944 | + about_file_extra_data.get("license_file"), |
| 945 | + about_file_extra_data.get("notice_file"), |
| 946 | + ] |
| 947 | + about_file_companions = about_file_resource.siblings().filter( |
| 948 | + name__in=about_file_companion_names |
825 | 949 | )
|
826 |
| - return |
| 950 | + return about_file_companions |
| 951 | + |
| 952 | + def create_about_packages_relations(self, project): |
| 953 | + """ |
| 954 | + Create packages using About file package data, if the About file |
| 955 | + has mapped resources on the to/ codebase and creates the mappings |
| 956 | + for the package created and mapped resources. |
| 957 | + """ |
| 958 | + about_purls = set() |
| 959 | + mapped_about_resources = [] |
| 960 | + |
| 961 | + for about_path, mapped_resources in self.mapped_resources_by_aboutpath.items(): |
| 962 | + about_file_resource = self.about_resources_by_path[about_path] |
| 963 | + package_data = self.about_pkgdata_by_path[about_path] |
| 964 | + |
| 965 | + if not mapped_resources: |
| 966 | + error_message_details = { |
| 967 | + "path": about_path, |
| 968 | + "package_data": package_data, |
| 969 | + } |
| 970 | + project.add_warning( |
| 971 | + description=( |
| 972 | + "Resource paths listed at about_resource is not found" |
| 973 | + " in the to/ codebase" |
| 974 | + ), |
| 975 | + model="map_about_files", |
| 976 | + details=error_message_details, |
| 977 | + ) |
| 978 | + continue |
827 | 979 |
|
828 |
| - # Ignore resources for paths in `ignored_resources` attribute |
829 |
| - if ignored_resources: |
830 |
| - lookups = Q() |
831 |
| - for resource_path in ignored_resources: |
832 |
| - lookups |= Q(**{"path__contains": resource_path}) |
833 |
| - codebase_resources = codebase_resources.filter(~lookups) |
| 980 | + # Create the Package using .ABOUT data and assign related codebase_resources |
| 981 | + about_package = pipes.update_or_create_package( |
| 982 | + project=project, |
| 983 | + package_data=package_data, |
| 984 | + codebase_resources=mapped_resources, |
| 985 | + ) |
| 986 | + about_purls.add(about_package.purl) |
| 987 | + mapped_about_resources.append(about_file_resource) |
834 | 988 |
|
835 |
| - # Create the Package using .ABOUT data and assigned related codebase_resources |
836 |
| - pipes.update_or_create_package(project, package_data, codebase_resources) |
| 989 | + # Map the .ABOUT file resource to all related resources in the ``to/`` side. |
| 990 | + for mapped_resource in mapped_resources: |
| 991 | + pipes.make_relation( |
| 992 | + from_resource=about_file_resource, |
| 993 | + to_resource=mapped_resource, |
| 994 | + map_type="about_file", |
| 995 | + ) |
837 | 996 |
|
838 |
| - # Map the .ABOUT file resource to all related resources in the ``to/`` side. |
839 |
| - for to_resource in codebase_resources: |
840 |
| - pipes.make_relation( |
841 |
| - from_resource=about_file_resource, |
842 |
| - to_resource=to_resource, |
843 |
| - map_type="about_file", |
844 |
| - ) |
| 997 | + about_file_resource.update(status=flag.ABOUT_MAPPED) |
| 998 | + |
| 999 | + about_file_companions = self.get_about_file_companions(about_path) |
| 1000 | + about_file_companions.update(status=flag.ABOUT_MAPPED) |
845 | 1001 |
|
846 |
| - codebase_resources.update(status=flag.ABOUT_MAPPED) |
847 |
| - about_file_resource.update(status=flag.ABOUT_MAPPED) |
| 1002 | + return about_purls, mapped_about_resources |
848 | 1003 |
|
849 | 1004 |
|
850 | 1005 | def map_about_files(project, logger=None):
|
851 | 1006 | """Map ``from/`` .ABOUT files to their related ``to/`` resources."""
|
852 | 1007 | project_resources = project.codebaseresources
|
853 |
| - from_files = project_resources.files().from_codebase() |
854 |
| - from_about_files = from_files.filter(extension=".ABOUT") |
855 |
| - to_resources = project_resources.to_codebase() |
| 1008 | + from_about_files = ( |
| 1009 | + project_resources.files().from_codebase().filter(extension=".ABOUT") |
| 1010 | + ) |
| 1011 | + if not from_about_files.exists(): |
| 1012 | + return |
856 | 1013 |
|
857 | 1014 | if logger:
|
858 | 1015 | logger(
|
859 | 1016 | f"Mapping {from_about_files.count():,d} .ABOUT files found in the from/ "
|
860 | 1017 | f"codebase."
|
861 | 1018 | )
|
862 | 1019 |
|
863 |
| - for about_file_resource in from_about_files: |
864 |
| - _map_about_file_resource(project, about_file_resource, to_resources) |
| 1020 | + indexes = AboutFileIndexes.create_indexes( |
| 1021 | + project=project, from_about_files=from_about_files |
| 1022 | + ) |
865 | 1023 |
|
866 |
| - about_file_companions = ( |
867 |
| - about_file_resource.siblings() |
868 |
| - .filter(name__startswith=about_file_resource.name_without_extension) |
869 |
| - .filter(extension__in=[".LICENSE", ".NOTICE"]) |
| 1024 | + # Ignoring empty or ignored files as they are not relevant anyway |
| 1025 | + to_resources = project_resources.to_codebase().no_status() |
| 1026 | + mapped_to_resources = indexes.map_deployed_to_devel_using_about( |
| 1027 | + to_resources=to_resources, |
| 1028 | + ) |
| 1029 | + if logger: |
| 1030 | + logger( |
| 1031 | + f"Mapped {len(mapped_to_resources):,d} resources from the " |
| 1032 | + f"to/ codebase to the About files in the from. codebase." |
| 1033 | + ) |
| 1034 | + |
| 1035 | + about_purls, mapped_about_resources = indexes.create_about_packages_relations( |
| 1036 | + project=project, |
| 1037 | + ) |
| 1038 | + if logger: |
| 1039 | + logger( |
| 1040 | + f"Created {len(about_purls):,d} new packages from " |
| 1041 | + f"{len(mapped_about_resources):,d} About files which " |
| 1042 | + f"were mapped to resources in the to/ side." |
870 | 1043 | )
|
871 |
| - about_file_companions.update(status=flag.ABOUT_MAPPED) |
872 | 1044 |
|
873 | 1045 |
|
874 | 1046 | def map_javascript_post_purldb_match(project, logger=None):
|
|
0 commit comments