diff --git a/scanpipe/pipes/scancode.py b/scanpipe/pipes/scancode.py index 822b3842b..725e89de4 100644 --- a/scanpipe/pipes/scancode.py +++ b/scanpipe/pipes/scancode.py @@ -320,7 +320,7 @@ def scan_for_application_packages(project): resource_qs = project.codebaseresources.no_status() # Collect detected Package data and save it to the CodebaseResource it was - # detected from. + # detected from _scan_and_save( resource_qs=resource_qs, scan_func=scan_for_package_data, @@ -328,19 +328,20 @@ def scan_for_application_packages(project): ) # Iterate through CodebaseResources with Package data and handle them using - # the proper Package handler from packagedcode. + # the proper Package handler from packagedcode assemble_packages(project=project) def add_to_package(package_uid, resource, project): """ - Relate a DiscoveredPackage to `resource` from `project` using `package_uid`. + Relate a DiscoveredPackage to `resource` from `project` using `package_uid` """ if not package_uid: return - - resource_package = resource.discovered_packages.filter(package_uid=package_uid) - if not resource_package.exists(): + package_associated_with_resource = resource.discovered_packages.filter( + package_uid=package_uid + ).exists() + if not package_associated_with_resource: package = project.discoveredpackages.get(package_uid=package_uid) resource.discovered_packages.add(package) @@ -350,20 +351,22 @@ def assemble_packages(project): Create instances of DiscoveredPackage and DiscoveredDependency for `project` from the parsed package data present in the CodebaseResources of `project`. """ - logger.info(f"Project {project} assemble_packages:") + logger.info(f"Project: {project}:\n" "Function: assemble_packages\n") seen_resource_paths = set() - for resource in project.codebaseresources.has_package_data(): if resource.path in seen_resource_paths: continue - logger.info(f" Processing: {resource.path}") + logger.info(f"Processing: CodebaseResource {resource.path}\n") + for package_mapping in resource.package_data: pd = packagedcode_models.PackageData.from_dict(mapping=package_mapping) - logger.info(f" Package data: {pd.purl}") + + logger.info(f"Processing: PackageData {pd.purl}\n") handler = get_package_handler(pd) - logger.info(f" Selected package handler: {handler.__name__}") + + logger.info(f"Selected: Package handler {handler}\n") items = handler.assemble( package_data=pd, @@ -372,9 +375,14 @@ def assemble_packages(project): package_adder=add_to_package, ) + dependencies = [] + resources = [] for item in items: - logger.info(f" Processing item: {item}") + logger.info(f"Processing: item {item}\n") if isinstance(item, packagedcode_models.Package): + # In order to relate CodebaseResources to + # DiscoveredPackages, we create DiscoveredPackages from + # yielded `Package`s first package_data = item.to_dict() pipes.update_or_create_package(project, package_data) elif isinstance(item, packagedcode_models.Dependency): @@ -382,9 +390,21 @@ def assemble_packages(project): # DiscoveredDependency model pass elif isinstance(item, CodebaseResource): - seen_resource_paths.add(item.path) + # We store CodebaseResources in `resources` so we can + # process them after we create all Packages assembled from + # `resource` + resources.append(item) else: - logger.info(f"Unknown Package assembly item type: {item!r}") + logger.info(f"Unknown Package assembly item type: {item!r}\n") + + for item in resources + dependencies: + logger.info(f"Processing: item {item}\n") + if isinstance(item, packagedcode_models.Dependency): + # We will handle Dependencies when we properly implement the + # DiscoveredDependency model + pass + elif isinstance(item, CodebaseResource): + seen_resource_paths.add(item.path) def run_scancode(location, output_file, options, raise_on_error=False):