Skip to content

Commit

Permalink
Upgrade ScanCode-toolkit to latest v32 #569 (#715)
Browse files Browse the repository at this point in the history
Signed-off-by: Thomas Druez <[email protected]>
  • Loading branch information
tdruez authored May 18, 2023
1 parent e4488e8 commit 657c1af
Show file tree
Hide file tree
Showing 61 changed files with 16,692 additions and 7,378 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ Changelog
v33.0.0 (unreleased)
--------------------

- Upgrade ScanCode-toolkit to latest v32.0.0
Warning: This upgrade requires schema and data migrations (both included).
It is recommended to reset and re-run the pipelines to benefit from the latest
ScanCode detection improvements.
Refer to https://github.com/nexB/scancode-toolkit/blob/develop/CHANGELOG.rst#v3200-next-roadmap
for the full list of changes.
https://github.com/nexB/scancode.io/issues/569

- Add a new ``deploy_to_develop`` pipeline specialized in creating relations between
the development source code and binaries or deployed code.
This pipeline is expecting 2 archive files with "from-" and "to-" filename prefixes
Expand Down
18 changes: 9 additions & 9 deletions docs/custom-pipelines.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,20 +141,20 @@ the file's directory in the :ref:`scancodeio_settings_pipelines_dirs`.
def report_licenses_with_resources(self):
"""
Retrieves codebase resources filtered by license categories,
Generates a licenses report file from a template.
Retrieves codebase resources and generates a licenses report file using
a Jinja template.
"""
categories = ["Commercial", "Copyleft"]
resources = self.project.codebaseresources.licenses_categories(categories)
resources = self.project.codebaseresources.has_license_detections()
resources_by_licenses = defaultdict(list)
resources_by_matched_text = defaultdict(list)
for resource in resources:
for license_data in resource.licenses:
matched_text = license_data.get("matched_text")
resources_by_licenses[matched_text].append(resource.path)
for detection_data in resource.license_detections:
for match in detection_data.get("matches", []):
matched_text = match.get("matched_text")
resources_by_matched_text[matched_text].append(resource.path)
template = Template(self.report_template, lstrip_blocks=True, trim_blocks=True)
report_stream = template.stream(resources=resources_by_licenses)
report_stream = template.stream(resources=resources_by_matched_text)
report_file = self.project.get_output_file_path("license-report", "txt")
report_stream.dump(str(report_file))
Expand Down
2 changes: 0 additions & 2 deletions docs/output-files.rst
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,6 @@ similar to the following:
"license_expression": "mit",
"declared_license": "MIT",
"notice_text": "",
"manifest_path": "",
"contains_source_code": null,
"missing_resources": [
"/lib/libc.musl-x86_64.so.1"
],
Expand Down
2 changes: 1 addition & 1 deletion scancodeio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import warnings
from pathlib import Path

__version__ = "32.2.0"
__version__ = "33.0.0-dev"

SCAN_NOTICE = Path(__file__).resolve().parent.joinpath("scan.NOTICE").read_text()

Expand Down
19 changes: 13 additions & 6 deletions scanpipe/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,11 @@ class Meta:
"is_archive",
"is_media",
"is_key_file",
"licenses",
"license_expressions",
"detected_license_expression",
"detected_license_expression_spdx",
"license_detections",
"license_clues",
"percentage_of_license_text",
"compliance_alert",
"copyrights",
"holders",
Expand Down Expand Up @@ -297,14 +300,18 @@ class Meta:
"sha256",
"sha512",
"copyright",
"license_expression",
"declared_license",
"holder",
"declared_license_expression",
"declared_license_expression_spdx",
"license_detections",
"other_license_expression",
"other_license_expression_spdx",
"other_license_detections",
"extracted_license_statement",
"notice_text",
"source_packages",
"extra_data",
"package_uid",
"manifest_path",
"contains_source_code",
"datasource_id",
"file_references",
"missing_resources",
Expand Down
29 changes: 12 additions & 17 deletions scanpipe/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ class ResourceFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"type",
"size",
"name",
"detected_license_expression",
"extension",
"programming_language",
"mime_type",
Expand All @@ -331,16 +332,8 @@ class ResourceFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"related_from__from_resource__path",
],
)
license_key = JSONContainsFilter(
label="License key",
field_name="licenses",
)
license_category = JSONContainsFilter(
label="License category",
field_name="licenses",
)
compliance_alert = django_filters.ChoiceFilter(
choices=CodebaseResource.Compliance.choices + [("EMPTY", "EMPTY")]
choices=CodebaseResource.Compliance.choices + [("_EMPTY_", "EMPTY")]
)
in_package = InPackageFilter(label="In a Package")
status = StatusFilter(empty_label="All")
Expand Down Expand Up @@ -373,9 +366,11 @@ class Meta:
"copyrights",
"holders",
"authors",
"licenses",
"license_category",
"license_expressions",
"detected_license_expression",
"detected_license_expression_spdx",
"license_detections",
"license_clues",
"percentage_of_license_text",
"emails",
"urls",
"in_package",
Expand Down Expand Up @@ -414,7 +409,8 @@ class PackageFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
sort = django_filters.OrderingFilter(
label="Sort",
fields=[
"license_expression",
"declared_license_expression",
"other_license_expression",
"copyright",
"primary_language",
],
Expand Down Expand Up @@ -444,11 +440,10 @@ class Meta:
"code_view_url",
"vcs_url",
"type",
"license_expression",
"declared_license",
"declared_license_expression",
"other_license_expression",
"extracted_license_statement",
"copyright",
"manifest_path",
"contains_source_code",
]


Expand Down
121 changes: 121 additions & 0 deletions scanpipe/migrations/0030_scancode_toolkit_v32_model_updates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# Generated by Django 4.2 on 2023-05-05 06:52

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("scanpipe", "0029_codebaseresource_scanpipe_co_type_ea1dd7_idx_and_more"),
]

operations = [
migrations.RemoveIndex(
model_name="discoveredpackage",
name="scanpipe_di_license_e8ce32_idx",
),
migrations.RenameField(
model_name="discoveredpackage",
old_name="license_expression",
new_name="declared_license_expression",
),
migrations.AlterField(
model_name="discoveredpackage",
name="declared_license_expression",
field=models.TextField(
blank=True,
help_text="The license expression for this package typically derived from its extracted_license_statement or from some other type-specific routine or convention.",
),
),
migrations.RenameField(
model_name="discoveredpackage",
old_name="declared_license",
new_name="extracted_license_statement",
),
migrations.AlterField(
model_name="discoveredpackage",
name="extracted_license_statement",
field=models.TextField(
blank=True,
help_text="The license statement mention, tag or text as found in a package manifest and extracted. This can be a string, a list or dict of strings possibly nested, as found originally in the manifest.",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="declared_license_expression_spdx",
field=models.TextField(
blank=True,
help_text="The SPDX license expression for this package converted from its declared_license_expression.",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="holder",
field=models.TextField(
blank=True,
help_text="Holders for this package. Typically one per line.",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="license_detections",
field=models.JSONField(
blank=True,
default=list,
help_text="A list of LicenseDetection mappings typically derived from its extracted_license_statement or from some other type-specific routine or convention.",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="other_license_detections",
field=models.JSONField(
blank=True,
default=list,
help_text="A list of LicenseDetection mappings which is different from the declared_license_expression, (i.e. not the primary license) These are detections for the detection for the license expressions in other_license_expression. ",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="other_license_expression",
field=models.TextField(
blank=True,
help_text="The license expression for this package which is different from the declared_license_expression, (i.e. not the primary license) routine or convention.",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="other_license_expression_spdx",
field=models.TextField(
blank=True,
help_text="The other SPDX license expression for this package converted from its other_license_expression.",
),
),
migrations.AddField(
model_name="codebaseresource",
name="detected_license_expression",
field=models.TextField(blank=True, help_text="The license expression summarizing the license info for this resource, combined from all the license detections"),
),
migrations.AddField(
model_name="codebaseresource",
name="detected_license_expression_spdx",
field=models.TextField(blank=True, help_text="The detected license expression for this file, with SPDX license keys"),
),
migrations.AddField(
model_name="codebaseresource",
name="license_detections",
field=models.JSONField(
blank=True, default=list, help_text="List of license detection details."
),
),
migrations.AddField(
model_name="codebaseresource",
name="license_clues",
field=models.JSONField(
blank=True, default=list, help_text="List of license matches that are not proper detections and potentially just clues to licenses or likely false positives. Those are not included in computing the detected license expression for the resource."
),
),
migrations.AddField(
model_name="codebaseresource",
name="percentage_of_license_text",
field=models.FloatField(blank=True, help_text="Percentage of file words detected as license text or notice.", null=True),
),
]
Loading

0 comments on commit 657c1af

Please sign in to comment.