Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More License Detection changes #3154

Merged
merged 11 commits into from
Dec 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/explanations/license-detection-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ After::
"text": "Apache License\nVersion 2.0, {Truncated text}"
}
],
"rule_references": [
"license_rule_references": [
{
"license_expression": "apache-2.0",
"rule_identifier": "apache-2.0_65.RULE",
Expand Down
1 change: 0 additions & 1 deletion setup-mini.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ scancode_post_scan =
is-license-text = licensedcode.plugin_license_text:IsLicenseText
filter-clues = cluecode.plugin_filter_clues:RedundantCluesFilter
consolidate = summarycode.plugin_consolidate:Consolidator
licenses-reference = licensedcode.plugin_licenses_reference:LicensesReference


# scancode_output_filter is the entry point for filter plugins executed after
Expand Down
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ scancode_post_scan =
is-license-text = licensedcode.plugin_license_text:IsLicenseText
filter-clues = cluecode.plugin_filter_clues:RedundantCluesFilter
consolidate = summarycode.plugin_consolidate:Consolidator
licenses-reference = licensedcode.plugin_licenses_reference:LicensesReference


# scancode_output_filter is the entry point for filter plugins executed after
Expand Down
2 changes: 1 addition & 1 deletion src/cluecode/plugin_copyright.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class CopyrightScanner(ScanPlugin):
('authors',attr.ib(default=attr.Factory(list))),
])

sort_order = 4
sort_order = 6

options = [
PluggableCommandLineOption(('-c', '--copyright',),
Expand Down
2 changes: 1 addition & 1 deletion src/cluecode/plugin_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class EmailScanner(ScanPlugin):
"""
resource_attributes = dict(emails=attr.ib(default=attr.Factory(list)))

sort_order = 8
sort_order = 7

options = [
PluggableCommandLineOption(('-e', '--email',),
Expand Down
4 changes: 1 addition & 3 deletions src/cluecode/plugin_filter_clues.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,8 @@ def process_codebase(self, codebase, **kwargs):

from licensedcode.cache import get_index

rules_by_id = {r.identifier: r for r in get_index().rules_by_rid}

for resource in codebase.walk():
filtered = filter_ignorable_resource_clues(resource, rules_by_id)
filtered = filter_ignorable_resource_clues(resource, get_index().rules_by_id)
if filtered:
filtered.save(codebase)

Expand Down
2 changes: 1 addition & 1 deletion src/cluecode/plugin_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class UrlScanner(ScanPlugin):

resource_attributes = dict(urls=attr.ib(default=attr.Factory(list)))

sort_order = 10
sort_order = 8

options = [
PluggableCommandLineOption(('-u', '--url',),
Expand Down
18 changes: 11 additions & 7 deletions src/formattedcode/output_debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from plugincode.output import output_impl
from plugincode.output import OutputPlugin
from licensedcode.detection import get_matches_from_detection_mappings

from licensedcode.licenses_reference import get_matched_text_from_reference_data
from scancode import notice

"""
Expand Down Expand Up @@ -106,7 +106,7 @@ def build_copyright_paragraphs(codebase, **kwargs):
if scanned_file['type'] == 'directory':
continue
dfiles = scanned_file['path']
dlicense = build_license(scanned_file)
dlicense = build_license(codebase, scanned_file)
dcopyright = build_copyright_field(scanned_file)

file_para = CopyrightFilesParagraph.from_dict(dict(
Expand All @@ -132,7 +132,7 @@ def build_copyright_field(scanned_file):
return '\n'.join(statements)


def build_license(scanned_file):
def build_license(codebase, scanned_file):
"""
Return Debian-like text where the first line is the expression and the
remaining lines are the license text from licenses detected in
Expand All @@ -146,11 +146,11 @@ def build_license(scanned_file):
return

licenses = scanned_file.get('license_detections', [])
text = '\n'.join(get_texts(licenses))
text = '\n'.join(get_texts(codebase, licenses))
return f'{expression}\n{text}'


def get_texts(detected_licenses):
def get_texts(codebase, detected_licenses):
"""
Yield license texts detected in this file.

Expand Down Expand Up @@ -179,8 +179,12 @@ def get_texts(detected_licenses):
# set of (start line, end line, matched_rule identifier)
seen = set()
for lic in get_matches_from_detection_mappings(detected_licenses):
matched_text = get_matched_text_from_reference_data(
codebase=codebase,
rule_identifier=lic['rule_identifier']
)
key = lic['start_line'], lic['end_line'], lic['rule_identifier']
if key not in seen:
yield lic['matched_text']
if matched_text != None:
yield matched_text
seen.add(key)

29 changes: 23 additions & 6 deletions src/formattedcode/output_spdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@
from spdx.utils import SPDXNone
from spdx.version import Version

from license_expression import Licensing
from commoncode.cliutils import OUTPUT_GROUP
from commoncode.cliutils import PluggableCommandLineOption
from commoncode.fileutils import file_name
from commoncode.fileutils import parent_directory
from commoncode.text import python_safe_name
from formattedcode import FileOptionType
from licensedcode.detection import get_matches_from_detection_mappings
from licensedcode.licenses_reference import get_matched_text_from_reference_data
from plugincode.output import output_impl
from plugincode.output import OutputPlugin
import scancode_config
Expand Down Expand Up @@ -170,6 +172,7 @@ def _process_codebase(
package_name = build_package_name(input_path)

write_spdx(
codebase=codebase,
output_file=output_file,
files=files,
tool_name=tool_name,
Expand Down Expand Up @@ -208,6 +211,7 @@ def check_sha1(codebase):


def write_spdx(
codebase,
output_file,
files,
tool_name,
Expand All @@ -229,6 +233,10 @@ def write_spdx(
producing this SPDX document.
Use ``package_name`` as a Package name and as a namespace prefix base.
"""
from licensedcode import cache
licenses = cache.get_licenses_db()
licensing = Licensing()

as_rdf = not as_tagvalue
_patch_license_list()

Expand Down Expand Up @@ -282,11 +290,20 @@ def write_spdx(
if license_matches:
all_files_have_no_license = False
for match in license_matches:
file_licenses = match["licenses"]
for file_license in file_licenses:
license_key = file_license.get('key')

spdx_id = file_license.get('spdx_license_key')
file_license_expression = match["license_expression"]
file_license_keys = licensing.license_keys(
expression=file_license_expression,
unique=True
)
matched_text = get_matched_text_from_reference_data(
codebase=codebase,
rule_identifier=match["rule_identifier"],
)
for license_key in file_license_keys:
file_license = licenses.get(license_key)
license_key = file_license.key

spdx_id = file_license.spdx_license_key
if not spdx_id:
spdx_id = f'LicenseRef-scancode-{license_key}'
is_license_ref = spdx_id.lower().startswith('licenseref-')
Expand All @@ -295,7 +312,7 @@ def write_spdx(
spdx_license = License.from_identifier(spdx_id)
else:
spdx_license = ExtractedLicense(spdx_id)
spdx_license.name = file_license.get('short_name')
spdx_license.name = file_license.short_name
# FIXME: replace this with the licensedb URL
comment = (
f'See details at https://github.com/nexB/scancode-toolkit'
Expand Down
Loading