Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support multiple input path in CLI #875 #1397 #1399

Merged
merged 9 commits into from
Mar 7, 2019
113 changes: 113 additions & 0 deletions etc/scripts/genlicspdx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2019 nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
# The ScanCode software is licensed under the Apache License version 2.0.
# Data generated with ScanCode require an acknowledgment.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# When you publish or redistribute any data created with ScanCode or any ScanCode
# derivative work, you must accompany this data with the following acknowledgment:
#
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# ScanCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.

from __future__ import absolute_import
from __future__ import unicode_literals
from __future__ import print_function

import os

import click
click.disable_unicode_literals_warning = True

from licensedcode.models import load_licenses
from scancode.cli import run_scan


"""
Generate an SPDX document for each license known in ScanCode that are not usted
at SPDX.
Run python genlicspdx.py -h for help.

NOTE: this is rather inefficient as it is starting a new command line process
for each license, taking a few seconds each time.
Upcomming code to call a scan function instead will be more efficient.
"""

FOSS_CATEGORIES = set([
'Copyleft',
'Copyleft Limited',
'Patent License',
'Permissive',
'Public Domain',
])


@click.command()
@click.argument('license_dir',
type=click.Path(file_okay=False, exists=True, writable=True,
allow_dash=False, resolve_path=True),
metavar='DIR')
@click.option('-v', '--verbose', is_flag=True, default=False, help='Print execution messages.')
@click.help_option('-h', '--help')
def cli(license_dir, verbose):
"""
Create one SPDX tag-value document for each non-SPDX ScanCode licenses.
Store these in the DIR directory
"""

base_kwargs = dict(
license=True, license_diag=True, license_text=True, info=True,
strip_root=True, quiet=True, return_results=False)

licenses_by_key = load_licenses(with_deprecated=False)


for i, lic in enumerate(licenses_by_key.values()):
ld = lic.to_dict()

if lic.spdx_license_key:
if verbose:
click.echo(
'Skipping ScanCode: {key} that is an SPDX license: {spdx_license_key}'.format(**ld))
continue

if not lic.text_file or not os.path.exists(lic.text_file):
if verbose:
click.echo(
'Skipping license without text: {key}'.format(**ld))
continue

if lic.category not in FOSS_CATEGORIES:
if verbose:
click.echo(
'Skipping non FOSS license: {key}'.format(**ld))
continue

output = 'licenseref-scancode-{key}.spdx'.format(**ld)
output = os.path.join(license_dir, output)

if verbose:
click.echo('Creating SPDX document for license: {key}'.format(**ld))
click.echo('at: {output}'.format(**locals()))

with open(output, 'wb') as ouput_file:
kwargs = dict(input=lic.text_file, spdx_tv=ouput_file)
kwargs.update(base_kwargs)
run_scan(**kwargs)


if __name__ == '__main__':
cli()
74 changes: 74 additions & 0 deletions etc/scripts/scancli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#
# Copyright (c) 2019 nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
# The ScanCode software is licensed under the Apache License version 2.0.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import json
from os.path import abspath
from os.path import dirname
from os.path import join
from os.path import normpath

import execnet

import scanserv

"""
This is a module designed to be called from Python 2 or 3 and is the client
side. See scanserv for the back server module that runs on Python 2 and runs
effectively scancode.
"""


def scan(locations, deserialize=False, scancode_root_dir=None):
"""
Scan the list of paths at `location` and return the results as an iterable
of JSON strings. If `deserialize` is True the iterable contains a python data
instead.
Each location is scanned independently.
"""
if not scancode_root_dir:
scancode_root_dir = abspath(normpath(__file__))
scancode_root_dir = dirname(dirname(dirname(scancode_root_dir)))
python2 = join(scancode_root_dir, 'bin', 'python')
spec = 'popen//python={python2}'.format(**locals())
gateway = execnet.makegateway(spec) # NOQA
channel = gateway.remote_exec(scanserv)

for location in locations:
# build a mapping of options to use for this scan
scan_kwargs = dict(
location=location,
license=True,
license_text=True,
license_diag=True,
copyright=True,
info=True,
processes=0,
)

channel.send(scan_kwargs) # execute func-call remotely
results = channel.receive()
if deserialize:
results = json.loads(results)
yield results


if __name__ == '__main__':
import sys # NOQA
args = sys.argv[1:]
for s in scan(args):
print(s)
29 changes: 29 additions & 0 deletions etc/scripts/scanserv.README
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
A simple proof of concept for Python3 remoting with execnet.

See ticket #1400 for more.

This is an example of how to call Scancode as a function from Python2 or Python3.
The benefits are that when the server process has loaded the license index,
and imported its modules there is no per-call import/loading penalty anymore.

This is using execnet which is the multiprocessing library used by
py.test and therefore a rather stable and high quality engine.

To test, do this::

1. checkout scancode and run ./configure in a first shell. This is for a plain
ScanCode using Python 2 that will be used as a "server".

2. in another shell, create a virtualenv with Python 3 in another
location. Activate that venv, and `pip install simplejson execnet`

3. Change dir to the install scancode-toolkit/etc/scripts where the scancli.py
and scancserv.py scripts are. Then run::

python3 scancli.py ../../NOTICE ../../setup.py

This will effectively make remote functions calls to the Python2
scancode and gets the result in Python3 alright. It also allows to have
multiple calls that reuse the same process, hence amortizing any startup
costs. Here this will run two scans: one on NOTICE and another on setup.py.
It could have been directories too.
55 changes: 55 additions & 0 deletions etc/scripts/scanserv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#
# Copyright (c) 2019 nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
# The ScanCode software is licensed under the Apache License version 2.0.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

"""
Python2 "server side" of the scan server. In a given execnet session, this
process will hold a loaded license index and can be invoked multiple times
without the index load penalty on each call.
"""


def as_json(results, pretty=True):
"""
Return a JSON string from a `results` data structuret.
"""
# this is used for its ability to handle iterables as arrays.
import simplejson

kwargs = dict(iterable_as_array=True, encoding='utf-8')
if pretty:
kwargs.update(dict(indent=2 * b' '))
else:
kwargs.update(dict(separators=(b',', b':',)))
return simplejson.dumps(results, **kwargs) + b'\n'


def run_scan(location, **kwargs):
from scancode import cli
pretty = kwargs.pop('pretty', True)
return as_json(cli.run_scan(location, **kwargs), pretty=pretty)


if __name__ == '__channelexec__':
for kwargs in channel: # NOQA
# a mapping of kwargs or a location string
if isinstance(kwargs, (str, unicode)):
channel.send(run_scan(kwargs)) # NOQA
elif isinstance(kwargs, dict):
channel.send(run_scan(**kwargs)) # NOQA
else:
raise Exception('Unknown arguments type: ' + repr(kwargs))
Loading