diff --git a/extractcode b/extractcode new file mode 100755 index 00000000000..b77f8ae99ec --- /dev/null +++ b/extractcode @@ -0,0 +1,17 @@ +#!/bin/bash +# +# Copyright (c) 2015 nexB Inc. http://www.nexb.com/ - All rights reserved. +# + +# A minimal shell wrapper to the CLI entry point + +SCANCODE_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$SCANCODE_ROOT_DIR" + +SCANCODE_CONFIGURED_PYTHON=$SCANCODE_ROOT_DIR/bin/python +if [ ! -f "$SCANCODE_CONFIGURED_PYTHON" ]; then + echo "* Configuring ScanCode ..." + CONFIGURE_QUIET=1 $SCANCODE_ROOT_DIR/configure etc/conf +fi + +$SCANCODE_ROOT_DIR/bin/extractcode "$@" diff --git a/extractcode.bat b/extractcode.bat new file mode 100644 index 00000000000..d3143e0a94a --- /dev/null +++ b/extractcode.bat @@ -0,0 +1,35 @@ +@echo OFF +@rem Copyright (c) 2015 nexB Inc. http://www.nexb.com/ - All rights reserved. +@rem + + +@rem A minimal shell wrapper to the CLI entry point + +set SCANCODE_ROOT_DIR=%~dp0 +cd %SCANCODE_ROOT_DIR% + +set SCANCODE_CMD_LINE_ARGS= +set SCANCODE_CONFIGURED_PYTHON=%SCANCODE_ROOT_DIR%\bin\python.exe + +@rem Collect all command line arguments in a variable +:collectarg + if ""%1""=="""" goto continue + call set SCANCODE_CMD_LINE_ARGS=%SCANCODE_CMD_LINE_ARGS% %1 + shift + goto collectarg + +:continue + + +if not exist %SCANCODE_CONFIGURED_PYTHON% goto configure +goto scancode + +:configure + echo * Configuring ScanCode ... + set CONFIGURE_QUIET=1 + call %SCANCODE_ROOT_DIR%\configure etc/conf + +:scancode +%SCANCODE_ROOT_DIR%\bin\extractcode %SCANCODE_CMD_LINE_ARGS% + +:EOS diff --git a/setup.py b/setup.py index bd0242f7966..4afca32cea2 100644 --- a/setup.py +++ b/setup.py @@ -115,6 +115,7 @@ def read(*names, **kwargs): entry_points={ 'console_scripts': [ 'scancode = scancode.cli:scancode', + 'extractcode = scancode.extract_cli:extractcode', ], }, ) diff --git a/src/scancode/extract_cli.py b/src/scancode/extract_cli.py new file mode 100644 index 00000000000..8f2fed6b36f --- /dev/null +++ b/src/scancode/extract_cli.py @@ -0,0 +1,176 @@ +# +# Copyright (c) 2015 nexB Inc. and others. All rights reserved. +# http://nexb.com and https://github.com/nexB/scancode-toolkit/ +# The ScanCode software is licensed under the Apache License version 2.0. +# Data generated with ScanCode require an acknowledgment. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# When you publish or redistribute any data created with ScanCode or any ScanCode +# derivative work, you must accompany this data with the following acknowledgment: +# +# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# ScanCode is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode-toolkit/ for support and download. + +from __future__ import print_function, absolute_import + +import os + +import click + +from scancode.cli import version +from scancode.cli import print_about + +from scancode.api import extract_archives +from scancode.utils import BaseCommand +from commoncode import fileutils +from scancode import utils +from click.termui import style +from commoncode.fileutils import as_posixpath + + +def print_version(ctx, param, value): + if not value or ctx.resilient_parsing: + return + click.secho('ScanCode extractcode version ' + version) + ctx.exit() + + +epilog_text = '''\b\bExamples: + +(Note for Windows: use '\\' backslash instead of '/' slash for paths.) + +\b +Extract all archives found in the 'samples' directory tree: + + extractcode samples + +Note: If an archive contains other archives, all contained archives will be +extracted recursively. Extraction is done directly in the 'samples' directory, +side-by-side with each archive. Files are extracted in a directory named after +the archive with an '-extract' suffix added to its name, created side-by-side +with the corresponding archive file. + +\b +Extract a single archive. Files are extracted in the directory +'samples/arch/zlib.tar.gz-extract/': + + extractcode samples/arch/zlib.tar.gz +''' + + +class ExtractCommand(BaseCommand): + short_usage_help = ''' +Try 'extractcode --help' for help on options and arguments.''' + + +@click.command(name='extractcode', epilog=epilog_text, cls=ExtractCommand) +@click.pass_context +@click.argument('input', metavar='', +type=click.Path(exists=True, readable=True) +) + +@click.option( + '--verbose', is_flag=True, default=False, + help='Print verbose file-by-file progress messages.' +) + +@click.help_option('-h', '--help') + +@click.option('--about', + is_flag=True, is_eager=True, callback=print_about, + help='Show information about ScanCode and licensing and exit.' +) + +@click.option('--version', + is_flag=True, is_eager=True, callback=print_version, + help='Show the version and exit.' +) + +def extractcode(ctx, input, verbose, *args, **kwargs): # @ReservedAssignment + """extract archives and compressed files found in the file or directory tree. + + Use this command before scanning proper, as an preparation step. + Archives found inside an extracted archive are extracted recursively. + Extraction is done in-place in a directory named '-extract' side-by-side with an archive. + """ + + abs_input = as_posixpath(os.path.abspath(os.path.expanduser(input))) + rc = extract_with_progress(abs_input, verbose) + ctx.exit(rc) + + +def extract_with_progress(input, verbose=False): # @ReservedAssignment + """ + Extract archives and display progress. + """ + # note: we use inner functions so they can close on local variables + + def extract_start(): + return style('Extracting archives...', fg='green') + + def extract_event(item): + """ + Display an extract event. + """ + if not item: + return '' + if verbose: + if item.done: + return '' + line = item.source or '' + else: + line = fileutils.file_name(item.source) or '' + return 'Extracting: %(line)s' % locals() + + + def extract_end(): + """ + Display a summary of warnings and errors if any. + """ + has_warnings = False + has_errors = False + summary = [] + for xev in extract_results: + has_errors = has_errors or bool(xev.errors) + has_warnings = has_warnings or bool(xev.warnings) + source = xev.source + for e in xev.errors: + summary.append(style('ERROR extracting: %(source)s: %(e)r' % locals(), fg='red', reset=False)) + for warn in xev.warnings: + summary.append(style('WARNING extracting: %(source)s: %(warn)r' % locals(), fg='yellow', reset=False)) + + summary_color = 'green' + if has_warnings: + summary_color = 'yellow' + if has_errors: + summary_color = 'red' + + summary.append(style('Extracting done.', fg=summary_color, reset=True)) + return '\n'.join(summary) + + + extract_results = [] + has_extract_errors = False + + with utils.progressmanager(extract_archives(input), + item_show_func=extract_event, + start_show_func=extract_start, + finish_show_func=extract_end, + verbose=verbose, + ) as extraction_events: + for xev in extraction_events: + if xev.done and (xev.warnings or xev.errors): + has_extract_errors = has_extract_errors or xev.errors + extract_results.append(xev) + return 1 if has_extract_errors else 0 diff --git a/tests/scancode/test_extract_cli.py b/tests/scancode/test_extract_cli.py new file mode 100644 index 00000000000..fa2b005461e --- /dev/null +++ b/tests/scancode/test_extract_cli.py @@ -0,0 +1,137 @@ +# +# Copyright (c) 2015 nexB Inc. and others. All rights reserved. +# http://nexb.com and https://github.com/nexB/scancode-toolkit/ +# The ScanCode software is licensed under the Apache License version 2.0. +# Data generated with ScanCode require an acknowledgment. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# When you publish or redistribute any data created with ScanCode or any ScanCode +# derivative work, you must accompany this data with the following acknowledgment: +# +# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# ScanCode is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode-toolkit/ for support and download. + +from __future__ import absolute_import, print_function + +import os + +import click +from click.testing import CliRunner + +from commoncode.fileutils import as_posixpath + +from scancode import extract_cli +from commoncode.testcase import FileDrivenTesting + +test_env = FileDrivenTesting() +test_env.test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + + +""" +These CLI tests are dependent on py.test monkeypatch to ensure we are testing +the actual command outputs as if using a TTY or not. +""" + + +def test_extractcode_command_can_take_an_empty_directory(monkeypatch): + test_dir = test_env.get_temp_dir() + monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True) + runner = CliRunner() + result = runner.invoke(extract_cli.extractcode, [test_dir]) + assert result.exit_code == 0 + assert 'Extracting archives...' in result.output + assert 'Extracting done' in result.output + + +def test_extractcode_command_does_extract_verbose(monkeypatch): + test_dir = test_env.get_test_loc('extract', copy=True) + monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True) + runner = CliRunner() + result = runner.invoke(extract_cli.extractcode, ['--verbose', test_dir]) + assert result.exit_code == 1 + assert os.path.exists(os.path.join(test_dir, 'some.tar.gz-extract')) + expected = [ + 'Extracting archives...', + '/some.tar.gz', + '/broken.tar.gz', + '/tarred_gzipped.tgz', + 'ERROR extracting', + "/broken.tar.gz: 'Unrecognized archive format'", + 'Extracting done.', + ] + for e in expected: + assert e in result.output + + +def test_extractcode_command_does_no_show_anything_if_not_using_a_tty(monkeypatch): + test_dir = test_env.get_test_loc('extract/some.tar.gz', copy=True) + monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: False) + runner = CliRunner() + result = runner.invoke(extract_cli.extractcode, ['--verbose', test_dir]) + assert '' == result.output + result = runner.invoke(extract_cli.extractcode, [test_dir]) + assert '' == result.output + + +def test_extractcode_command_works_with_relative_paths(monkeypatch): + # The setup is a tad complex because we want to have a relative dir + # to the base dir where we run tests from, ie the scancode-toolkit/ dir + # To use relative paths, we use our tmp dir at the root of the code tree + from os.path import dirname, join, abspath + from commoncode import fileutils + import extractcode + import tempfile + import shutil + + try: + scancode_root = dirname(dirname(dirname(__file__))) + scancode_tmp = join(scancode_root, 'tmp') + fileutils.create_dir(scancode_tmp) + scancode_root_abs = abspath(scancode_root) + test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/') + test_file = test_env.get_test_loc('extract_relative_path/basic.zip') + shutil.copy(test_file, test_src_dir) + test_src_file = join(test_src_dir, 'basic.zip') + test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX + + runner = CliRunner() + monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True) + result = runner.invoke(extract_cli.extractcode, [test_src_file]) + assert result.exit_code == 0 + assert 'Extracting done' in result.output + assert not 'WARNING' in result.output + assert not 'ERROR' in result.output + expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt'] + file_result = [as_posixpath(f.replace(test_tgt_dir, '')) for f in fileutils.file_iter(test_tgt_dir)] + assert sorted(expected) == sorted(file_result) + finally: + fileutils.delete(test_src_dir) + + +def test_usage_and_help_return_a_correct_script_name_on_all_platforms(monkeypatch): + runner = CliRunner() + monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True) + result = runner.invoke(extract_cli.extractcode, ['--help']) + assert 'Usage: extractcode [OPTIONS]' in result.output + # this was showing up on Windows + assert 'extractcode-script.py' not in result.output + + result = runner.invoke(extract_cli.extractcode, []) + assert 'Usage: extractcode [OPTIONS]' in result.output + # this was showing up on Windows + assert 'extractcode-script.py' not in result.output + + result = runner.invoke(extract_cli.extractcode, ['-xyz']) + # this was showing up on Windows + assert 'extractcode-script.py' not in result.output