diff --git a/extractcode b/extractcode
new file mode 100755
index 00000000000..b77f8ae99ec
--- /dev/null
+++ b/extractcode
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (c) 2015 nexB Inc. http://www.nexb.com/ - All rights reserved.
+#
+
+# A minimal shell wrapper to the CLI entry point
+
+SCANCODE_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cd "$SCANCODE_ROOT_DIR"
+
+SCANCODE_CONFIGURED_PYTHON=$SCANCODE_ROOT_DIR/bin/python
+if [ ! -f "$SCANCODE_CONFIGURED_PYTHON" ]; then
+ echo "* Configuring ScanCode ..."
+ CONFIGURE_QUIET=1 $SCANCODE_ROOT_DIR/configure etc/conf
+fi
+
+$SCANCODE_ROOT_DIR/bin/extractcode "$@"
diff --git a/extractcode.bat b/extractcode.bat
new file mode 100644
index 00000000000..d3143e0a94a
--- /dev/null
+++ b/extractcode.bat
@@ -0,0 +1,35 @@
+@echo OFF
+@rem Copyright (c) 2015 nexB Inc. http://www.nexb.com/ - All rights reserved.
+@rem
+
+
+@rem A minimal shell wrapper to the CLI entry point
+
+set SCANCODE_ROOT_DIR=%~dp0
+cd %SCANCODE_ROOT_DIR%
+
+set SCANCODE_CMD_LINE_ARGS=
+set SCANCODE_CONFIGURED_PYTHON=%SCANCODE_ROOT_DIR%\bin\python.exe
+
+@rem Collect all command line arguments in a variable
+:collectarg
+ if ""%1""=="""" goto continue
+ call set SCANCODE_CMD_LINE_ARGS=%SCANCODE_CMD_LINE_ARGS% %1
+ shift
+ goto collectarg
+
+:continue
+
+
+if not exist %SCANCODE_CONFIGURED_PYTHON% goto configure
+goto scancode
+
+:configure
+ echo * Configuring ScanCode ...
+ set CONFIGURE_QUIET=1
+ call %SCANCODE_ROOT_DIR%\configure etc/conf
+
+:scancode
+%SCANCODE_ROOT_DIR%\bin\extractcode %SCANCODE_CMD_LINE_ARGS%
+
+:EOS
diff --git a/setup.py b/setup.py
index bd0242f7966..4afca32cea2 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,7 @@ def read(*names, **kwargs):
entry_points={
'console_scripts': [
'scancode = scancode.cli:scancode',
+ 'extractcode = scancode.extract_cli:extractcode',
],
},
)
diff --git a/src/scancode/extract_cli.py b/src/scancode/extract_cli.py
new file mode 100644
index 00000000000..8f2fed6b36f
--- /dev/null
+++ b/src/scancode/extract_cli.py
@@ -0,0 +1,176 @@
+#
+# Copyright (c) 2015 nexB Inc. and others. All rights reserved.
+# http://nexb.com and https://github.com/nexB/scancode-toolkit/
+# The ScanCode software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode require an acknowledgment.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# When you publish or redistribute any data created with ScanCode or any ScanCode
+# derivative work, you must accompany this data with the following acknowledgment:
+#
+# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+# ScanCode is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode-toolkit/ for support and download.
+
+from __future__ import print_function, absolute_import
+
+import os
+
+import click
+
+from scancode.cli import version
+from scancode.cli import print_about
+
+from scancode.api import extract_archives
+from scancode.utils import BaseCommand
+from commoncode import fileutils
+from scancode import utils
+from click.termui import style
+from commoncode.fileutils import as_posixpath
+
+
+def print_version(ctx, param, value):
+ if not value or ctx.resilient_parsing:
+ return
+ click.secho('ScanCode extractcode version ' + version)
+ ctx.exit()
+
+
+epilog_text = '''\b\bExamples:
+
+(Note for Windows: use '\\' backslash instead of '/' slash for paths.)
+
+\b
+Extract all archives found in the 'samples' directory tree:
+
+ extractcode samples
+
+Note: If an archive contains other archives, all contained archives will be
+extracted recursively. Extraction is done directly in the 'samples' directory,
+side-by-side with each archive. Files are extracted in a directory named after
+the archive with an '-extract' suffix added to its name, created side-by-side
+with the corresponding archive file.
+
+\b
+Extract a single archive. Files are extracted in the directory
+'samples/arch/zlib.tar.gz-extract/':
+
+ extractcode samples/arch/zlib.tar.gz
+'''
+
+
+class ExtractCommand(BaseCommand):
+ short_usage_help = '''
+Try 'extractcode --help' for help on options and arguments.'''
+
+
+@click.command(name='extractcode', epilog=epilog_text, cls=ExtractCommand)
+@click.pass_context
+@click.argument('input', metavar='',
+type=click.Path(exists=True, readable=True)
+)
+
+@click.option(
+ '--verbose', is_flag=True, default=False,
+ help='Print verbose file-by-file progress messages.'
+)
+
+@click.help_option('-h', '--help')
+
+@click.option('--about',
+ is_flag=True, is_eager=True, callback=print_about,
+ help='Show information about ScanCode and licensing and exit.'
+)
+
+@click.option('--version',
+ is_flag=True, is_eager=True, callback=print_version,
+ help='Show the version and exit.'
+)
+
+def extractcode(ctx, input, verbose, *args, **kwargs): # @ReservedAssignment
+ """extract archives and compressed files found in the file or directory tree.
+
+ Use this command before scanning proper, as an preparation step.
+ Archives found inside an extracted archive are extracted recursively.
+ Extraction is done in-place in a directory named '-extract' side-by-side with an archive.
+ """
+
+ abs_input = as_posixpath(os.path.abspath(os.path.expanduser(input)))
+ rc = extract_with_progress(abs_input, verbose)
+ ctx.exit(rc)
+
+
+def extract_with_progress(input, verbose=False): # @ReservedAssignment
+ """
+ Extract archives and display progress.
+ """
+ # note: we use inner functions so they can close on local variables
+
+ def extract_start():
+ return style('Extracting archives...', fg='green')
+
+ def extract_event(item):
+ """
+ Display an extract event.
+ """
+ if not item:
+ return ''
+ if verbose:
+ if item.done:
+ return ''
+ line = item.source or ''
+ else:
+ line = fileutils.file_name(item.source) or ''
+ return 'Extracting: %(line)s' % locals()
+
+
+ def extract_end():
+ """
+ Display a summary of warnings and errors if any.
+ """
+ has_warnings = False
+ has_errors = False
+ summary = []
+ for xev in extract_results:
+ has_errors = has_errors or bool(xev.errors)
+ has_warnings = has_warnings or bool(xev.warnings)
+ source = xev.source
+ for e in xev.errors:
+ summary.append(style('ERROR extracting: %(source)s: %(e)r' % locals(), fg='red', reset=False))
+ for warn in xev.warnings:
+ summary.append(style('WARNING extracting: %(source)s: %(warn)r' % locals(), fg='yellow', reset=False))
+
+ summary_color = 'green'
+ if has_warnings:
+ summary_color = 'yellow'
+ if has_errors:
+ summary_color = 'red'
+
+ summary.append(style('Extracting done.', fg=summary_color, reset=True))
+ return '\n'.join(summary)
+
+
+ extract_results = []
+ has_extract_errors = False
+
+ with utils.progressmanager(extract_archives(input),
+ item_show_func=extract_event,
+ start_show_func=extract_start,
+ finish_show_func=extract_end,
+ verbose=verbose,
+ ) as extraction_events:
+ for xev in extraction_events:
+ if xev.done and (xev.warnings or xev.errors):
+ has_extract_errors = has_extract_errors or xev.errors
+ extract_results.append(xev)
+ return 1 if has_extract_errors else 0
diff --git a/tests/scancode/test_extract_cli.py b/tests/scancode/test_extract_cli.py
new file mode 100644
index 00000000000..fa2b005461e
--- /dev/null
+++ b/tests/scancode/test_extract_cli.py
@@ -0,0 +1,137 @@
+#
+# Copyright (c) 2015 nexB Inc. and others. All rights reserved.
+# http://nexb.com and https://github.com/nexB/scancode-toolkit/
+# The ScanCode software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode require an acknowledgment.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# When you publish or redistribute any data created with ScanCode or any ScanCode
+# derivative work, you must accompany this data with the following acknowledgment:
+#
+# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+# ScanCode is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode-toolkit/ for support and download.
+
+from __future__ import absolute_import, print_function
+
+import os
+
+import click
+from click.testing import CliRunner
+
+from commoncode.fileutils import as_posixpath
+
+from scancode import extract_cli
+from commoncode.testcase import FileDrivenTesting
+
+test_env = FileDrivenTesting()
+test_env.test_data_dir = os.path.join(os.path.dirname(__file__), 'data')
+
+
+"""
+These CLI tests are dependent on py.test monkeypatch to ensure we are testing
+the actual command outputs as if using a TTY or not.
+"""
+
+
+def test_extractcode_command_can_take_an_empty_directory(monkeypatch):
+ test_dir = test_env.get_temp_dir()
+ monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
+ runner = CliRunner()
+ result = runner.invoke(extract_cli.extractcode, [test_dir])
+ assert result.exit_code == 0
+ assert 'Extracting archives...' in result.output
+ assert 'Extracting done' in result.output
+
+
+def test_extractcode_command_does_extract_verbose(monkeypatch):
+ test_dir = test_env.get_test_loc('extract', copy=True)
+ monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
+ runner = CliRunner()
+ result = runner.invoke(extract_cli.extractcode, ['--verbose', test_dir])
+ assert result.exit_code == 1
+ assert os.path.exists(os.path.join(test_dir, 'some.tar.gz-extract'))
+ expected = [
+ 'Extracting archives...',
+ '/some.tar.gz',
+ '/broken.tar.gz',
+ '/tarred_gzipped.tgz',
+ 'ERROR extracting',
+ "/broken.tar.gz: 'Unrecognized archive format'",
+ 'Extracting done.',
+ ]
+ for e in expected:
+ assert e in result.output
+
+
+def test_extractcode_command_does_no_show_anything_if_not_using_a_tty(monkeypatch):
+ test_dir = test_env.get_test_loc('extract/some.tar.gz', copy=True)
+ monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: False)
+ runner = CliRunner()
+ result = runner.invoke(extract_cli.extractcode, ['--verbose', test_dir])
+ assert '' == result.output
+ result = runner.invoke(extract_cli.extractcode, [test_dir])
+ assert '' == result.output
+
+
+def test_extractcode_command_works_with_relative_paths(monkeypatch):
+ # The setup is a tad complex because we want to have a relative dir
+ # to the base dir where we run tests from, ie the scancode-toolkit/ dir
+ # To use relative paths, we use our tmp dir at the root of the code tree
+ from os.path import dirname, join, abspath
+ from commoncode import fileutils
+ import extractcode
+ import tempfile
+ import shutil
+
+ try:
+ scancode_root = dirname(dirname(dirname(__file__)))
+ scancode_tmp = join(scancode_root, 'tmp')
+ fileutils.create_dir(scancode_tmp)
+ scancode_root_abs = abspath(scancode_root)
+ test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
+ test_file = test_env.get_test_loc('extract_relative_path/basic.zip')
+ shutil.copy(test_file, test_src_dir)
+ test_src_file = join(test_src_dir, 'basic.zip')
+ test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX
+
+ runner = CliRunner()
+ monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
+ result = runner.invoke(extract_cli.extractcode, [test_src_file])
+ assert result.exit_code == 0
+ assert 'Extracting done' in result.output
+ assert not 'WARNING' in result.output
+ assert not 'ERROR' in result.output
+ expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt']
+ file_result = [as_posixpath(f.replace(test_tgt_dir, '')) for f in fileutils.file_iter(test_tgt_dir)]
+ assert sorted(expected) == sorted(file_result)
+ finally:
+ fileutils.delete(test_src_dir)
+
+
+def test_usage_and_help_return_a_correct_script_name_on_all_platforms(monkeypatch):
+ runner = CliRunner()
+ monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
+ result = runner.invoke(extract_cli.extractcode, ['--help'])
+ assert 'Usage: extractcode [OPTIONS]' in result.output
+ # this was showing up on Windows
+ assert 'extractcode-script.py' not in result.output
+
+ result = runner.invoke(extract_cli.extractcode, [])
+ assert 'Usage: extractcode [OPTIONS]' in result.output
+ # this was showing up on Windows
+ assert 'extractcode-script.py' not in result.output
+
+ result = runner.invoke(extract_cli.extractcode, ['-xyz'])
+ # this was showing up on Windows
+ assert 'extractcode-script.py' not in result.output