Skip to content

Commit

Permalink
Created new extractcode standlone command for #52
Browse files Browse the repository at this point in the history
  • Loading branch information
pombredanne authored and jdaguil committed Nov 24, 2015
1 parent 5706d30 commit 2a38e29
Show file tree
Hide file tree
Showing 5 changed files with 366 additions and 0 deletions.
17 changes: 17 additions & 0 deletions extractcode
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
#
# Copyright (c) 2015 nexB Inc. http://www.nexb.com/ - All rights reserved.
#

# A minimal shell wrapper to the CLI entry point

SCANCODE_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$SCANCODE_ROOT_DIR"

SCANCODE_CONFIGURED_PYTHON=$SCANCODE_ROOT_DIR/bin/python
if [ ! -f "$SCANCODE_CONFIGURED_PYTHON" ]; then
echo "* Configuring ScanCode ..."
CONFIGURE_QUIET=1 $SCANCODE_ROOT_DIR/configure etc/conf
fi

$SCANCODE_ROOT_DIR/bin/extractcode "$@"
35 changes: 35 additions & 0 deletions extractcode.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
@echo OFF
@rem Copyright (c) 2015 nexB Inc. http://www.nexb.com/ - All rights reserved.
@rem


@rem A minimal shell wrapper to the CLI entry point

set SCANCODE_ROOT_DIR=%~dp0
cd %SCANCODE_ROOT_DIR%

set SCANCODE_CMD_LINE_ARGS=
set SCANCODE_CONFIGURED_PYTHON=%SCANCODE_ROOT_DIR%\bin\python.exe

@rem Collect all command line arguments in a variable
:collectarg
if ""%1""=="""" goto continue
call set SCANCODE_CMD_LINE_ARGS=%SCANCODE_CMD_LINE_ARGS% %1
shift
goto collectarg

:continue


if not exist %SCANCODE_CONFIGURED_PYTHON% goto configure
goto scancode

:configure
echo * Configuring ScanCode ...
set CONFIGURE_QUIET=1
call %SCANCODE_ROOT_DIR%\configure etc/conf

:scancode
%SCANCODE_ROOT_DIR%\bin\extractcode %SCANCODE_CMD_LINE_ARGS%

:EOS
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def read(*names, **kwargs):
entry_points={
'console_scripts': [
'scancode = scancode.cli:scancode',
'extractcode = scancode.extract_cli:extractcode',
],
},
)
176 changes: 176 additions & 0 deletions src/scancode/extract_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
#
# Copyright (c) 2015 nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
# The ScanCode software is licensed under the Apache License version 2.0.
# Data generated with ScanCode require an acknowledgment.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# When you publish or redistribute any data created with ScanCode or any ScanCode
# derivative work, you must accompany this data with the following acknowledgment:
#
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# ScanCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.

from __future__ import print_function, absolute_import

import os

import click

from scancode.cli import version
from scancode.cli import print_about

from scancode.api import extract_archives
from scancode.utils import BaseCommand
from commoncode import fileutils
from scancode import utils
from click.termui import style
from commoncode.fileutils import as_posixpath


def print_version(ctx, param, value):
if not value or ctx.resilient_parsing:
return
click.secho('ScanCode extractcode version ' + version)
ctx.exit()


epilog_text = '''\b\bExamples:
(Note for Windows: use '\\' backslash instead of '/' slash for paths.)
\b
Extract all archives found in the 'samples' directory tree:
extractcode samples
Note: If an archive contains other archives, all contained archives will be
extracted recursively. Extraction is done directly in the 'samples' directory,
side-by-side with each archive. Files are extracted in a directory named after
the archive with an '-extract' suffix added to its name, created side-by-side
with the corresponding archive file.
\b
Extract a single archive. Files are extracted in the directory
'samples/arch/zlib.tar.gz-extract/':
extractcode samples/arch/zlib.tar.gz
'''


class ExtractCommand(BaseCommand):
short_usage_help = '''
Try 'extractcode --help' for help on options and arguments.'''


@click.command(name='extractcode', epilog=epilog_text, cls=ExtractCommand)
@click.pass_context
@click.argument('input', metavar='<input>',
type=click.Path(exists=True, readable=True)
)

@click.option(
'--verbose', is_flag=True, default=False,
help='Print verbose file-by-file progress messages.'
)

@click.help_option('-h', '--help')

@click.option('--about',
is_flag=True, is_eager=True, callback=print_about,
help='Show information about ScanCode and licensing and exit.'
)

@click.option('--version',
is_flag=True, is_eager=True, callback=print_version,
help='Show the version and exit.'
)

def extractcode(ctx, input, verbose, *args, **kwargs): # @ReservedAssignment
"""extract archives and compressed files found in the <input> file or directory tree.
Use this command before scanning proper, as an <input> preparation step.
Archives found inside an extracted archive are extracted recursively.
Extraction is done in-place in a directory named '-extract' side-by-side with an archive.
"""

abs_input = as_posixpath(os.path.abspath(os.path.expanduser(input)))
rc = extract_with_progress(abs_input, verbose)
ctx.exit(rc)


def extract_with_progress(input, verbose=False): # @ReservedAssignment
"""
Extract archives and display progress.
"""
# note: we use inner functions so they can close on local variables

def extract_start():
return style('Extracting archives...', fg='green')

def extract_event(item):
"""
Display an extract event.
"""
if not item:
return ''
if verbose:
if item.done:
return ''
line = item.source or ''
else:
line = fileutils.file_name(item.source) or ''
return 'Extracting: %(line)s' % locals()


def extract_end():
"""
Display a summary of warnings and errors if any.
"""
has_warnings = False
has_errors = False
summary = []
for xev in extract_results:
has_errors = has_errors or bool(xev.errors)
has_warnings = has_warnings or bool(xev.warnings)
source = xev.source
for e in xev.errors:
summary.append(style('ERROR extracting: %(source)s: %(e)r' % locals(), fg='red', reset=False))
for warn in xev.warnings:
summary.append(style('WARNING extracting: %(source)s: %(warn)r' % locals(), fg='yellow', reset=False))

summary_color = 'green'
if has_warnings:
summary_color = 'yellow'
if has_errors:
summary_color = 'red'

summary.append(style('Extracting done.', fg=summary_color, reset=True))
return '\n'.join(summary)


extract_results = []
has_extract_errors = False

with utils.progressmanager(extract_archives(input),
item_show_func=extract_event,
start_show_func=extract_start,
finish_show_func=extract_end,
verbose=verbose,
) as extraction_events:
for xev in extraction_events:
if xev.done and (xev.warnings or xev.errors):
has_extract_errors = has_extract_errors or xev.errors
extract_results.append(xev)
return 1 if has_extract_errors else 0
137 changes: 137 additions & 0 deletions tests/scancode/test_extract_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#
# Copyright (c) 2015 nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
# The ScanCode software is licensed under the Apache License version 2.0.
# Data generated with ScanCode require an acknowledgment.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# When you publish or redistribute any data created with ScanCode or any ScanCode
# derivative work, you must accompany this data with the following acknowledgment:
#
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# ScanCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.

from __future__ import absolute_import, print_function

import os

import click
from click.testing import CliRunner

from commoncode.fileutils import as_posixpath

from scancode import extract_cli
from commoncode.testcase import FileDrivenTesting

test_env = FileDrivenTesting()
test_env.test_data_dir = os.path.join(os.path.dirname(__file__), 'data')


"""
These CLI tests are dependent on py.test monkeypatch to ensure we are testing
the actual command outputs as if using a TTY or not.
"""


def test_extractcode_command_can_take_an_empty_directory(monkeypatch):
test_dir = test_env.get_temp_dir()
monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
runner = CliRunner()
result = runner.invoke(extract_cli.extractcode, [test_dir])
assert result.exit_code == 0
assert 'Extracting archives...' in result.output
assert 'Extracting done' in result.output


def test_extractcode_command_does_extract_verbose(monkeypatch):
test_dir = test_env.get_test_loc('extract', copy=True)
monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
runner = CliRunner()
result = runner.invoke(extract_cli.extractcode, ['--verbose', test_dir])
assert result.exit_code == 1
assert os.path.exists(os.path.join(test_dir, 'some.tar.gz-extract'))
expected = [
'Extracting archives...',
'/some.tar.gz',
'/broken.tar.gz',
'/tarred_gzipped.tgz',
'ERROR extracting',
"/broken.tar.gz: 'Unrecognized archive format'",
'Extracting done.',
]
for e in expected:
assert e in result.output


def test_extractcode_command_does_no_show_anything_if_not_using_a_tty(monkeypatch):
test_dir = test_env.get_test_loc('extract/some.tar.gz', copy=True)
monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: False)
runner = CliRunner()
result = runner.invoke(extract_cli.extractcode, ['--verbose', test_dir])
assert '' == result.output
result = runner.invoke(extract_cli.extractcode, [test_dir])
assert '' == result.output


def test_extractcode_command_works_with_relative_paths(monkeypatch):
# The setup is a tad complex because we want to have a relative dir
# to the base dir where we run tests from, ie the scancode-toolkit/ dir
# To use relative paths, we use our tmp dir at the root of the code tree
from os.path import dirname, join, abspath
from commoncode import fileutils
import extractcode
import tempfile
import shutil

try:
scancode_root = dirname(dirname(dirname(__file__)))
scancode_tmp = join(scancode_root, 'tmp')
fileutils.create_dir(scancode_tmp)
scancode_root_abs = abspath(scancode_root)
test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
test_file = test_env.get_test_loc('extract_relative_path/basic.zip')
shutil.copy(test_file, test_src_dir)
test_src_file = join(test_src_dir, 'basic.zip')
test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX

runner = CliRunner()
monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
result = runner.invoke(extract_cli.extractcode, [test_src_file])
assert result.exit_code == 0
assert 'Extracting done' in result.output
assert not 'WARNING' in result.output
assert not 'ERROR' in result.output
expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt']
file_result = [as_posixpath(f.replace(test_tgt_dir, '')) for f in fileutils.file_iter(test_tgt_dir)]
assert sorted(expected) == sorted(file_result)
finally:
fileutils.delete(test_src_dir)


def test_usage_and_help_return_a_correct_script_name_on_all_platforms(monkeypatch):
runner = CliRunner()
monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
result = runner.invoke(extract_cli.extractcode, ['--help'])
assert 'Usage: extractcode [OPTIONS]' in result.output
# this was showing up on Windows
assert 'extractcode-script.py' not in result.output

result = runner.invoke(extract_cli.extractcode, [])
assert 'Usage: extractcode [OPTIONS]' in result.output
# this was showing up on Windows
assert 'extractcode-script.py' not in result.output

result = runner.invoke(extract_cli.extractcode, ['-xyz'])
# this was showing up on Windows
assert 'extractcode-script.py' not in result.output

0 comments on commit 2a38e29

Please sign in to comment.