diff --git a/etc/scripts/genlicspdx.py b/etc/scripts/genlicspdx.py
new file mode 100644
index 00000000000..c4355844759
--- /dev/null
+++ b/etc/scripts/genlicspdx.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2019 nexB Inc. and others. All rights reserved.
+# http://nexb.com and https://github.com/nexB/scancode-toolkit/
+# The ScanCode software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode require an acknowledgment.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# When you publish or redistribute any data created with ScanCode or any ScanCode
+# derivative work, you must accompany this data with the following acknowledgment:
+#
+#  Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+#  OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+#  ScanCode should be considered or used as legal advice. Consult an Attorney
+#  for any legal advice.
+#  ScanCode is a free software code scanning tool from nexB Inc. and others.
+#  Visit https://github.com/nexB/scancode-toolkit/ for support and download.
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from __future__ import print_function
+
+import os
+
+import click
+click.disable_unicode_literals_warning = True
+
+from licensedcode.models import load_licenses
+from scancode.cli import run_scan
+
+
+"""
+Generate an SPDX document for each license known in ScanCode that are not usted
+at SPDX.
+Run python genlicspdx.py -h for help.
+
+NOTE: this is rather inefficient as it is starting a new command line process
+for each license, taking a few seconds each time.
+Upcomming code to call a scan function instead will be more efficient.
+"""
+
+FOSS_CATEGORIES = set([
+    'Copyleft',
+    'Copyleft Limited',
+    'Patent License',
+    'Permissive',
+    'Public Domain',
+])
+
+
+@click.command()
+@click.argument('license_dir',
+    type=click.Path(file_okay=False, exists=True, writable=True,
+                    allow_dash=False, resolve_path=True),
+    metavar='DIR')
+@click.option('-v', '--verbose', is_flag=True, default=False, help='Print execution messages.')
+@click.help_option('-h', '--help')
+def cli(license_dir, verbose):
+    """
+    Create one SPDX tag-value document for each non-SPDX ScanCode licenses.
+    Store these in the DIR directory
+    """
+
+    base_kwargs = dict(
+        license=True, license_diag=True, license_text=True, info=True,
+        strip_root=True, quiet=True, return_results=False)
+
+    licenses_by_key = load_licenses(with_deprecated=False)
+
+
+    for i, lic in enumerate(licenses_by_key.values()):
+        ld = lic.to_dict()
+
+        if lic.spdx_license_key:
+            if verbose:
+                click.echo(
+                    'Skipping ScanCode: {key} that is an SPDX license: {spdx_license_key}'.format(**ld))
+            continue
+
+        if not lic.text_file or not os.path.exists(lic.text_file):
+            if verbose:
+                click.echo(
+                    'Skipping license without text: {key}'.format(**ld))
+            continue
+
+        if lic.category not in FOSS_CATEGORIES:
+            if verbose:
+                click.echo(
+                    'Skipping non FOSS license: {key}'.format(**ld))
+            continue
+
+        output = 'licenseref-scancode-{key}.spdx'.format(**ld)
+        output = os.path.join(license_dir, output)
+
+        if verbose:
+            click.echo('Creating SPDX document for license: {key}'.format(**ld))
+            click.echo('at: {output}'.format(**locals()))
+
+        with open(output, 'wb') as ouput_file:
+            kwargs = dict(input=lic.text_file, spdx_tv=ouput_file)
+            kwargs.update(base_kwargs)
+            run_scan(**kwargs)
+
+
+if __name__ == '__main__':
+    cli()
diff --git a/etc/scripts/scancli.py b/etc/scripts/scancli.py
new file mode 100644
index 00000000000..d5829a421fd
--- /dev/null
+++ b/etc/scripts/scancli.py
@@ -0,0 +1,74 @@
+#
+# Copyright (c) 2019 nexB Inc. and others. All rights reserved.
+# http://nexb.com and https://github.com/nexB/scancode-toolkit/
+# The ScanCode software is licensed under the Apache License version 2.0.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import json
+from os.path import abspath
+from os.path import dirname
+from os.path import join
+from os.path import normpath
+
+import execnet
+
+import scanserv
+
+"""
+This is a module designed to be called from Python 2 or 3 and is the client
+side. See scanserv for the back server module that runs on Python 2 and runs
+effectively scancode.
+"""
+
+
+def scan(locations, deserialize=False, scancode_root_dir=None):
+    """
+    Scan the list of paths at `location` and return the results as an iterable
+    of JSON strings. If `deserialize` is True the iterable contains a python data
+    instead.
+    Each location is scanned independently.
+    """
+    if not scancode_root_dir:
+        scancode_root_dir = abspath(normpath(__file__))
+        scancode_root_dir = dirname(dirname(dirname(scancode_root_dir)))
+    python2 = join(scancode_root_dir, 'bin', 'python')
+    spec = 'popen//python={python2}'.format(**locals())
+    gateway = execnet.makegateway(spec)  # NOQA
+    channel = gateway.remote_exec(scanserv)
+
+    for location in locations:
+        # build a mapping of options to use for this scan
+        scan_kwargs = dict(
+            location=location,
+            license=True,
+            license_text=True,
+            license_diag=True,
+            copyright=True,
+            info=True,
+            processes=0,
+        )
+
+        channel.send(scan_kwargs)  # execute func-call remotely
+        results = channel.receive()
+        if deserialize:
+            results = json.loads(results)
+        yield results
+
+
+if __name__ == '__main__':
+    import sys  # NOQA
+    args = sys.argv[1:]
+    for s in scan(args):
+        print(s)
diff --git a/etc/scripts/scanserv.README b/etc/scripts/scanserv.README
new file mode 100644
index 00000000000..1da44b34312
--- /dev/null
+++ b/etc/scripts/scanserv.README
@@ -0,0 +1,29 @@
+A simple proof of concept for Python3 remoting with execnet.
+
+See ticket #1400 for more.
+
+This is an example of how to call Scancode as a function from Python2 or Python3.
+The benefits are that when the server process has loaded the license index,
+and imported its modules there is no per-call import/loading penalty anymore.
+
+This is using execnet which is the multiprocessing library used by
+py.test and therefore a rather stable and high quality engine.
+
+To test, do this::
+
+1. checkout scancode and run ./configure in a first shell. This is for a plain
+ScanCode using Python 2 that will be used as a "server".
+
+2. in another shell, create a virtualenv with Python 3 in another
+ location.  Activate that venv, and `pip install simplejson execnet`
+
+3. Change dir to the install scancode-toolkit/etc/scripts where the scancli.py
+and scancserv.py scripts are. Then run::
+
+    python3 scancli.py ../../NOTICE  ../../setup.py
+
+This will effectively make remote functions calls to the Python2
+scancode and gets the result in Python3 alright. It also allows to have
+multiple calls that reuse the same process, hence amortizing any startup
+costs. Here this will run two scans: one on NOTICE and another on setup.py.
+It could have been directories too.
diff --git a/etc/scripts/scanserv.py b/etc/scripts/scanserv.py
new file mode 100644
index 00000000000..fa49bb73605
--- /dev/null
+++ b/etc/scripts/scanserv.py
@@ -0,0 +1,55 @@
+#
+# Copyright (c) 2019 nexB Inc. and others. All rights reserved.
+# http://nexb.com and https://github.com/nexB/scancode-toolkit/
+# The ScanCode software is licensed under the Apache License version 2.0.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+"""
+Python2 "server side" of the scan server. In a given execnet session, this
+process will hold a loaded license index and can be invoked multiple times
+without the index load penalty on each call.
+"""
+
+
+def as_json(results, pretty=True):
+    """
+    Return a JSON string from a `results` data structuret.
+    """
+    # this is used for its ability to handle iterables as arrays.
+    import simplejson
+
+    kwargs = dict(iterable_as_array=True, encoding='utf-8')
+    if pretty:
+        kwargs.update(dict(indent=2 * b' '))
+    else:
+        kwargs.update(dict(separators=(b',', b':',)))
+    return simplejson.dumps(results, **kwargs) + b'\n'
+
+
+def run_scan(location, **kwargs):
+    from scancode import cli
+    pretty = kwargs.pop('pretty', True)
+    return as_json(cli.run_scan(location, **kwargs), pretty=pretty)
+
+
+if __name__ == '__channelexec__':
+    for kwargs in channel:  # NOQA
+        # a mapping of kwargs or a location string
+        if isinstance(kwargs, (str, unicode)):
+            channel.send(run_scan(kwargs))  # NOQA
+        elif isinstance(kwargs, dict):
+            channel.send(run_scan(**kwargs))  # NOQA
+        else:
+            raise Exception('Unknown arguments type: ' + repr(kwargs))
diff --git a/src/commoncode/fileset.py b/src/commoncode/fileset.py
index f01c6db5ae4..aad58783e57 100644
--- a/src/commoncode/fileset.py
+++ b/src/commoncode/fileset.py
@@ -28,21 +28,26 @@
 
 import fnmatch
 import os
-import logging
 
 from commoncode import fileutils
 from commoncode import paths
 from commoncode.system import on_linux
 
-DEBUG = False
-logger = logging.getLogger(__name__)
-# import sys
-# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
-# logger.setLevel(logging.DEBUG)
+
+TRACE = False
+if TRACE:
+    import logging
+    import sys
+
+    logger = logging.getLogger(__name__)
+    logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
+    logger.setLevel(logging.DEBUG)
+
 
 POSIX_PATH_SEP = b'/' if on_linux else '/'
 EMPTY_STRING = b'' if on_linux else ''
 
+
 """
 Match files and directories paths based on inclusion and exclusion glob-style
 patterns.
@@ -79,33 +84,44 @@
 """
 
 
-def match(path, includes, excludes):
+def is_included(path, includes=None, excludes=None):
     """
-    Return a matching pattern value (e.g. a reason message) or False if `path` is matched or not.
-    If the `path` is empty, return False.
+    Return a True if `path` is included based on mapping of `includes` and
+    `excludes` glob patterns. If the `path` is empty, return False.
 
     Matching is done based on the set of `includes` and `excludes` patterns maps
-    of {fnmtch pattern -> value} where value can be a message string or some other
-    object.
-    The order of the includes and excludes items does not matter and if a map is
-    empty , it is not used for matching.
+    of {fnmatch pattern: message}. If `includes` are provided they are tested
+    first. The `excludes` are tested second if provided.
+
+    The ordering of the includes and excludes items does not matter and if a map
+    is empty, it is not used for matching.
     """
-    includes = includes or {}
-    excludes = excludes or {}
     if not path or not path.strip():
         return False
 
-    included = get_matches(path, includes, all_matches=False)
-    excluded = get_matches(path, excludes, all_matches=False)
-    if DEBUG:
-        logger.debug('in_fileset: path: %(path)r included:%(included)r, '
-                     'excluded:%(excluded)r .' % locals())
-    if excluded:
-        return False
-    elif included:
-        return included
-    else:
-        return False
+    if not includes and not excludes:
+        return True
+
+    includes = includes or {}
+    includes = {k: v for k, v in includes.items() if k}
+    excludes = excludes or {}
+    excludes = {k: v for k, v in excludes.items() if k}
+
+    if includes:
+        included = get_matches(path, includes, all_matches=False)
+        if TRACE:
+            logger.debug('in_fileset: path: %(path)r included:%(included)r' % locals())
+        if not included:
+            return False
+
+    if excludes:
+        excluded = get_matches(path, excludes, all_matches=False)
+        if TRACE:
+            logger.debug('in_fileset: path: %(path)r excluded:%(excluded)r .' % locals())
+        if excluded:
+            return False
+
+    return True
 
 
 def get_matches(path, patterns, all_matches=False):
@@ -122,13 +138,17 @@ def get_matches(path, patterns, all_matches=False):
     pathstripped = path.lstrip(POSIX_PATH_SEP)
     if not pathstripped:
         return False
+
     segments = paths.split(pathstripped)
-    if DEBUG:
+
+    if TRACE:
         logger.debug('_match: path: %(path)r patterns:%(patterns)r.' % locals())
+
     matches = []
     if not isinstance(patterns, dict):
         assert isinstance(patterns, (list, tuple)), 'Invalid patterns: {}'.format(patterns)
         patterns = {p: p for p in patterns}
+
     for pat, value in patterns.items():
         if not pat or not pat.strip():
             continue
@@ -146,8 +166,9 @@ def get_matches(path, patterns, all_matches=False):
             matches.append(value)
             if not all_matches:
                 break
-    if DEBUG:
+    if TRACE:
         logger.debug('_match: matches: %(matches)r' % locals())
+
     if not all_matches:
         if matches:
             return matches[0]
@@ -183,6 +204,7 @@ def includes_excludes(patterns, message):
     excluded = {}
     if not patterns:
         return included, excluded
+
     for pat in patterns:
         pat = pat.strip()
         if not pat or pat.startswith(POUND):
diff --git a/src/commoncode/fileutils.py b/src/commoncode/fileutils.py
index dd8c8494ba4..5debd750582 100644
--- a/src/commoncode/fileutils.py
+++ b/src/commoncode/fileutils.py
@@ -91,6 +91,8 @@ def logger_debug(*args):
 ALL_SEPS = POSIX_PATH_SEP + WIN_PATH_SEP
 EMPTY_STRING = b'' if on_linux else ''
 DOT = b'.' if on_linux else '.'
+PATH_SEP = bytes(os.sep) if on_linux else unicode(os.sep)
+
 
 """
 File, paths and directory utility functions.
diff --git a/src/commoncode/ignore.py b/src/commoncode/ignore.py
index d04e4892342..58f9e8831f4 100644
--- a/src/commoncode/ignore.py
+++ b/src/commoncode/ignore.py
@@ -44,7 +44,7 @@ def is_ignored(location, ignores, unignores=None, skip_special=True):
     """
     if skip_special and filetype.is_special(location):
         return True
-    return fileset.match(location, includes=ignores, excludes=unignores)
+    return not fileset.is_included(location, includes=unignores, excludes=ignores)
 
 
 def is_ignore_file(location):
diff --git a/src/formattedcode/output_json.py b/src/formattedcode/output_json.py
index 220dc3802f8..926ba86acca 100644
--- a/src/formattedcode/output_json.py
+++ b/src/formattedcode/output_json.py
@@ -76,8 +76,8 @@ def is_enabled(self, output_json, **kwargs):
         return output_json
 
     def process_codebase(self, codebase, output_json, **kwargs):
-        files = self.get_files(codebase, **kwargs)
-        write_json(codebase, files, output_file=output_json, pretty=False)
+        results = get_results(codebase, as_list=False, **kwargs)
+        write_json(results, output_file=output_json, pretty=False)
 
 
 @output_impl
@@ -96,35 +96,47 @@ def is_enabled(self, output_json_pp, **kwargs):
         return output_json_pp
 
     def process_codebase(self, codebase, output_json_pp, **kwargs):
-        files = self.get_files(codebase, **kwargs)
-        write_json(codebase, files, output_file=output_json_pp, pretty=True, **kwargs)
+        results = get_results(codebase, as_list=False, **kwargs)
+        write_json(results, output_file=output_json_pp, pretty=True)
 
 
-def write_json(codebase, files, output_file,
-               include_summary=False, include_score=False,
-               pretty=False, **kwargs):
-    # NOTE: we write as binary, not text
+def write_json(results, output_file, pretty=False, **kwargs):
+    """
+    Write `results` to the `output_file` opened file-like object.
+    """
+    # NOTE: we write as encoded, binary bytes, not as unicode, decoded text
+    kwargs = dict(iterable_as_array=True, encoding='utf-8')
+    if pretty:
+        kwargs.update(dict(indent=2 * b' '))
+    else:
+        kwargs.update(dict(separators=(b',', b':',)))
+    output_file.write(simplejson.dumps(results, **kwargs))
+    output_file.write(b'\n')
+
+
+def get_results(codebase, as_list=False, **kwargs):
+    """
+    Return an ordered mapping of scan results collected from a `codebase`.
+    if `as_list` consume the "files" iterator in a list sequence.
+    """
 
     codebase.add_files_count_to_current_header()
-    scan = OrderedDict([(b'headers', codebase.get_headers()), ])
+    results = OrderedDict([('headers', codebase.get_headers()), ])
 
     # add codebase toplevel attributes such as summaries
     if codebase.attributes:
-        scan.update(codebase.attributes.to_dict())
+        results.update(codebase.attributes.to_dict())
+
+    files = OutputPlugin.get_files(codebase, **kwargs)
+    if as_list:
+        files = list(files)
+    results['files'] = files
 
     if TRACE:
-        logger_debug('write_json: files')
+        logger_debug('get_results: files')
         files = list(files)
         from pprint import pformat
         logger_debug(pformat(files))
 
-    scan[b'files'] = files
-
-    kwargs = dict(iterable_as_array=True, encoding='utf-8')
-    if pretty:
-        kwargs.update(dict(indent=2 * b' '))
-    else:
-        kwargs.update(dict(separators=(b',', b':',)))
+    return results
 
-    output_file.write(simplejson.dumps(scan, **kwargs))
-    output_file.write(b'\n')
diff --git a/src/formattedcode/output_jsonlines.py b/src/formattedcode/output_jsonlines.py
index 8914e64ef4c..770988b667c 100644
--- a/src/formattedcode/output_jsonlines.py
+++ b/src/formattedcode/output_jsonlines.py
@@ -53,6 +53,7 @@ class JsonLinesOutput(OutputPlugin):
     def is_enabled(self, output_json_lines, **kwargs):
         return output_json_lines
 
+    # TODO: reuse the json output code and merge that in a single plugin
     def process_codebase(self, codebase, output_json_lines, **kwargs):
         #NOTE: we write as binary, not text
         files = self.get_files(codebase, **kwargs)
diff --git a/src/plugincode/__init__.py b/src/plugincode/__init__.py
index 7ce3ec12d5a..74e7bab3779 100644
--- a/src/plugincode/__init__.py
+++ b/src/plugincode/__init__.py
@@ -189,8 +189,9 @@ def load_plugins(cls):
         for stage, manager in cls.managers.items():
             mgr_setup = manager.setup()
             if not mgr_setup:
+                from scancode import ScancodeError
                 msg = 'Cannot load ScanCode plugins for stage: %(stage)s' % locals()
-                raise Exception(msg)
+                raise ScancodeError(msg)
             mplugin_classes, mplugin_options = mgr_setup
             plugin_classes.extend(mplugin_classes)
             plugin_options.extend(mplugin_options)
@@ -202,7 +203,7 @@ def setup(self):
         all plugin classes).
 
         Load and validate available plugins for this PluginManager from its
-        assigned `entrypoint`. Raise an Exception if a plugin is not valid such
+        assigned `entrypoint`. Raise a ScancodeError if a plugin is not valid such
         that when it does not subcclass the manager `plugin_base_class`.
         Must be called once to setup the plugins of this manager.
         """
@@ -215,7 +216,7 @@ def setup(self):
         entrypoint = self.entrypoint
         try:
             self.manager.load_setuptools_entrypoints(entrypoint)
-        except ImportError, e:
+        except ImportError as e:
             raise e
         stage = self.stage
 
@@ -227,7 +228,8 @@ def setup(self):
             if not issubclass(plugin_class, self.plugin_base_class):
                 qname = '%(stage)s:%(name)s' % locals()
                 plugin_base_class = self.plugin_base_class
-                raise Exception(
+                from scancode import ScancodeError #NOQA
+                raise ScancodeError(
                     'Invalid plugin: %(qname)r: %(plugin_class)r '
                     'must extend %(plugin_base_class)r.' % locals())
 
@@ -236,7 +238,8 @@ def setup(self):
                     qname = '%(stage)s:%(name)s' % locals()
                     oname = option.name
                     clin = CommandLineOption
-                    raise Exception(
+                    from scancode import ScancodeError #NOQA
+                    raise ScancodeError(
                         'Invalid plugin: %(qname)r: option %(oname)r '
                         'must extend %(clin)r.' % locals())
                 plugin_options.append(option)
diff --git a/src/scancode/__init__.py b/src/scancode/__init__.py
index 271c66ae7ec..0115e81fcc6 100644
--- a/src/scancode/__init__.py
+++ b/src/scancode/__init__.py
@@ -72,6 +72,15 @@ def logger_debug(*args):
         return logger.debug(' '.join(isinstance(a, (unicode, str))
                                      and a or repr(a) for a in args))
 
+
+class ScancodeError(Exception):
+    """Base exception for scancode errors"""
+
+
+class ScancodeCliUsageError(ScancodeError, click.UsageError):
+    """Exception for command line usage errors"""
+
+
 # CLI help groups
 SCAN_GROUP = 'primary scans'
 SCAN_OPTIONS_GROUP = 'scan options'
diff --git a/src/scancode/cli.py b/src/scancode/cli.py
index abc80a72a30..4a4e088845e 100644
--- a/src/scancode/cli.py
+++ b/src/scancode/cli.py
@@ -37,6 +37,7 @@
 from collections import OrderedDict
 from functools import partial
 from itertools import imap
+import os
 import sys
 from time import time
 import traceback
@@ -47,7 +48,9 @@
 # import early
 from scancode_config import __version__ as scancode_version
 
+from commoncode.fileutils import as_posixpath
 from commoncode.fileutils import PATH_TYPE
+from commoncode.fileutils import POSIX_PATH_SEP
 from commoncode.timeutils import time2tstamp
 
 from plugincode import PluginManager
@@ -59,6 +62,8 @@
 from plugincode import output_filter
 from plugincode import output
 
+from scancode import ScancodeError
+from scancode import ScancodeCliUsageError
 from scancode import CORE_GROUP
 from scancode import DOC_GROUP
 from scancode import MISC_GROUP
@@ -260,7 +265,7 @@ def print_options(ctx, param, value):
 @click.pass_context
 
 # ensure that the input path is bytes on Linux, unicode elsewhere
-@click.argument('input', metavar='<input> <OUTPUT FORMAT OPTION(s)>',
+@click.argument('input', metavar='<OUTPUT FORMAT OPTION(s)> <input>...', nargs=-1,
     type=click.Path(exists=True, readable=True, path_type=PATH_TYPE))
 
 @click.option('--strip-root',
@@ -382,6 +387,7 @@ def scancode(ctx, input,  # NOQA
              max_in_memory,
              test_mode,
              keep_temp_files,
+             echo_func=echo_stderr,
              *args, **kwargs):
     """scan the <input> file or directory for license, origin and packages and save results to FILE(s) using one or more output format option.
 
@@ -449,6 +455,114 @@ def scancode(ctx, input,  # NOQA
     through Click context machinery.
     """
 
+    success = False
+    try:
+        # Validate CLI UI options dependencies and other CLI-specific inits
+        if TRACE_DEEP:
+            logger_debug('scancode: ctx.params:')
+            for co in sorted(ctx.params.items()):
+                logger_debug('  scancode: ctx.params:', co)
+
+        validate_option_dependencies(ctx)
+        pretty_params = get_pretty_params(ctx, generic_paths=test_mode)
+
+        # run proper
+        success, _results = run_scan(
+            input=input,
+            from_json=from_json,
+            strip_root=strip_root, full_root=full_root,
+            processes=processes, timeout=timeout,
+            quiet=quiet, verbose=verbose,
+            timing=timing, max_in_memory=max_in_memory,
+            test_mode=test_mode,
+            keep_temp_files=keep_temp_files,
+            pretty_params=pretty_params,
+            # results are saved to file, no need to get them back in a cli context
+            return_results=False,
+            echo_func=echo_stderr,
+            *args, **kwargs)
+
+    except click.UsageError as e:
+        # this will exit
+        raise e
+
+    except ScancodeError as se:
+        # TODO :consider raising a usage error?
+        echo_func(se.message, color='red')
+        ctx.exit(2)
+
+    rc = 0 if success else 1
+    ctx.exit(rc)
+
+
+def run_scan(
+        input,  # NOQA
+        from_json=None,
+        strip_root=False,
+        full_root=False,
+        max_in_memory=10000,
+        processes=1,
+        timeout=120,
+        quiet=True,
+        verbose=False,
+        echo_func=None,
+        timing=False,
+        keep_temp_files=False,
+        return_results=True,
+        test_mode=False,
+        pretty_params=None,
+        *args, **kwargs):
+    """
+    Run a scan on `input` path (or a list of input paths) and return a tuple of
+    (success, results) where success is a boolean and results is a list of
+    "files" items using the same data structure as the "files" in the JSON scan
+    results but as native Python. Raise Exceptions (e.g. ScancodeError) on
+    error. See scancode() for arguments details.
+    """
+
+    if not echo_func:
+        def echo_func(*args, **kwargs): pass
+
+    if not isinstance(input, (list, tuple)):
+        # nothing else todo
+        assert isinstance(input, (bytes, unicode))
+
+    elif len(input) == 1:
+        # we received a single input path, so we treat this as a single path
+        input = input[0]  # NOQA
+    else:
+        # we received a several input paths: we can handle this IFF they share
+        # a common root directory and none is an absolute path
+
+        if any(os.path.isabs(p) for p in input):
+            msg = ('ERROR: invalid inputs: input paths must be relative and '
+                  'share a common parent when using multiple inputs.')
+            raise ScancodeError(msg + '\n' + traceback.format_exc())
+
+        # find the common prefix directory (note that this is a pre string operation
+        # hence it may return non-existing paths
+        common_prefix = os.path.commonprefix(input)
+
+        if not common_prefix:
+            # we have no common prefix, but all relative. therefore the
+            # parent/root is the current ddirectory
+            common_prefix = PATH_TYPE('.')
+
+        elif not os.path.isdir(common_prefix):
+            msg = 'ERROR: invalid inputs: all input paths must share a common parent directory.'
+            raise ScancodeError(msg + '\n' + traceback.format_exc())
+
+        # and we craft a list of synthetic --include path pattern options from
+        # the input list of paths
+        included_paths = [as_posixpath(path).rstrip(POSIX_PATH_SEP) for path in input]
+        # FIXME: this is a hack as this "include" is from an external plugin!!!1
+        include = list(kwargs.get('include', []) or [])
+        include.extend(included_paths)
+        kwargs['include'] = include
+
+        # ... and use the common prefix as our new input
+        input = common_prefix  # NOQA
+
     # build mappings of all kwargs to pass down to plugins
     standard_kwargs = dict(
         input=input,
@@ -466,6 +580,7 @@ def scancode(ctx, input,  # NOQA
     kwargs.update(standard_kwargs)
 
     success = True
+    results = None
     codebase = None
     processing_start = time()
 
@@ -473,27 +588,16 @@ def scancode(ctx, input,  # NOQA
 
     if not quiet:
         if not processes:
-            echo_stderr('Disabling multi-processing for debugging.', fg='yellow')
+            echo_func('Disabling multi-processing for debugging.', fg='yellow')
 
         elif processes == -1:
-            echo_stderr('Disabling multi-processing '
-                        'and multi-threading for debugging.', fg='yellow')
+            echo_func('Disabling multi-processing '
+                      'and multi-threading for debugging.', fg='yellow')
 
     try:
-
-        ########################################################################
-        # Validate UI options deps
-        ########################################################################
-        validate_option_dependencies(ctx)
-
         ########################################################################
         # Find and create known plugin instances and collect the enabled
         ########################################################################
-        if TRACE_DEEP:
-            ctx_params = sorted(ctx.params.items())
-            logger_debug('scancode: ctx.params:')
-            for co in ctx.params:
-                logger_debug('  scancode: ctx.params:', co)
 
         enabled_plugins_by_stage = OrderedDict()
         all_enabled_plugins_by_qname = {}
@@ -506,16 +610,20 @@ def scancode(ctx, input,  # NOQA
                     name = plugin_cls.name
                     qname = plugin_cls.qname()
                     plugin = plugin_cls(**kwargs)
-                    if plugin.is_enabled(**kwargs):
+                    is_enabled = False
+                    try:
+                        is_enabled = plugin.is_enabled(**kwargs)
+                    except TypeError as te:
+                        if not 'takes exactly' in str(te):
+                            raise te
+                    if is_enabled:
                         stage_plugins.append(plugin)
                         all_enabled_plugins_by_qname[qname] = plugin
                     else:
                         non_enabled_plugins_by_qname[qname] = plugin
                 except:
                     msg = 'ERROR: failed to load plugin: %(qname)s:' % locals()
-                    echo_stderr(msg, fg='red')
-                    echo_stderr(traceback.format_exc())
-                    ctx.exit(2)
+                    raise ScancodeError(msg + '\n' + traceback.format_exc())
 
         # NOTE: these are list of plugin instances, not classes!
         pre_scan_plugins = enabled_plugins_by_stage[pre_scan.stage]
@@ -526,12 +634,12 @@ def scancode(ctx, input,  # NOQA
 
         if from_json and scanner_plugins:
             msg = ('Data loaded from JSON: no scan options can be selected.')
-            raise click.UsageError(msg)
+            raise ScancodeCliUsageError(msg)
 
-        if not output_plugins:
+        if not output_plugins and not return_results:
             msg = ('Missing output option(s): at least one output '
                    'option is required to save scan results.')
-            raise click.UsageError(msg)
+            raise ScancodeCliUsageError(msg)
 
         ########################################################################
         # Get required and enabled plugins instance so we can run their setup
@@ -559,7 +667,7 @@ def scancode(ctx, input,  # NOQA
             for qn, requestors in requestors_by_missing_qname.items():
                 rqs = ', '.join(sorted(requestors))
                 msg += '  Plugin: {qn} is required by plugins: {rqs}.\n'.format(**locals())
-            raise Exception(msg)
+            raise ScancodeError(msg)
 
         if TRACE_DEEP:
             logger_debug('scancode: plugins_to_setup: from required:', plugins_to_setup)
@@ -578,7 +686,7 @@ def scancode(ctx, input,  # NOQA
         plugins_setup_start = time()
 
         if not quiet and not verbose:
-            echo_stderr('Setup plugins...', fg='green')
+            echo_func('Setup plugins...', fg='green')
 
         # TODO: add progress indicator
         for plugin in plugins_to_setup:
@@ -586,15 +694,13 @@ def scancode(ctx, input,  # NOQA
             stage = plugin.stage
             name = plugin.name
             if verbose:
-                echo_stderr(' Setup plugin: %(stage)s:%(name)s...' % locals(),
+                echo_func(' Setup plugin: %(stage)s:%(name)s...' % locals(),
                             fg='green')
             try:
                 plugin.setup(**kwargs)
             except:
                 msg = 'ERROR: failed to setup plugin: %(stage)s:%(name)s:' % locals()
-                echo_stderr(msg, fg='red')
-                echo_stderr(traceback.format_exc())
-                ctx.exit(2)
+                raise ScancodeError(msg + '\n' + traceback.format_exc())
 
             timing_key = 'setup_%(stage)s:%(name)s' % locals()
             setup_timings[timing_key] = time() - plugin_setup_start
@@ -621,9 +727,7 @@ def scancode(ctx, input,  # NOQA
                 except:
                     msg = ('ERROR: failed to collect resource_attributes for plugin: '
                            '%(stage)s:%(name)s:' % locals())
-                    echo_stderr(msg, fg='red')
-                    echo_stderr(traceback.format_exc())
-                    ctx.exit(2)
+                    raise ScancodeError(msg + '\n' + traceback.format_exc())
 
         resource_attributes = OrderedDict()
         for _, name, attribs in sorted(sortable_resource_attributes):
@@ -659,9 +763,7 @@ def scancode(ctx, input,  # NOQA
                 except:
                     msg = ('ERROR: failed to collect codebase_attributes for plugin: '
                            '%(stage)s:%(name)s:' % locals())
-                    echo_stderr(msg, fg='red')
-                    echo_stderr(traceback.format_exc())
-                    ctx.exit(2)
+                    raise ScancodeError(msg + '\n' + traceback.format_exc())
 
         codebase_attributes = OrderedDict()
         for _, name, attribs in sorted(sortable_codebase_attributes):
@@ -685,7 +787,7 @@ def scancode(ctx, input,  # NOQA
         inventory_start = time()
 
         if not quiet:
-            echo_stderr('Collect file inventory...', fg='green')
+            echo_func('Collect file inventory...', fg='green')
 
         if from_json:
             codebase_class = VirtualCodebase
@@ -708,9 +810,7 @@ def scancode(ctx, input,  # NOQA
             )
         except:
             msg = 'ERROR: failed to collect codebase at: %(input)r' % locals()
-            echo_stderr(msg, fg='red')
-            echo_stderr(traceback.format_exc())
-            ctx.exit(2)
+            raise ScancodeError(msg + '\n' + traceback.format_exc())
 
         # update headers
         cle = codebase.get_or_create_current_header()
@@ -718,7 +818,7 @@ def scancode(ctx, input,  # NOQA
         cle.tool_name = 'scancode-toolkit'
         cle.tool_version = scancode_version
         cle.notice = notice
-        cle.options = get_pretty_params(ctx, generic_paths=test_mode)
+        cle.options = pretty_params or {}
 
         # TODO: this is weird: may be the timings should NOT be stored on the
         # codebase, since they exist in abstract of it??
@@ -735,10 +835,10 @@ def scancode(ctx, input,  # NOQA
 
         # TODO: add progress indicator
         pre_scan_success = run_codebase_plugins(
-            ctx, stage='pre-scan', plugins=pre_scan_plugins, codebase=codebase,
+            stage='pre-scan', plugins=pre_scan_plugins, codebase=codebase,
             stage_msg='Run %(stage)ss...',
             plugin_msg=' Run %(stage)s: %(name)s...',
-            quiet=quiet, verbose=verbose, kwargs=kwargs,
+            quiet=quiet, verbose=verbose, kwargs=kwargs, echo_func=echo_func,
         )
         success = success and pre_scan_success
 
@@ -747,9 +847,9 @@ def scancode(ctx, input,  # NOQA
         ########################################################################
 
         scan_success = run_scanners(
-            ctx, stage='scan', plugins=scanner_plugins, codebase=codebase,
+            stage='scan', plugins=scanner_plugins, codebase=codebase,
             processes=processes, timeout=timeout, timing=timeout,
-            quiet=quiet, verbose=verbose, kwargs=kwargs,
+            quiet=quiet, verbose=verbose, kwargs=kwargs, echo_func=echo_func,
         )
         success = success and scan_success
 
@@ -759,9 +859,9 @@ def scancode(ctx, input,  # NOQA
 
         # TODO: add progress indicator
         post_scan_success = run_codebase_plugins(
-            ctx, stage='post-scan', plugins=post_scan_plugins, codebase=codebase,
+            stage='post-scan', plugins=post_scan_plugins, codebase=codebase,
             stage_msg='Run %(stage)ss...', plugin_msg=' Run %(stage)s: %(name)s...',
-            quiet=quiet, verbose=verbose, kwargs=kwargs,
+            quiet=quiet, verbose=verbose, kwargs=kwargs, echo_func=echo_func,
         )
         success = success and post_scan_success
 
@@ -771,9 +871,9 @@ def scancode(ctx, input,  # NOQA
 
         # TODO: add progress indicator
         output_filter_success = run_codebase_plugins(
-            ctx, stage='output-filter', plugins=output_filter_plugins, codebase=codebase,
+            stage='output-filter', plugins=output_filter_plugins, codebase=codebase,
             stage_msg='Apply %(stage)ss...', plugin_msg=' Apply %(stage)s: %(name)s...',
-            quiet=quiet, verbose=verbose, kwargs=kwargs,
+            quiet=quiet, verbose=verbose, kwargs=kwargs, echo_func=echo_func,
         )
         success = success and output_filter_success
 
@@ -793,14 +893,17 @@ def scancode(ctx, input,  # NOQA
         errors = collect_errors(codebase, verbose)
         cle.errors = errors
 
-        # TODO: add progress indicator
-        output_success = run_codebase_plugins(
-            ctx, stage='output', plugins=output_plugins, codebase=codebase,
-            stage_msg='Save scan results...',
-            plugin_msg=' Save scan results as: %(name)s...',
-            quiet=quiet, verbose=verbose, kwargs=kwargs,
-        )
-        success = success and output_success
+        # when called from Python we can only get results back and not have
+        # any output plugin
+        if output_plugins:
+            # TODO: add progress indicator
+            output_success = run_codebase_plugins(
+                stage='output', plugins=output_plugins, codebase=codebase,
+                stage_msg='Save scan results...',
+                plugin_msg=' Save scan results as: %(name)s...',
+                quiet=quiet, verbose=verbose, kwargs=kwargs, echo_func=echo_func,
+            )
+            success = success and output_success
 
         ########################################################################
         # 9. display summary
@@ -810,8 +913,17 @@ def scancode(ctx, input,  # NOQA
         # TODO: compute summary for output plugins too??
         if not quiet:
             scan_names = ', '.join(p.name for p in scanner_plugins)
-            echo_stderr('Scanning done.', fg='green' if success else 'red')
-            display_summary(codebase, scan_names, processes, errors=errors, verbose=verbose)
+            echo_func('Scanning done.', fg='green' if success else 'red')
+            display_summary(codebase, scan_names, processes, errors=errors,
+                            verbose=verbose, echo_func=echo_func)
+
+        ########################################################################
+        # 10. optionally assemble results to return
+        ########################################################################
+        if return_results:
+            # the structure is exactly the same as the JSON output
+            from formattedcode.output_json import get_results
+            results = get_results(codebase, as_list=True, **kwargs)
 
     finally:
         # remove temporary files
@@ -819,24 +931,24 @@ def scancode(ctx, input,  # NOQA
         if keep_temp_files:
             if not quiet:
                 msg = 'Keeping temporary files in: "{}".'.format(scancode_temp_dir)
-                echo_stderr(msg, fg='green' if success else 'red')
+                echo_func(msg, fg='green' if success else 'red')
         else:
             if not quiet:
-                echo_stderr('Removing temporary files...', fg='green', nl=False)
+                echo_func('Removing temporary files...', fg='green', nl=False)
 
             from commoncode import fileutils
             fileutils.delete(scancode_temp_dir)
 
             if not quiet:
-                echo_stderr('done.', fg='green')
+                echo_func('done.', fg='green')
 
-    rc = 0 if success else 1
-    ctx.exit(rc)
+    return success, results
 
 
-def run_codebase_plugins(ctx, stage, plugins, codebase,
+def run_codebase_plugins(stage, plugins, codebase,
                          stage_msg='', plugin_msg='',
-                         quiet=False, verbose=False, kwargs=None):
+                         quiet=False, verbose=False, kwargs=None,
+                         echo_func=echo_stderr):
     """
     Run the list of `stage` `plugins` on `codebase`.
     Display errors and messages based on the `stage_msg`and `plugin_msg` strings
@@ -849,7 +961,7 @@ def run_codebase_plugins(ctx, stage, plugins, codebase,
 
     stage_start = time()
     if verbose and plugins:
-        echo_stderr(stage_msg % locals(), fg='green')
+        echo_func(stage_msg % locals(), fg='green')
 
     success = True
     # TODO: add progress indicator
@@ -858,7 +970,7 @@ def run_codebase_plugins(ctx, stage, plugins, codebase,
         plugin_start = time()
 
         if verbose:
-            echo_stderr(plugin_msg % locals(), fg='green')
+            echo_func(plugin_msg % locals(), fg='green')
 
         try:
             if TRACE_DEEP:
@@ -871,9 +983,9 @@ def run_codebase_plugins(ctx, stage, plugins, codebase,
 
         except Exception as _e:
             msg = 'ERROR: failed to run %(stage)s plugin: %(name)s:' % locals()
-            echo_stderr(msg, fg='red')
+            echo_func(msg, fg='red')
             tb = traceback.format_exc()
-            echo_stderr(tb)
+            echo_func(tb)
             codebase.errors.append(msg + '\n' + tb)
             success = False
 
@@ -884,9 +996,10 @@ def run_codebase_plugins(ctx, stage, plugins, codebase,
     return success
 
 
-def run_scanners(ctx, stage, plugins, codebase,
+def run_scanners(stage, plugins, codebase,
                  processes, timeout, timing,
-                 quiet=False, verbose=False, kwargs=None):
+                 quiet=False, verbose=False, kwargs=None,
+                 echo_func=echo_stderr):
     """
     Run the list of `stage` ScanPlugin `plugins` on `codebase`.
     Use multiple `processes` and limit the runtime of a single scanner function
@@ -916,7 +1029,7 @@ def run_scanners(ctx, stage, plugins, codebase,
 
     progress_manager = None
     if not quiet:
-        echo_stderr('Scan files for: %(scan_names)s '
+        echo_func('Scan files for: %(scan_names)s '
                     'with %(processes)d process(es)...' % locals())
         item_show_func = partial(path_progress_message, verbose=verbose)
         progress_manager = partial(progressmanager,
@@ -931,7 +1044,7 @@ def run_scanners(ctx, stage, plugins, codebase,
     # TODO: add progress indicator
     # run the process codebase of each scan plugin (most often a no-op)
     scan_process_codebase_success = run_codebase_plugins(
-        ctx, stage, plugins, codebase,
+        stage, plugins, codebase,
         stage_msg='Filter %(stage)ss...',
         plugin_msg=' Filter %(stage)s: %(name)s...',
         quiet=quiet, verbose=verbose, kwargs=kwargs,
@@ -956,7 +1069,7 @@ def run_scanners(ctx, stage, plugins, codebase,
 
 
 def scan_codebase(codebase, scanners, processes=1, timeout=DEFAULT_TIMEOUT,
-                  with_timing=False, progress_manager=None):
+                  with_timing=False, progress_manager=None, echo_func=echo_stderr):
     """
     Run the `scanners` Scanner objects on the `codebase` Codebase. Return True
     on success or False otherwise.
@@ -1062,7 +1175,7 @@ def scan_codebase(codebase, scanners, processes=1, timeout=DEFAULT_TIMEOUT,
             except StopIteration:
                 break
             except KeyboardInterrupt:
-                echo_stderr('\nAborted with Ctrl+C!', fg='red')
+                echo_func('\nAborted with Ctrl+C!', fg='red')
                 success = False
                 if pool:
                     pool.terminate()
@@ -1139,7 +1252,7 @@ def scan_resource(location_rid, scanners, timeout=DEFAULT_TIMEOUT,
     return location, rid, scan_errors, scan_time, results, timings
 
 
-def display_summary(codebase, scan_names, processes, errors, verbose):
+def display_summary(codebase, scan_names, processes, errors, verbose, echo_func=echo_stderr):
     """
     Display a scan summary.
     """
@@ -1213,39 +1326,39 @@ def display_summary(codebase, scan_names, processes, errors, verbose):
 
     errors_count = len(errors)
     if errors:
-        echo_stderr('Some files failed to scan properly:', fg='red')
+        echo_func('Some files failed to scan properly:', fg='red')
         for error in errors:
             for me in error.splitlines(False):
-                echo_stderr(me , fg='red')
+                echo_func(me , fg='red')
 
     ######################################################################
 
-    echo_stderr('Summary:        %(scan_names)s with %(processes)d process(es)' % locals())
-    echo_stderr('Errors count:   %(errors_count)d' % locals())
-    echo_stderr('Scan Speed:     %(scan_file_speed).2f files/sec. %(scan_size_speed)s' % locals())
+    echo_func('Summary:        %(scan_names)s with %(processes)d process(es)' % locals())
+    echo_func('Errors count:   %(errors_count)d' % locals())
+    echo_func('Scan Speed:     %(scan_file_speed).2f files/sec. %(scan_size_speed)s' % locals())
     if prescan_scan_time:
-        echo_stderr('Early Scanners Speed:     %(prescan_scan_file_speed).2f '
+        echo_func('Early Scanners Speed:     %(prescan_scan_file_speed).2f '
                     'files/sec. %(prescan_scan_size_speed)s' % locals())
 
-    echo_stderr('Initial counts: %(initial_res_count)d resource(s): '
-                                '%(initial_files_count)d file(s) '
-                                'and %(initial_dirs_count)d directorie(s) '
-                                '%(initial_size_count)s' % locals())
+    echo_func('Initial counts: %(initial_res_count)d resource(s): '
+                               '%(initial_files_count)d file(s) '
+                               'and %(initial_dirs_count)d directorie(s) '
+                               '%(initial_size_count)s' % locals())
 
-    echo_stderr('Final counts:   %(final_res_count)d resource(s): '
-                                '%(final_files_count)d file(s) '
-                                'and %(final_dirs_count)d directorie(s) '
-                                '%(final_size_count)s' % locals())
+    echo_func('Final counts:   %(final_res_count)d resource(s): '
+                               '%(final_files_count)d file(s) '
+                               'and %(final_dirs_count)d directorie(s) '
+                               '%(final_size_count)s' % locals())
 
-    echo_stderr('Timings:')
+    echo_func('Timings:')
 
     cle = codebase.get_or_create_current_header().to_dict()
-    echo_stderr('  scan_start: {start_timestamp}'.format(**cle))
-    echo_stderr('  scan_end:   {end_timestamp}'.format(**cle))
+    echo_func('  scan_start: {start_timestamp}'.format(**cle))
+    echo_func('  scan_end:   {end_timestamp}'.format(**cle))
 
     for name, value, in codebase.timings.items():
         if value > 0.1:
-            echo_stderr('  %(name)s: %(value).2fs' % locals())
+            echo_func('  %(name)s: %(value).2fs' % locals())
 
     # TODO: if timing was requested display top per-scan/per-file stats?
 
diff --git a/src/scancode/plugin_ignore.py b/src/scancode/plugin_ignore.py
index 3f5da101089..961f18721b4 100644
--- a/src/scancode/plugin_ignore.py
+++ b/src/scancode/plugin_ignore.py
@@ -27,17 +27,40 @@
 
 from functools import partial
 
-from commoncode.fileset import match
+from commoncode.fileset import is_included
 from plugincode.pre_scan import PreScanPlugin
 from plugincode.pre_scan import pre_scan_impl
 from scancode import CommandLineOption
 from scancode import PRE_SCAN_GROUP
 
 
+# Tracing flags
+TRACE = False
+
+
+def logger_debug(*args):
+    pass
+
+
+if TRACE:
+    import logging
+    import sys
+
+    logger = logging.getLogger(__name__)
+    # logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
+    logging.basicConfig(stream=sys.stdout)
+    logger.setLevel(logging.DEBUG)
+
+    def logger_debug(*args):
+        return logger.debug(
+            ' '.join(isinstance(a, unicode) and a or repr(a) for a in args))
+
+
+
 @pre_scan_impl
 class ProcessIgnore(PreScanPlugin):
     """
-    Ignore files matching the supplied pattern.
+    Include or ignore files matching patterns.
     """
 
     options = [
@@ -46,48 +69,69 @@ class ProcessIgnore(PreScanPlugin):
            metavar='<pattern>',
            help='Ignore files matching <pattern>.',
            sort_order=10,
+           help_group=PRE_SCAN_GROUP),
+        CommandLineOption(('--include',),
+           multiple=True,
+           metavar='<pattern>',
+           help='Include files matching <pattern>.',
+           sort_order=11,
            help_group=PRE_SCAN_GROUP)
     ]
 
-    def is_enabled(self, ignore, **kwargs):
-        return ignore
+    def is_enabled(self, ignore, include, **kwargs):
+        return ignore or include
 
-    def process_codebase(self, codebase, ignore=(), **kwargs):
+    def process_codebase(self, codebase, ignore=(), include=(), **kwargs):
         """
-        Remove ignored Resources from the resource tree.
+        Keep only included and non-ignored Resources in the codebase.
         """
 
-        if not ignore:
+        if not (ignore or include):
             return
 
-        ignores = {
+        excludes = {
             pattern: 'User ignore: Supplied by --ignore' for pattern in ignore
         }
 
-        ignorable = partial(is_ignored, ignores=ignores)
-        rids_to_remove = []
-        remove_resource = codebase.remove_resource
+        includes = {
+            pattern: 'User include: Supplied by --include' for pattern in include
+        }
+
+        included = partial(is_included, includes=includes, excludes=excludes)
+
+        rids_to_remove = set()
+        rids_to_remove_add = rids_to_remove.add
+        rids_to_remove_discard = rids_to_remove.discard
 
         # First, walk the codebase from the top-down and collect the rids of
         # Resources that can be removed.
         for resource in codebase.walk(topdown=True):
-            if ignorable(resource.path):
+            if resource.is_root:
+                continue
+            resource_rid = resource.rid
+
+            if not included(resource.path):
                 for child in resource.children(codebase):
-                    rids_to_remove.append(child.rid)
-                rids_to_remove.append(resource.rid)
+                    rids_to_remove_add(child.rid)
+                rids_to_remove_add(resource_rid)
+            else:
+                # we may have been selected for removal based on a parent dir
+                # but may be explicitly included. Honor that
+                rids_to_remove_discard(resource_rid)
+        if TRACE:
+            logger_debug('process_codebase: rids_to_remove')
+            logger_debug(rids_to_remove)
+            for rid in sorted(rids_to_remove):
+                logger_debug(codebase.get_resource(rid))
 
-        # Then, walk bottom-up and remove the ignored Resources from the
+        remove_resource = codebase.remove_resource
+        
+        # Then, walk bottom-up and remove the non-included Resources from the
         # Codebase if the Resource's rid is in our list of rid's to remove.
         for resource in codebase.walk(topdown=False):
             resource_rid = resource.rid
+            if resource.is_root:
+                continue
             if resource_rid in rids_to_remove:
-                rids_to_remove.remove(resource_rid)
+                rids_to_remove_discard(resource_rid)
                 remove_resource(resource)
-
-
-def is_ignored(location, ignores):
-    """
-    Return a tuple of (pattern , message) if a file at location is ignored or
-    False otherwise. `ignores` is a mappings of patterns to a reason.
-    """
-    return match(location, includes=ignores, excludes={})
diff --git a/tests/commoncode/test_fileset.py b/tests/commoncode/test_fileset.py
index e776b29fa6a..b20b68a0c99 100644
--- a/tests/commoncode/test_fileset.py
+++ b/tests/commoncode/test_fileset.py
@@ -39,65 +39,65 @@ def test_load(self):
         result = fileset.load(irf)
         assert ['/foo/*', '!/foobar/*', 'bar/*', '#comment'] == result
 
-    def test_match_basic(self):
-        assert not fileset.match('/common/src/', {}, {})
-        assert not fileset.match('/common/src/', None, None)
-        assert not fileset.match(None, None, None)
+    def test_is_included_basic(self):
+        assert fileset.is_included('/common/src/', {}, {})
+        assert fileset.is_included('/common/src/', None, None)
+        assert not fileset.is_included(None, None, None)
 
-    def test_in_fileset(self):
+    def test_is_included_in_fileset(self):
         incs = {'/common/src/*': '.scanignore'}
         excs = {'/common/src/*.so':'.scanignore'}
-        assert not fileset.match(None, incs, excs)
-        assert not fileset.match('', incs, excs)
-        assert not fileset.match('/', incs, excs)
-        assert fileset.match('/common/src/', incs, excs)
-        assert not fileset.match('/common/bin/', incs, excs)
+        assert not fileset.is_included(None, incs, excs)
+        assert not fileset.is_included('', incs, excs)
+        assert not fileset.is_included('/', incs, excs)
+        assert fileset.is_included('/common/src/', incs, excs)
+        assert not fileset.is_included('/common/bin/', incs, excs)
 
-    def test_in_fileset_2(self):
+    def test_is_included_in_fileset_2(self):
         incs = {'src*': '.scanignore'}
         excs = {'src/ab': '.scanignore'}
-        assert not fileset.match(None, incs, excs)
-        assert not fileset.match('', incs, excs)
-        assert not fileset.match('/', incs, excs)
-        assert fileset.match('/common/src/', incs, excs)
-        assert not fileset.match('src/ab', incs, excs)
-        assert fileset.match('src/abbab', incs, excs)
-
-    def test_match_exclusions(self):
+        assert not fileset.is_included(None, incs, excs)
+        assert not fileset.is_included('', incs, excs)
+        assert not fileset.is_included('/', incs, excs)
+        assert fileset.is_included('/common/src/', incs, excs)
+        assert not fileset.is_included('src/ab', incs, excs)
+        assert fileset.is_included('src/abbab', incs, excs)
+
+    def test_is_included_is_included_exclusions(self):
         incs = {'/src/*': '.scanignore'}
         excs = {'/src/*.so':'.scanignore'}
-        assert not fileset.match('/src/dist/build/mylib.so', incs, excs)
+        assert not fileset.is_included('/src/dist/build/mylib.so', incs, excs)
 
-    def test_match_exclusions_2(self):
+    def test_is_included_is_included_exclusions_2(self):
         incs = {'src': '.scanignore'}
         excs = {'src/*.so':'.scanignore'}
-        assert fileset.match('/some/src/this/that', incs, excs)
-        assert not fileset.match('/src/dist/build/mylib.so', incs, excs)
+        assert fileset.is_included('/some/src/this/that', incs, excs)
+        assert not fileset.is_included('/src/dist/build/mylib.so', incs, excs)
 
-    def test_match_empty_exclusions(self):
+    def test_is_included_empty_exclusions(self):
         incs = {'/src/*': '.scanignore'}
         excs = {'': '.scanignore'}
-        assert fileset.match('/src/dist/build/mylib.so', incs, excs)
+        assert fileset.is_included('/src/dist/build/mylib.so', incs, excs)
 
-    def test_match_sources(self):
+    def test_is_included_sources(self):
         incs = {'/home/elf/elf-0.5/*': '.scanignore'}
         excs = {'/home/elf/elf-0.5/src/elf': '.scanignore',
                 '/home/elf/elf-0.5/src/elf.o': '.scanignore'}
-        assert not fileset.match('/home/elf/elf-0.5/src/elf', incs, excs)
+        assert not fileset.is_included('/home/elf/elf-0.5/src/elf', incs, excs)
 
-    def test_match_dot_svn(self):
+    def test_is_included_dot_svn(self):
         incs = {'*/.svn/*': '.scanignore'}
         excs = {}
-        assert fileset.match('home/common/tools/elf/.svn/', incs, excs)
-        assert fileset.match('home/common/tools/.svn/this', incs, excs)
-        assert not fileset.match('home/common/tools/this', incs, excs)
+        assert fileset.is_included('home/common/tools/elf/.svn/', incs, excs)
+        assert fileset.is_included('home/common/tools/.svn/this', incs, excs)
+        assert not fileset.is_included('home/common/tools/this', incs, excs)
 
-    def test_match_dot_svn_with_excludes(self):
+    def test_is_included_dot_svn_with_excludes(self):
         incs = {'*/.svn/*': '.scanignore'}
         excs = {'*/.git/*': '.scanignore'}
-        assert fileset.match('home/common/tools/elf/.svn/', incs, excs)
-        assert fileset.match('home/common/tools/.svn/this', incs, excs)
-        assert not fileset.match('home/common/.git/this', incs, excs)
+        assert fileset.is_included('home/common/tools/elf/.svn/', incs, excs)
+        assert fileset.is_included('home/common/tools/.svn/this', incs, excs)
+        assert not fileset.is_included('home/common/.git/this', incs, excs)
 
     def test_get_matches(self):
         patterns = {'*/.svn/*': '.scanignore'}
diff --git a/tests/commoncode/test_ignore.py b/tests/commoncode/test_ignore.py
index 0ec21989397..e8322610621 100644
--- a/tests/commoncode/test_ignore.py
+++ b/tests/commoncode/test_ignore.py
@@ -43,64 +43,56 @@ def test_is_ignored_default_ignores_eclipse1(self):
         test_base = os.path.join(test_dir, 'eclipse')
 
         test = os.path.join(test_base, '.settings')
-        result = ignore.is_ignored(test, ignore.default_ignores, {})
-        assert 'Default ignore: Eclipse IDE artifact' == result
+        assert ignore.is_ignored(test, ignore.default_ignores, {})
 
     def test_is_ignored_default_ignores_eclipse2(self):
         test_dir = self.extract_test_tar('ignore/excludes/eclipse.tgz')
         test_base = os.path.join(test_dir, 'eclipse')
 
         test = os.path.join(test_base, '.settings/somefile')
-        result = ignore.is_ignored(test, ignore.default_ignores, {})
-        assert 'Default ignore: Eclipse IDE artifact' == result
+        assert ignore.is_ignored(test, ignore.default_ignores, {})
 
     def test_is_ignored_default_ignores_eclipse3(self):
         test_dir = self.extract_test_tar('ignore/excludes/eclipse.tgz')
         test_base = os.path.join(test_dir, 'eclipse')
 
         test = os.path.join(test_base, '.project')
-        result = ignore.is_ignored(test, ignore.default_ignores, {})
-        assert 'Default ignore: Eclipse IDE artifact' == result
+        assert ignore.is_ignored(test, ignore.default_ignores, {})
 
     def test_is_ignored_default_ignores_eclipse4(self):
         test_dir = self.extract_test_tar('ignore/excludes/eclipse.tgz')
         test_base = os.path.join(test_dir, 'eclipse')
 
         test = os.path.join(test_base, '.pydevproject')
-        result = ignore.is_ignored(test, ignore.default_ignores, {})
-        assert 'Default ignore: Eclipse IDE artifact' == result
+        assert ignore.is_ignored(test, ignore.default_ignores, {})
 
     def test_is_ignored_default_ignores_mac1(self):
         test_dir = self.extract_test_tar('ignore/excludes/mac.tgz')
         test_base = os.path.join(test_dir, 'mac')
 
         test = os.path.join(test_base, '__MACOSX')
-        result = ignore.is_ignored(test, ignore.default_ignores, {})
-        assert 'Default ignore: MacOSX artifact' == result
+        assert ignore.is_ignored(test, ignore.default_ignores, {})
 
     def test_is_ignored_default_ignores_mac2(self):
         test_dir = self.extract_test_tar('ignore/excludes/mac.tgz')
         test_base = os.path.join(test_dir, 'mac')
 
         test = os.path.join(test_base, '__MACOSX/comp_match/smallrepo/._jetty_1.0_index.csv')
-        result = ignore.is_ignored(test, ignore.default_ignores, {})
-        assert 'Default ignore: MacOSX artifact' == result
+        assert ignore.is_ignored(test, ignore.default_ignores, {})
 
     def test_is_ignored_default_ignores_mac3(self):
         test_dir = self.extract_test_tar('ignore/excludes/mac.tgz')
         test_base = os.path.join(test_dir, 'mac')
 
         test = os.path.join(test_base, '.DS_Store')
-        result = ignore.is_ignored(test, ignore.default_ignores, {})
-        assert 'Default ignore: MacOSX artifact' == result
+        assert ignore.is_ignored(test, ignore.default_ignores, {})
 
     def test_is_ignored_default_ignores_mac4(self):
         test_dir = self.extract_test_tar('ignore/excludes/mac.tgz')
         test_base = os.path.join(test_dir, 'mac')
 
         test = os.path.join(test_base, '.DS_Store/a')
-        result = ignore.is_ignored(test, ignore.default_ignores, {})
-        assert 'Default ignore: MacOSX artifact' == result
+        assert ignore.is_ignored(test, ignore.default_ignores, {})
 
     @skipIf(on_mac, 'Return different result on Mac for reasons to investigate')
     def test_is_ignored_default_ignores_mac5(self):
@@ -108,16 +100,16 @@ def test_is_ignored_default_ignores_mac5(self):
         test_base = os.path.join(test_dir, 'mac')
 
         test = os.path.join(test_base, '._.DS_Store')
-        result = ignore.is_ignored(test, ignore.default_ignores, {})
         # this is really weird as a behavior
-        assert 'Default ignore: MacOSX artifact' == result
+        # 'Default ignore: MacOSX artifact'
+        assert ignore.is_ignored(test, ignore.default_ignores, {})
 
     @skipIf(on_mac, 'Return different result on Mac for reasons to investigate')
     def test_is_ignored_default_ignores_msft(self):
         test_dir = self.extract_test_tar('ignore/excludes/msft-vs.tgz')
         test = os.path.join(test_dir, 'msft-vs/tst.sluo')
-        result = ignore.is_ignored(test, ignore.default_ignores, {})
-        assert 'Default ignore: Microsoft VS project artifact' == result
+        # 'Default ignore: Microsoft VS project artifact' ??
+        assert ignore.is_ignored(test, ignore.default_ignores, {})
 
     @skipIf(on_mac, 'Return different result on Mac for reasons to investigate')
     def test_is_ignored_skip_vcs_files_and_dirs(self):
@@ -144,26 +136,51 @@ def test_is_ignored_skip_vcs_files_and_dirs(self):
 
         expected = [
             ('/vcs', False),
-            ('/vcs/.bzr', u'Default ignore: Bazaar artifact'),
-            ('/vcs/.git', u'Default ignore: Git artifact'),
-            ('/vcs/.hg', u'Default ignore: Mercurial artifact'),
-            ('/vcs/.repo', u'Default ignore: Multiple Git repository artifact'),
-            ('/vcs/.svn', u'Default ignore: SVN artifact'),
-            ('/vcs/CVS', u'Default ignore: CVS artifact'),
-            ('/vcs/_darcs', u'Default ignore: Darcs artifact'),
-            ('/vcs/_MTN', u'Default ignore: Monotone artifact'),
-            ('/vcs/.bzrignore', u'Default ignore: Bazaar config artifact'),
-            ('/vcs/.cvsignore', u'Default ignore: CVS config artifact'),
-            ('/vcs/.gitignore', u'Default ignore: Git config artifact'),
-            ('/vcs/.hgignore', u'Default ignore: Mercurial config artifact'),
-            ('/vcs/.svnignore', u'Default ignore: SVN config artifact'),
-            ('/vcs/vssver.scc', u'Default ignore: Visual Source Safe artifact'),
+            ('/vcs/.bzr', True),
+            ('/vcs/.git', True),
+            ('/vcs/.hg', True),
+            ('/vcs/.repo', True),
+            ('/vcs/.svn', True),
+            ('/vcs/CVS', True),
+            ('/vcs/_darcs', True),
+            ('/vcs/_MTN', True),
+            ('/vcs/.bzrignore', True),
+            ('/vcs/.cvsignore', True),
+            ('/vcs/.gitignore', True),
+            ('/vcs/.hgignore', True),
+            ('/vcs/.svnignore', True),
+            ('/vcs/vssver.scc', True),
         ]
         assert sorted(expected) == sorted(result)
 
-    def test_fileset_match_default_ignore_does_not_skip_one_char_names(self):
+    def test_fileset_is_included_with_default_ignore_does_not_skip_one_char_names(self):
         # use fileset directly to work on strings not locations
         from commoncode import fileset
         tests = [c for c in 'HFS+ Private Data'] + 'HFS+ Private Data'.split()
-        for test in tests:
-            assert False == fileset.match(test, includes=ignore.default_ignores, excludes={})
+        result = [(t,
+            fileset.is_included(t, excludes=ignore.default_ignores, includes={}))
+            for t in tests]
+        expected = [
+            ('H', True),
+            ('F', True),
+            ('S', True),
+            ('+', True),
+            (' ', False),
+            ('P', True),
+            ('r', True),
+            ('i', True),
+            ('v', True),
+            ('a', True),
+            ('t', True),
+            ('e', True),
+            (' ', False),
+            ('D', True),
+            ('a', True),
+            ('t', True),
+            ('a', True),
+            ('HFS+', True),
+            ('Private', True),
+            ('Data', True)
+        ]
+
+        assert expected == result
diff --git a/tests/scancode/data/help/help.txt b/tests/scancode/data/help/help.txt
index 9f81b83beb7..1dafec753d3 100644
--- a/tests/scancode/data/help/help.txt
+++ b/tests/scancode/data/help/help.txt
@@ -1,4 +1,4 @@
-Usage: scancode [OPTIONS] <input> <OUTPUT FORMAT OPTION(s)>
+Usage: scancode [OPTIONS] <OUTPUT FORMAT OPTION(s)> <input>...
 
   scan the <input> file or directory for license, origin and packages and save
   results to FILE(s) using one or more output format option.
@@ -73,6 +73,7 @@ Options:
 
   pre-scan:
     --ignore <pattern>         Ignore files matching <pattern>.
+    --include <pattern>        Include files matching <pattern>.
     --classify                 Classify files with flags telling if the file is a
                                legal, or readme or test file, etc.
     --facet <facet>=<pattern>  Add the <facet> to files with a path matching
diff --git a/tests/scancode/test_cli.py b/tests/scancode/test_cli.py
index 2215859d2fa..7c704096c04 100644
--- a/tests/scancode/test_cli.py
+++ b/tests/scancode/test_cli.py
@@ -630,7 +630,7 @@ def test_scan_does_scan_rpm():
     check_json_scan(expected_file, result_file, regen=False)
 
 
-def test_scan_cli_help(regen=False):
+def test_scan_cli_help(regen=True):
     expected_file = test_env.get_test_loc('help/help.txt')
     result = run_scan_click(['--help'])
     if regen:
diff --git a/tests/scancode/test_plugin_ignore.py b/tests/scancode/test_plugin_ignore.py
index b1851397185..79094756ca2 100644
--- a/tests/scancode/test_plugin_ignore.py
+++ b/tests/scancode/test_plugin_ignore.py
@@ -29,9 +29,9 @@
 from os.path import join
 
 from commoncode.testcase import FileDrivenTesting
+from commoncode.fileset import is_included
 from scancode.cli_test_utils import run_scan_click
 from scancode.cli_test_utils import load_json_result
-from scancode.plugin_ignore import is_ignored
 from scancode.plugin_ignore import ProcessIgnore
 from scancode.resource import Codebase
 
@@ -40,30 +40,30 @@ class TestPluginIgnoreFiles(FileDrivenTesting):
 
     test_data_dir = join(dirname(__file__), 'data')
 
-    def test_is_ignored_glob_path(self):
+    def test_is_included_glob_path(self):
         location = 'common/src/test/sample.txt'
-        ignores = {'*/src/test/*': 'test ignore'}
-        assert is_ignored(location=location, ignores=ignores)
+        excludes = {'*/src/test/*': 'test ignore'}
+        assert not is_included(location, excludes=excludes)
 
-    def test_is_ignored_single_path(self):
+    def test_is_included_single_path(self):
         location = 'common/src/test/sample.txt'
-        ignores = {'common/src/test/sample.txt': 'test ignore'}
-        assert is_ignored(location=location, ignores=ignores)
+        excludes = {'common/src/test/sample.txt': 'test ignore'}
+        assert not is_included(location, excludes=excludes)
 
-    def test_is_ignored_single_path_not_matching(self):
+    def test_is_included_single_path_not_matching(self):
         location = 'common/src/test/sample.txt'
-        ignores = {'src/test/sample.txt': 'test ignore'}
-        assert not is_ignored(location=location, ignores=ignores)
+        excludes = {'src/test/sample.txt': 'test ignore'}
+        assert is_included(location, excludes=excludes)
 
-    def test_is_ignored_single_file(self):
+    def test_is_included_single_file(self):
         location = 'common/src/test/sample.txt'
-        ignores = {'sample.txt': 'test ignore'}
-        assert is_ignored(location=location, ignores=ignores)
+        excludes = {'sample.txt': 'test ignore'}
+        assert not is_included(location, excludes=excludes)
 
-    def test_is_ignored_glob_file(self):
+    def test_is_included_glob_file(self):
         location = 'common/src/test/sample.txt'
-        ignores = {'*.txt': 'test ignore'}
-        assert is_ignored(location=location, ignores=ignores)
+        excludes = {'*.txt': 'test ignore'}
+        assert not is_included(location, excludes=excludes)
 
     def check_ProcessIgnore(self, test_dir, expected, ignore):
         codebase = Codebase(test_dir, strip_root=True)
diff --git a/tests/scancode/test_resource.py b/tests/scancode/test_resource.py
index 5a220a10dcc..0687cf40504 100644
--- a/tests/scancode/test_resource.py
+++ b/tests/scancode/test_resource.py
@@ -437,17 +437,7 @@ def test_compute_counts_when_using_disk_cache(self):
 
     def test_low_max_in_memory_does_not_raise_exception_when_ignoring_files(self):
 
-        def is_ignored(location, ignores):
-            """
-            Return a tuple of (pattern , message) if a file at location is ignored or
-            False otherwise. `ignores` is a mappings of patterns to a reason.
-
-            Taken from scancode/plugin_ignore.py
-            """
-            from commoncode.fileset import match
-            return match(location, includes=ignores, excludes={})
-
-        from functools import partial
+        from commoncode.fileset import is_included
 
         test_codebase = self.get_test_loc('resource/client')
         codebase = Codebase(test_codebase, strip_root=True, max_in_memory=1)
@@ -456,14 +446,14 @@ def is_ignored(location, ignores):
         ignores = {
             '*.gif': 'User ignore: Supplied by --ignore'
         }
-        ignorable = partial(is_ignored, ignores=ignores)
         remove_resource = codebase.remove_resource
 
         for resource in codebase.walk(topdown=True):
-            if ignorable(resource.path):
+            if not is_included(resource.path, excludes=ignores):
                 for child in resource.children(codebase):
                     remove_resource(child)
-                remove_resource(resource)
+                if not resource.is_root:
+                    remove_resource(resource)
 
         # Walk through the codebase and save each Resource,
         # UnknownResource exception should not be raised