Skip to content

Commit

Permalink
Cache infos when they are computed #736
Browse files Browse the repository at this point in the history
Signed-off-by: Yash D. Saraf <[email protected]>
  • Loading branch information
yashdsaraf authored and pombredanne committed Sep 22, 2017
1 parent 04b6067 commit a0ca6bd
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 13 deletions.
17 changes: 16 additions & 1 deletion src/scancode/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ class Resource(object):
such as infos and path
"""

def __init__(self, abs_path, base_is_dir, len_base_path):
def __init__(self, scan_cache_class, abs_path, base_is_dir, len_base_path):
self.scan_cache_class = scan_cache_class()
self.is_cached = False
self.abs_path = abs_path
self.base_is_dir = base_is_dir
posix_path = as_posixpath(abs_path)
Expand All @@ -56,6 +58,19 @@ def __init__(self, abs_path, base_is_dir, len_base_path):
self.infos = OrderedDict()
self.infos['path'] = self.rel_path

def put_info(self, infos):
"""
Cache file info and set `is_cached` to True if already cached or false otherwise.
"""
self.infos.update(infos)
self.is_cached = self.scan_cache_class.put_info(self.rel_path, self.infos)

def get_info(self):
"""
Retrieve info from cache.
"""
return self.scan_cache_class.get_info(self.rel_path)


def extract_archives(location, recurse=True):
"""
Expand Down
13 changes: 6 additions & 7 deletions src/scancode/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ def scan(input_path,

pool = None

resources = resource_paths(input_path, diag, pre_scan_plugins=pre_scan_plugins)
resources = resource_paths(input_path, diag, scans_cache_class, pre_scan_plugins=pre_scan_plugins)
paths_with_error = []
files_count = 0

Expand Down Expand Up @@ -740,7 +740,6 @@ def _scanit(resource, scanners, scans_cache_class, diag, timeout=DEFAULT_TIMEOUT
"""
success = True
scans_cache = scans_cache_class()
is_cached = scans_cache.put_info(resource.rel_path, resource.infos)

# note: "flag and function" expressions return the function if flag is True
# note: the order of the scans matters to show things in logical order
Expand All @@ -756,7 +755,7 @@ def _scanit(resource, scanners, scans_cache_class, diag, timeout=DEFAULT_TIMEOUT
if any(scanner_functions):
# Skip other scans if already cached
# FIXME: ENSURE we only do this for files not directories
if not is_cached:
if not resource.is_cached:
# run the scan as an interruptiple task
scans_runner = partial(scan_one, resource.abs_path, scanners, diag)
success, scan_result = interrupter(scans_runner, timeout=timeout)
Expand All @@ -766,7 +765,7 @@ def _scanit(resource, scanners, scans_cache_class, diag, timeout=DEFAULT_TIMEOUT
# "scan" key is used for these errors
scan_result = {'scan_errors': [scan_result]}

scans_cache.put_scan(resource.rel_path, resource.infos, scan_result)
scans_cache.put_scan(resource.rel_path, resource.get_info(), scan_result)

# do not report success if some other errors happened
if scan_result.get('scan_errors'):
Expand All @@ -791,7 +790,7 @@ def build_ignorer(ignores, unignores):
return partial(ignore.is_ignored, ignores=ignores, unignores=unignores)


def resource_paths(base_path, diag, pre_scan_plugins=()):
def resource_paths(base_path, diag, scans_cache_class, pre_scan_plugins=()):
"""
Yield tuples of (absolute path, base_path-relative path) for all the files found
at base_path (either a directory or file) given an absolute base_path. Only yield
Expand Down Expand Up @@ -821,9 +820,9 @@ def resource_paths(base_path, diag, pre_scan_plugins=()):
resources = fileutils.resource_iter(base_path, ignored=ignorer)

for abs_path in resources:
resource = Resource(abs_path, base_is_dir, len_base_path)
resource = Resource(scans_cache_class, abs_path, base_is_dir, len_base_path)
# always fetch infos and cache.
resource.infos.update(scan_infos(abs_path, diag=diag))
resource.put_info(scan_infos(abs_path, diag=diag))
yield resource


Expand Down
12 changes: 7 additions & 5 deletions tests/scancode/test_ignore_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@

from commoncode.testcase import FileBasedTesting
from commoncode.ignore import is_ignored
from scancode.cache import get_scans_cache_class
from scancode.cli import resource_paths
from scancode.plugin_ignore import ProcessIgnore


class TestIgnoreFiles(FileBasedTesting):

test_data_dir = path.join(path.dirname(__file__), 'data')
scan_cache_class = get_scans_cache_class()

def test_ignore_glob_path(self):
test = (
Expand Down Expand Up @@ -80,7 +82,7 @@ def test_resource_paths_with_single_file(self):
'user/src/test',
'user/src/test/sample.txt'
]
test = [resource.rel_path for resource in resource_paths(test_dir, False, [test_plugin])]
test = [resource.rel_path for resource in resource_paths(test_dir, False, self.scan_cache_class, [test_plugin])]
assert expected == sorted(test)

def test_resource_paths_with_multiple_files(self):
Expand All @@ -93,7 +95,7 @@ def test_resource_paths_with_multiple_files(self):
'user/src/test/sample.doc',
'user/src/test/sample.txt'
]
test = [resource.rel_path for resource in resource_paths(test_dir, False, [test_plugin])]
test = [resource.rel_path for resource in resource_paths(test_dir, False, self.scan_cache_class, [test_plugin])]
assert expected == sorted(test)

def test_resource_paths_with_glob_file(self):
Expand All @@ -105,7 +107,7 @@ def test_resource_paths_with_glob_file(self):
'user/src/test',
'user/src/test/sample.txt'
]
test = [resource.rel_path for resource in resource_paths(test_dir, False, [test_plugin])]
test = [resource.rel_path for resource in resource_paths(test_dir, False, self.scan_cache_class, [test_plugin])]
assert expected == sorted(test)

def test_resource_paths_with_glob_path(self):
Expand All @@ -117,7 +119,7 @@ def test_resource_paths_with_glob_path(self):
'user/src',
'user/src/ignore.doc'
]
test = [resource.rel_path for resource in resource_paths(test_dir, False, [test_plugin])]
test = [resource.rel_path for resource in resource_paths(test_dir, False, self.scan_cache_class, [test_plugin])]
assert expected == sorted(test)

def test_resource_paths_with_multiple_plugins(self):
Expand All @@ -131,5 +133,5 @@ def test_resource_paths_with_multiple_plugins(self):
'user/src',
'user/src/test'
]
test = [resource.rel_path for resource in resource_paths(test_dir, False, test_plugins)]
test = [resource.rel_path for resource in resource_paths(test_dir, False, self.scan_cache_class, test_plugins)]
assert expected == sorted(test)

0 comments on commit a0ca6bd

Please sign in to comment.