diff --git a/sklearn/externals/joblib/__init__.py b/sklearn/externals/joblib/__init__.py index a14aa72321b33..5c2d87f90f310 100755 --- a/sklearn/externals/joblib/__init__.py +++ b/sklearn/externals/joblib/__init__.py @@ -60,6 +60,7 @@ inputs and outputs: Python functions. Joblib can save their computation to disk and rerun it only if necessary:: + >>> import numpy as np >>> from sklearn.externals.joblib import Memory >>> mem = Memory(cachedir='/tmp/joblib') >>> import numpy as np @@ -101,7 +102,7 @@ """ -__version__ = '0.6.3' +__version__ = '0.6.4' from .memory import Memory diff --git a/sklearn/externals/joblib/func_inspect.py b/sklearn/externals/joblib/func_inspect.py index 9a84cc38a2dd4..10eebc7b2e610 100755 --- a/sklearn/externals/joblib/func_inspect.py +++ b/sklearn/externals/joblib/func_inspect.py @@ -207,7 +207,7 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()): ) varkwargs = dict() - for arg_name, arg_value in kwargs.iteritems(): + for arg_name, arg_value in sorted(kwargs.items()): if arg_name in arg_dict: arg_dict[arg_name] = arg_value elif arg_keywords is not None: diff --git a/sklearn/externals/joblib/memory.py b/sklearn/externals/joblib/memory.py index b8fc0c196d108..f0a0252f42276 100755 --- a/sklearn/externals/joblib/memory.py +++ b/sklearn/externals/joblib/memory.py @@ -159,10 +159,15 @@ def __init__(self, func, cachedir, ignore=None, mmap_mode=None, def __call__(self, *args, **kwargs): # Compare the function code with the previous to see if the # function code has changed - output_dir, _ = self.get_output_dir(*args, **kwargs) + output_dir, argument_hash = self.get_output_dir(*args, **kwargs) # FIXME: The statements below should be try/excepted if not (self._check_previous_func_code(stacklevel=3) and os.path.exists(output_dir)): + if self._verbose > 10: + _, name = get_func_name(self.func) + self.warn('Computing func %s, argument hash %s in ' + 'directory %s' + % (name, argument_hash, output_dir)) return self.call(*args, **kwargs) else: try: @@ -287,6 +292,10 @@ def _check_previous_func_code(self, stacklevel=2): # The function has changed, wipe the cache directory. # XXX: Should be using warnings, and giving stacklevel + if self._verbose > 10: + _, func_name = get_func_name(self.func, resolv_alias=False) + self.warn("Function %s (stored in %s) has changed." % + (func_name, func_dir)) self.clear(warn=True) return False @@ -308,12 +317,11 @@ def call(self, *args, **kwargs): persist the output values. """ start_time = time.time() + output_dir, argument_hash = self.get_output_dir(*args, **kwargs) if self._verbose: print self.format_call(*args, **kwargs) - output_dir, argument_hash = self.get_output_dir(*args, **kwargs) output = self.func(*args, **kwargs) self._persist_output(output, output_dir) - input_repr = self._persist_input(output_dir, *args, **kwargs) duration = time.time() - start_time if self._verbose: _, name = get_func_name(self.func) @@ -368,6 +376,8 @@ def _persist_output(self, output, dir): mkdirp(dir) filename = os.path.join(dir, 'output.pkl') numpy_pickle.dump(output, filename, compress=self.compress) + if self._verbose > 10: + print 'Persisting in %s' % dir except OSError: " Race condition in the creation of the directory " @@ -398,10 +408,17 @@ def load_output(self, output_dir): """ if self._verbose > 1: t = time.time() - self.timestamp - print '[Memory]% 16s: Loading %s...' % ( + if self._verbose < 10: + print '[Memory]% 16s: Loading %s...' % ( format_time(t), self.format_signature(self.func)[0] ) + else: + print '[Memory]% 16s: Loading %s from %s' % ( + format_time(t), + self.format_signature(self.func)[0], + output_dir + ) filename = os.path.join(output_dir, 'output.pkl') return numpy_pickle.load(filename, mmap_mode=self.mmap_mode) diff --git a/sklearn/externals/joblib/test/test_hashing.py b/sklearn/externals/joblib/test/test_hashing.py index 02ae4d9c39c1a..7fdf8228528ac 100755 --- a/sklearn/externals/joblib/test/test_hashing.py +++ b/sklearn/externals/joblib/test/test_hashing.py @@ -93,7 +93,8 @@ def test_hash_methods(): def test_hash_numpy(): """ Test hashing with numpy arrays. """ - arr1 = np.random.random((10, 10)) + rnd = np.random.RandomState(0) + arr1 = rnd.random_sample((10, 10)) arr2 = arr1.copy() arr3 = arr2.copy() arr3[0] += 1 @@ -160,7 +161,8 @@ def test_hash_numpy_performance(): In [26]: %timeit hash(a) 100 loops, best of 3: 20.8 ms per loop """ - a = np.random.random(1000000) + rnd = np.random.RandomState(0) + a = rnd.random_sample(1000000) md5_hash = lambda x: hashlib.md5(np.getbuffer(x)).hexdigest() relative_diff = relative_time(md5_hash, hash, a) diff --git a/sklearn/externals/joblib/test/test_memory.py b/sklearn/externals/joblib/test/test_memory.py index 028670ee00898..356f366cbc50c 100755 --- a/sklearn/externals/joblib/test/test_memory.py +++ b/sklearn/externals/joblib/test/test_memory.py @@ -325,8 +325,10 @@ def n(l=None): verbose=0) memory.clear(warn=False) cached_n = memory.cache(n) + + rnd = np.random.RandomState(0) for i in range(3): - a = np.random.random((10, 10)) + a = rnd.random_sample((10, 10)) for _ in range(3): yield nose.tools.assert_true, np.all(cached_n(a) == a) yield nose.tools.assert_equal, len(accumulator), i + 1 diff --git a/sklearn/externals/joblib/test/test_numpy_pickle.py b/sklearn/externals/joblib/test/test_numpy_pickle.py index b8069615c90f1..f5d34cc136479 100755 --- a/sklearn/externals/joblib/test/test_numpy_pickle.py +++ b/sklearn/externals/joblib/test/test_numpy_pickle.py @@ -134,7 +134,8 @@ def test_value_error(): @with_numpy def test_numpy_persistence(): filename = env['filename'] - a = np.random.random((10, 2)) + rnd = np.random.RandomState(0) + a = rnd.random_sample((10, 2)) for compress, cache_size in ((0, 0), (1, 0), (1, 10)): # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])): @@ -183,7 +184,8 @@ def test_numpy_persistence(): @with_numpy def test_memmap_persistence(): - a = np.random.random(10) + rnd = np.random.RandomState(0) + a = rnd.random_sample(10) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') @@ -195,7 +197,8 @@ def test_memmap_persistence(): def test_masked_array_persistence(): # The special-case picker fails, because saving masked_array # not implemented, but it just delegates to the standard pickler. - a = np.random.random(10) + rnd = np.random.RandomState(0) + a = rnd.random_sample(10) a = np.ma.masked_greater(a, 0.5) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, filename) @@ -210,3 +213,26 @@ def test_z_file(): numpy_pickle.write_zfile(file(filename, 'wb'), data) data_read = numpy_pickle.read_zfile(file(filename, 'rb')) nose.tools.assert_equal(data, data_read) + +################################################################################ +# Test dumping array subclasses +if np is not None: + + class SubArray(np.ndarray): + + def __reduce__(self): + return (_load_sub_array, (np.asarray(self), )) + + + def _load_sub_array(arr): + d = SubArray(arr.shape) + d[:] = arr + return d + +@with_numpy +def test_numpy_subclass(): + filename = env['filename'] + a = SubArray((10,)) + numpy_pickle.dump(a, filename) + c = numpy_pickle.load(filename) + nose.tools.assert_true(isinstance(c, SubArray))