From d3cbee730139b2d0117a1de1474a581844505196 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 19 Apr 2019 13:38:02 +0200
Subject: [PATCH 01/11] Initial implementation to work with intermediate
 outputs

---
 mlblocks/mlpipeline.py | 82 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 71 insertions(+), 11 deletions(-)

diff --git a/mlblocks/mlpipeline.py b/mlblocks/mlpipeline.py
index 058737ee..d5928b69 100644
--- a/mlblocks/mlpipeline.py
+++ b/mlblocks/mlpipeline.py
@@ -166,7 +166,7 @@ def _get_block_args(self, block_name, block_args, context):
 
         return kwargs
 
-    def _get_outputs(self, block_name, outputs, block_outputs):
+    def _extract_outputs(self, block_name, outputs, block_outputs):
         # TODO: type validation and/or transformation should be done here
 
         if not isinstance(outputs, tuple):
@@ -188,7 +188,40 @@ def _get_outputs(self, block_name, outputs, block_outputs):
 
         return output_dict
 
-    def fit(self, X=None, y=None, **kwargs):
+    def _get_block_name(self, index):
+        return list(self.blocks.keys())[index]
+
+    def _get_output_spec(self, output):
+        if output is None:
+            return None, None
+
+        if isinstance(output, int):
+            output = self._get_block_name(output)
+
+        if output in self.blocks:
+            return output, None
+
+        if '.' in output:
+            output_block, output_variable = output.rsplit('.', 1)
+            if output_block not in self.blocks:
+                raise ValueError('Unknown block name: {}'.format(output_block))
+
+            return output_block, output_variable
+
+        last_block_name = self._get_block_name(-1)
+        return last_block_name, output
+
+    def _get_output(self, output_variable, context):
+        if output_variable:
+            if output_variable not in context:
+                raise ValueError('Output variable {} not found in context'
+                                 .format(output_variable))
+
+            return context[output_variable]
+        else:
+            return context
+
+    def fit(self, X=None, y=None, output=None, skip_to=None, **kwargs):
         """Fit the blocks of this pipeline.
 
         Sequentially call the `fit` and the `produce` methods of each block,
@@ -213,8 +246,19 @@ def fit(self, X=None, y=None, **kwargs):
         }
         context.update(kwargs)
 
-        last_block_name = list(self.blocks.keys())[-1]
+        output_block, output_variable = self._get_output_spec(output)
+        last_block_name = self._get_block_name(-1)
+
+        if isinstance(skip_to, int):
+            skip_to = self._get_block_name(skip_to)
+
         for block_name, block in self.blocks.items():
+            if block_name == skip_to:
+                skip_to = False
+            elif skip_to:
+                LOGGER.debug("Skipping block %s fit", block_name)
+                continue
+
             LOGGER.debug("Fitting block %s", block_name)
             try:
                 fit_args = self._get_block_args(block_name, block.fit_args, context)
@@ -223,19 +267,22 @@ def fit(self, X=None, y=None, **kwargs):
                 LOGGER.exception("Exception caught fitting MLBlock %s", block_name)
                 raise
 
-            if block_name != last_block_name:
+            if (block_name != last_block_name) or (block_name == output_block):
                 LOGGER.debug("Producing block %s", block_name)
                 try:
                     produce_args = self._get_block_args(block_name, block.produce_args, context)
                     outputs = block.produce(**produce_args)
 
-                    output_dict = self._get_outputs(block_name, outputs, block.produce_output)
+                    output_dict = self._extract_outputs(block_name, outputs, block.produce_output)
                     context.update(output_dict)
                 except Exception:
                     LOGGER.exception("Exception caught producing MLBlock %s", block_name)
                     raise
 
-    def predict(self, X=None, **kwargs):
+            if block_name == output_block:
+                return self._get_output(output_variable, context)
+
+    def predict(self, X=None, output='y', skip_to=None, **kwargs):
         """Produce predictions using the blocks of this pipeline.
 
         Sequentially call the `produce` method of each block, capturing the
@@ -256,22 +303,35 @@ def predict(self, X=None, **kwargs):
         }
         context.update(kwargs)
 
-        last_block_name = list(self.blocks.keys())[-1]
+        output_block, output_variable = self._get_output_spec(output)
+
+        if isinstance(skip_to, int):
+            skip_to = self._get_block_name(skip_to)
+
         for block_name, block in self.blocks.items():
+            if block_name == skip_to:
+                skip_to = False
+            elif skip_to:
+                LOGGER.debug("Skipping block %s produce", block_name)
+                continue
+
             LOGGER.debug("Producing block %s", block_name)
             try:
                 produce_args = self._get_block_args(block_name, block.produce_args, context)
                 outputs = block.produce(**produce_args)
+                output_dict = self._extract_outputs(block_name, outputs, block.produce_output)
+                context.update(output_dict)
 
-                if block_name != last_block_name:
-                    output_dict = self._get_outputs(block_name, outputs, block.produce_output)
-                    context.update(output_dict)
+                if block_name == output_block:
+                    return self._get_output(output_variable, context)
 
             except Exception:
                 LOGGER.exception("Exception caught producing MLBlock %s", block_name)
                 raise
 
-        return outputs
+        if skip_to:
+            # We skipped all the blocks up to the end
+            raise ValueError('Unknown block name: {}'.format(skip_to))
 
     def to_dict(self):
         """Return all the details of this MLPipeline in a dict.

From 59fae909d44afb78005425c6c4a24de567391eb5 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 6 May 2019 22:48:38 +0200
Subject: [PATCH 02/11] Update contributing guide to match the current release
 workflow

---
 CONTRIBUTING.rst | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 2db74080..4fce53bf 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -172,24 +172,26 @@ The process of releasing a new version involves several steps combining both ``g
 
 1. Merge what is in ``master`` branch into ``stable`` branch.
 2. Update the version in ``setup.cfg``, ``mlblocks/__init__.py`` and ``HISTORY.md`` files.
-3. Create a new TAG pointing at the correspoding commit in ``stable`` branch.
+3. Create a new git tag pointing at the corresponding commit in ``stable`` branch.
 4. Merge the new commit from ``stable`` into ``master``.
-5. Update the version in ``setup.cfg`` and ``mlblocks/__init__.py`` to open the next
-   development interation.
+5. Update the version in ``setup.cfg`` and ``mlblocks/__init__.py``
+   to open the next development iteration.
 
-**Note:** Before starting the process, make sure that ``HISTORY.md`` has a section titled
-**Unreleased** with the list of changes that will be included in the new version, and that
-these changes are committed and available in ``master`` branch.
-Normally this is just a list of the Pull Requests that have been merged since the latest version.
+.. note:: Before starting the process, make sure that ``HISTORY.md`` has been updated with a new
+          entry that explains the changes that will be included in the new version.
+          Normally this is just a list of the Pull Requests that have been merged to master
+          since the last release.
 
-Once this is done, just run the following commands::
+Once this is done, run of the following commands:
+
+1. If you are releasing a patch version::
 
-    git checkout stable
-    git merge --no-ff master    # This creates a merge commit
-    bumpversion release   # This creates a new commit and a TAG
-    git push --tags origin stable
     make release
-    git checkout master
-    git merge stable
-    bumpversion --no-tag patch
-    git push
+
+2. If you are releasing a minor version::
+
+    make release-minor
+
+3. If you are releasing a major version::
+
+    make release-major

From e768037076387fcb9a33e494c9c89421f0c657a8 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 6 May 2019 22:49:47 +0200
Subject: [PATCH 03/11] Update docs config

---
 Makefile           |  1 -
 docs/changelog.rst |  2 +-
 docs/conf.py       | 20 +++++++-------------
 setup.py           |  1 -
 4 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/Makefile b/Makefile
index dc62e90d..c2d2aaa4 100644
--- a/Makefile
+++ b/Makefile
@@ -122,7 +122,6 @@ coverage: ## check code coverage quickly with the default Python
 .PHONY: docs
 docs: clean-docs ## generate Sphinx HTML documentation, including API docs
 	$(MAKE) -C docs html
-	touch docs/_build/html/.nojekyll
 
 .PHONY: view-docs
 view-docs: docs ## view docs in browser
diff --git a/docs/changelog.rst b/docs/changelog.rst
index fcd2eb2d..d26e5be8 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1 +1 @@
-.. include:: ../HISTORY.md
+.. mdinclude:: ../HISTORY.md
diff --git a/docs/conf.py b/docs/conf.py
index 8659996f..9b4595ec 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -18,18 +18,9 @@
 # relative to the documentation root, use os.path.abspath to make it
 # absolute, like shown here.
 
-import os
-import sys
-
 import sphinx_rtd_theme # For read the docs theme
-from recommonmark.parser import CommonMarkParser
-# from recommonmark.transform import AutoStructify
-
-# sys.path.insert(0, os.path.abspath('..'))
 
 import mlblocks
-# 
-# mlblocks.add_primitives_path('../mlblocks_primitives')
 
 # -- General configuration ---------------------------------------------
 
@@ -40,8 +31,11 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
-    'sphinx.ext.napoleon',
+    'm2r',
+    'sphinx.ext.autodoc',
     'sphinx.ext.githubpages',
+    'sphinx.ext.viewcode',
+    'sphinx.ext.napoleon',
     'sphinx.ext.graphviz',
     'IPython.sphinxext.ipython_console_highlighting',
     'IPython.sphinxext.ipython_directive',
@@ -56,9 +50,9 @@
 # You can specify multiple suffix as a list of string:
 source_suffix = ['.rst', '.md', '.ipynb']
 
-source_parsers = {
-    '.md': CommonMarkParser,
-}
+# source_parsers = {
+#     '.md': CommonMarkParser,
+# }
 
 # The master toctree document.
 master_doc = 'index'
diff --git a/setup.py b/setup.py
index a8ac84d7..f6991ab1 100644
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,6 @@
     'graphviz==0.9',
     'ipython==6.5.0',
     'matplotlib==2.2.3',
-    'recommonmark>=0.4.0',
 
     # style check
     'flake8>=3.5.0',

From 080580d45c9b47680fbc31d30aee4e8478292711 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 6 May 2019 22:50:08 +0200
Subject: [PATCH 04/11] Remove spaces

---
 setup.cfg | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index e976dec7..62ced521 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -3,13 +3,13 @@ current_version = 0.3.1-dev
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+))?
-serialize = 
+serialize =
 	{major}.{minor}.{patch}-{release}
 	{major}.{minor}.{patch}
 
 [bumpversion:part:release]
 optional_value = release
-values = 
+values =
 	dev
 	release
 
@@ -45,4 +45,3 @@ collect_ignore = ['setup.py']
 
 [tool:pylint]
 good-names = X,y
-

From e25fa6d3ac3af2f20b205ed73d91d28124bc8c16 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 6 May 2019 22:50:32 +0200
Subject: [PATCH 05/11] ADd docstrings

---
 mlblocks/mlpipeline.py | 127 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 113 insertions(+), 14 deletions(-)

diff --git a/mlblocks/mlpipeline.py b/mlblocks/mlpipeline.py
index d5928b69..abbac922 100644
--- a/mlblocks/mlpipeline.py
+++ b/mlblocks/mlpipeline.py
@@ -69,6 +69,7 @@ class MLPipeline():
     """
 
     def _get_tunable_hyperparameters(self):
+        """Get the tunable hyperperparameters from all the blocks in this pipeline."""
         tunable = {}
         for block_name, block in self.blocks.items():
             tunable[block_name] = block.get_tunable_hyperparameters()
@@ -140,6 +141,24 @@ def set_hyperparameters(self, hyperparameters):
             self.blocks[block_name].set_hyperparameters(block_hyperparams)
 
     def _get_block_args(self, block_name, block_args, context):
+        """Get the arguments expected by the block method from the context.
+
+        The arguments will be taken from the context using both the method
+        arguments specification and the `input_names` given when the pipeline
+        was created.
+
+        Args:
+            block_name (str): Name of this block. Used to find the corresponding
+                              input_names.
+            block_args (list): list of method argument specifications from the
+                               primitive.
+            context (dict): current context dictionary.
+
+        Returns:
+            dict:
+                A dictionary containing the argument names and values to pass
+                to the method.
+        """
         # TODO: type validation and/or transformation should be done here
 
         input_names = self.input_names.get(block_name, dict())
@@ -167,6 +186,7 @@ def _get_block_args(self, block_name, block_args, context):
         return kwargs
 
     def _extract_outputs(self, block_name, outputs, block_outputs):
+        """Extract the outputs of the method as a dict to be set into the context."""
         # TODO: type validation and/or transformation should be done here
 
         if not isinstance(outputs, tuple):
@@ -189,9 +209,36 @@ def _extract_outputs(self, block_name, outputs, block_outputs):
         return output_dict
 
     def _get_block_name(self, index):
+        """Get the name of the block in the `index` position."""
         return list(self.blocks.keys())[index]
 
     def _get_output_spec(self, output):
+        """Parsre the output specification and get a block name and a variable name.
+
+        The output specification can be of two types: int and str.
+
+        If it is an integer, it is interpreted as a block index, and the variable name
+        is considered to be ``None``, which means that the whole context will be returned.
+
+        If it is a string, it is interpreted as the block name, and it has to match a block
+        name exactly, including its hash and counter number ``#n``. Optionally, a variable
+        name can be passed at the end using a ``'.'`` as a separator.
+        In this case, the format of the string is `{block_name}.{variable_name}`. Note
+        that the block name can also contain dots, so only the leftmost dot will be
+        considered, and only if the complete string does not match exactly a block name.
+
+        Args:
+            output (str or int): Output specification as either a string or an integer.
+
+        Returns:
+            tuple:
+                The output is a tuple containing:
+                    * block_name (str): name of the block from which the output will be
+                        returned, including its counter number.
+                    * variable_name (str): Name of the variable to extract from the context.
+                        It can be ``None``, which means that the whole context is to be
+                        returned.
+        """
         if output is None:
             return None, None
 
@@ -212,6 +259,10 @@ def _get_output_spec(self, output):
         return last_block_name, output
 
     def _get_output(self, output_variable, context):
+        """Get the specified output variable from the context.
+
+        If the variable name is ``None``, return the entire context.
+        """
         if output_variable:
             if output_variable not in context:
                 raise ValueError('Output variable {} not found in context'
@@ -221,7 +272,7 @@ def _get_output(self, output_variable, context):
         else:
             return context
 
-    def fit(self, X=None, y=None, output=None, skip_to=None, **kwargs):
+    def fit(self, X=None, y=None, output=None, start_on=None, **kwargs):
         """Fit the blocks of this pipeline.
 
         Sequentially call the `fit` and the `produce` methods of each block,
@@ -237,8 +288,32 @@ def fit(self, X=None, y=None, output=None, skip_to=None, **kwargs):
             X: Fit Data, which the pipeline will learn from.
             y: Fit Data labels, which the pipeline will use to learn how to
                behave.
+            output (str or int): Output specification, which can be a string or an integer.
+                If an integer is given, it is interpreted as the block number, and the whole
+                context after running the specified block will be returned.
+                If a string is given, it is expected to be the name of one block, including
+                its counter number at the end. Optionally, a variable name can be included
+                at the end after the counter number using a ``'.'`` as a separator between the
+                block name and the variable name. If the variable name is given, this will be
+                extracted from the context and returned. Otherwise, the whole context will
+                be returned.
+            start_on (str or int): Block index or block name to start processing from. The
+                value can either be an integer, which will be interpreted as a block index,
+                or the name of a block, including the conter number at the end.
+                If given, the execution of the pipeline will start on the specified block,
+                and all the blocks before that one will be skipped.
             **kwargs: Any additional keyword arguments will be directly added
                       to the context dictionary and available for the blocks.
+
+        Returns:
+            None or dict or object:
+                * If no output is specified, nothing will be returned.
+                * If an output block has been specified without and output variable, the
+                  context dictionary will be returned after the produce method of that block
+                  has been called.
+                * If both an output block and an output variable have been specified,
+                  the value of that variable from the context will extracted and returned
+                  after the produce method of that block has been called.
         """
         context = {
             'X': X,
@@ -249,13 +324,13 @@ def fit(self, X=None, y=None, output=None, skip_to=None, **kwargs):
         output_block, output_variable = self._get_output_spec(output)
         last_block_name = self._get_block_name(-1)
 
-        if isinstance(skip_to, int):
-            skip_to = self._get_block_name(skip_to)
+        if isinstance(start_on, int):
+            start_on = self._get_block_name(start_on)
 
         for block_name, block in self.blocks.items():
-            if block_name == skip_to:
-                skip_to = False
-            elif skip_to:
+            if block_name == start_on:
+                start_on = False
+            elif start_on:
                 LOGGER.debug("Skipping block %s fit", block_name)
                 continue
 
@@ -282,7 +357,7 @@ def fit(self, X=None, y=None, output=None, skip_to=None, **kwargs):
             if block_name == output_block:
                 return self._get_output(output_variable, context)
 
-    def predict(self, X=None, output='y', skip_to=None, **kwargs):
+    def predict(self, X=None, output='y', start_on=None, **kwargs):
         """Produce predictions using the blocks of this pipeline.
 
         Sequentially call the `produce` method of each block, capturing the
@@ -295,8 +370,32 @@ def predict(self, X=None, output='y', skip_to=None, **kwargs):
 
         Args:
             X: Data which the pipeline will use to make predictions.
+            output (str or int): Output specification, which can be a string or an integer.
+                If an integer is given, it is interpreted as the block number, and the whole
+                context after running the specified block will be returned.
+                If a string is given, it is expected to be the name of one block, including
+                its counter number at the end. Optionally, a variable name can be included
+                at the end after the counter number using a ``'.'`` as a separator between the
+                block name and the variable name. If the variable name is given, this will be
+                extracted from the context and returned. Otherwise, the whole context will
+                be returned.
+            start_on (str or int): Block index or block name to start processing from. The
+                value can either be an integer, which will be interpreted as a block index,
+                or the name of a block, including the conter number at the end.
+                If given, the execution of the pipeline will start on the specified block,
+                and all the blocks before that one will be skipped.
             **kwargs: Any additional keyword arguments will be directly added
                       to the context dictionary and available for the blocks.
+
+        Returns:
+            None or dict or object:
+                * If no output is specified, the output of the last block will be returned.
+                * If an output block has been specified without and output variable, the
+                  context dictionary will be returned after the produce method of that block
+                  has been called.
+                * If both an output block and an output variable have been specified,
+                  the value of that variable from the context will extracted and returned
+                  after the produce method of that block has been called.
         """
         context = {
             'X': X
@@ -305,13 +404,13 @@ def predict(self, X=None, output='y', skip_to=None, **kwargs):
 
         output_block, output_variable = self._get_output_spec(output)
 
-        if isinstance(skip_to, int):
-            skip_to = self._get_block_name(skip_to)
+        if isinstance(start_on, int):
+            start_on = self._get_block_name(start_on)
 
         for block_name, block in self.blocks.items():
-            if block_name == skip_to:
-                skip_to = False
-            elif skip_to:
+            if block_name == start_on:
+                start_on = False
+            elif start_on:
                 LOGGER.debug("Skipping block %s produce", block_name)
                 continue
 
@@ -329,9 +428,9 @@ def predict(self, X=None, output='y', skip_to=None, **kwargs):
                 LOGGER.exception("Exception caught producing MLBlock %s", block_name)
                 raise
 
-        if skip_to:
+        if start_on:
             # We skipped all the blocks up to the end
-            raise ValueError('Unknown block name: {}'.format(skip_to))
+            raise ValueError('Unknown block name: {}'.format(start_on))
 
     def to_dict(self):
         """Return all the details of this MLPipeline in a dict.

From 5e9be7aa7188d38ca6eafb684c24171b9e61f322 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 6 May 2019 22:51:09 +0200
Subject: [PATCH 06/11] Update primitive names to match the latest versions of
 MLPrimitives

---
 docs/getting_started/quickstart.rst |  2 +-
 docs/pipeline_examples/graph.rst    |  4 ++--
 docs/pipeline_examples/text.rst     | 22 +++++++++++-----------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/docs/getting_started/quickstart.rst b/docs/getting_started/quickstart.rst
index 2e00ece6..2115fcef 100644
--- a/docs/getting_started/quickstart.rst
+++ b/docs/getting_started/quickstart.rst
@@ -24,7 +24,7 @@ them to the `MLPipeline class`_:
 
     from mlblocks import MLPipeline
     primitives = [
-        'mlprimitives.feature_extraction.StringVectorizer',
+        'mlprimitives.custom.feature_extraction.StringVectorizer',
         'sklearn.ensemble.RandomForestClassifier',
     ]
     pipeline = MLPipeline(primitives)
diff --git a/docs/pipeline_examples/graph.rst b/docs/pipeline_examples/graph.rst
index 5503e739..54ef85a1 100644
--- a/docs/pipeline_examples/graph.rst
+++ b/docs/pipeline_examples/graph.rst
@@ -39,7 +39,7 @@ additional information not found inside `X`.
 
     primitives = [
         'networkx.link_prediction_feature_extraction',
-        'mlprimitives.feature_extraction.CategoricalEncoder',
+        'mlprimitives.custom.feature_extraction.CategoricalEncoder',
         'sklearn.preprocessing.StandardScaler',
         'xgboost.XGBClassifier'
     ]
@@ -69,6 +69,6 @@ additional information not found inside `X`.
 
 
 .. _NetworkX Link Prediction: https://networkx.github.io/documentation/networkx-1.10/reference/algorithms.link_prediction.html
-.. _CategoricalEncoder from MLPrimitives: https://github.com/HDI-Project/MLPrimitives/blob/master/mlblocks_primitives/mlprimitives.feature_extraction.CategoricalEncoder.json
+.. _CategoricalEncoder from MLPrimitives: https://github.com/HDI-Project/MLPrimitives/blob/master/mlblocks_primitives/mlprimitives.custom.feature_extraction.CategoricalEncoder.json
 .. _StandardScaler from scikit-learn: http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
 .. _XGBClassifier: https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.sklearn
diff --git a/docs/pipeline_examples/text.rst b/docs/pipeline_examples/text.rst
index df8a9d5a..03472ea3 100644
--- a/docs/pipeline_examples/text.rst
+++ b/docs/pipeline_examples/text.rst
@@ -40,31 +40,31 @@ for later ones.
 
     # set up the pipeline
     primitives = [
-        "mlprimitives.counters.UniqueCounter",
-        "mlprimitives.text.TextCleaner",
-        "mlprimitives.counters.VocabularyCounter",
+        "mlprimitives.custom.counters.UniqueCounter",
+        "mlprimitives.custom.text.TextCleaner",
+        "mlprimitives.custom.counters.VocabularyCounter",
         "keras.preprocessing.text.Tokenizer",
         "keras.preprocessing.sequence.pad_sequences",
         "keras.Sequential.LSTMTextClassifier"
     ]
     input_names = {
-        "mlprimitives.counters.UniqueCounter#1": {
+        "mlprimitives.custom.counters.UniqueCounter#1": {
             "X": "y"
         }
     }
     output_names = {
-        "mlprimitives.counters.UniqueCounter#1": {
+        "mlprimitives.custom.counters.UniqueCounter#1": {
             "counts": "classes"
         },
-        "mlprimitives.counters.VocabularyCounter#1": {
+        "mlprimitives.custom.counters.VocabularyCounter#1": {
             "counts": "vocabulary_size"
         }
     }
     init_params = {
-        "mlprimitives.counters.VocabularyCounter#1": {
+        "mlprimitives.custom.counters.VocabularyCounter#1": {
             "add": 1
         },
-        "mlprimitives.text.TextCleaner#1": {
+        "mlprimitives.custom.text.TextCleaner#1": {
             "language": "en"
         },
         "keras.preprocessing.sequence.pad_sequences#1": {
@@ -116,12 +116,12 @@ to encode all the string features, and go directly into the
     nltk.download('stopwords')
 
     primitives = [
-        'mlprimitives.text.TextCleaner',
-        'mlprimitives.feature_extraction.StringVectorizer',
+        'mlprimitives.custom.text.TextCleaner',
+        'mlprimitives.custom.feature_extraction.StringVectorizer',
         'sklearn.ensemble.RandomForestClassifier',
     ]
     init_params = {
-        'mlprimitives.text.TextCleaner': {
+        'mlprimitives.custom.text.TextCleaner': {
             'column': 'text',
             'language': 'nl'
         },

From 9f0ae6a3fa000896d8f530b72f6da46d23c31e4b Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Tue, 7 May 2019 17:12:33 +0200
Subject: [PATCH 07/11] Add random state to datasets get_splits

---
 mlblocks/datasets.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mlblocks/datasets.py b/mlblocks/datasets.py
index b5ed6b46..fb32df9c 100644
--- a/mlblocks/datasets.py
+++ b/mlblocks/datasets.py
@@ -141,7 +141,7 @@ def _get_split(data, index):
         else:
             return data[index]
 
-    def get_splits(self, n_splits=1):
+    def get_splits(self, n_splits=1, random_state=0):
         """Return splits of this dataset ready for Cross Validation.
 
         If n_splits is 1, a tuple containing the X for train and test
@@ -166,12 +166,13 @@ def get_splits(self, n_splits=1):
                 self.data,
                 self.target,
                 shuffle=self._shuffle,
-                stratify=stratify
+                stratify=stratify,
+                random_state=random_state
             )
 
         else:
             cv_class = StratifiedKFold if self._stratify else KFold
-            cv = cv_class(n_splits=n_splits, shuffle=self._shuffle)
+            cv = cv_class(n_splits=n_splits, shuffle=self._shuffle, random_state=random_state)
 
             splits = list()
             for train, test in cv.split(self.data, self.target):

From 5aea64755b7b7f9b4e68f6faa9a0912c1a55033a Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Tue, 7 May 2019 17:12:58 +0200
Subject: [PATCH 08/11] Rename output and start arguments

---
 mlblocks/mlpipeline.py | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/mlblocks/mlpipeline.py b/mlblocks/mlpipeline.py
index abbac922..91e44341 100644
--- a/mlblocks/mlpipeline.py
+++ b/mlblocks/mlpipeline.py
@@ -272,7 +272,7 @@ def _get_output(self, output_variable, context):
         else:
             return context
 
-    def fit(self, X=None, y=None, output=None, start_on=None, **kwargs):
+    def fit(self, X=None, y=None, output_=None, start_=None, **kwargs):
         """Fit the blocks of this pipeline.
 
         Sequentially call the `fit` and the `produce` methods of each block,
@@ -288,7 +288,7 @@ def fit(self, X=None, y=None, output=None, start_on=None, **kwargs):
             X: Fit Data, which the pipeline will learn from.
             y: Fit Data labels, which the pipeline will use to learn how to
                behave.
-            output (str or int): Output specification, which can be a string or an integer.
+            output_ (str or int): Output specification, which can be a string or an integer.
                 If an integer is given, it is interpreted as the block number, and the whole
                 context after running the specified block will be returned.
                 If a string is given, it is expected to be the name of one block, including
@@ -297,7 +297,7 @@ def fit(self, X=None, y=None, output=None, start_on=None, **kwargs):
                 block name and the variable name. If the variable name is given, this will be
                 extracted from the context and returned. Otherwise, the whole context will
                 be returned.
-            start_on (str or int): Block index or block name to start processing from. The
+            start_ (str or int): Block index or block name to start processing from. The
                 value can either be an integer, which will be interpreted as a block index,
                 or the name of a block, including the conter number at the end.
                 If given, the execution of the pipeline will start on the specified block,
@@ -321,16 +321,16 @@ def fit(self, X=None, y=None, output=None, start_on=None, **kwargs):
         }
         context.update(kwargs)
 
-        output_block, output_variable = self._get_output_spec(output)
+        output_block, output_variable = self._get_output_spec(output_)
         last_block_name = self._get_block_name(-1)
 
-        if isinstance(start_on, int):
-            start_on = self._get_block_name(start_on)
+        if isinstance(start_, int):
+            start_ = self._get_block_name(start_)
 
         for block_name, block in self.blocks.items():
-            if block_name == start_on:
-                start_on = False
-            elif start_on:
+            if block_name == start_:
+                start_ = False
+            elif start_:
                 LOGGER.debug("Skipping block %s fit", block_name)
                 continue
 
@@ -357,7 +357,7 @@ def fit(self, X=None, y=None, output=None, start_on=None, **kwargs):
             if block_name == output_block:
                 return self._get_output(output_variable, context)
 
-    def predict(self, X=None, output='y', start_on=None, **kwargs):
+    def predict(self, X=None, output_='y', start_=None, **kwargs):
         """Produce predictions using the blocks of this pipeline.
 
         Sequentially call the `produce` method of each block, capturing the
@@ -370,7 +370,7 @@ def predict(self, X=None, output='y', start_on=None, **kwargs):
 
         Args:
             X: Data which the pipeline will use to make predictions.
-            output (str or int): Output specification, which can be a string or an integer.
+            output_ (str or int): Output specification, which can be a string or an integer.
                 If an integer is given, it is interpreted as the block number, and the whole
                 context after running the specified block will be returned.
                 If a string is given, it is expected to be the name of one block, including
@@ -379,7 +379,7 @@ def predict(self, X=None, output='y', start_on=None, **kwargs):
                 block name and the variable name. If the variable name is given, this will be
                 extracted from the context and returned. Otherwise, the whole context will
                 be returned.
-            start_on (str or int): Block index or block name to start processing from. The
+            start_ (str or int): Block index or block name to start processing from. The
                 value can either be an integer, which will be interpreted as a block index,
                 or the name of a block, including the conter number at the end.
                 If given, the execution of the pipeline will start on the specified block,
@@ -402,15 +402,15 @@ def predict(self, X=None, output='y', start_on=None, **kwargs):
         }
         context.update(kwargs)
 
-        output_block, output_variable = self._get_output_spec(output)
+        output_block, output_variable = self._get_output_spec(output_)
 
-        if isinstance(start_on, int):
-            start_on = self._get_block_name(start_on)
+        if isinstance(start_, int):
+            start_ = self._get_block_name(start_)
 
         for block_name, block in self.blocks.items():
-            if block_name == start_on:
-                start_on = False
-            elif start_on:
+            if block_name == start_:
+                start_ = False
+            elif start_:
                 LOGGER.debug("Skipping block %s produce", block_name)
                 continue
 
@@ -428,9 +428,9 @@ def predict(self, X=None, output='y', start_on=None, **kwargs):
                 LOGGER.exception("Exception caught producing MLBlock %s", block_name)
                 raise
 
-        if start_on:
+        if start_:
             # We skipped all the blocks up to the end
-            raise ValueError('Unknown block name: {}'.format(start_on))
+            raise ValueError('Unknown block name: {}'.format(start_))
 
     def to_dict(self):
         """Return all the details of this MLPipeline in a dict.

From 4607b3898aa9767774f872b936f2311492179746 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Tue, 7 May 2019 17:13:12 +0200
Subject: [PATCH 09/11] Add unit tests for partial outputs feature

---
 tests/features/test_partial_outputs.py | 133 +++++++++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 tests/features/test_partial_outputs.py

diff --git a/tests/features/test_partial_outputs.py b/tests/features/test_partial_outputs.py
new file mode 100644
index 00000000..ce28d457
--- /dev/null
+++ b/tests/features/test_partial_outputs.py
@@ -0,0 +1,133 @@
+from unittest import TestCase
+from unittest.mock import Mock
+
+import numpy as np
+
+from mlblocks.datasets import load_iris
+from mlblocks.mlpipeline import MLPipeline
+
+
+def almost_equal(obj1, obj2):
+    if isinstance(obj1, dict):
+        if not isinstance(obj2, dict):
+            raise AssertionError("{} is not equal to {}".format(type(obj2), dict))
+
+        for key, value in obj1.items():
+            if key not in obj2:
+                raise AssertionError("{} not in {}".format(key, obj2))
+            almost_equal(value, obj2[key])
+
+    else:
+        np.testing.assert_almost_equal(obj1, obj2)
+
+
+class TestPartialOutputs(TestCase):
+    def setUp(self):
+        dataset = load_iris()
+
+        self.X_train, self.X_test, self.y_train, self.y_test = dataset.get_splits(1)
+
+    def test_fit_output(self):
+
+        # Setup variables
+        primitives = [
+            'sklearn.preprocessing.StandardScaler',
+            'sklearn.linear_model.LogisticRegression'
+        ]
+        pipeline = MLPipeline(primitives)
+
+        int_block = 0
+        invalid_int = 10
+        str_block = 'sklearn.preprocessing.StandardScaler#1'
+        invalid_block = 'InvalidBlockName'
+        str_block_variable = 'sklearn.preprocessing.StandardScaler#1.y'
+        invalid_variable = 'sklearn.preprocessing.StandardScaler#1.invalid'
+
+        # Run
+        int_out = pipeline.fit(self.X_train[0:5], self.y_train[0:5], output_=int_block)
+        str_out = pipeline.fit(self.X_train[0:5], self.y_train[0:5], output_=str_block)
+        str_out_variable = pipeline.fit(self.X_train[0:5], self.y_train[0:5],
+                                        output_=str_block_variable)
+        no_output = pipeline.fit(self.X_train, self.y_train)
+
+        # Assert successful calls
+        X = np.array([
+            [0.71269665, -1.45152899, 0.55344946, 0.31740553],
+            [0.26726124, 1.23648766, -1.1557327, -1.0932857],
+            [-1.95991577, 0.967686, -1.1557327, -1.0932857],
+            [0.71269665, -0.645124, 0.39067021, 0.31740553],
+            [0.26726124, -0.10752067, 1.36734573, 1.55176035]
+        ])
+        y = np.array([1, 0, 0, 1, 2])
+        context = {
+            'X': X,
+            'y': y
+        }
+        almost_equal(context, int_out)
+        almost_equal(context, str_out)
+
+        almost_equal(y, str_out_variable)
+
+        assert no_output is None
+
+        # Run asserting exceptions
+        with self.assertRaises(IndexError):
+            pipeline.fit(self.X_train[0:5], self.y_train[0:5], output_=invalid_int)
+
+        with self.assertRaises(ValueError):
+            pipeline.fit(self.X_train[0:5], self.y_train[0:5], output_=invalid_block)
+
+        with self.assertRaises(ValueError):
+            pipeline.fit(self.X_train[0:5], self.y_train[0:5], output_=invalid_variable)
+
+    def test_fit_start(self):
+        # Setup variables
+        primitives = [
+            'sklearn.preprocessing.StandardScaler',
+            'sklearn.linear_model.LogisticRegression'
+        ]
+        pipeline = MLPipeline(primitives)
+
+        # Mock the first block
+        block_mock = Mock()
+        pipeline.blocks['sklearn.preprocessing.StandardScaler#1'] = block_mock
+
+        # Run first block
+        context = {
+            'X': self.X_train,
+            'y': self.y_train
+        }
+        int_start = 1
+        str_start = 'sklearn.linear_model.LogisticRegression#1'
+
+        pipeline.fit(start_=int_start, **context)
+        pipeline.fit(start_=str_start, **context)
+
+        # Assert that mock has not been called
+        block_mock.fit.assert_not_called()
+
+    def test_predict_start(self):
+        # Setup variables
+        primitives = [
+            'sklearn.preprocessing.StandardScaler',
+            'sklearn.linear_model.LogisticRegression'
+        ]
+        pipeline = MLPipeline(primitives)
+        pipeline.fit(self.X_train, self.y_train)
+
+        # Mock the first block
+        block_mock = Mock()
+        pipeline.blocks['sklearn.preprocessing.StandardScaler#1'] = block_mock
+
+        # Run first block
+        context = {
+            'X': self.X_train,
+        }
+        int_start = 1
+        str_start = 'sklearn.linear_model.LogisticRegression#1'
+
+        pipeline.predict(start_=int_start, **context)
+        pipeline.predict(start_=str_start, **context)
+
+        # Assert that mock has not been called
+        block_mock.predict.assert_not_called()

From 980794b67165e286d49cb81cf742ea44fd760365 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 9 May 2019 15:14:23 +0200
Subject: [PATCH 10/11] Improve docstrings and add toc in autogenerated API
 reference

---
 Makefile               |   5 +
 docs/conf.py           |   9 +-
 mlblocks/datasets.py   |  12 +-
 mlblocks/mlblock.py    |  79 +++++++------
 mlblocks/mlpipeline.py | 256 +++++++++++++++++++++++++++--------------
 mlblocks/primitives.py |   3 +-
 setup.cfg              |   6 +
 setup.py               |   4 +
 8 files changed, 234 insertions(+), 140 deletions(-)

diff --git a/Makefile b/Makefile
index c2d2aaa4..6266033f 100644
--- a/Makefile
+++ b/Makefile
@@ -98,6 +98,11 @@ fix-lint: ## fix lint issues using autoflake, autopep8, and isort
 	autopep8 --in-place --recursive --aggressive tests
 	isort --apply --atomic --recursive tests
 
+.PHONY: lint-docs
+lint-docs: ## check docs formatting with doc8 and pydocstyle
+	doc8 mlblocks/
+	pydocstyle mlblocks/
+
 
 # TEST TARGETS
 
diff --git a/docs/conf.py b/docs/conf.py
index 9b4595ec..95653914 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -39,8 +39,13 @@
     'sphinx.ext.graphviz',
     'IPython.sphinxext.ipython_console_highlighting',
     'IPython.sphinxext.ipython_directive',
+    'autodocsumm',
 ]
 
+autodoc_default_options = {
+    'autosummary': True,
+}
+
 ipython_execlines = ["import pandas as pd", "pd.set_option('display.width', 1000000)"]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -50,10 +55,6 @@
 # You can specify multiple suffix as a list of string:
 source_suffix = ['.rst', '.md', '.ipynb']
 
-# source_parsers = {
-#     '.md': CommonMarkParser,
-# }
-
 # The master toctree document.
 master_doc = 'index'
 
diff --git a/mlblocks/datasets.py b/mlblocks/datasets.py
index fb32df9c..0c69afda 100644
--- a/mlblocks/datasets.py
+++ b/mlblocks/datasets.py
@@ -100,6 +100,7 @@ class Dataset():
         **kwargs: Any additional keyword argument passed on initialization will be made
             available as instance attributes.
     """
+
     def __init__(self, description, data, target, score, shuffle=True, stratify=False, **kwargs):
 
         self.name = description.splitlines()[0]
@@ -115,10 +116,10 @@ def __init__(self, description, data, target, score, shuffle=True, stratify=Fals
         self.__dict__.update(kwargs)
 
     def score(self, *args, **kwargs):
-        """Scoring function for this dataset.
+        r"""Scoring function for this dataset.
 
         Args:
-            \\*args, \\*\\*kwargs: Any given arguments and keyword arguments will be
+            \*args, \*\*kwargs: Any given arguments and keyword arguments will be
             directly passed to the given scoring function.
 
         Returns:
@@ -315,7 +316,6 @@ def load_dic28():
     There exist 52,652 words (vertices in a network) having 2 up to 8 characters
     in the dictionary. The obtained network has 89038 edges.
     """
-
     dataset_path = _load('dic28')
 
     X = _load_csv(dataset_path, 'data')
@@ -344,7 +344,6 @@ def load_nomination():
     Data consists of one graph whose nodes contain two attributes, attr1 and attr2.
     Associated with each node is a label that has to be learned and predicted.
     """
-
     dataset_path = _load('nomination')
 
     X = _load_csv(dataset_path, 'data')
@@ -363,7 +362,6 @@ def load_amazon():
     co-purchased with product j, the graph contains an undirected edge from i to j.
     Each product category provided by Amazon defines each ground-truth community.
     """
-
     dataset_path = _load('amazon')
 
     X = _load_csv(dataset_path, 'data')
@@ -383,7 +381,6 @@ def load_jester():
     source: "University of California Berkeley, CA"
     sourceURI: "http://eigentaste.berkeley.edu/dataset/"
     """
-
     dataset_path = _load('jester')
 
     X = _load_csv(dataset_path, 'data')
@@ -393,7 +390,7 @@ def load_jester():
 
 
 def load_wikiqa():
-    """A Challenge Dataset for Open-Domain Question Answering.
+    """Challenge Dataset for Open-Domain Question Answering.
 
     WikiQA dataset is a publicly available set of question and sentence (QS) pairs,
     collected and annotated for research on open-domain question answering.
@@ -401,7 +398,6 @@ def load_wikiqa():
     source: "Microsoft"
     sourceURI: "https://www.microsoft.com/en-us/research/publication/wikiqa-a-challenge-dataset-for-open-domain-question-answering/#"
     """  # noqa
-
     dataset_path = _load('wikiqa')
 
     data = _load_csv(dataset_path, 'data', set_index=True)
diff --git a/mlblocks/mlblock.py b/mlblocks/mlblock.py
index a5cdb6a4..c3878e68 100644
--- a/mlblocks/mlblock.py
+++ b/mlblocks/mlblock.py
@@ -25,32 +25,34 @@ class MLBlock():
     as wrapping them and providing a common interface to run them.
 
     Attributes:
-        name (str): Name given to this MLBlock.
-        primitive (object): the actual function or instance which this MLBlock
-                            wraps.
-        fit_args (dict): specification of the arguments expected by the `fit`
-                         method.
-        fit_method (str): name of the primitive method to call on `fit`.
-                          `None` if the primitive is a function.
-        produce_args (dict): specification of the arguments expected by the
-                             `predict` method.
-        produce_output (dict): specification of the outputs of the `produce`
-                               method.
-        produce_method (str): name of the primitive method to call on
-                              `produce`. `None` if the primitive is a function.
+        name (str):
+            Name given to this MLBlock.
+        primitive (object):
+            the actual function or instance which this MLBlock wraps.
+        fit_args (dict):
+            specification of the arguments expected by the `fit` method.
+        fit_method (str):
+            name of the primitive method to call on `fit`. `None` if the primitive is a function.
+        produce_args (dict):
+            specification of the arguments expected by the `predict` method.
+        produce_output (dict):
+            specification of the outputs of the `produce` method.
+        produce_method (str):
+            name of the primitive method to call on `produce`. `None` if the primitive is a
+            function.
 
     Args:
-        name (str): Name given to this MLBlock.
-        **kwargs: Any additional arguments that will be used as
-                  hyperparameters or passed to the `fit` or `produce`
-                  methods.
+        name (str):
+            Name given to this MLBlock.
+        **kwargs:
+            Any additional arguments that will be used as hyperparameters or passed to the
+            `fit` or `produce` methods.
 
     Raises:
-        TypeError: A `TypeError` is raised if a required argument is not
-                   found within the `kwargs` or if an unexpected
-                   argument has been given.
-    """
-    # pylint: disable=too-many-instance-attributes
+        TypeError:
+            A `TypeError` is raised if a required argument is not found within the `kwargs`
+            or if an unexpected argument has been given.
+    """  # pylint: disable=too-many-instance-attributes
 
     def _extract_params(self, kwargs, hyperparameters):
         """Extract init, fit and produce params from kwargs.
@@ -63,16 +65,16 @@ def _extract_params(self, kwargs, hyperparameters):
         have been given and that nothing unexpected exists in the input.
 
         Args:
-            kwargs (dict): dict containing the Keyword arguments that have
-                           been passed to the `__init__` method upon
-                           initialization.
-            hyperparameters (dict): hyperparameters dictionary, as found in
-                                    the JSON annotation.
+            kwargs (dict):
+                dict containing the Keyword arguments that have been passed to the `__init__`
+                method upon initialization.
+            hyperparameters (dict):
+                hyperparameters dictionary, as found in the JSON annotation.
 
         Raises:
-            TypeError: A `TypeError` is raised if a required argument is not
-                       found in the `kwargs` dict, or if an unexpected
-                       argument has been given.
+            TypeError:
+                A `TypeError` is raised if a required argument is not found in the `kwargs` dict,
+                or if an unexpected argument has been given.
         """
         init_params = dict()
         fit_params = dict()
@@ -138,7 +140,6 @@ def _get_tunable(cls, hyperparameters, init_params):
         return tunable
 
     def __init__(self, name, **kwargs):
-
         self.name = name
 
         metadata = load_primitive(name)
@@ -174,6 +175,7 @@ def __init__(self, name, **kwargs):
         self.set_hyperparameters(default)
 
     def __str__(self):
+        """Return a string that represents this block."""
         return 'MLBlock - {}'.format(self.name)
 
     def get_tunable_hyperparameters(self):
@@ -210,9 +212,9 @@ def set_hyperparameters(self, hyperparameters):
         If necessary, a new instance of the primitive is created.
 
         Args:
-            hyperparameters (dict): Dictionary containing as keys the name
-                                    of the hyperparameters and as values
-                                    the values to be used.
+            hyperparameters (dict):
+                Dictionary containing as keys the name of the hyperparameters and as
+                values the values to be used.
         """
         self._hyperparameters.update(hyperparameters)
 
@@ -233,12 +235,13 @@ def fit(self, **kwargs):
         the primitive is a simple function, this will be a noop.
 
         Args:
-            **kwargs: Any given keyword argument will be directly passed
-                      to the primitive fit method.
+            **kwargs:
+                Any given keyword argument will be directly passed to the primitive fit method.
 
         Raises:
-            TypeError: A `TypeError` might be raised if any argument not
-                       expected by the primitive fit method is given.
+            TypeError:
+                A `TypeError` might be raised if any argument not expected by the primitive fit
+                method is given.
         """
         if self.fit_method is not None:
             fit_args = self._fit_params.copy()
diff --git a/mlblocks/mlpipeline.py b/mlblocks/mlpipeline.py
index 91e44341..eddb442e 100644
--- a/mlblocks/mlpipeline.py
+++ b/mlblocks/mlpipeline.py
@@ -34,38 +34,35 @@ class MLPipeline():
     results, which will be returned as the prediction of the pipeline.
 
     Attributes:
-        primitives (list): List of the names of the primitives that compose
-                           this pipeline.
-        blocks (list): OrderedDict of the block names and the corresponding
-                       MLBlock instances.
-        init_params (dict): init_params dictionary, as given when the instance
-                            was created.
-        input_names (dict): input_names dictionary, as given when the instance
-                            was created.
-        output_names (dict): output_names dictionary, as given when the instance
-                             was created.
+        primitives (list):
+            List of the names of the primitives that compose this pipeline.
+        blocks (list):
+            OrderedDict of the block names and the corresponding MLBlock instances.
+        init_params (dict):
+            init_params dictionary, as given when the instance was created.
+        input_names (dict):
+            input_names dictionary, as given when the instance was created.
+        output_names (dict):
+            output_names dictionary, as given when the instance was created.
 
     Args:
-        primitives (list): List with the names of the primitives that will
-                           compose this pipeline.
-        init_params (dict): dictionary containing initialization arguments to
-                            be passed when creating the MLBlocks instances.
-                            The dictionary keys must be the corresponding
-                            primitive names and the values must be another
-                            dictionary that will be passed as `**kargs` to the
-                            MLBlock instance.
-        input_names (dict): dictionary that maps input variable names with the
-                            actual names expected by each primitive. This
-                            allows reusing the same input argument for multiple
-                            primitives that name it differently, as well as
-                            passing different values to primitives that expect
-                            arguments named similary.
-        output_names (dict): dictionary that maps output variable names with
-                             the name these variables will be given when stored
-                             in the context dictionary. This allows storing
-                             the output of different primitives in different
-                             variables, even if the primitive output name is
-                             the same one.
+        primitives (list):
+            List with the names of the primitives that will compose this pipeline.
+        init_params (dict):
+            dictionary containing initialization arguments to be passed when creating the
+            MLBlocks instances. The dictionary keys must be the corresponding primitive names
+            and the values must be another dictionary that will be passed as `**kargs` to the
+            MLBlock instance.
+        input_names (dict):
+            dictionary that maps input variable names with the actual names expected by each
+            primitive. This allows reusing the same input argument for multiple primitives that
+            name it differently, as well as passing different values to primitives that expect
+            arguments named similary.
+        output_names (dict):
+            dictionary that maps output variable names with the name these variables will be
+            given when stored in the context dictionary. This allows storing the output of
+            different primitives in different variables, even if the primitive output name is
+            the same one.
     """
 
     def _get_tunable_hyperparameters(self):
@@ -133,9 +130,9 @@ def set_hyperparameters(self, hyperparameters):
         """Set new hyperparameter values for some blocks.
 
         Args:
-            hyperparameters (dict): A dictionary containing the block names as
-                                    keys and the new hyperparameters dictionary
-                                    as values.
+            hyperparameters (dict):
+                A dictionary containing the block names as keys and the new hyperparameters
+                dictionary as values.
         """
         for block_name, block_hyperparams in hyperparameters.items():
             self.blocks[block_name].set_hyperparameters(block_hyperparams)
@@ -148,11 +145,12 @@ def _get_block_args(self, block_name, block_args, context):
         was created.
 
         Args:
-            block_name (str): Name of this block. Used to find the corresponding
-                              input_names.
-            block_args (list): list of method argument specifications from the
-                               primitive.
-            context (dict): current context dictionary.
+            block_name (str):
+                Name of this block. Used to find the corresponding input_names.
+            block_args (list):
+                list of method argument specifications from the primitive.
+            context (dict):
+                current context dictionary.
 
         Returns:
             dict:
@@ -213,22 +211,40 @@ def _get_block_name(self, index):
         return list(self.blocks.keys())[index]
 
     def _get_output_spec(self, output):
-        """Parsre the output specification and get a block name and a variable name.
+        """Parse the output specification and get a block name and a variable name.
 
         The output specification can be of two types: int and str.
 
         If it is an integer, it is interpreted as a block index, and the variable name
         is considered to be ``None``, which means that the whole context will be returned.
 
-        If it is a string, it is interpreted as the block name, and it has to match a block
-        name exactly, including its hash and counter number ``#n``. Optionally, a variable
-        name can be passed at the end using a ``'.'`` as a separator.
-        In this case, the format of the string is `{block_name}.{variable_name}`. Note
-        that the block name can also contain dots, so only the leftmost dot will be
-        considered, and only if the complete string does not match exactly a block name.
+        If it is a string, it can be interpreted in three ways:
+
+            * **block name**: If the string matches a block name exactly, including
+            its hash and counter number ``#n`` at the end, the whole context will be
+            returned after that block is produced.
+            * **variable_name**: If the string does not match any block name and does
+            not contain any dot characted, ``'.'``, it will be considered a variable
+            name. In this case, the indicated variable will be extracted from the
+            context and returned after the last block has been produced.
+            * **block_name + variable_name**: If the complete string does not match a
+            block name but it contains at least one dot, ``'.'``, it will be split
+            in two parts on the last dot. If the first part of the string matches a
+            block name exactly, the second part of the string will be considered a
+            variable name, assuming the format ``{block_name}.{variable_name}``, and
+            the indicated variable will be extracted from the context and returned
+            after the block has been produced. Otherwise, if the extracted
+            ``block_name`` does not match a block name exactly, a ``ValueError``
+            will be raised.
 
         Args:
-            output (str or int): Output specification as either a string or an integer.
+            output (str or int):
+                Output specification as either a string or an integer.
+
+        Raises:
+            ValueError:
+                If the output string contains dots but it does not match a block
+                name exactly
 
         Returns:
             tuple:
@@ -239,15 +255,21 @@ def _get_output_spec(self, output):
                         It can be ``None``, which means that the whole context is to be
                         returned.
         """
+        # If None is given, both block and varialbe are None
         if output is None:
             return None, None
 
+        # If an int is given, it is a block index and there is no variable
         if isinstance(output, int):
             output = self._get_block_name(output)
+            return output, None
 
+        # If the string matches a block name, there is no variable
         if output in self.blocks:
             return output, None
 
+        # If there is at least one dot in the output, but it did not match
+        # a block name, it is considered to be {block_name}.{variable_name}
         if '.' in output:
             output_block, output_variable = output.rsplit('.', 1)
             if output_block not in self.blocks:
@@ -255,6 +277,9 @@ def _get_output_spec(self, output):
 
             return output_block, output_variable
 
+        # If the given string is not a block name and it has no dots,
+        # it is considered to be a variable name to be extracted
+        # from the context after the last block has been produced
         last_block_name = self._get_block_name(-1)
         return last_block_name, output
 
@@ -285,25 +310,48 @@ def fit(self, X=None, y=None, output_=None, start_=None, **kwargs):
         `produce` calls will be taken.
 
         Args:
-            X: Fit Data, which the pipeline will learn from.
-            y: Fit Data labels, which the pipeline will use to learn how to
-               behave.
-            output_ (str or int): Output specification, which can be a string or an integer.
-                If an integer is given, it is interpreted as the block number, and the whole
-                context after running the specified block will be returned.
-                If a string is given, it is expected to be the name of one block, including
-                its counter number at the end. Optionally, a variable name can be included
-                at the end after the counter number using a ``'.'`` as a separator between the
-                block name and the variable name. If the variable name is given, this will be
-                extracted from the context and returned. Otherwise, the whole context will
-                be returned.
-            start_ (str or int): Block index or block name to start processing from. The
+            X:
+                Fit Data, which the pipeline will learn from.
+
+            y:
+                Fit Data labels, which the pipeline will use to learn how to
+                behave.
+
+            output_ (str or int or None):
+                Output specification, which can be a string or an integer or None.
+
+                    * If it is None (default), nothing will be returned
+                    * If an integer is given, it is interpreted as the block number, and the whole
+                      context after running the specified block will be returned.
+                    * If it is a string, it can be interpreted in three ways:
+
+                        * **block name**: If the string matches a block name exactly, including
+                          its hash and counter number ``#n`` at the end, the whole context will be
+                          returned after that block is produced.
+                        * **variable_name**: If the string does not match any block name and does
+                          not contain any dot characted, ``'.'``, it will be considered a variable
+                          name. In this case, the indicated variable will be extracted from the
+                          context and returned after the last block has been produced.
+                        * **block_name + variable_name**: If the complete string does not match a
+                          block name but it contains at least one dot, ``'.'``, it will be split
+                          in two parts on the last dot. If the first part of the string matches a
+                          block name exactly, the second part of the string will be considered a
+                          variable name, assuming the format ``{block_name}.{variable_name}``, and
+                          the indicated variable will be extracted from the context and returned
+                          after the block has been produced. Otherwise, if the extracted
+                          ``block_name`` does not match a block name exactly, a ``ValueError``
+                          will be raised.
+
+            start_ (str or int or None):
+                Block index or block name to start processing from. The
                 value can either be an integer, which will be interpreted as a block index,
                 or the name of a block, including the conter number at the end.
                 If given, the execution of the pipeline will start on the specified block,
                 and all the blocks before that one will be skipped.
-            **kwargs: Any additional keyword arguments will be directly added
-                      to the context dictionary and available for the blocks.
+
+            **kwargs:
+                Any additional keyword arguments will be directly added
+                to the context dictionary and available for the blocks.
 
         Returns:
             None or dict or object:
@@ -328,11 +376,12 @@ def fit(self, X=None, y=None, output_=None, start_=None, **kwargs):
             start_ = self._get_block_name(start_)
 
         for block_name, block in self.blocks.items():
-            if block_name == start_:
-                start_ = False
-            elif start_:
-                LOGGER.debug("Skipping block %s fit", block_name)
-                continue
+            if start_:
+                if block_name == start_:
+                    start_ = False
+                else:
+                    LOGGER.debug("Skipping block %s fit", block_name)
+                    continue
 
             LOGGER.debug("Fitting block %s", block_name)
             try:
@@ -357,7 +406,11 @@ def fit(self, X=None, y=None, output_=None, start_=None, **kwargs):
             if block_name == output_block:
                 return self._get_output(output_variable, context)
 
-    def predict(self, X=None, output_='y', start_=None, **kwargs):
+        if start_:
+            # We skipped all the blocks up to the end
+            raise ValueError('Unknown block name: {}'.format(start_))
+
+    def predict(self, X=None, output_=None, start_=None, **kwargs):
         """Produce predictions using the blocks of this pipeline.
 
         Sequentially call the `produce` method of each block, capturing the
@@ -369,23 +422,43 @@ def predict(self, X=None, output_='y', start_=None, **kwargs):
         will be taken.
 
         Args:
-            X: Data which the pipeline will use to make predictions.
-            output_ (str or int): Output specification, which can be a string or an integer.
-                If an integer is given, it is interpreted as the block number, and the whole
-                context after running the specified block will be returned.
-                If a string is given, it is expected to be the name of one block, including
-                its counter number at the end. Optionally, a variable name can be included
-                at the end after the counter number using a ``'.'`` as a separator between the
-                block name and the variable name. If the variable name is given, this will be
-                extracted from the context and returned. Otherwise, the whole context will
-                be returned.
-            start_ (str or int): Block index or block name to start processing from. The
+            X:
+                Data which the pipeline will use to make predictions.
+
+            output_ (str or int or None):
+                Output specification, which can be a string or an integer or None.
+                    * If it is None (default), the output of the last block will be returned.
+                    * If an integer is given, it is interpreted as the block number, and the whole
+                      context after running the specified block will be returned.
+                    * If it is a string, it can be interpreted in three ways:
+
+                        * **block name**: If the string matches a block name exactly, including
+                          its hash and counter number ``#n`` at the end, the whole context will be
+                          returned after that block is produced.
+                        * **variable_name**: If the string does not match any block name and does
+                          not contain any dot characted, ``'.'``, it will be considered a variable
+                          name. In this case, the indicated variable will be extracted from the
+                          context and returned after the last block has been produced.
+                        * **block_name + variable_name**: If the complete string does not match a
+                          block name but it contains at least one dot, ``'.'``, it will be split
+                          in two parts on the last dot. If the first part of the string matches a
+                          block name exactly, the second part of the string will be considered a
+                          variable name, assuming the format ``{block_name}.{variable_name}``, and
+                          the indicated variable will be extracted from the context and returned
+                          after the block has been produced. Otherwise, if the extracted
+                          ``block_name`` does not match a block name exactly, a ``ValueError``
+                          will be raised.
+
+            start_ (str or int or None):
+                Block index or block name to start processing from. The
                 value can either be an integer, which will be interpreted as a block index,
                 or the name of a block, including the conter number at the end.
                 If given, the execution of the pipeline will start on the specified block,
                 and all the blocks before that one will be skipped.
-            **kwargs: Any additional keyword arguments will be directly added
-                      to the context dictionary and available for the blocks.
+
+            **kwargs:
+                Any additional keyword arguments will be directly added
+                to the context dictionary and available for the blocks.
 
         Returns:
             None or dict or object:
@@ -408,11 +481,12 @@ def predict(self, X=None, output_='y', start_=None, **kwargs):
             start_ = self._get_block_name(start_)
 
         for block_name, block in self.blocks.items():
-            if block_name == start_:
-                start_ = False
-            elif start_:
-                LOGGER.debug("Skipping block %s produce", block_name)
-                continue
+            if start_:
+                if block_name == start_:
+                    start_ = False
+                else:
+                    LOGGER.debug("Skipping block %s produce", block_name)
+                    continue
 
             LOGGER.debug("Producing block %s", block_name)
             try:
@@ -432,6 +506,9 @@ def predict(self, X=None, output_='y', start_=None, **kwargs):
             # We skipped all the blocks up to the end
             raise ValueError('Unknown block name: {}'.format(start_))
 
+        if output_ is None:
+            return outputs
+
     def to_dict(self):
         """Return all the details of this MLPipeline in a dict.
 
@@ -487,7 +564,8 @@ def save(self, path):
         The content of the JSON file is the dict returned by the `to_dict` method.
 
         Args:
-            path (str): Path to the JSON file to write.
+            path (str):
+                Path to the JSON file to write.
         """
         with open(path, 'w') as out_file:
             json.dump(self.to_dict(), out_file, indent=4)
@@ -499,7 +577,8 @@ def from_dict(cls, metadata):
         The dict structure is the same as the one created by the `to_dict` method.
 
         Args:
-            metadata (dict): Dictionary containing the pipeline specification.
+            metadata (dict):
+                Dictionary containing the pipeline specification.
 
         Returns:
             MLPipeline:
@@ -531,7 +610,8 @@ def load(cls, path):
         The JSON file format is the same as the one created by the `to_dict` method.
 
         Args:
-            path (str): Path of the JSON file to load.
+            path (str):
+                Path of the JSON file to load.
 
         Returns:
             MLPipeline:
diff --git a/mlblocks/primitives.py b/mlblocks/primitives.py
index 9bca6a5d..f2300f67 100644
--- a/mlblocks/primitives.py
+++ b/mlblocks/primitives.py
@@ -37,6 +37,7 @@ def add_primitives_path(path):
 
     Raises:
         ValueError: A `ValueError` will be raised if the path is not valid.
+
     """
     if path not in _PRIMITIVES_PATHS:
         if not os.path.isdir(path):
@@ -68,7 +69,6 @@ def get_primitives_paths():
         list:
             The list of folders.
     """
-
     primitives_paths = list()
     entry_points = pkg_resources.iter_entry_points('mlprimitives')
     for entry_point in entry_points:
@@ -99,7 +99,6 @@ def load_primitive(name):
         ValueError: A `ValueError` will be raised if the primitive cannot be
                     found.
     """
-
     for base_path in get_primitives_paths():
         parts = name.split('.')
         number_of_parts = len(parts)
diff --git a/setup.cfg b/setup.cfg
index 62ced521..17244565 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -45,3 +45,9 @@ collect_ignore = ['setup.py']
 
 [tool:pylint]
 good-names = X,y
+
+[doc8]
+max-line-length = 99
+
+[pydocstyle]
+add-ignore = D403,D413,D105,D107
diff --git a/setup.py b/setup.py
index f6991ab1..c73eb0a6 100644
--- a/setup.py
+++ b/setup.py
@@ -59,6 +59,10 @@
     # Advanced testing
     'tox>=2.9.1',
     'coverage>=4.5.1',
+
+    # Documentation style
+    'doc8==0.8.0',
+    'pydocstyle==3.0.0'
 ]
 
 

From 711201650e50e7ef0c3861347ac89abfa1a5c77d Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 9 May 2019 15:42:10 +0200
Subject: [PATCH 11/11] Add missing dependency

---
 setup.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index c73eb0a6..f355be93 100644
--- a/setup.py
+++ b/setup.py
@@ -40,9 +40,10 @@
     'm2r>=0.2.0',
     'Sphinx>=1.7.1',
     'sphinx_rtd_theme>=0.2.4',
-    'graphviz==0.9',
-    'ipython==6.5.0',
-    'matplotlib==2.2.3',
+    'graphviz>=0.9',
+    'ipython>=6.5.0',
+    'matplotlib>=2.2.3',
+    'autodocsumm>=0.1.10',
 
     # style check
     'flake8>=3.5.0',
@@ -61,8 +62,8 @@
     'coverage>=4.5.1',
 
     # Documentation style
-    'doc8==0.8.0',
-    'pydocstyle==3.0.0'
+    'doc8>=0.8.0',
+    'pydocstyle>=3.0.0'
 ]