feat: Add support for Numpy docstrings

Issue #7: #7 PR #87: #87 Co-authored-by: Achille Murangira <[email protected]> Co-authored-by: Timothée Mazzucotelli <[email protected]>
mkdocstrings · Feb 14, 2021 · de0424a · de0424a
1 parent 4052eab
commit de0424a
Show file tree

Hide file tree

Showing 7 changed files with 765 additions and 7 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -85,7 +85,7 @@ jobs:
     - name: Set up the project
       run: |
         pip install poetry
-        poetry install -vvv || { rm -rf .venv; poetry install -vvv; }
+        poetry install -vvv -E numpy-style || { rm -rf .venv; poetry install -vvv -E numpy-style; }
 
     - name: Run the test suite
       run: poetry run duty test
diff --git a/CREDITS.md b/CREDITS.md
@@ -13,6 +13,7 @@ These projects were used to build `pytkdocs`. **Thank you!**
 [`autoflake`](https://github.com/myint/autoflake) |
 [`black`](https://github.com/psf/black) |
 [`dataclasses`](https://github.com/ericvsmith/dataclasses) |
+[`docstring_parser`]() |
 [`duty`](https://github.com/pawamoy/duty) |
 [`flake8-black`](https://github.com/peterjc/flake8-black) |
 [`flake8-builtins`](https://github.com/gforcada/flake8-builtins) |
@@ -52,7 +53,6 @@ These projects were used to build `pytkdocs`. **Thank you!**
 [`attrs`](https://www.attrs.org/) |
 [`backcall`](https://github.com/takluyver/backcall) |
 [`bandit`](https://bandit.readthedocs.io/en/latest/) |
-[`beautifulsoup4`](http://www.crummy.com/software/BeautifulSoup/bs4/) |
 [`certifi`](https://certifiio.readthedocs.io/en/latest/) |
 [`chardet`](https://github.com/chardet/chardet) |
 [`click`](https://palletsprojects.com/p/click/) |
@@ -61,6 +61,7 @@ These projects were used to build `pytkdocs`. **Thank you!**
 [`coverage`](https://github.com/nedbat/coveragepy) |
 [`darglint`](None) |
 [`decorator`](https://github.com/micheles/decorator) |
+[`docstring-parser`](https://github.com/rr-/docstring_parser) |
 [`docutils`](http://docutils.sourceforge.net/) |
 [`entrypoints`](https://github.com/takluyver/entrypoints) |
 [`eradicate`](https://github.com/myint/eradicate) |
@@ -122,15 +123,14 @@ These projects were used to build `pytkdocs`. **Thank you!**
 [`pymdown-extensions`](https://github.com/facelessuser/pymdown-extensions) |
 [`pyparsing`](https://github.com/pyparsing/pyparsing/) |
 [`pytest-forked`](https://github.com/pytest-dev/pytest-forked) |
-[`PyYAML`](https://github.com/yaml/pyyaml) |
+[`PyYAML`](https://pyyaml.org/) |
 [`regex`](https://bitbucket.org/mrabarnett/mrab-regex) |
 [`restructuredtext-lint`](https://github.com/twolfson/restructuredtext-lint) |
 [`rfc3986`](http://rfc3986.readthedocs.io) |
 [`six`](https://github.com/benjaminp/six) |
 [`smmap`](https://github.com/gitpython-developers/smmap) |
 [`sniffio`](https://github.com/python-trio/sniffio) |
 [`snowballstemmer`](https://github.com/snowballstem/snowball) |
-[`soupsieve`](https://github.com/facelessuser/soupsieve) |
 [`stevedore`](https://docs.openstack.org/stevedore/latest/) |
 [`termcolor`](http://pypi.python.org/pypi/termcolor) |
 [`testfixtures`](https://github.com/Simplistix/testfixtures) |

diff --git a/README.md b/README.md
@@ -193,16 +193,17 @@ The configuration options available are:
 
 - `inherited_members`: true or false (default). When enabled, inherited members will be selected as well.
 
-- `docstring_style`: the docstring style to use when parsing the docstring. `google` and `restructured-text`<sup>1</sup>.
+- `docstring_style`: the docstring style to use when parsing the docstring. `google`, `restructured-text`<sup>1</sup> and `numpy`<sup>2</sup>.
 
 - `docstring_options`: options to pass to the docstring parser.
     - `google` accepts a `replace_admonitions` boolean option (default: true). When enabled, this option will
       replace titles of an indented block by their Markdown admonition equivalent:
       `AdmonitionType: Title` will become `!!! admonitiontype "Title"`.
     - `restructured-text` does not accept any options.
+    - `numpy` does not accept any options.
 
 <sup>1</sup>: reStructured Text parsing is in active development and is not feature complete yet.
-
+<sup>2</sup>: The following sections are currently not supported : `Notes`, `See Also`, `Warns` and `References`.
 ### Details on `new_path_syntax`
 
 Example:

diff --git a/pyproject.toml b/pyproject.toml
@@ -22,8 +22,12 @@ include = [
 python = "^3.6"
 cached-property = {version = "^1.5.2", python = "<3.8"}
 dataclasses = {version = ">=0.7,<0.9", python = "3.6"}
+docstring_parser = {version = "^0.7.3", optional = true}
 typing-extensions = {version = "^3.7.4.3", python = "<3.8"}
 
+[tool.poetry.extras]
+numpy-style = ["docstring_parser"]
+
 [tool.poetry.dev-dependencies]
 autoflake = "^1.4"
 black = "^20.8b1"

diff --git a/src/pytkdocs/parsers/docstrings/__init__.py b/src/pytkdocs/parsers/docstrings/__init__.py
@@ -4,6 +4,11 @@
 
 from pytkdocs.parsers.docstrings.base import Parser
 from pytkdocs.parsers.docstrings.google import Google
+from pytkdocs.parsers.docstrings.numpy import Numpy
 from pytkdocs.parsers.docstrings.restructured_text import RestructuredText
 
-PARSERS: Dict[str, Type[Parser]] = {"google": Google, "restructured-text": RestructuredText}
+PARSERS: Dict[str, Type[Parser]] = {
+    "google": Google,
+    "restructured-text": RestructuredText,
+    "numpy": Numpy,
+}
diff --git a/src/pytkdocs/parsers/docstrings/numpy.py b/src/pytkdocs/parsers/docstrings/numpy.py
@@ -0,0 +1,274 @@
+"""This module defines functions and classes to parse docstrings into structured data."""
+import re
+from typing import List, Optional
+
+import docstring_parser
+from docstring_parser import parse
+
+from pytkdocs.parsers.docstrings.base import AnnotatedObject, Attribute, Parameter, Parser, Section, empty
+
+
+class Numpy(Parser):
+    """A Numpy-style docstrings parser."""
+
+    def __init__(self) -> None:
+        """
+        Initialize the objects.
+        """
+        super().__init__()
+        self.section_reader = {
+            Section.Type.PARAMETERS: self.read_parameters_section,
+            Section.Type.EXCEPTIONS: self.read_exceptions_section,
+            Section.Type.EXAMPLES: self.read_examples_section,
+            Section.Type.ATTRIBUTES: self.read_attributes_section,
+            Section.Type.RETURN: self.read_return_section,
+        }
+
+    def parse_sections(self, docstring: str) -> List[Section]:  # noqa: D102
+        if "signature" not in self.context:
+            self.context["signature"] = getattr(self.context["obj"], "signature", None)
+        if "annotation" not in self.context:
+            self.context["annotation"] = getattr(self.context["obj"], "type", empty)
+        if "attributes" not in self.context:
+            self.context["attributes"] = {}
+
+        docstring_obj = parse(docstring)
+        description_all = (
+            none_str_cast(docstring_obj.short_description) + "\n\n" + none_str_cast(docstring_obj.long_description)
+        ).strip()
+        sections = [Section(Section.Type.MARKDOWN, description_all)] if description_all else []
+        sections_other = [
+            reader(docstring_obj)  # type: ignore
+            if sec == Section.Type.RETURN
+            else reader(docstring, docstring_obj)  # type: ignore
+            for (sec, reader) in self.section_reader.items()
+        ]
+        sections.extend([sec for sec in sections_other if sec])
+        return sections
+
+    def read_parameters_section(
+        self,
+        docstring: str,
+        docstring_obj: docstring_parser.common.Docstring,
+    ) -> Optional[Section]:
+        """
+        Parse a "parameters" section.
+
+        Arguments:
+            lines: The parameters block lines.
+            start_index: The line number to start at.
+
+        Returns:
+            A tuple containing a `Section` (or `None`) and the index at which to continue parsing.
+        """
+        parameters = []
+
+        docstring_params = [p for p in docstring_obj.params if p.args[0] == "param"]
+
+        for param in docstring_params:
+            name = param.arg_name
+            kind = None
+            type_name = param.type_name
+            default = param.default or empty
+            try:
+                signature_param = self.context["signature"].parameters[name.lstrip("*")]  # type: ignore
+            except (AttributeError, KeyError):
+                self.error(f"No type annotation for parameter '{name}'")
+            else:
+                if signature_param.annotation is not empty:
+                    type_name = signature_param.annotation
+                if signature_param.default is not empty:
+                    default = signature_param.default
+                kind = signature_param.kind
+            parameters.append(
+                Parameter(
+                    name=param.arg_name,
+                    annotation=type_name,
+                    description=param.description,
+                    default=default,
+                    kind=kind,
+                )
+            )
+
+        if parameters:
+            return Section(Section.Type.PARAMETERS, parameters)
+        if re.search("Parameters\n", docstring):
+            self.error("Empty parameter section")
+        return None
+
+    def read_attributes_section(
+        self,
+        docstring: str,
+        docstring_obj: docstring_parser.common.Docstring,
+    ) -> Optional[Section]:
+        """
+        Parse an "attributes" section.
+
+        Arguments:
+            docstring_obj: Docstring object parsed by docstring_parser.
+
+        Returns:
+            A tuple containing a `Section` (or `None`).
+        """
+        attributes = []
+        docstring_attributes = [p for p in docstring_obj.params if p.args[0] == "attribute"]
+
+        for attr in docstring_attributes:
+            attributes.append(
+                Attribute(
+                    name=attr.arg_name,
+                    annotation=attr.type_name,
+                    description=attr.description,
+                )
+            )
+
+        if attributes:
+            return Section(Section.Type.ATTRIBUTES, attributes)
+        if re.search("Attributes\n", docstring):
+            self.error("Empty attributes section")
+        return None
+
+    def read_exceptions_section(
+        self,
+        docstring: str,
+        docstring_obj: docstring_parser.common.Docstring,
+    ) -> Optional[Section]:
+        """
+        Parse an "exceptions" section.
+
+        Arguments:
+            docstring_obj: Docstring object parsed by docstring_parser.
+
+        Returns:
+            A tuple containing a `Section` (or `None`) and the index at which to continue parsing.
+        """
+        exceptions = []
+        except_obj = docstring_obj.raises
+
+        for exception in except_obj:
+            exceptions.append(AnnotatedObject(exception.type_name, exception.description))
+
+        if exceptions:
+            return Section(Section.Type.EXCEPTIONS, exceptions)
+        if re.search("Raises\n", docstring):
+            self.error("Empty exceptions section")
+        return None
+
+    def read_return_section(
+        self,
+        docstring_obj: docstring_parser.common.Docstring,
+    ) -> Optional[Section]:
+        """
+        Parse a "returns" section.
+
+        Arguments:
+            docstring_obj: Docstring object parsed by docstring_parser.
+
+        Returns:
+            A tuple containing a `Section` (or `None`).
+        """
+        return_obj = docstring_obj.returns if docstring_obj.returns else []
+        text = return_obj.description if return_obj else ""
+
+        if self.context["signature"]:
+            annotation = self.context["signature"].return_annotation
+        else:
+            annotation = self.context["annotation"]
+
+        if annotation is empty:
+            if text:
+                annotation = return_obj.type_name or empty
+                text = return_obj.description
+            elif return_obj and annotation is empty:
+                self.error("No return type annotation")
+
+        if return_obj and not text:
+            self.error("Empty return description")
+        if not return_obj or annotation is empty or not text:
+            return None
+        return Section(Section.Type.RETURN, AnnotatedObject(annotation, text))
+
+    def read_examples_section(
+        self,
+        docstring: str,
+        docstring_obj: docstring_parser.common.Docstring,
+    ) -> Optional[Section]:
+        """
+        Parse an "examples" section.
+
+        Arguments:
+            docstring_obj: Docstring object parsed by docstring_parser.
+
+        Returns:
+            A tuple containing a `Section` (or `None`).
+        """
+        text = next(
+            (
+                meta.description
+                for meta in docstring_obj.meta
+                if isinstance(meta, docstring_parser.common.DocstringMeta) and meta.args[0] == "examples"
+            ),
+            "",
+        )
+
+        sub_sections = []
+        in_code_example = False
+        in_code_block = False
+        current_text: List[str] = []
+        current_example: List[str] = []
+
+        if text:
+            for line in text.split("\n"):
+                if is_empty_line(line):
+                    if in_code_example:
+                        if current_example:
+                            sub_sections.append((Section.Type.EXAMPLES, "\n".join(current_example)))
+                            current_example = []
+                        in_code_example = False
+                    else:
+                        current_text.append(line)
+
+                elif in_code_example:
+                    current_example.append(line)
+
+                elif line.startswith("```"):
+                    in_code_block = not in_code_block
+                    current_text.append(line)
+
+                elif in_code_block:
+                    current_text.append(line)
+
+                elif line.startswith(">>>"):
+                    if current_text:
+                        sub_sections.append((Section.Type.MARKDOWN, "\n".join(current_text)))
+                        current_text = []
+                    in_code_example = True
+                    current_example.append(line)
+                else:
+                    current_text.append(line)
+        if current_text:
+            sub_sections.append((Section.Type.MARKDOWN, "\n".join(current_text)))
+        elif current_example:
+            sub_sections.append((Section.Type.EXAMPLES, "\n".join(current_example)))
+        if sub_sections:
+            return Section(Section.Type.EXAMPLES, sub_sections)
+        if re.search("Examples\n", docstring):
+            self.error("Empty examples section")
+        return None
+
+
+def is_empty_line(line: str) -> bool:
+    """
+    Tell if a line is empty.
+
+    Arguments:
+        line: The line to check.
+
+    Returns:
+        True if the line is empty or composed of blanks only, False otherwise.
+    """
+    return not line.strip()
+
+
+def none_str_cast(string: Optional[str]):
+    return string or ""