From de0424a33e94f7dfdfd3b613c500a6fb428406aa Mon Sep 17 00:00:00 2001 From: Achille M Date: Sun, 14 Feb 2021 18:47:27 +0100 Subject: [PATCH] feat: Add support for Numpy docstrings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue #7: https://github.com/mkdocstrings/pytkdocs/issues/7 PR #87: https://github.com/mkdocstrings/pytkdocs/pull/87 Co-authored-by: Achille Murangira Co-authored-by: Timothée Mazzucotelli --- .github/workflows/ci.yml | 2 +- CREDITS.md | 6 +- README.md | 5 +- pyproject.toml | 4 + src/pytkdocs/parsers/docstrings/__init__.py | 7 +- src/pytkdocs/parsers/docstrings/numpy.py | 274 ++++++++++ .../test_docstrings/test_numpy.py | 474 ++++++++++++++++++ 7 files changed, 765 insertions(+), 7 deletions(-) create mode 100644 src/pytkdocs/parsers/docstrings/numpy.py create mode 100644 tests/test_parsers/test_docstrings/test_numpy.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 13e1ac6..4462c28 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -85,7 +85,7 @@ jobs: - name: Set up the project run: | pip install poetry - poetry install -vvv || { rm -rf .venv; poetry install -vvv; } + poetry install -vvv -E numpy-style || { rm -rf .venv; poetry install -vvv -E numpy-style; } - name: Run the test suite run: poetry run duty test diff --git a/CREDITS.md b/CREDITS.md index b5d0cda..72b0f43 100644 --- a/CREDITS.md +++ b/CREDITS.md @@ -13,6 +13,7 @@ These projects were used to build `pytkdocs`. **Thank you!** [`autoflake`](https://github.com/myint/autoflake) | [`black`](https://github.com/psf/black) | [`dataclasses`](https://github.com/ericvsmith/dataclasses) | +[`docstring_parser`]() | [`duty`](https://github.com/pawamoy/duty) | [`flake8-black`](https://github.com/peterjc/flake8-black) | [`flake8-builtins`](https://github.com/gforcada/flake8-builtins) | @@ -52,7 +53,6 @@ These projects were used to build `pytkdocs`. **Thank you!** [`attrs`](https://www.attrs.org/) | [`backcall`](https://github.com/takluyver/backcall) | [`bandit`](https://bandit.readthedocs.io/en/latest/) | -[`beautifulsoup4`](http://www.crummy.com/software/BeautifulSoup/bs4/) | [`certifi`](https://certifiio.readthedocs.io/en/latest/) | [`chardet`](https://github.com/chardet/chardet) | [`click`](https://palletsprojects.com/p/click/) | @@ -61,6 +61,7 @@ These projects were used to build `pytkdocs`. **Thank you!** [`coverage`](https://github.com/nedbat/coveragepy) | [`darglint`](None) | [`decorator`](https://github.com/micheles/decorator) | +[`docstring-parser`](https://github.com/rr-/docstring_parser) | [`docutils`](http://docutils.sourceforge.net/) | [`entrypoints`](https://github.com/takluyver/entrypoints) | [`eradicate`](https://github.com/myint/eradicate) | @@ -122,7 +123,7 @@ These projects were used to build `pytkdocs`. **Thank you!** [`pymdown-extensions`](https://github.com/facelessuser/pymdown-extensions) | [`pyparsing`](https://github.com/pyparsing/pyparsing/) | [`pytest-forked`](https://github.com/pytest-dev/pytest-forked) | -[`PyYAML`](https://github.com/yaml/pyyaml) | +[`PyYAML`](https://pyyaml.org/) | [`regex`](https://bitbucket.org/mrabarnett/mrab-regex) | [`restructuredtext-lint`](https://github.com/twolfson/restructuredtext-lint) | [`rfc3986`](http://rfc3986.readthedocs.io) | @@ -130,7 +131,6 @@ These projects were used to build `pytkdocs`. **Thank you!** [`smmap`](https://github.com/gitpython-developers/smmap) | [`sniffio`](https://github.com/python-trio/sniffio) | [`snowballstemmer`](https://github.com/snowballstem/snowball) | -[`soupsieve`](https://github.com/facelessuser/soupsieve) | [`stevedore`](https://docs.openstack.org/stevedore/latest/) | [`termcolor`](http://pypi.python.org/pypi/termcolor) | [`testfixtures`](https://github.com/Simplistix/testfixtures) | diff --git a/README.md b/README.md index 06f619c..d4676e0 100644 --- a/README.md +++ b/README.md @@ -193,16 +193,17 @@ The configuration options available are: - `inherited_members`: true or false (default). When enabled, inherited members will be selected as well. -- `docstring_style`: the docstring style to use when parsing the docstring. `google` and `restructured-text`1. +- `docstring_style`: the docstring style to use when parsing the docstring. `google`, `restructured-text`1 and `numpy`2. - `docstring_options`: options to pass to the docstring parser. - `google` accepts a `replace_admonitions` boolean option (default: true). When enabled, this option will replace titles of an indented block by their Markdown admonition equivalent: `AdmonitionType: Title` will become `!!! admonitiontype "Title"`. - `restructured-text` does not accept any options. + - `numpy` does not accept any options. 1: reStructured Text parsing is in active development and is not feature complete yet. - +2: The following sections are currently not supported : `Notes`, `See Also`, `Warns` and `References`. ### Details on `new_path_syntax` Example: diff --git a/pyproject.toml b/pyproject.toml index f2b93d6..1dedc4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,8 +22,12 @@ include = [ python = "^3.6" cached-property = {version = "^1.5.2", python = "<3.8"} dataclasses = {version = ">=0.7,<0.9", python = "3.6"} +docstring_parser = {version = "^0.7.3", optional = true} typing-extensions = {version = "^3.7.4.3", python = "<3.8"} +[tool.poetry.extras] +numpy-style = ["docstring_parser"] + [tool.poetry.dev-dependencies] autoflake = "^1.4" black = "^20.8b1" diff --git a/src/pytkdocs/parsers/docstrings/__init__.py b/src/pytkdocs/parsers/docstrings/__init__.py index 257192c..554d5f3 100644 --- a/src/pytkdocs/parsers/docstrings/__init__.py +++ b/src/pytkdocs/parsers/docstrings/__init__.py @@ -4,6 +4,11 @@ from pytkdocs.parsers.docstrings.base import Parser from pytkdocs.parsers.docstrings.google import Google +from pytkdocs.parsers.docstrings.numpy import Numpy from pytkdocs.parsers.docstrings.restructured_text import RestructuredText -PARSERS: Dict[str, Type[Parser]] = {"google": Google, "restructured-text": RestructuredText} +PARSERS: Dict[str, Type[Parser]] = { + "google": Google, + "restructured-text": RestructuredText, + "numpy": Numpy, +} diff --git a/src/pytkdocs/parsers/docstrings/numpy.py b/src/pytkdocs/parsers/docstrings/numpy.py new file mode 100644 index 0000000..a12e3e5 --- /dev/null +++ b/src/pytkdocs/parsers/docstrings/numpy.py @@ -0,0 +1,274 @@ +"""This module defines functions and classes to parse docstrings into structured data.""" +import re +from typing import List, Optional + +import docstring_parser +from docstring_parser import parse + +from pytkdocs.parsers.docstrings.base import AnnotatedObject, Attribute, Parameter, Parser, Section, empty + + +class Numpy(Parser): + """A Numpy-style docstrings parser.""" + + def __init__(self) -> None: + """ + Initialize the objects. + """ + super().__init__() + self.section_reader = { + Section.Type.PARAMETERS: self.read_parameters_section, + Section.Type.EXCEPTIONS: self.read_exceptions_section, + Section.Type.EXAMPLES: self.read_examples_section, + Section.Type.ATTRIBUTES: self.read_attributes_section, + Section.Type.RETURN: self.read_return_section, + } + + def parse_sections(self, docstring: str) -> List[Section]: # noqa: D102 + if "signature" not in self.context: + self.context["signature"] = getattr(self.context["obj"], "signature", None) + if "annotation" not in self.context: + self.context["annotation"] = getattr(self.context["obj"], "type", empty) + if "attributes" not in self.context: + self.context["attributes"] = {} + + docstring_obj = parse(docstring) + description_all = ( + none_str_cast(docstring_obj.short_description) + "\n\n" + none_str_cast(docstring_obj.long_description) + ).strip() + sections = [Section(Section.Type.MARKDOWN, description_all)] if description_all else [] + sections_other = [ + reader(docstring_obj) # type: ignore + if sec == Section.Type.RETURN + else reader(docstring, docstring_obj) # type: ignore + for (sec, reader) in self.section_reader.items() + ] + sections.extend([sec for sec in sections_other if sec]) + return sections + + def read_parameters_section( + self, + docstring: str, + docstring_obj: docstring_parser.common.Docstring, + ) -> Optional[Section]: + """ + Parse a "parameters" section. + + Arguments: + lines: The parameters block lines. + start_index: The line number to start at. + + Returns: + A tuple containing a `Section` (or `None`) and the index at which to continue parsing. + """ + parameters = [] + + docstring_params = [p for p in docstring_obj.params if p.args[0] == "param"] + + for param in docstring_params: + name = param.arg_name + kind = None + type_name = param.type_name + default = param.default or empty + try: + signature_param = self.context["signature"].parameters[name.lstrip("*")] # type: ignore + except (AttributeError, KeyError): + self.error(f"No type annotation for parameter '{name}'") + else: + if signature_param.annotation is not empty: + type_name = signature_param.annotation + if signature_param.default is not empty: + default = signature_param.default + kind = signature_param.kind + parameters.append( + Parameter( + name=param.arg_name, + annotation=type_name, + description=param.description, + default=default, + kind=kind, + ) + ) + + if parameters: + return Section(Section.Type.PARAMETERS, parameters) + if re.search("Parameters\n", docstring): + self.error("Empty parameter section") + return None + + def read_attributes_section( + self, + docstring: str, + docstring_obj: docstring_parser.common.Docstring, + ) -> Optional[Section]: + """ + Parse an "attributes" section. + + Arguments: + docstring_obj: Docstring object parsed by docstring_parser. + + Returns: + A tuple containing a `Section` (or `None`). + """ + attributes = [] + docstring_attributes = [p for p in docstring_obj.params if p.args[0] == "attribute"] + + for attr in docstring_attributes: + attributes.append( + Attribute( + name=attr.arg_name, + annotation=attr.type_name, + description=attr.description, + ) + ) + + if attributes: + return Section(Section.Type.ATTRIBUTES, attributes) + if re.search("Attributes\n", docstring): + self.error("Empty attributes section") + return None + + def read_exceptions_section( + self, + docstring: str, + docstring_obj: docstring_parser.common.Docstring, + ) -> Optional[Section]: + """ + Parse an "exceptions" section. + + Arguments: + docstring_obj: Docstring object parsed by docstring_parser. + + Returns: + A tuple containing a `Section` (or `None`) and the index at which to continue parsing. + """ + exceptions = [] + except_obj = docstring_obj.raises + + for exception in except_obj: + exceptions.append(AnnotatedObject(exception.type_name, exception.description)) + + if exceptions: + return Section(Section.Type.EXCEPTIONS, exceptions) + if re.search("Raises\n", docstring): + self.error("Empty exceptions section") + return None + + def read_return_section( + self, + docstring_obj: docstring_parser.common.Docstring, + ) -> Optional[Section]: + """ + Parse a "returns" section. + + Arguments: + docstring_obj: Docstring object parsed by docstring_parser. + + Returns: + A tuple containing a `Section` (or `None`). + """ + return_obj = docstring_obj.returns if docstring_obj.returns else [] + text = return_obj.description if return_obj else "" + + if self.context["signature"]: + annotation = self.context["signature"].return_annotation + else: + annotation = self.context["annotation"] + + if annotation is empty: + if text: + annotation = return_obj.type_name or empty + text = return_obj.description + elif return_obj and annotation is empty: + self.error("No return type annotation") + + if return_obj and not text: + self.error("Empty return description") + if not return_obj or annotation is empty or not text: + return None + return Section(Section.Type.RETURN, AnnotatedObject(annotation, text)) + + def read_examples_section( + self, + docstring: str, + docstring_obj: docstring_parser.common.Docstring, + ) -> Optional[Section]: + """ + Parse an "examples" section. + + Arguments: + docstring_obj: Docstring object parsed by docstring_parser. + + Returns: + A tuple containing a `Section` (or `None`). + """ + text = next( + ( + meta.description + for meta in docstring_obj.meta + if isinstance(meta, docstring_parser.common.DocstringMeta) and meta.args[0] == "examples" + ), + "", + ) + + sub_sections = [] + in_code_example = False + in_code_block = False + current_text: List[str] = [] + current_example: List[str] = [] + + if text: + for line in text.split("\n"): + if is_empty_line(line): + if in_code_example: + if current_example: + sub_sections.append((Section.Type.EXAMPLES, "\n".join(current_example))) + current_example = [] + in_code_example = False + else: + current_text.append(line) + + elif in_code_example: + current_example.append(line) + + elif line.startswith("```"): + in_code_block = not in_code_block + current_text.append(line) + + elif in_code_block: + current_text.append(line) + + elif line.startswith(">>>"): + if current_text: + sub_sections.append((Section.Type.MARKDOWN, "\n".join(current_text))) + current_text = [] + in_code_example = True + current_example.append(line) + else: + current_text.append(line) + if current_text: + sub_sections.append((Section.Type.MARKDOWN, "\n".join(current_text))) + elif current_example: + sub_sections.append((Section.Type.EXAMPLES, "\n".join(current_example))) + if sub_sections: + return Section(Section.Type.EXAMPLES, sub_sections) + if re.search("Examples\n", docstring): + self.error("Empty examples section") + return None + + +def is_empty_line(line: str) -> bool: + """ + Tell if a line is empty. + + Arguments: + line: The line to check. + + Returns: + True if the line is empty or composed of blanks only, False otherwise. + """ + return not line.strip() + + +def none_str_cast(string: Optional[str]): + return string or "" diff --git a/tests/test_parsers/test_docstrings/test_numpy.py b/tests/test_parsers/test_docstrings/test_numpy.py new file mode 100644 index 0000000..57d8c85 --- /dev/null +++ b/tests/test_parsers/test_docstrings/test_numpy.py @@ -0,0 +1,474 @@ +"""Tests for [the `parsers.docstrings.numpy` module][pytkdocs.parsers.docstrings.numpy].""" + +import inspect +from textwrap import dedent + +from pytkdocs.loader import Loader +from pytkdocs.parsers.docstrings.numpy import Numpy + + +class DummyObject: + path = "o" + + +def parse(docstring, signature=None, return_type=inspect.Signature.empty): + """Helper to parse a doctring.""" + return Numpy().parse( + dedent(docstring).strip(), + {"obj": DummyObject(), "signature": signature, "type": return_type}, + ) + + +def test_simple_docstring(): + """Parse a simple docstring.""" + sections, errors = parse("A simple docstring.") + assert len(sections) == 1 + assert not errors + + +def test_multi_line_docstring(): + """Parse a multi-line docstring.""" + sections, errors = parse( + """ + A somewhat longer docstring. + + Blablablabla. + """ + ) + assert len(sections) == 1 + assert not errors + + +def test_sections_without_signature(): + """Parse a docstring without a signature.""" + # type of return value always required + sections, errors = parse( + """ + Sections without signature. + + Parameters + ---------- + void : + SEGFAULT. + niet : + SEGFAULT. + nada : + SEGFAULT. + rien : + SEGFAULT. + + Raises + ------ + GlobalError + when nothing works as expected. + + Returns + ------- + bool + Itself. + """ + ) + assert len(sections) == 4 + assert len(errors) == 4 # missing annotations for params + for error in errors: + assert "param" in error + + +def test_property_docstring(): + """Parse a property docstring.""" + class_ = Loader().get_object_documentation("tests.fixtures.parsing.docstrings.NotDefinedYet") + prop = class_.attributes[0] + sections, errors = prop.docstring_sections, prop.docstring_errors + assert len(sections) == 2 + assert not errors + + +def test_function_without_annotations(): + """Parse a function docstring without signature annotations.""" + + def f(x, y): + """ + This function has no annotations. + + Parameters + ---------- + x: + X value. + y: + Y value. + + Returns + ------- + float + Sum X + Y. + """ + return x + y + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 3 + assert not errors + + +def test_function_with_annotations(): + """Parse a function docstring with signature annotations.""" + + def f(x: int, y: int) -> int: + """ + This function has annotations. + + Parameters + ---------- + x: + X value. + y: + Y value. + + Returns + ------- + int + Sum X + Y. + """ + return x + y + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 3 + assert not errors + + +def test_function_with_examples(): + """Parse a function docstring with signature annotations.""" + + def f(x: int, y: int) -> int: + """ + This function has annotations. + + Examples + -------- + Some examples that will create an unified code block: + + >>> 2 + 2 == 5 + False + >>> print("examples") + "examples" + + This is just a random comment in the examples section. + + These examples will generate two different code blocks. Note the blank line. + + >>> print("I'm in the first code block!") + "I'm in the first code block!" + + >>> print("I'm in other code block!") + "I'm in other code block!" + + We also can write multiline examples: + + >>> x = 3 + 2 + >>> y = x + 10 + >>> y + 15 + + This is just a typical Python code block: + + ```python + print("examples") + return 2 + 2 + ``` + + Even if it contains doctests, the following block is still considered a normal code-block. + + ```python + >>> print("examples") + "examples" + >>> 2 + 2 + 4 + ``` + + The blank line before an example is optional. + >>> x = 3 + >>> y = "apple" + >>> z = False + >>> l = [x, y, z] + >>> my_print_list_function(l) + 3 + "apple" + False + """ + return x + y + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 2 + assert len(sections[1].value) == 9 + assert not errors + + +def test_types_in_docstring(): + """Parse types in docstring.""" + + def f(x, y): + """ + The types are written in the docstring. + + Parameters + ---------- + x : int + X value. + y : int + Y value. + + Returns + ------- + int + Sum X + Y. + """ + return x + y + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 3 + assert not errors + + x, y = sections[1].value + r = sections[2].value + + assert x.name == "x" + assert x.annotation == "int" + assert x.description == "X value." + assert x.kind is inspect.Parameter.POSITIONAL_OR_KEYWORD + assert x.default is inspect.Signature.empty + + assert y.name == "y" + assert y.annotation == "int" + assert y.description == "Y value." + assert y.kind is inspect.Parameter.POSITIONAL_OR_KEYWORD + assert y.default is inspect.Signature.empty + + assert r.annotation == "int" + assert r.description == "Sum X + Y." + + +def test_types_and_optional_in_docstring(): + """Parse optional types in docstring.""" + + def f(x=1, y=None): + """ + The types are written in the docstring. + + Parameters + ---------- + x : int + X value. + y : int, optional + Y value. + + Returns + ------- + int + Sum X + Y. + """ + return x + (y or 1) + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 3 + assert not errors + + x, y = sections[1].value + + assert x.name == "x" + assert x.annotation == "int" + assert x.description == "X value." + assert x.kind is inspect.Parameter.POSITIONAL_OR_KEYWORD + assert x.default == 1 + + assert y.name == "y" + assert y.annotation == "int" + assert y.description == "Y value." + assert y.kind is inspect.Parameter.POSITIONAL_OR_KEYWORD + assert y.default is None + + +def test_types_in_signature_and_docstring(): + """Parse types in both signature and docstring.""" + + def f(x: int, y: int) -> int: + """ + The types are written both in the signature and in the docstring. + + Parameters + ---------- + x : int + X value. + y : int + Y value. + + Returns + ------- + int + Sum X + Y. + """ + return x + y + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 3 + assert not errors + + +def test_close_sections(): + """Parse sections without blank lines in between.""" + + def f(x, y, z): + """ + Parameters + ---------- + x : + X + y : + Y + z : + Z + Raises + ------ + Error2 + error. + Error1 + error. + Returns + ------- + str + value + """ + return x + y + z + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 3 + assert not errors + + +# test_code_blocks was removed as docstrings within a code block +# are not applicable to numpy docstrings + + +def test_extra_parameter(): + """Warn on extra parameter in docstring.""" + + def f(x): + """ + Parameters + ---------- + x : + Integer. + y : + Integer. + """ + return x + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 1 + assert len(errors) == 1 + assert "No type" in errors[0] + + +def test_missing_parameter(): + """Don't warn on missing parameter in docstring.""" + # FIXME: could warn + def f(x, y): + """ + Parameters + ---------- + x : + Integer. + """ + return x + y + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 1 + assert not errors + + +def test_multiple_lines_in_sections_items(): + """Parse multi-line item description.""" + + def f(p: str, q: str): + """ + Hi. + + Parameters + ---------- + p : + This argument + has a description + spawning on multiple lines. + + It even has blank lines in it. + Some of these lines + are indented for no reason. + q : + What if the first line is blank? + """ + return p + q + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 2 + assert len(sections[1].value) == 2 + # numpy docstrings parameter description can be parsed even if misindentated + assert not errors + + +def test_parse_args_kwargs(): + """Parse args and kwargs.""" + + def f(a, *args, **kwargs): + """ + Parameters + ---------- + a : + a parameter. + *args : + args parameters. + **kwargs : + kwargs parameters. + """ + return 1 + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 1 + expected_parameters = { + "a": "a parameter.", + "*args": "args parameters.", + "**kwargs": "kwargs parameters.", + } + for param in sections[0].value: + assert param.name in expected_parameters + assert expected_parameters[param.name] == param.description + assert not errors + + +def test_different_indentation(): + """Parse different indentations, warn on confusing indentation.""" + + def f(): + """ + Hello. + + Raises + ------ + StartAt5 + this section's items starts with x spaces of indentation. + Well indented continuation line. + Badly indented continuation line (will not trigger an error). + + Empty lines are preserved, as well as extra-indentation (this line is a code block). + AnyOtherLine + ...starting with exactly 5 spaces is a new item. + """ + + sections, errors = parse(inspect.getdoc(f), inspect.signature(f)) + assert len(sections) == 2 + assert len(sections[1].value) == 2 + assert sections[1].value[0].description == ( + "this section's items starts with x spaces of indentation.\n" + "Well indented continuation line.\n" + " Badly indented continuation line (will not trigger an error).\n" + "\n" + " Empty lines are preserved, as well as extra-indentation (this line is a code block)." + ) + assert not errors