From d427bccbfd619f65ae2d12559fcd6f1f1649d036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Wed, 29 Apr 2020 21:18:57 +0200 Subject: [PATCH] refactor: Layout a docstring parser base --- src/pytkdocs/cli.py | 6 +- src/pytkdocs/loader.py | 17 +- src/pytkdocs/objects.py | 28 +-- src/pytkdocs/parsers/attributes.py | 2 +- src/pytkdocs/parsers/docstrings/__init__.py | 6 + src/pytkdocs/parsers/docstrings/base.py | 127 ++++++++++++++ .../{docstrings.py => docstrings/google.py} | 166 +++--------------- src/pytkdocs/serializer.py | 4 +- tests/test_cli.py | 24 +-- .../test_parsers/test_docstrings/__init__.py | 0 .../test_google.py} | 4 +- 11 files changed, 202 insertions(+), 182 deletions(-) create mode 100644 src/pytkdocs/parsers/docstrings/__init__.py create mode 100644 src/pytkdocs/parsers/docstrings/base.py rename src/pytkdocs/parsers/{docstrings.py => docstrings/google.py} (67%) create mode 100644 tests/test_parsers/test_docstrings/__init__.py rename tests/test_parsers/{test_docstrings.py => test_docstrings/test_google.py} (98%) diff --git a/src/pytkdocs/cli.py b/src/pytkdocs/cli.py index b22d4a2..6e6ba75 100644 --- a/src/pytkdocs/cli.py +++ b/src/pytkdocs/cli.py @@ -21,9 +21,9 @@ import traceback from typing import Dict, List, Optional, Sequence -from .loader import Loader -from .objects import Object -from .serializer import serialize_object +from pytkdocs.loader import Loader +from pytkdocs.objects import Object +from pytkdocs.serializer import serialize_object def process_config(config: dict) -> dict: diff --git a/src/pytkdocs/loader.py b/src/pytkdocs/loader.py index 37b49db..8e83322 100644 --- a/src/pytkdocs/loader.py +++ b/src/pytkdocs/loader.py @@ -14,9 +14,10 @@ from pathlib import Path from typing import Any, List, Optional, Set, Union -from .objects import Attribute, Class, Function, Method, Module, Object, Source -from .parsers.attributes import get_attributes -from .properties import RE_SPECIAL +from pytkdocs.objects import Attribute, Class, Function, Method, Module, Object, Source +from pytkdocs.parsers.attributes import get_attributes +from pytkdocs.parsers.docstrings import PARSERS +from pytkdocs.properties import RE_SPECIAL class ObjectNode: @@ -173,7 +174,12 @@ class Loader: Any error that occurred during collection of the objects and their documentation is stored in the `errors` list. """ - def __init__(self, filters: Optional[List[str]] = None): + def __init__( + self, + filters: Optional[List[str]] = None, + docstring_style: str = "google", + docstring_options: Optional[dict] = None, + ) -> None: """ Arguments: filters: A list of regular expressions to fine-grain select members. It is applied recursively. @@ -182,6 +188,7 @@ def __init__(self, filters: Optional[List[str]] = None): filters = [] self.filters = [(f, re.compile(f.lstrip("!"))) for f in filters] + self.docstring_parser = PARSERS[docstring_style](**(docstring_options or {})) # type: ignore self.errors: List[str] = [] def get_object_documentation(self, dotted_path: str, members: Optional[Union[Set[str], bool]] = None) -> Object: @@ -236,7 +243,7 @@ def get_object_documentation(self, dotted_path: str, members: Optional[Union[Set filtered.append(attribute) root_object.dispatch_attributes(filtered) - root_object.parse_all_docstring() + root_object.parse_all_docstring(self.docstring_parser) return root_object diff --git a/src/pytkdocs/objects.py b/src/pytkdocs/objects.py index 6ec23d0..40fbf0a 100644 --- a/src/pytkdocs/objects.py +++ b/src/pytkdocs/objects.py @@ -22,9 +22,8 @@ from pathlib import Path from typing import List, Optional, Union -from pytkdocs.parsers.docstrings import parse - -from .properties import NAME_CLASS_PRIVATE, NAME_PRIVATE, NAME_SPECIAL, ApplicableNameProperty +from pytkdocs.parsers.docstrings.base import Parser, Section +from pytkdocs.properties import NAME_CLASS_PRIVATE, NAME_PRIVATE, NAME_SPECIAL, ApplicableNameProperty class Source: @@ -92,6 +91,10 @@ def __init__( """The file path of the object's direct parent module.""" self.docstring = docstring """The object's docstring.""" + self.docstring_sections: List[Section] = [] + """The object's docstring parsed into sections.""" + self.docstring_errors: List[str] = [] + """The errors detected while parsing the docstring.""" self.properties = properties or [] """The object's properties.""" self.parent: Optional[Object] = None @@ -283,23 +286,20 @@ def dispatch_attributes(self, attributes: List["Attribute"]) -> None: attach_to.children.append(attribute) attribute.parent = attach_to - def parse_all_docstring(self) -> None: + def parse_all_docstring(self, parser: Parser) -> None: """ Recursively parse the docstring of this object and its children. I hope we can get rid of this code at some point as parsing docstring is not really our purpose. """ - signature = None - if hasattr(self, "signature"): - signature = self.signature # type: ignore - attr_type = None - if hasattr(self, "type"): - attr_type = self.type # type: ignore - sections, errors = parse(self.path, self.docstring, signature, attr_type) - self.docstring_sections = sections - self.docstring_errors = errors + self.docstring_sections, self.docstring_errors = parser.parse( + self.docstring, + object_path=self.path, + object_signature=getattr(self, "signature", None), + object_type=getattr(self, "type", None), + ) for child in self.children: - child.parse_all_docstring() + child.parse_all_docstring(parser) @lru_cache() def has_contents(self) -> bool: diff --git a/src/pytkdocs/parsers/attributes.py b/src/pytkdocs/parsers/attributes.py index 088246b..8ccdb32 100644 --- a/src/pytkdocs/parsers/attributes.py +++ b/src/pytkdocs/parsers/attributes.py @@ -5,7 +5,7 @@ from types import ModuleType from typing import Any, Iterable, List, Optional, Union -from ..objects import Attribute +from pytkdocs.objects import Attribute RECURSIVE_NODES = (ast.If, ast.IfExp, ast.Try, ast.With, ast.ExceptHandler) diff --git a/src/pytkdocs/parsers/docstrings/__init__.py b/src/pytkdocs/parsers/docstrings/__init__.py new file mode 100644 index 0000000..18ab552 --- /dev/null +++ b/src/pytkdocs/parsers/docstrings/__init__.py @@ -0,0 +1,6 @@ +from typing import Dict, Type + +from pytkdocs.parsers.docstrings.base import Parser +from pytkdocs.parsers.docstrings.google import Google + +PARSERS: Dict[str, Type[Parser]] = {"google": Google} diff --git a/src/pytkdocs/parsers/docstrings/base.py b/src/pytkdocs/parsers/docstrings/base.py new file mode 100644 index 0000000..ef7b660 --- /dev/null +++ b/src/pytkdocs/parsers/docstrings/base.py @@ -0,0 +1,127 @@ +import inspect +from abc import ABCMeta, abstractmethod +from typing import Any, List, Optional, Tuple + +empty = inspect.Signature.empty + + +class AnnotatedObject: + """A helper class to store information about an annotated object.""" + + def __init__(self, annotation, description): + self.annotation = annotation + self.description = description + + +class Parameter(AnnotatedObject): + """A helper class to store information about a signature parameter.""" + + def __init__(self, name, annotation, description, kind, default=empty): + super().__init__(annotation, description) + self.name = name + self.kind = kind + self.default = default + + def __str__(self): + return self.name + + def __repr__(self): + return f"" + + @property + def is_optional(self): + return self.default is not empty + + @property + def is_required(self): + return not self.is_optional + + @property + def is_args(self): + return self.kind is inspect.Parameter.VAR_POSITIONAL + + @property + def is_kwargs(self): + return self.kind is inspect.Parameter.VAR_KEYWORD + + @property + def default_string(self): + if self.is_kwargs: + return "{}" + elif self.is_args: + return "()" + elif self.is_required: + return "" + return repr(self.default) + + +class Section: + """A helper class to store a docstring section.""" + + class Type: + MARKDOWN = "markdown" + PARAMETERS = "parameters" + EXCEPTIONS = "exceptions" + RETURN = "return" + + def __init__(self, section_type, value): + self.type = section_type + self.value = value + + def __str__(self): + return self.type + + def __repr__(self): + return f"" + + +class Parser(metaclass=ABCMeta): + """ + A class to parse docstrings. + + It is instantiated with an object's path, docstring, signature and return type. + + The `parse` method then returns structured data, + in the form of a list of [`Section`][pytkdocs.parsers.docstrings.Section]s. + It also return the list of errors that occurred during parsing. + """ + + def __init__(self) -> None: + """Initialization method.""" + self.object_path = "" + self.object_signature: Optional[inspect.Signature] = None + self.object_type = None + self.errors: List[str] = [] + + def set_state( + self, object_path: str, object_signature: Optional[inspect.Signature], object_type: Optional[Any], + ): + self.errors = [] + self.object_path = object_path + self.object_signature = object_signature + self.object_type = object_type + + def reset_state(self): + self.object_path = "" + self.object_signature = None + self.object_type = None + + def parse( + self, + docstring: str, + object_path: str, + object_signature: Optional[inspect.Signature] = None, + object_type: Optional[Any] = None, + ) -> Tuple[List[Section], List[str]]: + self.set_state(object_path, object_signature, object_type) + sections = self.parse_sections(docstring) + errors = self.errors + self.reset_state() + return sections, errors + + def error(self, message): + self.errors.append(f"{self.object_path}: {message}") + + @abstractmethod + def parse_sections(self, docstring: str) -> List[Section]: + raise NotImplementedError diff --git a/src/pytkdocs/parsers/docstrings.py b/src/pytkdocs/parsers/docstrings/google.py similarity index 67% rename from src/pytkdocs/parsers/docstrings.py rename to src/pytkdocs/parsers/docstrings/google.py index 9ecacbe..a8c746d 100644 --- a/src/pytkdocs/parsers/docstrings.py +++ b/src/pytkdocs/parsers/docstrings/google.py @@ -1,11 +1,8 @@ """This module defines functions and classes to parse docstrings into structured data.""" - -import inspect import re from typing import Any, List, Optional, Pattern, Sequence, Tuple -empty = inspect.Signature.empty - +from pytkdocs.parsers.docstrings.base import AnnotatedObject, Parameter, Parser, Section, empty TITLES_PARAMETERS: Sequence[str] = ("args:", "arguments:", "params:", "parameters:") """Titles to match for "parameters" sections.""" @@ -21,124 +18,20 @@ """Regular expressions to match lines starting admonitions, of the form `TYPE: [TITLE]`.""" -class AnnotatedObject: - """A helper class to store information about an annotated object.""" - - def __init__(self, annotation, description): - self.annotation = annotation - self.description = description - - -class Parameter(AnnotatedObject): - """A helper class to store information about a signature parameter.""" - - def __init__(self, name, annotation, description, kind, default=empty): - super().__init__(annotation, description) - self.name = name - self.kind = kind - self.default = default - - def __str__(self): - return self.name - - def __repr__(self): - return f"" - - @property - def is_optional(self): - return self.default is not empty - - @property - def is_required(self): - return not self.is_optional - - @property - def is_args(self): - return self.kind is inspect.Parameter.VAR_POSITIONAL - - @property - def is_kwargs(self): - return self.kind is inspect.Parameter.VAR_KEYWORD - - @property - def default_string(self): - if self.is_kwargs: - return "{}" - elif self.is_args: - return "()" - elif self.is_required: - return "" - return repr(self.default) - - -class Section: - """A helper class to store a docstring section.""" - - class Type: - MARKDOWN = "markdown" - PARAMETERS = "parameters" - EXCEPTIONS = "exceptions" - RETURN = "return" - - def __init__(self, section_type, value): - self.type = section_type - self.value = value - - def __str__(self): - return self.type +class Google(Parser): + """A Google-style docstrings parser.""" - def __repr__(self): - return f"" + def __init__(self, replace_admonitions: bool = True) -> None: + super().__init__() + self.replace_admonitions = replace_admonitions - -class DocstringParser: - """ - A class to parse docstrings. - - It is instantiated with an object's path, docstring, signature and return type. - - The `parse` method then returns structured data, - in the form of a list of [`Section`][pytkdocs.parsers.docstrings.Section]s. - It also return the list of errors that occurred during parsing. - """ - - def __init__( - self, - path: str, - docstring: str, - signature: Optional[inspect.Signature] = None, - return_type: Optional[Any] = empty, - ) -> None: - """ - Arguments: - path: An object's dotted-path, used to improve error messages. - docstring: An object's docstring: the docstring to parse. - signature: An object's signature if any. - return_type: An object's return type if any. Can be a string or a type. - """ - self.path = path - self.docstring = docstring or "" - self.signature = signature - self.return_type = return_type - self.parsing_errors: List[str] = [] - - def parse(self, admonitions: bool = True) -> List[Section]: - """ - Parse a docstring. - - Arguments: - admonitions: Whether to transform "Google-Style admonitions" to "Markdown admonitions" - by transforming `Type: [Title]` to `!!! type: ["Title"]`. - - Returns: - A tuple containing the list of parsed sections and the errors that occurred during parsing. - """ + def parse_sections(self, docstring: str) -> List[Section]: sections = [] current_section = [] in_code_block = False - lines = self.docstring.split("\n") + lines = docstring.split("\n") i = 0 while i < len(lines): @@ -181,7 +74,7 @@ def parse(self, admonitions: bool = True) -> List[Section]: current_section.append(lines[i]) else: - if admonitions and not in_code_block and i + 1 < len(lines): + if self.replace_admonitions and not in_code_block and i + 1 < len(lines): match = RE_GOOGLE_STYLE_ADMONITION.match(lines[i]) if match: groups = match.groupdict() @@ -250,8 +143,8 @@ def read_block_items(self, lines: List[str], start_index: int) -> Tuple[List[str # indent between initial and continuation: append but add error cont_indent = len(line) - len(line.lstrip()) current_item.append(line[cont_indent:]) - self.parsing_errors.append( - f"{self.path}: Confusing indentation for continuation line {i+1} in docstring, " + self.error( + f"Confusing indentation for continuation line {i+1} in docstring, " f"should be {indent} * 2 = {indent*2} spaces, not {cont_indent}" ) @@ -333,7 +226,7 @@ def read_parameters_section(self, lines: List[str], start_index: int) -> Tuple[O try: name_with_type, description = param_line.split(":", 1) except ValueError: - self.parsing_errors.append(f"{self.path}: Failed to get 'name: description' pair from '{param_line}'") + self.error(f"Failed to get 'name: description' pair from '{param_line}'") continue description = description.lstrip() @@ -352,9 +245,9 @@ def read_parameters_section(self, lines: List[str], start_index: int) -> Tuple[O kind = None try: - signature_param = self.signature.parameters[name.lstrip("*")] # type: ignore + signature_param = self.object_signature.parameters[name.lstrip("*")] # type: ignore except (AttributeError, KeyError): - self.parsing_errors.append(f"{self.path}: No type annotation for parameter '{name}'") + self.error(f"No type annotation for parameter '{name}'") else: if signature_param.annotation is not empty: annotation = signature_param.annotation @@ -369,7 +262,7 @@ def read_parameters_section(self, lines: List[str], start_index: int) -> Tuple[O if parameters: return Section(Section.Type.PARAMETERS, parameters), i - self.parsing_errors.append(f"{self.path}: Empty parameters section at line {start_index}") + self.error(f"Empty parameters section at line {start_index}") return None, i def read_exceptions_section(self, lines: List[str], start_index: int) -> Tuple[Optional[Section], int]: @@ -390,16 +283,14 @@ def read_exceptions_section(self, lines: List[str], start_index: int) -> Tuple[O try: annotation, description = exception_line.split(": ", 1) except ValueError: - self.parsing_errors.append( - f"{self.path}: Failed to get 'exception: description' pair from '{exception_line}'" - ) + self.error(f"Failed to get 'exception: description' pair from '{exception_line}'") else: exceptions.append(AnnotatedObject(annotation, description.lstrip(" "))) if exceptions: return Section(Section.Type.EXCEPTIONS, exceptions), i - self.parsing_errors.append(f"{self.path}: Empty exceptions section at line {start_index}") + self.error(f"Empty exceptions section at line {start_index}") return None, i def read_return_section(self, lines: List[str], start_index: int) -> Tuple[Optional[Section], int]: @@ -415,36 +306,25 @@ def read_return_section(self, lines: List[str], start_index: int) -> Tuple[Optio """ text, i = self.read_block(lines, start_index) - if self.signature: - annotation = self.signature.return_annotation + if self.object_signature: + annotation = self.object_signature.return_annotation else: - annotation = self.return_type + annotation = self.object_type if annotation is empty: if not text: - self.parsing_errors.append(f"{self.path}: No return type annotation") + self.error("No return type annotation") else: try: type_, text = text.split(":", 1) except ValueError: - self.parsing_errors.append(f"{self.path}: No type in return description") + self.error("No type in return description") else: annotation = type_.lstrip() text = text.lstrip() if annotation is empty and not text: - self.parsing_errors.append(f"{self.path}: Empty return section at line {start_index}") + self.error(f"Empty return section at line {start_index}") return None, i return Section(Section.Type.RETURN, AnnotatedObject(annotation, text)), i - - -def parse( - path: str, - docstring: str, - signature: Optional[inspect.Signature] = None, - return_type: Optional[Any] = empty, - admonitions: bool = True, -): - parser = DocstringParser(path, docstring, signature, return_type) - return parser.parse(admonitions), parser.parsing_errors diff --git a/src/pytkdocs/serializer.py b/src/pytkdocs/serializer.py index dfff54f..c0e3477 100644 --- a/src/pytkdocs/serializer.py +++ b/src/pytkdocs/serializer.py @@ -9,8 +9,8 @@ import re from typing import Any, Optional, Pattern -from .objects import Object, Source -from .parsers.docstrings import AnnotatedObject, Parameter, Section +from pytkdocs.objects import Object, Source +from pytkdocs.parsers.docstrings.base import AnnotatedObject, Parameter, Section try: from typing import GenericMeta # python 3.6 diff --git a/tests/test_cli.py b/tests/test_cli.py index 212c6be..2bbf2ff 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,17 +10,17 @@ def test_read_whole_stdin(monkeypatch): "sys.stdin", io.StringIO( """ - { - "objects": [ - { - "path": "pytkdocs.cli.main" - }, - { - "path": "pytkdocs.cli.get_parser" - } - ] - } - """ + { + "objects": [ + { + "path": "pytkdocs.cli.main" + }, + { + "path": "pytkdocs.cli.get_parser" + } + ] + } + """ ), ) @@ -39,4 +39,4 @@ def test_read_stdin_line_by_line(monkeypatch): def test_load_complete_tree(monkeypatch): monkeypatch.setattr("sys.stdin", io.StringIO('{"objects": [{"path": "pytkdocs"}]}')) - cli.main() + cli.main(["--line-by-line"]) diff --git a/tests/test_parsers/test_docstrings/__init__.py b/tests/test_parsers/test_docstrings/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_parsers/test_docstrings.py b/tests/test_parsers/test_docstrings/test_google.py similarity index 98% rename from tests/test_parsers/test_docstrings.py rename to tests/test_parsers/test_docstrings/test_google.py index 840ec91..2266137 100644 --- a/tests/test_parsers/test_docstrings.py +++ b/tests/test_parsers/test_docstrings/test_google.py @@ -4,11 +4,11 @@ from textwrap import dedent from pytkdocs.loader import Loader -from pytkdocs.parsers.docstrings import parse as _parse +from pytkdocs.parsers.docstrings.google import Google def parse(docstring, signature=None, return_type=inspect.Signature.empty, admonitions=True): - return _parse("o", dedent(docstring).strip(), signature, return_type, admonitions) + return Google(replace_admonitions=admonitions).parse(dedent(docstring).strip(), "o", signature, return_type) def test_simple_docstring():