mkdocstrings · pawamoy · Feb 27, 2021 · Feb 27, 2021
diff --git a/duties.py b/duties.py
@@ -15,7 +15,7 @@
 from jinja2.sandbox import SandboxedEnvironment
 from pip._internal.commands.show import search_packages_info  # noqa: WPS436 (no other way?)
 
-PY_SRC_PATHS = (Path(_) for _ in ("src", "tests", "duties.py"))
+PY_SRC_PATHS = (Path(_) for _ in ("src", "tests", "scripts", "duties.py"))
 PY_SRC_LIST = tuple(str(_) for _ in PY_SRC_PATHS)
 PY_SRC = " ".join(PY_SRC_LIST)
 TESTING = os.environ.get("TESTING", "0") in {"1", "true"}

diff --git a/pyproject.toml b/pyproject.toml
@@ -20,6 +20,7 @@ include = [
 
 [tool.poetry.dependencies]
 python = "^3.6"
+astunparse = {version = "^1.6.3", python = "<3.9"}
 cached-property = {version = "^1.5.2", python = "<3.8"}
 dataclasses = {version = ">=0.7,<0.9", python = "3.6"}
 docstring_parser = {version = "^0.7.3", optional = true}

diff --git a/scripts/get_annotations.py b/scripts/get_annotations.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+"""Scan Python files to retrieve real-world type annotations."""
+
+import ast
+import glob
+import re
+import sys
+from multiprocessing import Pool, cpu_count
+from pathlib import Path
+from typing import List
+
+try:
+    from ast import unparse  # type: ignore
+except ImportError:
+    from astunparse import unparse as _unparse
+
+    unparse = lambda node: _unparse(node).rstrip("\n").replace("(", "").replace(")", "")
+
+regex = re.compile(r"\w+")
+
+
+def scan_file(filepath: str) -> set:
+    """
+    Scan a Python file and return a set of annotations.
+
+    Since parsing `Optional[typing.List]` and `Optional[typing.Dict]` is the same,
+    we're not interested in keeping the actual names.
+    Therefore we replace every word with "a".
+    It has two benefits:
+
+    - we can get rid of syntaxically equivalent annotations (duplicates)
+    - the resulting annotations takes less bytes
+
+    Arguments:
+        filepath: The path to the Python file to scan.
+
+    Returns:
+        A set of annotations.
+    """
+    annotations: set = set()
+    path = Path(filepath)
+    try:
+        code = ast.parse(path.read_text())
+    except:
+        return annotations
+    for node in ast.walk(code):
+        if hasattr(node, "annotation"):
+            try:
+                unparsed = unparse(node.annotation)  # type: ignore
+                annotations.add(regex.sub("a", unparsed))
+            except:
+                continue
+    return annotations
+
+
+def main(directories: List[str]) -> int:
+    """
+    Scan Python files in a list of directories.
+
+    First, all the files are stored in a list,
+    then the scanning is done in parallel with a multiprocessing pool.
+
+    Arguments:
+        directories: A list of directories to scan.
+
+    Returns:
+        An exit code.
+    """
+    if not directories:
+        return 1
+    all_files = []
+    for directory in directories:
+        all_files.extend(glob.glob(directory.rstrip("/") + "/**/*.py", recursive=True))
+    n_files = len(all_files)
+    with Pool(cpu_count() - 1) as pool:
+        sets = pool.map(scan_file, all_files)
+    annotations: set = set().union(*sets)
+    print("a: " + "\na: ".join(sorted(annotations)))
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
diff --git a/src/pytkdocs/parsers/attributes.py b/src/pytkdocs/parsers/attributes.py
@@ -4,34 +4,14 @@
 import inspect
 from functools import lru_cache
 from textwrap import dedent
-from typing import Union, get_type_hints
-
-RECURSIVE_NODES = (ast.If, ast.IfExp, ast.Try, ast.With)
+from typing import get_type_hints
 
+try:
+    from ast import unparse  # type: ignore
+except ImportError:
+    from astunparse import unparse
 
-def node_to_annotation(node) -> Union[str, object]:
-    if isinstance(node, ast.AnnAssign):
-        if isinstance(node.annotation, ast.Name):
-            return node.annotation.id
-        elif isinstance(node.annotation, (ast.Constant, ast.Str)):
-            return node.annotation.s
-        elif isinstance(node.annotation, ast.Subscript):
-            value_id = node.annotation.value.id  # type: ignore
-            if hasattr(node.annotation.slice, "value"):
-                value = node.annotation.slice.value  # type: ignore
-            else:
-                value = node.annotation.slice
-            return f"{value_id}[{node_to_annotation(value)}]"
-        else:
-            return inspect.Signature.empty
-    elif isinstance(node, ast.Subscript):
-        return f"{node.value.id}[{node_to_annotation(node.slice.value)}]"  # type: ignore
-    elif isinstance(node, ast.Tuple):
-        annotations = [node_to_annotation(n) for n in node.elts]
-        return ", ".join(a for a in annotations if a is not inspect.Signature.empty)  # type: ignore
-    elif isinstance(node, ast.Name):
-        return node.id
-    return inspect.Signature.empty
+RECURSIVE_NODES = (ast.If, ast.IfExp, ast.Try, ast.With)
 
 
 def get_nodes(obj):
@@ -144,6 +124,11 @@ def pick_target(target):
     return isinstance(target, ast.Attribute) and isinstance(target.value, ast.Name) and target.value.id == "self"
 
 
+def unparse_annotation(node):
+    code = unparse(node).rstrip("\n")
+    return code.replace("(", "").replace(")", "")
+
+
 @lru_cache()
 def get_instance_attributes(func):
     nodes = get_nodes(func)
@@ -157,7 +142,7 @@ def get_instance_attributes(func):
         if isinstance(assignment, ast.AnnAssign):
             if pick_target(assignment.target):
                 names = [assignment.target.attr]
-                annotation = node_to_annotation(assignment)
+                annotation = unparse_annotation(assignment.annotation)
         else:
             names = [target.attr for target in assignment.targets if pick_target(target)]
 

diff --git a/tests/fixtures/parsing/annotations.py b/tests/fixtures/parsing/annotations.py