Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add substitution definition code and tests #385

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
5 changes: 5 additions & 0 deletions examples/numpy.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exclude = [
'numpy:tensordot',
# See https://github.com/jupyter/papyri/issues/361'
'numpy.ma.core:MaskedArray.resize',
# Contain a table that messes up the substitution ref
'numpy:isscalar'
]

execute_exclude_patterns = [
Expand Down Expand Up @@ -88,3 +90,6 @@ docspage = 'https://numpy.org/doc/1.26/'
[global.expected_errors]
VisitCitationReferenceNotImplementedError = ["numpy.fft"]

[global.substitution_definitions]
version = '1.2.3'

9 changes: 8 additions & 1 deletion examples/scipy.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@ exclude = [
"scipy.stats._continuous_distns:crystalball_gen._pdf",

# assert len(tsc) in (0, 1), (tsc, data)
"scipy.io.matlab._mio5"
"scipy.io.matlab._mio5",

# non quoted ||u|| lead to substitution ref
"scipy.linalg._decomp_update:qr_insert",
"scipy.stats._ksstats:kolmogn",
"scipy.optimize._trustregion:BaseQuadraticSubproblem.get_boundaries_intersections",
"scipy.sparse.linalg._expm_multiply:LazyOperatorNormInfo.d"

]
exclude_jedi = [
"scipy.linalg._sketches.clarkson_woodruff_transform",
Expand Down
13 changes: 13 additions & 0 deletions papyri/common_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,19 @@ def __hash__(self):
)


class UnserializableNode(Node):
_dont_serialise = True

def cbor(self, encoder):
assert False, self
Comment on lines +87 to +88
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is currently failing on a table because of the |<table cell>| syntax in https://numpy.org/doc/stable/reference/generated/numpy.isscalar.html

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think we can just add the part that fail on a table on hte ignore list. I don't think tree-sitter-rst plan to parse tables, and I'm wondering if having a .. table:: directive could be a good workaround, we can add it to sphinx as well, so that projects work on both papyri and sphinx at the same time.


def to_json(self) -> bytes:
assert False, self

def to_dict(self):
assert False, self


TAG_MAP: Dict[Any, int] = {}
REV_TAG_MAP: Dict[int, Any] = {}

Expand Down
13 changes: 8 additions & 5 deletions papyri/crosslink.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def find_all_refs(

# TODO
# here we can't compute just the dictionary and use frozenset(....values())
# as we may have multiple version of lisbraries; this is something that will
# as we may have multiple version of libraries; this is something that will
# need to be fixed in the long run
known_refs = []
ref_map = {}
Expand Down Expand Up @@ -160,7 +160,7 @@ def process(
"Receives",
# "Notes",
# "Signature",
#'Extended Summary',
# "Extended Summary",
#'References'
#'See Also'
#'Examples'
Expand All @@ -181,7 +181,7 @@ def flat(l):
local_refs = frozenset(flat(_local_refs))

visitor = PostDVR(
self.qa, known_refs, local_refs, aliases, version=version, config={}
self.qa, known_refs, local_refs, {}, aliases, version=version, config={}
)
for section in ["Extended Summary", "Summary", "Notes"] + sections_:
if section not in self.content:
Expand Down Expand Up @@ -313,7 +313,8 @@ def _ingest_examples(
f"TBD (examples, {path}), supposed to be QA",
known_refs,
set(),
aliases,
substitution_defs={},
aliases=aliases,
version=version,
config={},
)
Expand Down Expand Up @@ -443,7 +444,8 @@ def ingest(self, path: Path, check: bool) -> None:
)

except Exception as e:
raise RuntimeError(f"error writing to {path}") from e
e.add_note(f"error writing to {path} {key}")
raise

def relink(self) -> None:
gstore = self.gstore
Expand Down Expand Up @@ -506,6 +508,7 @@ def relink(self) -> None:
f"TBD, supposed to be QA relink {key}",
known_refs,
set(),
{},
aliases,
version="?",
)
Expand Down
13 changes: 13 additions & 0 deletions papyri/directives.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,18 @@ def versionchanged_handler(argument, options, content):
return admonition_helper("versionchanged", argument, options, content)


def unicode_handler(argument, options, content):
# likely need some checks that this is indeed a Unicode char
return [MText(chr(int("0x" + argument[2:], 0)))]


def replace_hander(argument, options, content):
# Here we likely want to parse the content/argument and recurse.
# which might change slightly the API.
# I think here if necessary we return an unprocessedDirective,
# and the TSVisitor should recurs with generic_visit ?
return [MText(argument)]


def deprecated_handler(argument, options, content):
return admonition_helper("deprecated", argument, options, content)
15 changes: 13 additions & 2 deletions papyri/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,20 @@
Substitutions
~~~~~~~~~~~~~

In this paragraph: |SubstitutionRef| Should be replaced...
In this paragraph: |ALIAS| Should be replaced...

.. |ALIAS| replace:: -- SUBSTITUTION works--

Substitutions should support a |WIDE|, |VARIETY|, |OF|, |REPLACEMENT|

.. |WIDE| replace:: modules links like: :mod:`papyri`

.. |VARIETY| replace:: text formatting with _italics_ and **bold**

.. |OF| image:: inline images are likely to not work,

.. |REPLACEMENT| unicode:: U+2665

.. |SubstitutionDef| replace:: ASUBSTITUTIONDEF



Expand Down
76 changes: 70 additions & 6 deletions papyri/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
RefInfo,
Section,
SeeAlsoItem,
SubstitutionDef,
parse_rst_section,
)
from .toc import make_tree
Expand All @@ -98,6 +99,8 @@
# delayed import
if True:
from .myst_ast import MText
from .myst_ast import ReplaceNode
from .myst_ast import MParagraph


class ErrorCollector:
Expand Down Expand Up @@ -432,6 +435,7 @@ class Config:
fail_unseen_error: bool = False
execute_doctests: bool = True
directives: Dict[str, str] = dataclasses.field(default_factory=lambda: {})
substitution_definitions: Dict[str, str] = dataclasses.field(default_factory=dict)

def replace(self, **kwargs):
return dataclasses.replace(self, **kwargs)
Expand Down Expand Up @@ -1388,8 +1392,15 @@ def debugprint(*args):
docstring=example_code,
)
if config.execute_doctests:
doctest_runner.run(doctests, out=debugprint, clear_globs=False)
doctest_runner.globs.update(doctests.globs)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"FigureCanvasAgg is non-interactive.*",
UserWarning,
)

doctest_runner.run(doctests, out=debugprint, clear_globs=False)
doctest_runner.globs.update(doctests.globs)
example_section_data.extend(
doctest_runner.get_example_section_data()
)
Expand All @@ -1403,7 +1414,7 @@ def debugprint(*args):
# TODO fix this if plt.close not called and still a lingering figure.
fig_managers = _pylab_helpers.Gcf.get_all_fig_managers()
if len(fig_managers) != 0:
print_(f"Unclosed figures in {qa}!!")
self.log.debug("Unclosed figures in %s!!", qa)
plt.close("all")

return processed_example_data(example_section_data), doctest_runner.figs
Expand Down Expand Up @@ -1448,6 +1459,18 @@ def collect_narrative_docs(self):
trees = {}
title_map = {}
blbs = {}
global_substitutions = {}
for k, v in self.config.substitution_definitions.items():
res = ts.parse(v.encode(), qa="global_substitution")
# HERE are some assumptions as we are parsing a "full document" with tree sitter
# this is going to give a single Section with a single paragraph.
sec: Section
[sec] = res
assert isinstance(sec, Section)
[par] = sec.children
assert isinstance(par, MParagraph)
par: MParagraph
global_substitutions["|" + k + "|"] = [ReplaceNode(k, v, par.children)]
with self.progress() as p2:
task = p2.add_task("Parsing narrative", total=len(files))

Expand All @@ -1469,10 +1492,16 @@ def collect_narrative_docs(self):
blob = DocBlob.new()
key = ":".join(parts)[:-4]
try:
from copy import copy

G = copy(global_substitutions)
G.update({})
ref_set: frozenset[RefInfo] = frozenset({})
dv = DVR(
key,
set(),
ref_set,
local_refs=set(),
substitution_defs=G,
aliases={},
version=self._meta["version"],
config=self.config.directives,
Expand Down Expand Up @@ -1503,7 +1532,11 @@ def collect_narrative_docs(self):

blbs[key] = blob
for k, b in blbs.items():
self.docs[k] = b.to_json()
try:
self.docs[k] = b.to_json()
except Exception as e:
e.add_note(f"serializing {k}")
raise

self._doctree = {"tree": make_tree(trees), "titles": title_map}

Expand All @@ -1526,7 +1559,11 @@ def write_api(self, where: Path):
"""
(where / "module").mkdir(exist_ok=True)
for k, v in self.data.items():
(where / "module" / (k + ".json")).write_bytes(v.to_json())
try:
(where / "module" / (k + ".json")).write_bytes(v.to_json())
except Exception as e:
e.add_note(f"serializing {k}")
raise

def partial_write(self, where):
self.write_api(where)
Expand Down Expand Up @@ -1931,6 +1968,7 @@ def collect_examples(self, folder: Path, config):
example.name,
frozenset(),
local_refs=frozenset(),
substitution_defs={},
aliases={},
version=self.version,
config=self.config.directives,
Expand Down Expand Up @@ -2255,20 +2293,46 @@ def collect_api_docs(self, root: str, limit_to: List[str]) -> None:
if new_ref:
_local_refs = _local_refs + new_ref

# substitution_defs: Dict[str, Union(MImage, ReplaceNode)] = {}
substitution_defs = {}
sections = []
for section in doc_blob.sections:
sections.append(doc_blob.content.get(section, []))
# arbitrary is usually raw RST that typically is a module docstring that can't be
# parsed by numpydoc
sections.extend(arbitrary)
for section in sections:
for child in section:
if isinstance(child, SubstitutionDef):
if child.value in substitution_defs:
self.log.warn(
"The same substitution definition was found twice: %s",
child.value,
)
substitution_defs[child.value] = child.children

if substitution_defs:
self.log.debug(
"Found substitution def for %s : %s", qa, substitution_defs
)

# def flat(l) -> List[str]:
# return [y for x in l for y in x]
for lr1 in _local_refs:
assert isinstance(lr1, str)
# lr: FrozenSet[str] = frozenset(flat(_local_refs))
lr: FrozenSet[str] = frozenset(_local_refs)

dv = DVR(
qa,
known_refs,
local_refs=lr,
substitution_defs=substitution_defs,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

aliases={},
version=self.version,
config=self.config.directives,
)

doc_blob.arbitrary = [dv.visit(s) for s in arbitrary]
doc_blob.example_section_data = dv.visit(doc_blob.example_section_data)
doc_blob._content = {k: dv.visit(v) for (k, v) in doc_blob._content.items()}
Expand Down
22 changes: 10 additions & 12 deletions papyri/myst_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from typing import List, Union, Optional, Dict

from .common_ast import Node, register
from .common_ast import Node, register, UnserializableNode

from . import take2
from . import signature
Expand Down Expand Up @@ -104,7 +104,7 @@ def __init__(self, value):
@register(4045)
class MParagraph(Node):
type = "paragraph"
children: List[Union["PhrasingContent", "take2.MUnimpl"]]
children: List[Union["PhrasingContent", "take2.MUnimpl", "MImage"]]
# position: Any
# data: Any

Expand Down Expand Up @@ -152,16 +152,6 @@ def from_unprocessed(cls, up):
return cls(up.name, up.args, up.options, up.value, up.children)


class UnserializableNode(Node):
_dont_serialise = True

def cbor(self, encoder):
assert False

def to_json(self) -> bytes:
assert False


class UnprocessedDirective(UnserializableNode):
"""
Placeholder for yet unprocessed directives, after they are parsed by tree-sitter,
Expand Down Expand Up @@ -268,6 +258,14 @@ class MRoot(Node):
]


class ReplaceNode(Node):
# We may want to return links too.
type = "replace"
value: str
text: str
children: List[Node]


StaticPhrasingContent = Union[
MText,
MInlineCode,
Expand Down
3 changes: 3 additions & 0 deletions papyri/myst_serialiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@


def serialize(instance, annotation):
assert not getattr(
instance, "_dont_serialise", None
), f"Should not have to serialize {instance}"
try:
if annotation in base_types:
# print("BASE", instance)
Expand Down
Loading
Loading