mwouts · mwouts · Mar 16, 2020 · Mar 16, 2020 · Mar 16, 2020 · Mar 16, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -39,7 +39,7 @@ install:
       pip install -r requirements.txt;
     fi
   # install is required for testing the pre-commit mode
-  - pip install . || true
+  - pip install .[myst] || true
   # install black if available (Python 3.6 and above), and autopep8 for testing the pipe mode
   - pip install black || true
   - pip install autopep8 || true

diff --git a/jupytext/formats.py b/jupytext/formats.py
@@ -19,6 +19,12 @@
 from .languages import _SCRIPT_EXTENSIONS, _COMMENT_CHARS, same_language
 from .pandoc import pandoc_version, is_pandoc_available
 from .magics import is_magic
+from .myst import (
+    MYST_FORMAT_NAME,
+    is_myst_available,
+    myst_version,
+    myst_extensions,
+)
 
 
 class JupytextFormatError(ValueError):
@@ -162,6 +168,15 @@ def __init__(self,
         cell_exporter_class=None,
         current_version_number=pandoc_version()))
 
+if is_myst_available():
+    JUPYTEXT_FORMATS.extend([NotebookFormatDescription(
+        format_name=MYST_FORMAT_NAME,
+        extension=ext,
+        header_prefix='',
+        cell_reader_class=None,
+        cell_exporter_class=None,
+        current_version_number=myst_version()) for ext in myst_extensions()])
+
 NOTEBOOK_EXTENSIONS = list(dict.fromkeys(['.ipynb'] + [fmt.extension for fmt in JUPYTEXT_FORMATS]))
 EXTENSION_PREFIXES = ['.lgt', '.spx', '.pct', '.hyd', '.nb']
 

diff --git a/jupytext/jupytext.py b/jupytext/jupytext.py
@@ -19,6 +19,7 @@
 from .languages import default_language_from_metadata_and_ext, set_main_and_cell_language
 from .pep8 import pep8_lines_between_cells
 from .pandoc import md_to_notebook, notebook_to_md
+from .myst import myst_extensions, myst_to_notebook, notebook_to_myst
 
 
 class TextNotebookConverter(NotebookReader, NotebookWriter):
@@ -54,6 +55,9 @@ def reads(self, s, **_):
         if self.fmt.get('format_name') == 'pandoc':
             return md_to_notebook(s)
 
+        if self.ext in myst_extensions():
+            return myst_to_notebook(s)
+
         lines = s.splitlines()
 
         cells = []
@@ -123,6 +127,26 @@ def writes(self, nb, metadata=None, **kwargs):
                 metadata=metadata,
                 cells=cells))
 
+        if self.ext in myst_extensions():
+            pygments_lexer = metadata.get("language_info", {}).get("pygments_lexer", None)
+            metadata = insert_jupytext_info_and_filter_metadata(metadata, self.ext, self.implementation)
+
+            cells = []
+            for cell in nb.cells:
+                cell_metadata = filter_metadata(cell.metadata,
+                                                self.fmt.get('cell_metadata_filter'),
+                                                _IGNORE_CELL_METADATA)
+                if cell.cell_type == 'code':
+                    cells.append(new_code_cell(source=cell.source, metadata=cell_metadata))
+                else:
+                    cells.append(NotebookNode(source=cell.source, metadata=cell_metadata, cell_type=cell.cell_type))
+            return notebook_to_myst(NotebookNode(
+                nbformat=nb.nbformat,
+                nbformat_minor=nb.nbformat_minor,
+                metadata=metadata,
+                cells=cells),
+                default_lexer=pygments_lexer)
+
         # Copy the notebook, in order to be sure we do not modify the original notebook
         nb = NotebookNode(
             nbformat=nb.nbformat,

diff --git a/jupytext/myst.py b/jupytext/myst.py
@@ -0,0 +1,259 @@
+"""
+This module contains round-trip conversion between
+myst formatted text documents and notebooks.
+"""
+import json
+import logging
+
+import nbformat as nbf
+import yaml
+
+MYST_FORMAT_NAME = "mystnb"
+CODE_DIRECTIVE = "nb-code"
+RAW_DIRECTIVE = "nb-raw"
+
+LOGGER = logging.getLogger(__name__)
+
+
+def is_myst_available():
+    try:
+        import myst_parser  # noqa
+    except ImportError:
+        return False
+    return True
+
+
+def myst_version():
+    from myst_parser import __version__
+
+    return __version__
+
+
+def myst_extensions():
+    return [".mystnb"]
+
+
+def from_nbnode(value):
+    """Recursively convert NotebookNode to dict."""
+    if isinstance(value, nbf.NotebookNode):
+        return {k: from_nbnode(v) for k, v in value.items()}
+    return value
+
+
+class MockDirective:
+    option_spec = {"options": True}
+    required_arguments = 0
+    optional_arguments = 1
+    has_content = True
+
+
+def _fmt_md(text):
+    text = text.rstrip()
+    while text and text.startswith("\n"):
+        text = text[1:]
+    return text
+
+
+def myst_to_notebook(
+    text, code_directive=CODE_DIRECTIVE, raw_directive=RAW_DIRECTIVE, logger=None,
+):
+    """Convert text written in the myst format to a notebook.
+
+    :param text: the file text
+    :directive: the name of the directive to search for.
+
+    NOTE: we assume here that all of these directives are at the top-level,
+    i.e. not nested in other directives.
+    """
+    from mistletoe.base_elements import SourceLines
+    from mistletoe.parse_context import (
+        ParseContext,
+        get_parse_context,
+        set_parse_context,
+    )
+    from mistletoe.block_tokens import Document, CodeFence
+
+    from myst_parser.block_tokens import BlockBreak
+    from myst_parser.parse_directives import parse_directive_text
+    from myst_parser.docutils_renderer import DocutilsRenderer
+
+    code_directive = "{{{0}}}".format(code_directive)
+    raw_directive = "{{{0}}}".format(raw_directive)
+    logger = logger or LOGGER
+
+    original_context = get_parse_context()
+    parse_context = ParseContext(
+        find_blocks=DocutilsRenderer.default_block_tokens,
+        find_spans=DocutilsRenderer.default_span_tokens,
+    )
+
+    if isinstance(text, SourceLines):
+        lines = text
+    else:
+        lines = SourceLines(text, standardize_ends=True)
+
+    try:
+        set_parse_context(parse_context)
+        doc = Document.read(lines, front_matter=True)
+
+        metadata_nb = doc.front_matter.get_data() if doc.front_matter else {}
+        nbformat = metadata_nb.pop("nbformat", None)
+        nbformat_minor = metadata_nb.pop("nbformat_minor", None)
+        kwargs = {"metadata": nbf.from_dict(metadata_nb)}
+        if nbformat is not None:
+            kwargs["nbformat"] = nbformat
+        if nbformat_minor is not None:
+            kwargs["nbformat_minor"] = nbformat_minor
+
+        notebook = nbf.v4.new_notebook(**kwargs)
+
+        current_line = 0 if not doc.front_matter else doc.front_matter.position.line_end
+        md_metadata = {}
+
+        for item in doc.walk(["CodeFence", "BlockBreak"]):
+            if isinstance(item.node, BlockBreak):
+                token = item.node  # type: BlockBreak
+                source = _fmt_md(
+                    "".join(lines.lines[current_line:token.position.line_start - 1])
+                )
+                if source:
+                    notebook.cells.append(
+                        nbf.v4.new_markdown_cell(
+                            source=source, metadata=nbf.from_dict(md_metadata),
+                        )
+                    )
+                if token.content:
+                    try:
+                        md_metadata = json.loads(token.content.strip())
+                    except Exception:
+                        logger.warning(
+                            "markdown cell metadata could not be read: {}".format(
+                                token.position
+                            )
+                        )
+                        md_metadata = {}
+                    if not isinstance(md_metadata, dict):
+                        logger.warning(
+                            "markdown cell metadata is not a dict: {}".format(
+                                token.position
+                            )
+                        )
+                        md_metadata = {}
+                else:
+                    md_metadata = {}
+                current_line = token.position.line_start
+            if isinstance(item.node, CodeFence) and item.node.language in [
+                code_directive,
+                raw_directive,
+            ]:
+                token = item.node  # type: CodeFence
+                # Note: we ignore anything after the directive on the first line
+                # this is reserved for the optional lexer name
+                # TODO: could log warning about if token.arguments != lexer name
+
+                _, options, body_lines = parse_directive_text(
+                    directive_class=MockDirective,
+                    argument_str="",
+                    content=token.children[0].content,
+                    validate_options=False,
+                )
+
+                md_source = _fmt_md(
+                    "".join(lines.lines[current_line:token.position.line_start - 1])
+                )
+                if md_source:
+                    notebook.cells.append(
+                        nbf.v4.new_markdown_cell(
+                            source=md_source, metadata=nbf.from_dict(md_metadata),
+                        )
+                    )
+                current_line = token.position.line_end
+                md_metadata = {}
+
+                if item.node.language == code_directive:
+                    notebook.cells.append(
+                        nbf.v4.new_code_cell(
+                            source="\n".join(body_lines),
+                            metadata=nbf.from_dict(options),
+                        )
+                    )
+                if item.node.language == raw_directive:
+                    notebook.cells.append(
+                        nbf.v4.new_raw_cell(
+                            source="\n".join(body_lines),
+                            metadata=nbf.from_dict(options),
+                        )
+                    )
+
+        # add the final markdown cell (if present)
+        if lines.lines[current_line:]:
+            notebook.cells.append(
+                nbf.v4.new_markdown_cell(
+                    source=_fmt_md("".join(lines.lines[current_line:])),
+                    metadata=nbf.from_dict(md_metadata),
+                )
+            )
+
+    finally:
+        set_parse_context(original_context)
+
+    return notebook
+
+
+def notebook_to_myst(
+    nb, code_directive=CODE_DIRECTIVE, raw_directive=RAW_DIRECTIVE, default_lexer=None
+):
+    string = ""
+
+    nb_metadata = from_nbnode(nb.metadata)
+    nb_metadata["nbformat"] = nb.nbformat
+    nb_metadata["nbformat_minor"] = nb.nbformat_minor
+
+    # we add the pygments lexer as a directive argument, for use by syntax highlighters
+    pygments_lexer = nb_metadata.get("language_info", {}).get("pygments_lexer", None)
+    if pygments_lexer is None:
+        pygments_lexer = default_lexer
+
+    string += "---\n"
+    string += yaml.safe_dump(nb_metadata)
+    string += "---\n"
+
+    last_cell_md = False
+    for i, cell in enumerate(nb.cells):
+
+        if cell.cell_type == "markdown":
+            metadata = from_nbnode(cell.metadata)
+            if metadata or last_cell_md:
+                if metadata:
+                    string += "\n+++ {}\n".format(json.dumps(metadata))
+                else:
+                    string += "\n+++\n"
+            string += "\n" + cell.source
+            if not cell.source.endswith("\n"):
+                string += "\n"
+            last_cell_md = True
+
+        elif cell.cell_type in ["code", "raw"]:
+            string += "\n```{{{}}}".format(
+                code_directive if cell.cell_type == "code" else raw_directive
+            )
+            if pygments_lexer and cell.cell_type == "code":
+                string += " {}".format(pygments_lexer)
+            string += "\n"
+            metadata = from_nbnode(cell.metadata)
+            if metadata:
+                string += "---\n"
+                string += yaml.safe_dump(metadata)
+                string += "---\n"
+            elif cell.source.startswith("---") or cell.source.startswith(":"):
+                string += "\n"
+            string += cell.source
+            if not cell.source.endswith("\n"):
+                string += "\n"
+            string += "```\n"
+            last_cell_md = False
+
+        else:
+            raise NotImplementedError("cell {}, type: {}".format(i, cell.cell_type))
+
+    return string.rstrip() + "\n"
diff --git a/setup.py b/setup.py
@@ -36,6 +36,7 @@
     entry_points={'console_scripts': ['jupytext = jupytext.cli:jupytext']},
     tests_require=['pytest'],
     install_requires=['nbformat>=4.0.0', 'pyyaml', 'mock;python_version<"3"'],
+    extras_require={"myst": ["myst-parser~=0.7.1; python_version >= '3.6'"]},
     license='MIT',
     classifiers=['Development Status :: 5 - Production/Stable',
                  'License :: OSI Approved :: MIT License',

diff --git a/tests/notebooks/mirror/ipynb_to_myst/Line_breaks_in_LateX_305.mystnb b/tests/notebooks/mirror/ipynb_to_myst/Line_breaks_in_LateX_305.mystnb
@@ -0,0 +1,42 @@
+---
+kernelspec:
+  display_name: Python 3
+  language: python
+  name: python3
+nbformat: 4
+nbformat_minor: 2
+---
+
+This cell uses no particular cell marker
+
+$$
+\begin{align}
+\dot{x} & = \sigma(y-x)\\
+\dot{y} & = \rho x - y - xz \\
+\dot{z} & = -\beta z + xy
+\end{align}
+$$
+
++++
+
+This cell uses no particular cell marker, and a single slash in the $\LaTeX$ equation
+
+$$
+\begin{align}
+\dot{x} & = \sigma(y-x) \
+\dot{y} & = \rho x - y - xz \
+\dot{z} & = -\beta z + xy
+\end{align}
+$$
+
++++
+
+This cell uses the triple quote cell markers introduced at https://github.com/mwouts/jupytext/issues/305
+
+$$
+\begin{align}
+\dot{x} & = \sigma(y-x)\\
+\dot{y} & = \rho x - y - xz \\
+\dot{z} & = -\beta z + xy
+\end{align}
+$$