Skip to content

Commit

Permalink
OCaml support (#848)
Browse files Browse the repository at this point in the history
* Add support for ml (#827, WIP)
TODO: use block comment instead of line comment

* Add block comments and ocaml support (#827)

* Remove the version information from the mirror notebooks

* Remove the comment suffix in uncomment/when parsing the header

* More OCaml-specific tests

* Update CHANGELOG.md

Co-authored-by: Quentin Fortier <[email protected]>
  • Loading branch information
mwouts and fortierq authored Sep 8, 2021
1 parent e97819f commit b553f32
Show file tree
Hide file tree
Showing 17 changed files with 366 additions and 49 deletions.
5 changes: 3 additions & 2 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ Jupytext ChangeLog
-----------------------

**Added**
- Jupytext can be configured through the `pyproject.toml` file. Thanks to Robin Brown for this contribution! (#828)
- Jupytext can be configured through the `pyproject.toml` file. Thanks to Robin Brown for this contribution! ([#828](https://github.com/mwouts/jupytext/issues/828))
- Jupytext now supports OCaml files with `.ml` extension. Thanks to Quentin Fortier for getting this started ([#832](https://github.com/mwouts/jupytext/issues/832))

**Fixed**
- Added more test to make sure that notebooks can be trusted. In practice, notebooks could not be trusted in JupyterLab<3.0.13 because of the absence of cell ids (#826)
- Added more test to make sure that notebooks can be trusted. In practice, notebooks could not be trusted in JupyterLab<3.0.13 because of the absence of cell ids ([#826](https://github.com/mwouts/jupytext/issues/826))


1.11.5 (2021-08-31)
Expand Down
1 change: 1 addition & 0 deletions docs/languages.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Jupytext works with notebooks in any of the following languages:
- Javascript
- Julia
- Matlab
- OCaml
- Octave
- PowerShell
- Python
Expand Down
73 changes: 54 additions & 19 deletions jupytext/cell_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,31 @@
_PY_INDENTED = re.compile(r"^\s")


def uncomment(lines, prefix="#"):
def uncomment(lines, prefix="#", suffix=""):
"""Remove prefix and space, or only prefix, when possible"""
if not prefix:
return lines
prefix_and_space = prefix + " "
length_prefix = len(prefix)
length_prefix_and_space = len(prefix_and_space)
return [
line[length_prefix_and_space:]
if line.startswith(prefix_and_space)
else (line[length_prefix:] if line.startswith(prefix) else line)
for line in lines
]
if prefix:
prefix_and_space = prefix + " "
length_prefix = len(prefix)
length_prefix_and_space = len(prefix_and_space)
lines = [
line[length_prefix_and_space:]
if line.startswith(prefix_and_space)
else (line[length_prefix:] if line.startswith(prefix) else line)
for line in lines
]

if suffix:
space_and_suffix = " " + suffix
length_suffix = len(suffix)
length_space_and_suffix = len(space_and_suffix)
lines = [
line[:-length_space_and_suffix]
if line.endswith(space_and_suffix)
else (line[:-length_suffix] if line.endswith(suffix) else line)
for line in lines
]

return lines


def paragraph_is_fully_commented(lines, comment, main_language):
Expand Down Expand Up @@ -539,14 +551,17 @@ def uncomment_code_and_magics(self, lines):
lines, self.ext, self.language or self.default_language
)

return uncomment(lines, self.markdown_prefix or self.comment)
return uncomment(
lines, self.markdown_prefix or self.comment, self.comment_suffix
)


class RScriptCellReader(ScriptCellReader):
"""Read notebook cells from R scripts written according
to the knitr-spin syntax"""

comment = "#'"
comment_suffix = ""
markdown_prefix = "#'"
default_language = "R"
start_code_re = re.compile(r"^#\+(.*)\s*$")
Expand Down Expand Up @@ -620,6 +635,7 @@ def __init__(self, fmt=None, default_language=None):
script = _SCRIPT_EXTENSIONS[self.ext]
self.default_language = default_language or script["language"]
self.comment = script["comment"]
self.comment_suffix = script.get("comment_suffix", "")
self.ignore_end_marker = True
self.explicit_end_marker_required = False
if (
Expand All @@ -632,16 +648,29 @@ def __init__(self, fmt=None, default_language=None):
",", 1
)
self.start_code_re = re.compile(
"^" + self.comment + r"\s*" + self.cell_marker_start + r"(.*)$"
"^"
+ re.escape(self.comment)
+ r"\s*"
+ self.cell_marker_start
+ r"(.*)$"
)
self.end_code_re = re.compile(
"^" + self.comment + r"\s*" + self.cell_marker_end + r"\s*$"
"^" + re.escape(self.comment) + r"\s*" + self.cell_marker_end + r"\s*$"
)
else:
self.start_code_re = re.compile("^" + self.comment + r"\s*\+(.*)$")
self.start_code_re = re.compile(
"^" + re.escape(self.comment) + r"\s*\+(.*)$"
)

def metadata_and_language_from_option_line(self, line):
if self.start_code_re.match(line):
# Remove the OCAML suffix
if self.comment_suffix:
if line.endswith(" " + self.comment_suffix):
line = line[: -len(" " + self.comment_suffix)]
elif line.endswith(self.comment_suffix):
line = line[: -len(self.comment_suffix)]

# We want to parse inner most regions as cells.
# Thus, if we find another region start before the end for this region,
# we will have ignore the metadata that we found here, and move on to the next cell.
Expand Down Expand Up @@ -705,7 +734,7 @@ def find_cell_end(self, lines):
elif not self.cell_marker_end:
end_of_cell = self.metadata.get("endofcell", "-")
self.end_code_re = re.compile(
"^" + self.comment + " " + end_of_cell + r"\s*$"
"^" + re.escape(self.comment) + " " + end_of_cell + r"\s*$"
)

return self.find_region_end(lines)
Expand Down Expand Up @@ -774,16 +803,22 @@ def __init__(self, fmt, default_language=None):
script = _SCRIPT_EXTENSIONS[self.ext]
self.default_language = default_language or script["language"]
self.comment = script["comment"]
self.start_code_re = re.compile(r"^\s*{}\s*%%(%*)\s(.*)$".format(self.comment))
self.comment_suffix = script.get("comment_suffix", "")
self.start_code_re = re.compile(
r"^\s*{}\s*%%(%*)\s(.*)$".format(re.escape(self.comment))
)
self.alternative_start_code_re = re.compile(
r"^\s*{}\s*(%%|<codecell>|In\[[0-9 ]*\]:?)\s*$".format(self.comment)
r"^\s*{}\s*(%%|<codecell>|In\[[0-9 ]*\]:?)\s*$".format(
re.escape(self.comment)
)
)
self.explicit_soc = True

def metadata_and_language_from_option_line(self, line):
"""Parse code options on the given line. When a start of a code cell
is found, self.metadata is set to a dictionary."""
if self.start_code_re.match(line):
line = uncomment([line], self.comment, self.comment_suffix)[0]
self.language, self.metadata = self.options_to_metadata(
line[line.find("%%") + 2 :]
)
Expand Down
19 changes: 14 additions & 5 deletions jupytext/cell_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ def __init__(self, cell, default_language, fmt=None):
self.language = self.language or cell.metadata.get("language", default_language)
self.default_language = default_language
self.comment = _SCRIPT_EXTENSIONS.get(self.ext, {}).get("comment", "#")
self.comment_suffix = _SCRIPT_EXTENSIONS.get(self.ext, {}).get(
"comment_suffix", ""
)
self.comment_magics = self.fmt.get(
"comment_magics", self.default_comment_magics
)
Expand Down Expand Up @@ -168,7 +171,7 @@ def markdown_to_text(self, source):
explicitly_code=self.cell_type == "code",
)

return comment_lines(source, self.comment)
return comment_lines(source, self.comment, self.comment_suffix)

def code_to_text(self):
"""Return the text representation of this cell as a code cell"""
Expand Down Expand Up @@ -278,7 +281,9 @@ def endofcell_marker(source, comment):
we add an end-of-cell marker"""
endofcell = "-"
while True:
endofcell_re = re.compile(r"^{}( )".format(comment) + endofcell + r"\s*$")
endofcell_re = re.compile(
r"^{}( )".format(re.escape(comment)) + endofcell + r"\s*$"
)
if list(filter(endofcell_re.match, source)):
endofcell = endofcell + "-"
else:
Expand Down Expand Up @@ -492,9 +497,13 @@ def cell_to_text(self):
indent = left_space.groups()[0]

if options.startswith("%") or not options:
lines = [indent + self.comment + " %%" + options]
lines = comment_lines(
["%%" + options], indent + self.comment, self.comment_suffix
)
else:
lines = [indent + self.comment + " %% " + options]
lines = comment_lines(
["%% " + options], indent + self.comment, self.comment_suffix
)

if self.is_code() and active:
source = copy(self.source)
Expand Down Expand Up @@ -555,4 +564,4 @@ def cell_to_text(self):
cell_marker
if cell_marker.startswith("#" * 20)
else self.default_cell_marker
] + comment_lines(self.source, self.comment)
] + comment_lines(self.source, self.comment, self.comment_suffix)
8 changes: 7 additions & 1 deletion jupytext/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,13 @@ def __init__(
cell_reader_class,
cell_exporter_class,
current_version_number,
header_suffix="",
min_readable_version_number=None,
):
self.format_name = format_name
self.extension = extension
self.header_prefix = header_prefix
self.header_suffix = header_suffix
self.cell_reader_class = cell_reader_class
self.cell_exporter_class = cell_exporter_class
self.current_version_number = current_version_number
Expand Down Expand Up @@ -115,6 +117,7 @@ def __init__(
format_name="light",
extension=ext,
header_prefix=_SCRIPT_EXTENSIONS[ext]["comment"],
header_suffix=_SCRIPT_EXTENSIONS[ext].get("comment_suffix", ""),
cell_reader_class=LightScriptCellReader,
cell_exporter_class=LightScriptCellExporter,
# Version 1.5 on 2019-10-19 - jupytext v1.3.0 - Cell metadata represented as key=value by default
Expand All @@ -136,6 +139,7 @@ def __init__(
format_name="nomarker",
extension=ext,
header_prefix=_SCRIPT_EXTENSIONS[ext]["comment"],
header_suffix=_SCRIPT_EXTENSIONS[ext].get("comment_suffix", ""),
cell_reader_class=LightScriptCellReader,
cell_exporter_class=BareScriptCellExporter,
current_version_number="1.0",
Expand All @@ -148,6 +152,7 @@ def __init__(
format_name="percent",
extension=ext,
header_prefix=_SCRIPT_EXTENSIONS[ext]["comment"],
header_suffix=_SCRIPT_EXTENSIONS[ext].get("comment_suffix", ""),
cell_reader_class=DoublePercentScriptCellReader,
cell_exporter_class=DoublePercentCellExporter,
# Version 1.3 on 2019-09-21 - jupytext v1.3.0: Markdown cells can be quoted using triple quotes #305
Expand All @@ -166,6 +171,7 @@ def __init__(
format_name="hydrogen",
extension=ext,
header_prefix=_SCRIPT_EXTENSIONS[ext]["comment"],
header_suffix=_SCRIPT_EXTENSIONS[ext].get("comment_suffix", ""),
cell_reader_class=HydrogenCellReader,
cell_exporter_class=HydrogenCellExporter,
# Version 1.2 on 2018-12-14 - jupytext v0.9.0: same as percent - only magics are not commented by default
Expand Down Expand Up @@ -297,7 +303,7 @@ def guess_format(text, ext):
# Is this a Hydrogen-like script?
# Or a Sphinx-gallery script?
if ext in _SCRIPT_EXTENSIONS:
comment = _SCRIPT_EXTENSIONS[ext]["comment"]
comment = re.escape(_SCRIPT_EXTENSIONS[ext]["comment"])
language = _SCRIPT_EXTENSIONS[ext]["language"]
twenty_hash_re = re.compile(r"^#( |)#{19,}\s*$")
double_percent_re = re.compile(r"^{}( %%|%%)$".format(comment))
Expand Down
33 changes: 23 additions & 10 deletions jupytext/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,18 @@ def insert_or_test_version_number():
return INSERT_AND_CHECK_VERSION_NUMBER


def uncomment_line(line, prefix):
def uncomment_line(line, prefix, suffix=""):
"""Remove prefix (and space) from line"""
if not prefix:
return line
if line.startswith(prefix + " "):
return line[len(prefix) + 1 :]
if line.startswith(prefix):
return line[len(prefix) :]
if prefix:
if line.startswith(prefix + " "):
line = line[len(prefix) + 1 :]
elif line.startswith(prefix):
line = line[len(prefix) :]
if suffix:
if line.endswith(suffix + " "):
line = line[: -(1 + len(suffix))]
elif line.endswith(suffix):
line = line[: -len(suffix)]
return line


Expand Down Expand Up @@ -133,7 +137,10 @@ def metadata_and_cell_to_header(notebook, metadata, text_format, fmt):
):
header = ["<!--", ""] + header + ["", "-->"]

return comment_lines(header, text_format.header_prefix), lines_to_next_cell
return (
comment_lines(header, text_format.header_prefix, text_format.header_suffix),
lines_to_next_cell,
)


def recursive_update(target, update):
Expand Down Expand Up @@ -173,7 +180,7 @@ def header_to_metadata_and_cell(
comment = "#" if header_prefix == "#'" else header_prefix

encoding_re = re.compile(
r"^[ \t\f]*{}.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)".format(comment)
r"^[ \t\f]*{}.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)".format(re.escape(comment))
)

for i, line in enumerate(lines):
Expand Down Expand Up @@ -203,7 +210,13 @@ def header_to_metadata_and_cell(
if not started and not line.strip():
continue

line = uncomment_line(line, header_prefix)
# OCAML
if header_prefix == "(*":
header_suffix = "*)"
else:
header_suffix = ""

line = uncomment_line(line, header_prefix, header_suffix)
if _HEADER_RE.match(line):
if not started:
started = True
Expand Down
14 changes: 12 additions & 2 deletions jupytext/languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@
".java": {"language": "java", "comment": "//"},
".groovy": {"language": "groovy", "comment": "//"},
".sage": {"language": "sage", "comment": "#"},
".ml": {
"language": "ocaml",
"comment": "(*",
"comment_suffix": "*)",
}, # OCaml only has block comments
}

_COMMENT_CHARS = [
Expand Down Expand Up @@ -192,8 +197,13 @@ def cell_language(source, default_language, custom_cell_magics):
return None, None


def comment_lines(lines, prefix):
def comment_lines(lines, prefix, suffix=""):
"""Return commented lines"""
if not prefix:
return lines
return [prefix + " " + line if line else prefix for line in lines]
if not suffix:
return [prefix + " " + line if line else prefix for line in lines]
return [
prefix + " " + line + " " + suffix if line else prefix + " " + suffix
for line in lines
]
Loading

0 comments on commit b553f32

Please sign in to comment.