From 89e28ea66f50d4281cb9f624e31566aed9d5aab1 Mon Sep 17 00:00:00 2001 From: tungol Date: Mon, 20 Nov 2023 20:44:33 -0800 Subject: [PATCH] Permit standalone form feed characters at the module level (#4021) Co-authored-by: Stephen Morton Co-authored-by: Jelle Zijlstra --- CHANGES.md | 2 +- .../reference/reference_functions.rst | 4 +- docs/the_black_code_style/future_style.md | 11 + src/black/comments.py | 39 ++- src/black/linegen.py | 25 +- src/black/lines.py | 14 +- src/black/mode.py | 1 + src/black/nodes.py | 7 + src/black/output.py | 23 +- src/blib2to3/pgen2/driver.py | 2 + tests/data/cases/preview_form_feeds.py | 225 ++++++++++++++++++ 11 files changed, 318 insertions(+), 35 deletions(-) create mode 100644 tests/data/cases/preview_form_feeds.py diff --git a/CHANGES.md b/CHANGES.md index 8d0f10a2f3a..4c3fbf1afc8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,7 +12,7 @@ ### Preview style - +- Standalone form feed characters at the module level are no longer removed (#4021) - Additional cases of immediately nested tuples, lists, and dictionaries are now indented less (#4012) diff --git a/docs/contributing/reference/reference_functions.rst b/docs/contributing/reference/reference_functions.rst index dd92e37a7d4..ebadf6975a7 100644 --- a/docs/contributing/reference/reference_functions.rst +++ b/docs/contributing/reference/reference_functions.rst @@ -149,7 +149,7 @@ Utilities .. autofunction:: black.numerics.normalize_numeric_literal -.. autofunction:: black.linegen.normalize_prefix +.. autofunction:: black.comments.normalize_trailing_prefix .. autofunction:: black.strings.normalize_string_prefix @@ -168,3 +168,5 @@ Utilities .. autofunction:: black.strings.sub_twice .. autofunction:: black.nodes.whitespace + +.. autofunction:: black.nodes.make_simple_prefix diff --git a/docs/the_black_code_style/future_style.md b/docs/the_black_code_style/future_style.md index 428bd87ab50..f55ea5f60a9 100644 --- a/docs/the_black_code_style/future_style.md +++ b/docs/the_black_code_style/future_style.md @@ -296,3 +296,14 @@ s = ( # Top comment # Bottom comment ) ``` + +======= + +### Form feed characters + +_Black_ will now retain form feed characters on otherwise empty lines at the module +level. Only one form feed is retained for a group of consecutive empty lines. Where +there are two empty lines in a row, the form feed will be placed on the second line. + +_Black_ already retained form feed literals inside a comment or inside a string. This +remains the case. diff --git a/src/black/comments.py b/src/black/comments.py index 862fc7607cc..8a0e925fdc0 100644 --- a/src/black/comments.py +++ b/src/black/comments.py @@ -10,6 +10,7 @@ WHITESPACE, container_of, first_leaf_of, + make_simple_prefix, preceding_leaf, syms, ) @@ -44,6 +45,7 @@ class ProtoComment: value: str # content of the comment newlines: int # how many newlines before the comment consumed: int # how many characters of the original leaf's prefix did we consume + form_feed: bool # is there a form feed before the comment def generate_comments(leaf: LN) -> Iterator[Leaf]: @@ -65,8 +67,12 @@ def generate_comments(leaf: LN) -> Iterator[Leaf]: Inline comments are emitted as regular token.COMMENT leaves. Standalone are emitted with a fake STANDALONE_COMMENT token identifier. """ + total_consumed = 0 for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER): - yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines) + total_consumed = pc.consumed + prefix = make_simple_prefix(pc.newlines, pc.form_feed) + yield Leaf(pc.type, pc.value, prefix=prefix) + normalize_trailing_prefix(leaf, total_consumed) @lru_cache(maxsize=4096) @@ -79,11 +85,14 @@ def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]: consumed = 0 nlines = 0 ignored_lines = 0 - for index, line in enumerate(re.split("\r?\n", prefix)): - consumed += len(line) + 1 # adding the length of the split '\n' - line = line.lstrip() + form_feed = False + for index, full_line in enumerate(re.split("\r?\n", prefix)): + consumed += len(full_line) + 1 # adding the length of the split '\n' + line = full_line.lstrip() if not line: nlines += 1 + if "\f" in full_line: + form_feed = True if not line.startswith("#"): # Escaped newlines outside of a comment are not really newlines at # all. We treat a single-line comment following an escaped newline @@ -99,13 +108,33 @@ def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]: comment = make_comment(line) result.append( ProtoComment( - type=comment_type, value=comment, newlines=nlines, consumed=consumed + type=comment_type, + value=comment, + newlines=nlines, + consumed=consumed, + form_feed=form_feed, ) ) + form_feed = False nlines = 0 return result +def normalize_trailing_prefix(leaf: LN, total_consumed: int) -> None: + """Normalize the prefix that's left over after generating comments. + + Note: don't use backslashes for formatting or you'll lose your voting rights. + """ + remainder = leaf.prefix[total_consumed:] + if "\\" not in remainder: + nl_count = remainder.count("\n") + form_feed = "\f" in remainder and remainder.endswith("\n") + leaf.prefix = make_simple_prefix(nl_count, form_feed) + return + + leaf.prefix = "" + + def make_comment(content: str) -> str: """Return a consistently formatted comment from the given `content` string. diff --git a/src/black/linegen.py b/src/black/linegen.py index 8a2cd4710b9..7fbbe290d7e 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -149,7 +149,8 @@ def visit_default(self, node: LN) -> Iterator[Line]: self.current_line.append(comment) yield from self.line() - normalize_prefix(node, inside_brackets=any_open_brackets) + if any_open_brackets: + node.prefix = "" if self.mode.string_normalization and node.type == token.STRING: node.value = normalize_string_prefix(node.value) node.value = normalize_string_quotes(node.value) @@ -1035,8 +1036,6 @@ def bracket_split_build_line( result.inside_brackets = True result.depth += 1 if leaves: - # Since body is a new indent level, remove spurious leading whitespace. - normalize_prefix(leaves[0], inside_brackets=True) # Ensure a trailing comma for imports and standalone function arguments, but # be careful not to add one after any comments or within type annotations. no_commas = ( @@ -1106,7 +1105,7 @@ def split_wrapper( line: Line, features: Collection[Feature], mode: Mode ) -> Iterator[Line]: for split_line in split_func(line, features, mode): - normalize_prefix(split_line.leaves[0], inside_brackets=True) + split_line.leaves[0].prefix = "" yield split_line return split_wrapper @@ -1250,24 +1249,6 @@ def append_to_line(leaf: Leaf) -> Iterator[Line]: yield current_line -def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None: - """Leave existing extra newlines if not `inside_brackets`. Remove everything - else. - - Note: don't use backslashes for formatting or you'll lose your voting rights. - """ - if not inside_brackets: - spl = leaf.prefix.split("#") - if "\\" not in spl[0]: - nl_count = spl[-1].count("\n") - if len(spl) > 1: - nl_count -= 1 - leaf.prefix = "\n" * nl_count - return - - leaf.prefix = "" - - def normalize_invisible_parens( # noqa: C901 node: Node, parens_after: Set[str], *, mode: Mode, features: Collection[Feature] ) -> None: diff --git a/src/black/lines.py b/src/black/lines.py index 3ade0a5f4a5..ec6145ff848 100644 --- a/src/black/lines.py +++ b/src/black/lines.py @@ -31,6 +31,7 @@ is_type_comment, is_type_ignore_comment, is_with_or_async_with_stmt, + make_simple_prefix, replace_child, syms, whitespace, @@ -520,12 +521,12 @@ class LinesBlock: before: int = 0 content_lines: List[str] = field(default_factory=list) after: int = 0 + form_feed: bool = False def all_lines(self) -> List[str]: empty_line = str(Line(mode=self.mode)) - return ( - [empty_line * self.before] + self.content_lines + [empty_line * self.after] - ) + prefix = make_simple_prefix(self.before, self.form_feed, empty_line) + return [prefix] + self.content_lines + [empty_line * self.after] @dataclass @@ -550,6 +551,12 @@ def maybe_empty_lines(self, current_line: Line) -> LinesBlock: This is for separating `def`, `async def` and `class` with extra empty lines (two on module-level). """ + form_feed = ( + Preview.allow_form_feeds in self.mode + and current_line.depth == 0 + and bool(current_line.leaves) + and "\f\n" in current_line.leaves[0].prefix + ) before, after = self._maybe_empty_lines(current_line) previous_after = self.previous_block.after if self.previous_block else 0 before = ( @@ -575,6 +582,7 @@ def maybe_empty_lines(self, current_line: Line) -> LinesBlock: original_line=current_line, before=before, after=after, + form_feed=form_feed, ) # Maintain the semantic_leading_comment state. diff --git a/src/black/mode.py b/src/black/mode.py index 1aa5cbecc86..04038f49627 100644 --- a/src/black/mode.py +++ b/src/black/mode.py @@ -194,6 +194,7 @@ class Preview(Enum): allow_empty_first_line_before_new_block_or_comment = auto() single_line_format_skip_with_multiple_comments = auto() long_case_block_line_splitting = auto() + allow_form_feeds = auto() class Deprecated(UserWarning): diff --git a/src/black/nodes.py b/src/black/nodes.py index 9251b0defb0..de53f8e36a3 100644 --- a/src/black/nodes.py +++ b/src/black/nodes.py @@ -407,6 +407,13 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no return SPACE +def make_simple_prefix(nl_count: int, form_feed: bool, empty_line: str = "\n") -> str: + """Generate a normalized prefix string.""" + if form_feed: + return (empty_line * (nl_count - 1)) + "\f" + empty_line + return empty_line * nl_count + + def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]: """Return the first leaf that precedes `node`, if any.""" while node: diff --git a/src/black/output.py b/src/black/output.py index f4c17f28ea4..7c7dd0fe14e 100644 --- a/src/black/output.py +++ b/src/black/output.py @@ -4,8 +4,9 @@ """ import json +import re import tempfile -from typing import Any, Optional +from typing import Any, List, Optional from click import echo, style from mypy_extensions import mypyc_attr @@ -55,12 +56,28 @@ def ipynb_diff(a: str, b: str, a_name: str, b_name: str) -> str: return "".join(diff_lines) +_line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))") + + +def _splitlines_no_ff(source: str) -> List[str]: + """Split a string into lines ignoring form feed and other chars. + + This mimics how the Python parser splits source code. + + A simplified version of the function with the same name in Lib/ast.py + """ + result = [match[0] for match in _line_pattern.finditer(source)] + if result[-1] == "": + result.pop(-1) + return result + + def diff(a: str, b: str, a_name: str, b_name: str) -> str: """Return a unified diff string between strings `a` and `b`.""" import difflib - a_lines = a.splitlines(keepends=True) - b_lines = b.splitlines(keepends=True) + a_lines = _splitlines_no_ff(a) + b_lines = _splitlines_no_ff(b) diff_lines = [] for line in difflib.unified_diff( a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5 diff --git a/src/blib2to3/pgen2/driver.py b/src/blib2to3/pgen2/driver.py index e629843f8b9..be3984437a8 100644 --- a/src/blib2to3/pgen2/driver.py +++ b/src/blib2to3/pgen2/driver.py @@ -222,6 +222,8 @@ def _partially_consume_prefix(self, prefix: str, column: int) -> Tuple[str, str] elif char == "\n": # unexpected empty line current_column = 0 + elif char == "\f": + current_column = 0 else: # indent is finished wait_for_nl = True diff --git a/tests/data/cases/preview_form_feeds.py b/tests/data/cases/preview_form_feeds.py new file mode 100644 index 00000000000..2d8653a1f04 --- /dev/null +++ b/tests/data/cases/preview_form_feeds.py @@ -0,0 +1,225 @@ +# flags: --preview + + +# Warning! This file contains form feeds (ASCII 0x0C, often represented by \f or ^L). +# These may be invisible in your editor: ensure you can see them before making changes here. + +# There's one at the start that'll get stripped + +# Comment and statement processing is different enough that we'll test variations of both +# contexts here + +# + + +# + + +# + + + +# + + + +# + + + +# + + +# + + + +# + +# + +# + + \ +# +pass + +pass + + +pass + + +pass + + + +pass + + + +pass + + + +pass + + +pass + + + +pass + +pass + +pass + + +# form feed after a dedent +def foo(): + pass + +pass + + +# form feeds are prohibited inside blocks, or on a line with nonwhitespace + def bar( a = 1 ,b : bool = False ) : + + + pass + + +class Baz: + + def __init__(self): + pass + + + def something(self): + pass + + + +# +pass +pass # +a = 1 + # + pass + a = 1 + +a = [ + +] + +# as internal whitespace of a comment is allowed but why +"form feed literal in a string is okay " + +# form feeds at the very end get removed. + + + +# output + +# Warning! This file contains form feeds (ASCII 0x0C, often represented by \f or ^L). +# These may be invisible in your editor: ensure you can see them before making changes here. + +# There's one at the start that'll get stripped + +# Comment and statement processing is different enough that we'll test variations of both +# contexts here + +# + + +# + + +# + + +# + + +# + + +# + + +# + + +# + +# + +# + +# +pass + +pass + + +pass + + +pass + + +pass + + +pass + + +pass + + +pass + + +pass + +pass + +pass + + +# form feed after a dedent +def foo(): + pass + + +pass + + +# form feeds are prohibited inside blocks, or on a line with nonwhitespace +def bar(a=1, b: bool = False): + pass + + +class Baz: + def __init__(self): + pass + + def something(self): + pass + + +# +pass +pass # +a = 1 +# +pass +a = 1 + +a = [] + +# as internal whitespace of a comment is allowed but why +"form feed literal in a string is okay " + +# form feeds at the very end get removed.