From b80de52592f7b4c9854a454e2b7a0121fe2862e4 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Fri, 22 Nov 2024 13:43:53 +0100 Subject: [PATCH] Consider quotes inside format-specs when choosing the quotes for an f-string (#14493) --- .../fixtures/ruff/expression/fstring_py312.py | 28 +++ .../join_implicit_concatenated_string.py | 7 + .../src/string/implicit.rs | 68 +++--- .../src/string/normalize.rs | 197 +++++++++++++----- .../format@expression__fstring_py312.py.snap | 91 +++++++- ..._join_implicit_concatenated_string.py.snap | 14 ++ 6 files changed, 325 insertions(+), 80 deletions(-) diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring_py312.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring_py312.py index bd1d755f2d5fb..9c0f4ff9c0b91 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring_py312.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring_py312.py @@ -15,3 +15,31 @@ f'{b"""other " """}' f'{f"""other " """}' + +# Regression tests for https://github.com/astral-sh/ruff/issues/13935 +f'{1: hy "user"}' +f'{1:hy "user"}' +f'{1: abcd "{1}" }' +f'{1: abcd "{'aa'}" }' +f'{1=: "abcd {'aa'}}' +f'{x:a{z:hy "user"}} \'\'\'' + +# Changing the outer quotes is fine because the format-spec is in a nested expression. +f'{f'{z=:hy "user"}'} \'\'\'' + + +# We have to be careful about changing the quotes if the f-string has a debug expression because it is inserted verbatim. +f'{1=: "abcd \'\'}' # Don't change the outer quotes, or it results in a syntax error +f'{1=: abcd \'\'}' # Changing the quotes here is fine because the inner quotes aren't the opposite quotes +f'{1=: abcd \"\"}' # Changing the quotes here is fine because the inner quotes are escaped +# Don't change the quotes in the following cases: +f'{x=:hy "user"} \'\'\'' +f'{x=:a{y:hy "user"}} \'\'\'' +f'{x=:a{y:{z:hy "user"}}} \'\'\'' +f'{x:a{y=:{z:hy "user"}}} \'\'\'' + +# This is fine because the debug expression and format spec are in a nested expression + +f"""{1=: "this" is fine}""" +f'''{1=: "this" is fine}''' # Change quotes to double quotes because they're preferred +f'{1=: {'ab"cd"'}}' # It's okay if the quotes are in an expression part. diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/join_implicit_concatenated_string.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/join_implicit_concatenated_string.py index 6dccb377d898c..a2548aa5524b3 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/join_implicit_concatenated_string.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/join_implicit_concatenated_string.py @@ -319,3 +319,10 @@ def implicit_with_comment(): assert False, +"Implicit concatenated string" "uses {} layout on {} format".format( "Multiline", "first" ) + + +# Regression tests for https://github.com/astral-sh/ruff/issues/13935 + +"a" f'{1=: "abcd \'\'}' +f'{1=: "abcd \'\'}' "a" +f'{1=: "abcd \'\'}' f"{1=: 'abcd \"\"}" diff --git a/crates/ruff_python_formatter/src/string/implicit.rs b/crates/ruff_python_formatter/src/string/implicit.rs index b103dc545a70d..10489e1e0a5a3 100644 --- a/crates/ruff_python_formatter/src/string/implicit.rs +++ b/crates/ruff_python_formatter/src/string/implicit.rs @@ -20,7 +20,7 @@ use crate::preview::{ }; use crate::string::docstring::needs_chaperone_space; use crate::string::normalize::{ - is_fstring_with_quoted_debug_expression, + is_fstring_with_quoted_debug_expression, is_fstring_with_quoted_format_spec_and_debug, is_fstring_with_triple_quoted_literal_expression_containing_quotes, QuoteMetadata, }; use crate::string::{normalize_string, StringLikeExtensions, StringNormalizer, StringQuotes}; @@ -206,15 +206,8 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> { return None; } - // Avoid invalid syntax for pre Python 312: - // * When joining parts that have debug expressions with quotes: `f"{10 + len('bar')=}" f'{10 + len("bar")=}' - // * When joining parts that contain triple quoted strings with quotes: `f"{'''test ' '''}" f'{"""other " """}'` - if !context.options().target_version().supports_pep_701() { - if is_fstring_with_quoted_debug_expression(fstring, context) - || is_fstring_with_triple_quoted_literal_expression_containing_quotes( - fstring, context, - ) - { + if context.options().target_version().supports_pep_701() { + if is_fstring_with_quoted_format_spec_and_debug(fstring, context) { if preserve_quotes_requirement .is_some_and(|quote| quote != part.flags().quote_style()) { @@ -223,6 +216,21 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> { preserve_quotes_requirement = Some(part.flags().quote_style()); } } + // Avoid invalid syntax for pre Python 312: + // * When joining parts that have debug expressions with quotes: `f"{10 + len('bar')=}" f'{10 + len("bar")=}' + // * When joining parts that contain triple quoted strings with quotes: `f"{'''test ' '''}" f'{"""other " """}'` + else if is_fstring_with_quoted_debug_expression(fstring, context) + || is_fstring_with_triple_quoted_literal_expression_containing_quotes( + fstring, context, + ) + { + if preserve_quotes_requirement + .is_some_and(|quote| quote != part.flags().quote_style()) + { + return None; + } + preserve_quotes_requirement = Some(part.flags().quote_style()); + } } } @@ -238,26 +246,30 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> { StringLike::FString(_) => AnyStringPrefix::Format(FStringPrefix::Regular), }; - // Only determining the preferred quote for the first string is sufficient - // because we don't support joining triple quoted strings with non triple quoted strings. - let quote = if let Ok(preferred_quote) = - Quote::try_from(normalizer.preferred_quote_style(first_part)) - { - for part in string.parts() { - let part_quote_metadata = - QuoteMetadata::from_part(part, context, preferred_quote); - - if let Some(merged) = merged_quotes.as_mut() { - *merged = part_quote_metadata.merge(merged)?; - } else { - merged_quotes = Some(part_quote_metadata); + let quote = if let Some(quote) = preserve_quotes_requirement { + quote + } else { + // Only determining the preferred quote for the first string is sufficient + // because we don't support joining triple quoted strings with non triple quoted strings. + if let Ok(preferred_quote) = + Quote::try_from(normalizer.preferred_quote_style(first_part)) + { + for part in string.parts() { + let part_quote_metadata = + QuoteMetadata::from_part(part, context, preferred_quote); + + if let Some(merged) = merged_quotes.as_mut() { + *merged = part_quote_metadata.merge(merged)?; + } else { + merged_quotes = Some(part_quote_metadata); + } } - } - merged_quotes?.choose(preferred_quote) - } else { - // Use the quotes of the first part if the quotes should be preserved. - first_part.flags().quote_style() + merged_quotes?.choose(preferred_quote) + } else { + // Use the quotes of the first part if the quotes should be preserved. + first_part.flags().quote_style() + } }; Some(AnyStringFlags::new(prefix, quote, false)) diff --git a/crates/ruff_python_formatter/src/string/normalize.rs b/crates/ruff_python_formatter/src/string/normalize.rs index d23e359958d5c..05223792549b9 100644 --- a/crates/ruff_python_formatter/src/string/normalize.rs +++ b/crates/ruff_python_formatter/src/string/normalize.rs @@ -5,7 +5,8 @@ use std::iter::FusedIterator; use ruff_formatter::FormatContext; use ruff_python_ast::visitor::source_order::SourceOrderVisitor; use ruff_python_ast::{ - str::Quote, AnyStringFlags, BytesLiteral, FString, StringFlags, StringLikePart, StringLiteral, + str::Quote, AnyStringFlags, BytesLiteral, FString, FStringElement, FStringElements, + FStringFlags, StringFlags, StringLikePart, StringLiteral, }; use ruff_text_size::{Ranged, TextRange, TextSlice}; @@ -78,6 +79,10 @@ impl<'a, 'src> StringNormalizer<'a, 'src> { return QuoteStyle::Preserve; } } + } else if let StringLikePart::FString(fstring) = string { + if is_fstring_with_quoted_format_spec_and_debug(fstring, self.context) { + return QuoteStyle::Preserve; + } } // For f-strings prefer alternating the quotes unless The outer string is triple quoted and the inner isn't. @@ -262,37 +267,14 @@ impl QuoteMetadata { } StringLikePart::FString(fstring) => { if is_f_string_formatting_enabled(context) { - // For f-strings, only consider the quotes inside string-literals but ignore - // quotes inside expressions. This allows both the outer and the nested literals - // to make the optimal local-choice to reduce the total number of quotes necessary. - // This doesn't require any pre 312 special handling because an expression - // can never contain the outer quote character, not even escaped: - // ```python - // f"{'escaping a quote like this \" is a syntax error pre 312'}" - // ``` - let mut literals = fstring.elements.literals(); - - let Some(first) = literals.next() else { - return QuoteMetadata::from_str("", part.flags(), preferred_quote); - }; - - let mut metadata = QuoteMetadata::from_str( - context.source().slice(first), - fstring.flags.into(), - preferred_quote, - ); + let metadata = QuoteMetadata::from_str("", part.flags(), preferred_quote); - for literal in literals { - metadata = metadata - .merge(&QuoteMetadata::from_str( - context.source().slice(literal), - fstring.flags.into(), - preferred_quote, - )) - .expect("Merge to succeed because all parts have the same flags"); - } - - metadata + metadata.merge_fstring_elements( + &fstring.elements, + fstring.flags, + context, + preferred_quote, + ) } else { let text = &context.source()[part.content_range()]; @@ -398,6 +380,52 @@ impl QuoteMetadata { source_style: self.source_style, }) } + + /// For f-strings, only consider the quotes inside string-literals but ignore + /// quotes inside expressions (except inside the format spec). This allows both the outer and the nested literals + /// to make the optimal local-choice to reduce the total number of quotes necessary. + /// This doesn't require any pre 312 special handling because an expression + /// can never contain the outer quote character, not even escaped: + /// ```python + /// f"{'escaping a quote like this \" is a syntax error pre 312'}" + /// ``` + fn merge_fstring_elements( + self, + elements: &FStringElements, + flags: FStringFlags, + context: &PyFormatContext, + preferred_quote: Quote, + ) -> Self { + let mut merged = self; + + for element in elements { + match element { + FStringElement::Literal(literal) => { + merged = merged + .merge(&QuoteMetadata::from_str( + context.source().slice(literal), + flags.into(), + preferred_quote, + )) + .expect("Merge to succeed because all parts have the same flags"); + } + FStringElement::Expression(expression) => { + if let Some(spec) = expression.format_spec.as_deref() { + if expression.debug_text.is_none() { + merged = merged.merge_fstring_elements( + &spec.elements, + flags, + context, + preferred_quote, + ); + } + } + } + } + } + + merged + } } #[derive(Copy, Clone, Debug)] @@ -891,33 +919,66 @@ pub(super) fn is_fstring_with_quoted_debug_expression( fstring: &FString, context: &PyFormatContext, ) -> bool { - if fstring.elements.expressions().any(|expression| { + fstring.elements.expressions().any(|expression| { if expression.debug_text.is_some() { let content = context.source().slice(expression); - match fstring.flags.quote_style() { - Quote::Single => { - if fstring.flags.is_triple_quoted() { - content.contains(r#"""""#) - } else { - content.contains('"') - } - } - Quote::Double => { - if fstring.flags.is_triple_quoted() { - content.contains("'''") - } else { - content.contains('\'') - } - } - } + contains_opposite_quote(content, fstring.flags.into()) } else { false } - }) { - return true; + }) +} + +/// Returns `true` if `string` has any f-string expression element (direct or nested) with a debug expression and a format spec +/// that contains the opposite quote. It's important to preserve the quote style for those f-strings +/// because changing the quote style would result in invalid syntax. +/// +/// ```python +/// f'{1=: "abcd \'\'}' +/// f'{x=:a{y:"abcd"}}' +/// f'{x=:a{y:{z:"abcd"}}}' +/// ``` +pub(super) fn is_fstring_with_quoted_format_spec_and_debug( + fstring: &FString, + context: &PyFormatContext, +) -> bool { + fn has_format_spec_with_opposite_quote( + elements: &FStringElements, + flags: FStringFlags, + context: &PyFormatContext, + in_debug: bool, + ) -> bool { + elements.iter().any(|element| match element { + FStringElement::Literal(literal) => { + let content = context.source().slice(literal); + + in_debug && contains_opposite_quote(content, flags.into()) + } + FStringElement::Expression(expression) => { + expression.format_spec.as_deref().is_some_and(|spec| { + has_format_spec_with_opposite_quote( + &spec.elements, + flags, + context, + in_debug || expression.debug_text.is_some(), + ) + }) + } + }) } - false + fstring.elements.expressions().any(|expression| { + if let Some(spec) = expression.format_spec.as_deref() { + return has_format_spec_with_opposite_quote( + &spec.elements, + fstring.flags, + context, + expression.debug_text.is_some(), + ); + } + + false + }) } /// Tests if the `fstring` contains any triple quoted string, byte, or f-string literal that @@ -997,6 +1058,40 @@ pub(super) fn is_fstring_with_triple_quoted_literal_expression_containing_quotes visitor.found } +fn contains_opposite_quote(content: &str, flags: AnyStringFlags) -> bool { + if flags.is_triple_quoted() { + match flags.quote_style() { + Quote::Single => content.contains(r#"""""#), + Quote::Double => content.contains("'''"), + } + } else { + let mut rest = content; + + while let Some(index) = rest.find(flags.quote_style().opposite().as_char()) { + // Quotes in raw strings can't be escaped + if flags.is_raw_string() { + return true; + } + + // Only if the quote isn't escaped + if rest[..index] + .chars() + .rev() + .take_while(|c| *c == '\\') + .count() + % 2 + == 0 + { + return true; + } + + rest = &rest[index + flags.quote_style().opposite().as_char().len_utf8()..]; + } + + false + } +} + #[cfg(test)] mod tests { use std::borrow::Cow; diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__fstring_py312.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__fstring_py312.py.snap index c48270787171a..77725792d7f21 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__fstring_py312.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__fstring_py312.py.snap @@ -22,6 +22,34 @@ f'{"""other " """ + "more"}' f'{b"""other " """}' f'{f"""other " """}' + +# Regression tests for https://github.com/astral-sh/ruff/issues/13935 +f'{1: hy "user"}' +f'{1:hy "user"}' +f'{1: abcd "{1}" }' +f'{1: abcd "{'aa'}" }' +f'{1=: "abcd {'aa'}}' +f'{x:a{z:hy "user"}} \'\'\'' + +# Changing the outer quotes is fine because the format-spec is in a nested expression. +f'{f'{z=:hy "user"}'} \'\'\'' + + +# We have to be careful about changing the quotes if the f-string has a debug expression because it is inserted verbatim. +f'{1=: "abcd \'\'}' # Don't change the outer quotes, or it results in a syntax error +f'{1=: abcd \'\'}' # Changing the quotes here is fine because the inner quotes aren't the opposite quotes +f'{1=: abcd \"\"}' # Changing the quotes here is fine because the inner quotes are escaped +# Don't change the quotes in the following cases: +f'{x=:hy "user"} \'\'\'' +f'{x=:a{y:hy "user"}} \'\'\'' +f'{x=:a{y:{z:hy "user"}}} \'\'\'' +f'{x:a{y=:{z:hy "user"}}} \'\'\'' + +# This is fine because the debug expression and format spec are in a nested expression + +f"""{1=: "this" is fine}""" +f'''{1=: "this" is fine}''' # Change quotes to double quotes because they're preferred +f'{1=: {'ab"cd"'}}' # It's okay if the quotes are in an expression part. ``` ## Outputs @@ -57,6 +85,35 @@ f'{"""other " """}' f'{"""other " """ + "more"}' f'{b"""other " """}' f'{f"""other " """}' + + +# Regression tests for https://github.com/astral-sh/ruff/issues/13935 +f'{1: hy "user"}' +f'{1:hy "user"}' +f'{1: abcd "{1}" }' +f'{1: abcd "{'aa'}" }' +f'{1=: "abcd {'aa'}}' +f'{x:a{z:hy "user"}} \'\'\'' + +# Changing the outer quotes is fine because the format-spec is in a nested expression. +f'{f'{z=:hy "user"}'} \'\'\'' + + +# We have to be careful about changing the quotes if the f-string has a debug expression because it is inserted verbatim. +f'{1=: "abcd \'\'}' # Don't change the outer quotes, or it results in a syntax error +f'{1=: abcd \'\'}' # Changing the quotes here is fine because the inner quotes aren't the opposite quotes +f'{1=: abcd \"\"}' # Changing the quotes here is fine because the inner quotes are escaped +# Don't change the quotes in the following cases: +f'{x=:hy "user"} \'\'\'' +f'{x=:a{y:hy "user"}} \'\'\'' +f'{x=:a{y:{z:hy "user"}}} \'\'\'' +f'{x:a{y=:{z:hy "user"}}} \'\'\'' + +# This is fine because the debug expression and format spec are in a nested expression + +f"""{1=: "this" is fine}""" +f"""{1=: "this" is fine}""" # Change quotes to double quotes because they're preferred +f'{1=: {'ab"cd"'}}' # It's okay if the quotes are in an expression part. ``` @@ -64,7 +121,7 @@ f'{f"""other " """}' ```diff --- Stable +++ Preview -@@ -6,11 +6,11 @@ +@@ -6,32 +6,32 @@ f"{'a'}" # 312+, it's okay to change the outer quotes even when there's a debug expression using the same quotes @@ -82,4 +139,36 @@ f'{f"""other " """}' +f"{'''other " ''' + 'more'}" +f"{b'''other " '''}" +f"{f'''other " '''}" + + + # Regression tests for https://github.com/astral-sh/ruff/issues/13935 + f'{1: hy "user"}' + f'{1:hy "user"}' + f'{1: abcd "{1}" }' +-f'{1: abcd "{'aa'}" }' ++f'{1: abcd "{"aa"}" }' + f'{1=: "abcd {'aa'}}' +-f'{x:a{z:hy "user"}} \'\'\'' ++f"{x:a{z:hy \"user\"}} '''" + + # Changing the outer quotes is fine because the format-spec is in a nested expression. +-f'{f'{z=:hy "user"}'} \'\'\'' ++f"{f'{z=:hy "user"}'} '''" + + + # We have to be careful about changing the quotes if the f-string has a debug expression because it is inserted verbatim. + f'{1=: "abcd \'\'}' # Don't change the outer quotes, or it results in a syntax error +-f'{1=: abcd \'\'}' # Changing the quotes here is fine because the inner quotes aren't the opposite quotes +-f'{1=: abcd \"\"}' # Changing the quotes here is fine because the inner quotes are escaped ++f"{1=: abcd \'\'}" # Changing the quotes here is fine because the inner quotes aren't the opposite quotes ++f"{1=: abcd \"\"}" # Changing the quotes here is fine because the inner quotes are escaped + # Don't change the quotes in the following cases: + f'{x=:hy "user"} \'\'\'' + f'{x=:a{y:hy "user"}} \'\'\'' +@@ -42,4 +42,4 @@ + + f"""{1=: "this" is fine}""" + f"""{1=: "this" is fine}""" # Change quotes to double quotes because they're preferred +-f'{1=: {'ab"cd"'}}' # It's okay if the quotes are in an expression part. ++f"{1=: {'ab"cd"'}}" # It's okay if the quotes are in an expression part. ``` diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__join_implicit_concatenated_string.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__join_implicit_concatenated_string.py.snap index 45dabeaccc2f4..33a2c90cb4229 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__join_implicit_concatenated_string.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__join_implicit_concatenated_string.py.snap @@ -326,6 +326,13 @@ assert False, "Implicit concatenated stringuses {} layout on {} format"[ assert False, +"Implicit concatenated string" "uses {} layout on {} format".format( "Multiline", "first" ) + + +# Regression tests for https://github.com/astral-sh/ruff/issues/13935 + +"a" f'{1=: "abcd \'\'}' +f'{1=: "abcd \'\'}' "a" +f'{1=: "abcd \'\'}' f"{1=: 'abcd \"\"}" ``` ## Outputs @@ -733,4 +740,11 @@ assert False, "Implicit concatenated stringuses {} layout on {} format"[ assert False, +"Implicit concatenated stringuses {} layout on {} format".format( "Multiline", "first" ) + + +# Regression tests for https://github.com/astral-sh/ruff/issues/13935 + +f'a{1=: "abcd \'\'}' +f'{1=: "abcd \'\'}a' +f'{1=: "abcd \'\'}' f"{1=: 'abcd \"\"}" ```