From 5a371a0ec597562fa33a7feecfc6c29f2499e4a5 Mon Sep 17 00:00:00 2001 From: David Bar-On Date: Fri, 13 Nov 2020 19:55:42 +0200 Subject: [PATCH 1/3] Issue #4471 - not wrap strings at backslash --- src/formatting/string.rs | 22 ++++++++++++++++++++-- tests/source/string-lit.rs | 9 +++++++++ tests/target/string-lit.rs | 23 +++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/src/formatting/string.rs b/src/formatting/string.rs index 08960959ebf..e06b4b9a515 100644 --- a/src/formatting/string.rs +++ b/src/formatting/string.rs @@ -268,6 +268,24 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str] } }; + // Don't allow break at `\` as it is used to concat string lines. + let break_not_at_backslash = |index /* grapheme at index is included */| { + if input[0..=index].iter().last().unwrap().ne(&"\\") { + break_at(index) + } else { + let backslash_count = match input[0..index] + .iter() + .rposition(|grapheme| grapheme.ne(&"\\")) + { + // Break at the reverse-first non `\` + Some(non_backslash_index) => index - non_backslash_index, + // Only `\` to the right - break after even number to not break after escape `\` + None => index % 2, + }; + break_at(index - backslash_count % 2) + } + }; + // find a first index where the unicode width of input[0..x] become > max_width let max_width_index_in_input = { let mut cur_width = 0; @@ -320,7 +338,7 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str] .rposition(|grapheme| is_punctuation(grapheme)) { // Found a punctuation and what is on its left side is big enough. - Some(index) if index >= MIN_STRING => break_at(index), + Some(index) if index >= MIN_STRING => break_not_at_backslash(index), // Either no boundary character was found to the left of `input[max_chars]`, or the line // got too small. We try searching for a boundary character to the right. _ => match input[max_width_index_in_input..] @@ -328,7 +346,7 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str] .position(|grapheme| is_whitespace(grapheme) || is_punctuation(grapheme)) { // A boundary was found after the line limit - Some(index) => break_at(max_width_index_in_input + index), + Some(index) => break_not_at_backslash(max_width_index_in_input + index), // No boundary to the right, the input cannot be broken None => SnippetState::EndOfInput(input.concat()), }, diff --git a/tests/source/string-lit.rs b/tests/source/string-lit.rs index 7719e76ffe7..39cb0be98d9 100644 --- a/tests/source/string-lit.rs +++ b/tests/source/string-lit.rs @@ -59,3 +59,12 @@ fn issue_1282() { // #1987 #[link_args = "-s NO_FILESYSTEM=1 -s NO_EXIT_RUNTIME=1 -s EXPORTED_RUNTIME_METHODS=[\"_malloc\"] -s NO_DYNAMIC_EXECUTION=1 -s ELIMINATE_DUPLICATE_FUNCTIONS=1 -s EVAL_CTORS=1"] extern "C" {} + +// #4471 - strings including `\` shaouldnot wrap at the `\` +const ASCII_ESCAPE: &str = "id\u{1f}1\u{1f}/Users/nixon/dev/rs/gitstatusd\u{1f}1c9be4fe5460a30e70de9cbf99c3ec7064296b28\u{1f}master\u{1f}\u{1f}\u{1f}\u{1f}\u{1f}7\u{1f}0\u{1f}1\u{1f}0\u{1f}1\u{1f}0\u{1f}0\u{1f}0\u{1f}\u{1f}0\u{1f}0\u{1f}0\u{1f}\u{1f}\u{1f}0\u{1f}0\u{1f}0\u{1f}0"; +const ASCII_ESCAPE: &str = "id\u{1f}1\u{1f}/Users/nixon/dev/rs/gitstatusd\u{1f}1c9be4fe5460a30e70de9cbf99c3ec7064296b28\u{1f}master\u{1f}\u{1f}\u{1f}\u{1f}\u{1f}7\u{1f}0"; +const ASCII_ESCAPE: &str = "id\u{1f}1\u{1f}/Users/nixon/dev/rs/gitstatusd\u{1f}1c9be4fe5460a30e70de9cbf99c3ec70642,96b28\u{1f}master\u{1f}\u{1f}\u{1f}\u{1f}\u{1f}7\u{1f}0"; +const ASCII_ESCAPE: &str = "id\u{1f}1\u{1f}/Users/nixon/dev/rs/gitstatusd\u{1f}1c9be4fe5460a30e70de9cbf99c3ec70642 96b28\u{1f}master\u{1f}\u{1f}\u{1f}\u{1f}\u{1f}7\u{1f}0"; +const ASCII_ESCAPE: &str = "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\"; +const ASCII: &str = "xxxxxxxxxxxxxxxxxxxxxxxxxx\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\"; +const ASCII: &str = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\\nyyyyyyyyyyyy"; diff --git a/tests/target/string-lit.rs b/tests/target/string-lit.rs index 2d33061074d..af01d4326ae 100644 --- a/tests/target/string-lit.rs +++ b/tests/target/string-lit.rs @@ -61,3 +61,26 @@ fn issue_1282() { #[link_args = "-s NO_FILESYSTEM=1 -s NO_EXIT_RUNTIME=1 -s EXPORTED_RUNTIME_METHODS=[\"_malloc\"] \ -s NO_DYNAMIC_EXECUTION=1 -s ELIMINATE_DUPLICATE_FUNCTIONS=1 -s EVAL_CTORS=1"] extern "C" {} + +// #4471 - strings including `\` shaouldnot wrap at the `\` +const ASCII_ESCAPE: &str = + "id\u{1f}1\u{1f}/Users/nixon/dev/rs/gitstatusd\u{1f}1c9be4fe5460a30e70de9cbf99c3ec7064296b28\ + \u{1f}master\u{1f}\u{1f}\u{1f}\u{1f}\u{1f}7\u{1f}0\u{1f}1\u{1f}0\u{1f}1\u{1f}0\u{1f}0\u{1f}0\ + \u{1f}\u{1f}0\u{1f}0\u{1f}0\u{1f}\u{1f}\u{1f}0\u{1f}0\u{1f}0\u{1f}0"; +const ASCII_ESCAPE: &str = "id\u{1f}1\u{1f}/Users/nixon/dev/rs/gitstatusd\ + \u{1f}1c9be4fe5460a30e70de9cbf99c3ec7064296b28\u{1f}master\u{1f}\ + \u{1f}\u{1f}\u{1f}\u{1f}7\u{1f}0"; +const ASCII_ESCAPE: &str = "id\u{1f}1\u{1f}/Users/nixon/dev/rs/gitstatusd\ + \u{1f}1c9be4fe5460a30e70de9cbf99c3ec70642,96b28\u{1f}master\u{1f}\ + \u{1f}\u{1f}\u{1f}\u{1f}7\u{1f}0"; +const ASCII_ESCAPE: &str = "id\u{1f}1\u{1f}/Users/nixon/dev/rs/gitstatusd\ + \u{1f}1c9be4fe5460a30e70de9cbf99c3ec70642 \ + 96b28\u{1f}master\u{1f}\u{1f}\u{1f}\u{1f}\u{1f}7\u{1f}0"; +const ASCII_ESCAPE: &str = "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ + \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ + \\"; +const ASCII: &str = "xxxxxxxxxxxxxxxxxxxxxxxxxx\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ + \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\"; +const ASCII: &str = + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\\ + \nyyyyyyyyyyyy"; From e8eb088cf4334f31f710aea1f4def5d7411212b6 Mon Sep 17 00:00:00 2001 From: David Bar-On Date: Tue, 8 Dec 2020 14:41:17 +0200 Subject: [PATCH 2/3] Changes according to comments to the fix PR #4524 --- src/formatting/string.rs | 42 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/formatting/string.rs b/src/formatting/string.rs index e06b4b9a515..201675f8669 100644 --- a/src/formatting/string.rs +++ b/src/formatting/string.rs @@ -225,7 +225,25 @@ fn not_whitespace_except_line_feed(g: &str) -> bool { /// character is either a punctuation or a whitespace. /// FIXME(issue#3281): We must follow UAX#14 algorithm instead of this. fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str]) -> SnippetState { - let break_at = |index /* grapheme at index is included */| { + let break_at = |index_in /* grapheme at index is included */| { + // Ensure break is not after an escape '\' as it will "escape" the `\` that is added + // for concatenating the two parts of the broken line. + let index = if (input[index_in] as &str).ne("\\") { + index_in + } else { + let index_offset = match input[0..index_in] + .iter() + .rposition(|grapheme| grapheme.ne(&"\\")) + { + // There is a non-`\` to the left + Some(non_backslash_index) => (index_in - non_backslash_index) % 2, + // Only `\` to the left + None => index_in % 2, + }; + // Make sure break is after even number (including zero) of `\` + index_in - index_offset + }; + // Take in any whitespaces to the left/right of `input[index]` while // preserving line feeds let index_minus_ws = input[0..=index] @@ -268,24 +286,6 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str] } }; - // Don't allow break at `\` as it is used to concat string lines. - let break_not_at_backslash = |index /* grapheme at index is included */| { - if input[0..=index].iter().last().unwrap().ne(&"\\") { - break_at(index) - } else { - let backslash_count = match input[0..index] - .iter() - .rposition(|grapheme| grapheme.ne(&"\\")) - { - // Break at the reverse-first non `\` - Some(non_backslash_index) => index - non_backslash_index, - // Only `\` to the right - break after even number to not break after escape `\` - None => index % 2, - }; - break_at(index - backslash_count % 2) - } - }; - // find a first index where the unicode width of input[0..x] become > max_width let max_width_index_in_input = { let mut cur_width = 0; @@ -338,7 +338,7 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str] .rposition(|grapheme| is_punctuation(grapheme)) { // Found a punctuation and what is on its left side is big enough. - Some(index) if index >= MIN_STRING => break_not_at_backslash(index), + Some(index) if index >= MIN_STRING => break_at(index), // Either no boundary character was found to the left of `input[max_chars]`, or the line // got too small. We try searching for a boundary character to the right. _ => match input[max_width_index_in_input..] @@ -346,7 +346,7 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str] .position(|grapheme| is_whitespace(grapheme) || is_punctuation(grapheme)) { // A boundary was found after the line limit - Some(index) => break_not_at_backslash(max_width_index_in_input + index), + Some(index) => break_at(max_width_index_in_input + index), // No boundary to the right, the input cannot be broken None => SnippetState::EndOfInput(input.concat()), }, From c6e1673119e4734056aa455a186937eab437eee8 Mon Sep 17 00:00:00 2001 From: David Bar-On Date: Wed, 9 Dec 2020 16:43:54 +0200 Subject: [PATCH 3/3] Some more modifications to improve the code per comments received --- src/formatting/string.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/formatting/string.rs b/src/formatting/string.rs index 201675f8669..4844352f1bc 100644 --- a/src/formatting/string.rs +++ b/src/formatting/string.rs @@ -225,23 +225,23 @@ fn not_whitespace_except_line_feed(g: &str) -> bool { /// character is either a punctuation or a whitespace. /// FIXME(issue#3281): We must follow UAX#14 algorithm instead of this. fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str]) -> SnippetState { - let break_at = |index_in /* grapheme at index is included */| { + let break_at = |index /* grapheme at index is included */| { // Ensure break is not after an escape '\' as it will "escape" the `\` that is added // for concatenating the two parts of the broken line. - let index = if (input[index_in] as &str).ne("\\") { - index_in + let index = if input[index] != "\\" { + index } else { - let index_offset = match input[0..index_in] + let index_offset = match input[0..index] .iter() .rposition(|grapheme| grapheme.ne(&"\\")) { // There is a non-`\` to the left - Some(non_backslash_index) => (index_in - non_backslash_index) % 2, + Some(non_backslash_index) => (index - non_backslash_index) % 2, // Only `\` to the left - None => index_in % 2, + None => index % 2, }; // Make sure break is after even number (including zero) of `\` - index_in - index_offset + index - index_offset }; // Take in any whitespaces to the left/right of `input[index]` while