From a047064d6b0ddf82443e5e478d4dae3d0db6f379 Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Mon, 10 Apr 2023 06:52:18 +0000 Subject: [PATCH] Revert "Don't recover lifetimes/labels containing emojis as character literals" Reverts PR #108031 Fixes (doesnt close until beta backported) #109746 This reverts commit e3f9db5fc319c6d8eee5d47d216ea6a426070c41. This reverts commit 98b82aedba3f3f581e89df54352914b27f42c6f7. This reverts commit 380fa264132ad481e73cbbf0f3a0feefd99a1d78. --- compiler/rustc_errors/src/lib.rs | 2 - compiler/rustc_lexer/src/lib.rs | 43 +++------- compiler/rustc_lexer/src/tests.rs | 2 +- compiler/rustc_parse/src/lexer/mod.rs | 9 +- .../lexer/issue-108019-bad-emoji-recovery.rs | 45 ---------- .../issue-108019-bad-emoji-recovery.stderr | 86 ------------------- tests/ui/parser/numeric-lifetime.rs | 4 +- tests/ui/parser/numeric-lifetime.stderr | 4 +- 8 files changed, 17 insertions(+), 178 deletions(-) delete mode 100644 tests/ui/lexer/issue-108019-bad-emoji-recovery.rs delete mode 100644 tests/ui/lexer/issue-108019-bad-emoji-recovery.stderr diff --git a/compiler/rustc_errors/src/lib.rs b/compiler/rustc_errors/src/lib.rs index 9866a9bffe0e1..17944044aae1f 100644 --- a/compiler/rustc_errors/src/lib.rs +++ b/compiler/rustc_errors/src/lib.rs @@ -473,8 +473,6 @@ pub enum StashKey { /// When an invalid lifetime e.g. `'2` should be reinterpreted /// as a char literal in the parser LifetimeIsChar, - /// When an invalid lifetime e.g. `'🐱` contains emoji. - LifetimeContainsEmoji, /// Maybe there was a typo where a comma was forgotten before /// FRU syntax MaybeFruTypo, diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 322ec31fb2cff..b3f4b5cd5e5a0 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -95,7 +95,7 @@ pub enum TokenKind { Literal { kind: LiteralKind, suffix_start: u32 }, /// "'a" - Lifetime { starts_with_number: bool, contains_emoji: bool }, + Lifetime { starts_with_number: bool }, // One-char tokens: /// ";" @@ -632,13 +632,7 @@ impl Cursor<'_> { // If the first symbol is valid for identifier, it can be a lifetime. // Also check if it's a number for a better error reporting (so '0 will // be reported as invalid lifetime and not as unterminated char literal). - // We also have to account for potential `'🐱` emojis to avoid reporting - // it as an unterminated char literal. - is_id_start(self.first()) - || self.first().is_digit(10) - // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode - // 5.0, but Unicode is already newer than this. - || unic_emoji_char::is_emoji(self.first()) + is_id_start(self.first()) || self.first().is_digit(10) }; if !can_be_a_lifetime { @@ -651,33 +645,16 @@ impl Cursor<'_> { return Literal { kind, suffix_start }; } - // Either a lifetime or a character literal. + // Either a lifetime or a character literal with + // length greater than 1. let starts_with_number = self.first().is_digit(10); - let mut contains_emoji = false; - // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode - // 5.0, but Unicode is already newer than this. - if unic_emoji_char::is_emoji(self.first()) { - contains_emoji = true; - } else { - // Skip the literal contents. - // First symbol can be a number (which isn't a valid identifier start), - // so skip it without any checks. - self.bump(); - } - self.eat_while(|c| { - if is_id_continue(c) { - true - // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode - // 5.0, but Unicode is already newer than this. - } else if unic_emoji_char::is_emoji(c) { - contains_emoji = true; - true - } else { - false - } - }); + // Skip the literal contents. + // First symbol can be a number (which isn't a valid identifier start), + // so skip it without any checks. + self.bump(); + self.eat_while(is_id_continue); // Check if after skipping literal contents we've met a closing // single quote (which means that user attempted to create a @@ -687,7 +664,7 @@ impl Cursor<'_> { let kind = Char { terminated: true }; Literal { kind, suffix_start: self.pos_within_token() } } else { - Lifetime { starts_with_number, contains_emoji } + Lifetime { starts_with_number } } } diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs index 670d64fb983f5..e4c1787f2ccef 100644 --- a/compiler/rustc_lexer/src/tests.rs +++ b/compiler/rustc_lexer/src/tests.rs @@ -235,7 +235,7 @@ fn lifetime() { check_lexing( "'abc", expect![[r#" - Token { kind: Lifetime { starts_with_number: false, contains_emoji: false }, len: 4 } + Token { kind: Lifetime { starts_with_number: false }, len: 4 } "#]], ); } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 4a7da11a09784..399a86c9297a3 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -223,21 +223,16 @@ impl<'a> StringReader<'a> { }; token::Literal(token::Lit { kind, symbol, suffix }) } - rustc_lexer::TokenKind::Lifetime { starts_with_number, contains_emoji } => { + rustc_lexer::TokenKind::Lifetime { starts_with_number } => { // Include the leading `'` in the real identifier, for macro // expansion purposes. See #12512 for the gory details of why // this is necessary. let lifetime_name = self.str_from(start); if starts_with_number { let span = self.mk_sp(start, self.pos); - let mut diag = self.sess.struct_err("lifetimes or labels cannot start with a number"); + let mut diag = self.sess.struct_err("lifetimes cannot start with a number"); diag.set_span(span); diag.stash(span, StashKey::LifetimeIsChar); - } else if contains_emoji { - let span = self.mk_sp(start, self.pos); - let mut diag = self.sess.struct_err("lifetimes or labels cannot contain emojis"); - diag.set_span(span); - diag.stash(span, StashKey::LifetimeContainsEmoji); } let ident = Symbol::intern(lifetime_name); token::Lifetime(ident) diff --git a/tests/ui/lexer/issue-108019-bad-emoji-recovery.rs b/tests/ui/lexer/issue-108019-bad-emoji-recovery.rs deleted file mode 100644 index f0f8622456010..0000000000000 --- a/tests/ui/lexer/issue-108019-bad-emoji-recovery.rs +++ /dev/null @@ -1,45 +0,0 @@ -#![allow(unused_labels)] - -// FIXME(#108019): outdated Unicode table -// fn foo() { -// '🥺 loop { -// break -// } -// } - -fn bar() { - '🐱 loop { - //~^ ERROR labeled expression must be followed by `:` - //~| ERROR lifetimes or labels cannot contain emojis - break - } -} - -fn qux() { - 'a🐱 loop { - //~^ ERROR labeled expression must be followed by `:` - //~| ERROR lifetimes or labels cannot contain emojis - break - } -} - -fn quux() { - '1🐱 loop { - //~^ ERROR labeled expression must be followed by `:` - //~| ERROR lifetimes or labels cannot start with a number - break - } -} - -fn x<'🐱>() -> &'🐱 () { - //~^ ERROR lifetimes or labels cannot contain emojis - //~| ERROR lifetimes or labels cannot contain emojis - &() -} - -fn y() { - 'a🐱: loop {} - //~^ ERROR lifetimes or labels cannot contain emojis -} - -fn main() {} diff --git a/tests/ui/lexer/issue-108019-bad-emoji-recovery.stderr b/tests/ui/lexer/issue-108019-bad-emoji-recovery.stderr deleted file mode 100644 index be77ffdea349f..0000000000000 --- a/tests/ui/lexer/issue-108019-bad-emoji-recovery.stderr +++ /dev/null @@ -1,86 +0,0 @@ -error: labeled expression must be followed by `:` - --> $DIR/issue-108019-bad-emoji-recovery.rs:11:5 - | -LL | '🐱 loop { - | ^--- help: add `:` after the label - | | - | _____the label - | | -LL | | -LL | | -LL | | break -LL | | } - | |_____^ - | - = note: labels are used before loops and blocks, allowing e.g., `break 'label` to them - -error: labeled expression must be followed by `:` - --> $DIR/issue-108019-bad-emoji-recovery.rs:19:5 - | -LL | 'a🐱 loop { - | ^---- help: add `:` after the label - | | - | _____the label - | | -LL | | -LL | | -LL | | break -LL | | } - | |_____^ - | - = note: labels are used before loops and blocks, allowing e.g., `break 'label` to them - -error: labeled expression must be followed by `:` - --> $DIR/issue-108019-bad-emoji-recovery.rs:27:5 - | -LL | '1🐱 loop { - | ^---- help: add `:` after the label - | | - | _____the label - | | -LL | | -LL | | -LL | | break -LL | | } - | |_____^ - | - = note: labels are used before loops and blocks, allowing e.g., `break 'label` to them - -error: lifetimes or labels cannot contain emojis - --> $DIR/issue-108019-bad-emoji-recovery.rs:11:5 - | -LL | '🐱 loop { - | ^^^ - -error: lifetimes or labels cannot contain emojis - --> $DIR/issue-108019-bad-emoji-recovery.rs:19:5 - | -LL | 'a🐱 loop { - | ^^^^ - -error: lifetimes or labels cannot start with a number - --> $DIR/issue-108019-bad-emoji-recovery.rs:27:5 - | -LL | '1🐱 loop { - | ^^^^ - -error: lifetimes or labels cannot contain emojis - --> $DIR/issue-108019-bad-emoji-recovery.rs:34:6 - | -LL | fn x<'🐱>() -> &'🐱 () { - | ^^^ - -error: lifetimes or labels cannot contain emojis - --> $DIR/issue-108019-bad-emoji-recovery.rs:34:16 - | -LL | fn x<'🐱>() -> &'🐱 () { - | ^^^ - -error: lifetimes or labels cannot contain emojis - --> $DIR/issue-108019-bad-emoji-recovery.rs:41:5 - | -LL | 'a🐱: loop {} - | ^^^^ - -error: aborting due to 9 previous errors - diff --git a/tests/ui/parser/numeric-lifetime.rs b/tests/ui/parser/numeric-lifetime.rs index a082a8a44df2a..2d82354c62cca 100644 --- a/tests/ui/parser/numeric-lifetime.rs +++ b/tests/ui/parser/numeric-lifetime.rs @@ -1,6 +1,6 @@ struct S<'1> { s: &'1 usize } -//~^ ERROR lifetimes or labels cannot start with a number -//~| ERROR lifetimes or labels cannot start with a number +//~^ ERROR lifetimes cannot start with a number +//~| ERROR lifetimes cannot start with a number fn main() { // verify that the parse error doesn't stop type checking let x: usize = ""; diff --git a/tests/ui/parser/numeric-lifetime.stderr b/tests/ui/parser/numeric-lifetime.stderr index 66e35dca92319..7c1bcb7263171 100644 --- a/tests/ui/parser/numeric-lifetime.stderr +++ b/tests/ui/parser/numeric-lifetime.stderr @@ -6,13 +6,13 @@ LL | let x: usize = ""; | | | expected due to this -error: lifetimes or labels cannot start with a number +error: lifetimes cannot start with a number --> $DIR/numeric-lifetime.rs:1:10 | LL | struct S<'1> { s: &'1 usize } | ^^ -error: lifetimes or labels cannot start with a number +error: lifetimes cannot start with a number --> $DIR/numeric-lifetime.rs:1:20 | LL | struct S<'1> { s: &'1 usize }