From 76ff85fa51374f28a06f57cd163d3a03b9065995 Mon Sep 17 00:00:00 2001
From: overlookmotel <theoverlookmotel@gmail.com>
Date: Sun, 13 Oct 2024 12:49:59 +0100
Subject: [PATCH] perf(codegen): check last char with byte methods

---
 crates/oxc_codegen/src/code_buffer.rs | 62 +++++++++++++++++++++++++--
 crates/oxc_codegen/src/comment.rs     |  2 +-
 crates/oxc_codegen/src/gen.rs         |  6 +--
 crates/oxc_codegen/src/lib.rs         | 36 ++++++++++++----
 4 files changed, 89 insertions(+), 17 deletions(-)
diff --git a/crates/oxc_codegen/src/code_buffer.rs b/crates/oxc_codegen/src/code_buffer.rs
index ada3e00f203b62..5ed6f1d6223087 100644
--- a/crates/oxc_codegen/src/code_buffer.rs
+++ b/crates/oxc_codegen/src/code_buffer.rs
@@ -131,11 +131,49 @@ impl CodeBuffer {
     /// ```
     #[inline]
     #[must_use = "Peeking is pointless if the peeked char isn't used"]
-    pub fn peek_nth_back(&self, n: usize) -> Option<char> {
+    pub fn peek_nth_char_back(&self, n: usize) -> Option<char> {
         // SAFETY: All methods of `CodeBuffer` ensure `buf` is valid UTF-8
         unsafe { std::str::from_utf8_unchecked(&self.buf) }.chars().nth_back(n)
     }
 
+    /// Peek the `n`th byte from the end of the buffer.
+    ///
+    /// When `n` is zero, the last byte is returned.
+    /// Returns [`None`] if `n` exceeds the length of the buffer.
+    ///
+    /// # Example
+    /// ```
+    /// use oxc_codegen::CodeBuffer;
+    /// let mut code = CodeBuffer::new();
+    /// code.print_str("foo");
+    ///
+    /// assert_eq!(code.peek_nth_back(0), Some('o'));
+    /// assert_eq!(code.peek_nth_back(2), Some('f'));
+    /// assert_eq!(code.peek_nth_back(3), None);
+    /// ```
+    #[inline]
+    #[must_use = "Peeking is pointless if the peeked char isn't used"]
+    pub fn peek_nth_byte_back(&self, n: usize) -> Option<u8> {
+        let len = self.len();
+        if n < len {
+            Some(self.buf[len - 1 - n])
+        } else {
+            None
+        }
+    }
+
+    /// Peek the last byte from the end of the buffer.
+    #[inline]
+    pub fn last_byte(&self) -> Option<u8> {
+        self.buf.last().copied()
+    }
+
+    /// Peek the last char from the end of the buffer.
+    #[inline]
+    pub fn last_char(&self) -> Option<char> {
+        self.peek_nth_char_back(0)
+    }
+
     /// Push a single ASCII byte into the buffer.
     ///
     /// # Panics
@@ -436,8 +474,24 @@ mod test {
         let mut code = CodeBuffer::new();
         code.print_str("foo");
 
-        assert_eq!(code.peek_nth_back(0), Some('o'));
-        assert_eq!(code.peek_nth_back(2), Some('f'));
-        assert_eq!(code.peek_nth_back(3), None);
+        assert_eq!(code.peek_nth_char_back(0), Some('o'));
+        assert_eq!(code.peek_nth_char_back(2), Some('f'));
+        assert_eq!(code.peek_nth_char_back(3), None);
+    }
+
+    #[test]
+    fn last_byte() {
+        let mut code = CodeBuffer::new();
+        assert_eq!(code.last_byte(), None);
+        code.print_str("bar");
+        assert_eq!(code.last_byte(), Some(b'r'));
+    }
+
+    #[test]
+    fn last_char() {
+        let mut code = CodeBuffer::new();
+        assert_eq!(code.last_char(), None);
+        code.print_str("bar");
+        assert_eq!(code.last_char(), Some('r'));
     }
 }
diff --git a/crates/oxc_codegen/src/comment.rs b/crates/oxc_codegen/src/comment.rs
index 064077f0fe1a50..3db8da578844a9 100644
--- a/crates/oxc_codegen/src/comment.rs
+++ b/crates/oxc_codegen/src/comment.rs
@@ -87,7 +87,7 @@ impl<'a> Codegen<'a> {
 
         if comments.first().is_some_and(|c| c.preceded_by_newline) {
             // Skip printing newline if this comment is already on a newline.
-            if self.peek_nth_back(0).is_some_and(|c| c != '\n' && c != '\t') {
+            if self.last_byte().is_some_and(|b| b != b'\n' && b != b'\t') {
                 self.print_hard_newline();
                 self.print_indent();
             }
diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs
index c1341c30104646..4ca8e752713f1f 100644
--- a/crates/oxc_codegen/src/gen.rs
+++ b/crates/oxc_codegen/src/gen.rs
@@ -1194,11 +1194,11 @@ impl<'a> Gen for BigIntLiteral<'a> {
 impl<'a> Gen for RegExpLiteral<'a> {
     fn gen(&self, p: &mut Codegen, _ctx: Context) {
         p.add_source_mapping(self.span.start);
-        let last = p.peek_nth_back(0);
+        let last = p.last_byte();
         let pattern_text = self.regex.pattern.source_text(p.source_text);
         // Avoid forming a single-line comment or "</script" sequence
-        if Some('/') == last
-            || (Some('<') == last && pattern_text.cow_to_lowercase().starts_with("script"))
+        if last == Some(b'/')
+            || (last == Some(b'<') && pattern_text.cow_to_lowercase().starts_with("script"))
         {
             p.print_hard_space();
         }
diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs
index 47662efeb8e9ea..5296ed613ae8a0 100644
--- a/crates/oxc_codegen/src/lib.rs
+++ b/crates/oxc_codegen/src/lib.rs
@@ -19,7 +19,7 @@ use oxc_ast::ast::{
 use oxc_mangler::Mangler;
 use oxc_span::{GetSpan, Span};
 use oxc_syntax::{
-    identifier::is_identifier_part,
+    identifier::{is_identifier_part, is_identifier_part_ascii},
     operator::{BinaryOperator, UnaryOperator, UpdateOperator},
     precedence::Precedence,
 };
@@ -289,17 +289,31 @@ impl<'a> Codegen<'a> {
 
     #[inline]
     fn print_space_before_identifier(&mut self) {
-        if self
-            .peek_nth_back(0)
-            .is_some_and(|ch| is_identifier_part(ch) || self.prev_reg_exp_end == self.code.len())
-        {
-            self.print_hard_space();
+        let Some(byte) = self.last_byte() else { return };
+
+        if self.prev_reg_exp_end != self.code.len() {
+            let is_identifier = if byte.is_ascii() {
+                // Fast path for ASCII (very common case)
+                is_identifier_part_ascii(byte as char)
+            } else {
+                is_identifier_part(self.last_char().unwrap())
+            };
+            if !is_identifier {
+                return;
+            }
         }
+
+        self.print_hard_space();
+    }
+
+    #[inline]
+    fn last_byte(&self) -> Option<u8> {
+        self.code.last_byte()
     }
 
     #[inline]
-    fn peek_nth_back(&self, n: usize) -> Option<char> {
-        self.code.peek_nth_back(n)
+    fn last_char(&self) -> Option<char> {
+        self.code.last_char()
     }
 
     #[inline]
@@ -530,7 +544,11 @@ impl<'a> Codegen<'a> {
             || ((prev == bin_op_sub || prev == un_op_neg)
                 && (next == bin_op_sub || next == un_op_neg || next == un_op_pre_dec))
             || (prev == un_op_post_dec && next == bin_op_gt)
-            || (prev == un_op_not && next == un_op_pre_dec && self.peek_nth_back(1) == Some('<'))
+            || (prev == un_op_not
+                && next == un_op_pre_dec
+                // `prev == UnaryOperator::LogicalNot` which means last byte is ASCII,
+                // and therefore previous character is 1 byte from end of buffer
+                && self.code.peek_nth_byte_back(1) == Some(b'<'))
         {
             self.print_hard_space();
         }