From 76ff85fa51374f28a06f57cd163d3a03b9065995 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Sun, 13 Oct 2024 12:49:59 +0100 Subject: [PATCH] perf(codegen): check last char with byte methods --- crates/oxc_codegen/src/code_buffer.rs | 62 +++++++++++++++++++++++++-- crates/oxc_codegen/src/comment.rs | 2 +- crates/oxc_codegen/src/gen.rs | 6 +-- crates/oxc_codegen/src/lib.rs | 36 ++++++++++++---- 4 files changed, 89 insertions(+), 17 deletions(-) diff --git a/crates/oxc_codegen/src/code_buffer.rs b/crates/oxc_codegen/src/code_buffer.rs index ada3e00f203b62..5ed6f1d6223087 100644 --- a/crates/oxc_codegen/src/code_buffer.rs +++ b/crates/oxc_codegen/src/code_buffer.rs @@ -131,11 +131,49 @@ impl CodeBuffer { /// ``` #[inline] #[must_use = "Peeking is pointless if the peeked char isn't used"] - pub fn peek_nth_back(&self, n: usize) -> Option { + pub fn peek_nth_char_back(&self, n: usize) -> Option { // SAFETY: All methods of `CodeBuffer` ensure `buf` is valid UTF-8 unsafe { std::str::from_utf8_unchecked(&self.buf) }.chars().nth_back(n) } + /// Peek the `n`th byte from the end of the buffer. + /// + /// When `n` is zero, the last byte is returned. + /// Returns [`None`] if `n` exceeds the length of the buffer. + /// + /// # Example + /// ``` + /// use oxc_codegen::CodeBuffer; + /// let mut code = CodeBuffer::new(); + /// code.print_str("foo"); + /// + /// assert_eq!(code.peek_nth_back(0), Some('o')); + /// assert_eq!(code.peek_nth_back(2), Some('f')); + /// assert_eq!(code.peek_nth_back(3), None); + /// ``` + #[inline] + #[must_use = "Peeking is pointless if the peeked char isn't used"] + pub fn peek_nth_byte_back(&self, n: usize) -> Option { + let len = self.len(); + if n < len { + Some(self.buf[len - 1 - n]) + } else { + None + } + } + + /// Peek the last byte from the end of the buffer. + #[inline] + pub fn last_byte(&self) -> Option { + self.buf.last().copied() + } + + /// Peek the last char from the end of the buffer. + #[inline] + pub fn last_char(&self) -> Option { + self.peek_nth_char_back(0) + } + /// Push a single ASCII byte into the buffer. /// /// # Panics @@ -436,8 +474,24 @@ mod test { let mut code = CodeBuffer::new(); code.print_str("foo"); - assert_eq!(code.peek_nth_back(0), Some('o')); - assert_eq!(code.peek_nth_back(2), Some('f')); - assert_eq!(code.peek_nth_back(3), None); + assert_eq!(code.peek_nth_char_back(0), Some('o')); + assert_eq!(code.peek_nth_char_back(2), Some('f')); + assert_eq!(code.peek_nth_char_back(3), None); + } + + #[test] + fn last_byte() { + let mut code = CodeBuffer::new(); + assert_eq!(code.last_byte(), None); + code.print_str("bar"); + assert_eq!(code.last_byte(), Some(b'r')); + } + + #[test] + fn last_char() { + let mut code = CodeBuffer::new(); + assert_eq!(code.last_char(), None); + code.print_str("bar"); + assert_eq!(code.last_char(), Some('r')); } } diff --git a/crates/oxc_codegen/src/comment.rs b/crates/oxc_codegen/src/comment.rs index 064077f0fe1a50..3db8da578844a9 100644 --- a/crates/oxc_codegen/src/comment.rs +++ b/crates/oxc_codegen/src/comment.rs @@ -87,7 +87,7 @@ impl<'a> Codegen<'a> { if comments.first().is_some_and(|c| c.preceded_by_newline) { // Skip printing newline if this comment is already on a newline. - if self.peek_nth_back(0).is_some_and(|c| c != '\n' && c != '\t') { + if self.last_byte().is_some_and(|b| b != b'\n' && b != b'\t') { self.print_hard_newline(); self.print_indent(); } diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs index c1341c30104646..4ca8e752713f1f 100644 --- a/crates/oxc_codegen/src/gen.rs +++ b/crates/oxc_codegen/src/gen.rs @@ -1194,11 +1194,11 @@ impl<'a> Gen for BigIntLiteral<'a> { impl<'a> Gen for RegExpLiteral<'a> { fn gen(&self, p: &mut Codegen, _ctx: Context) { p.add_source_mapping(self.span.start); - let last = p.peek_nth_back(0); + let last = p.last_byte(); let pattern_text = self.regex.pattern.source_text(p.source_text); // Avoid forming a single-line comment or " Codegen<'a> { #[inline] fn print_space_before_identifier(&mut self) { - if self - .peek_nth_back(0) - .is_some_and(|ch| is_identifier_part(ch) || self.prev_reg_exp_end == self.code.len()) - { - self.print_hard_space(); + let Some(byte) = self.last_byte() else { return }; + + if self.prev_reg_exp_end != self.code.len() { + let is_identifier = if byte.is_ascii() { + // Fast path for ASCII (very common case) + is_identifier_part_ascii(byte as char) + } else { + is_identifier_part(self.last_char().unwrap()) + }; + if !is_identifier { + return; + } } + + self.print_hard_space(); + } + + #[inline] + fn last_byte(&self) -> Option { + self.code.last_byte() } #[inline] - fn peek_nth_back(&self, n: usize) -> Option { - self.code.peek_nth_back(n) + fn last_char(&self) -> Option { + self.code.last_char() } #[inline] @@ -530,7 +544,11 @@ impl<'a> Codegen<'a> { || ((prev == bin_op_sub || prev == un_op_neg) && (next == bin_op_sub || next == un_op_neg || next == un_op_pre_dec)) || (prev == un_op_post_dec && next == bin_op_gt) - || (prev == un_op_not && next == un_op_pre_dec && self.peek_nth_back(1) == Some('<')) + || (prev == un_op_not + && next == un_op_pre_dec + // `prev == UnaryOperator::LogicalNot` which means last byte is ASCII, + // and therefore previous character is 1 byte from end of buffer + && self.code.peek_nth_byte_back(1) == Some(b'<')) { self.print_hard_space(); }