Skip to content

Commit

Permalink
feat(minifier): fold String::charAt / String::charCodeAt more precise…
Browse files Browse the repository at this point in the history
…ly (#9082)
  • Loading branch information
sapphi-red committed Feb 13, 2025
1 parent 237ffba commit 125d610
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 74 deletions.
24 changes: 17 additions & 7 deletions crates/oxc_ecmascript/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ mod string_to_number;
mod to_big_int;
mod to_boolean;
mod to_int_32;
mod to_integer_or_infinity;
mod to_number;
mod to_string;

Expand All @@ -28,12 +29,21 @@ pub mod constant_evaluation;
pub mod side_effects;

pub use self::{
array_join::ArrayJoin, bound_names::BoundNames,
array_join::ArrayJoin,
bound_names::BoundNames,
is_simple_parameter_list::IsSimpleParameterList,
private_bound_identifiers::PrivateBoundIdentifiers, prop_name::PropName,
string_char_at::StringCharAt, string_char_code_at::StringCharCodeAt,
string_index_of::StringIndexOf, string_last_index_of::StringLastIndexOf,
string_substring::StringSubstring, string_to_big_int::StringToBigInt,
string_to_number::StringToNumber, to_big_int::ToBigInt, to_boolean::ToBoolean,
to_int_32::ToInt32, to_number::ToNumber, to_string::ToJsString,
private_bound_identifiers::PrivateBoundIdentifiers,
prop_name::PropName,
string_char_at::{StringCharAt, StringCharAtResult},
string_char_code_at::StringCharCodeAt,
string_index_of::StringIndexOf,
string_last_index_of::StringLastIndexOf,
string_substring::StringSubstring,
string_to_big_int::StringToBigInt,
string_to_number::StringToNumber,
to_big_int::ToBigInt,
to_boolean::ToBoolean,
to_int_32::ToInt32,
to_number::ToNumber,
to_string::ToJsString,
};
58 changes: 37 additions & 21 deletions crates/oxc_ecmascript/src/string_char_at.rs
Original file line number Diff line number Diff line change
@@ -1,40 +1,56 @@
use crate::ToInt32;
use num_traits::ToPrimitive;

use crate::to_integer_or_infinity::ToIntegerOrInfinityResult;

pub trait StringCharAt {
/// `String.prototype.charAt ( pos )`
/// <https://tc39.es/ecma262/#sec-string.prototype.charat>
fn char_at(&self, index: Option<f64>) -> Option<char>;
fn char_at(&self, pos: Option<f64>) -> StringCharAtResult;
}

impl StringCharAt for &str {
#[expect(clippy::cast_sign_loss)]
fn char_at(&self, index: Option<f64>) -> Option<char> {
let index = index.unwrap_or(0.0);
if index.fract() != 0.0 || index.is_nan() || index.is_infinite() {
return None;
}
let index = index.to_int_32() as isize;
if index < 0 {
None
} else {
self.encode_utf16().nth(index as usize).and_then(|n| char::from_u32(u32::from(n)))
}
fn char_at(&self, pos: Option<f64>) -> StringCharAtResult {
use crate::to_integer_or_infinity::ToIntegerOrInfinity;

let position = pos.unwrap_or(0.0).to_integer_or_infinity_as_i64();
let position = match position {
ToIntegerOrInfinityResult::Value(v) if v >= 0 => v.to_usize().unwrap(),
_ => return StringCharAtResult::OutOfRange,
};

self.encode_utf16().nth(position).map_or(StringCharAtResult::OutOfRange, |n| {
char::from_u32(u32::from(n))
.map_or(StringCharAtResult::InvalidChar(n), StringCharAtResult::Value)
})
}
}

#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum StringCharAtResult {
Value(char),
InvalidChar(u16),
OutOfRange,
}

#[cfg(test)]
mod test {
use crate::string_char_at::StringCharAtResult;

use super::StringCharAt;

#[test]
fn test_evaluate_string_char_at() {
let s = "test";
assert_eq!(s.char_at(Some(0.0)), Some('t'));
assert_eq!(s.char_at(Some(1.0)), Some('e'));
assert_eq!(s.char_at(Some(2.0)), Some('s'));
assert_eq!(s.char_at(Some(0.5)), None);
assert_eq!(s.char_at(Some(-1.0)), None);
assert_eq!(s.char_at(Some(-1.1)), None);
assert_eq!(s.char_at(Some(-1_073_741_825.0)), None);
assert_eq!(s.char_at(Some(0.0)), StringCharAtResult::Value('t'));
assert_eq!(s.char_at(Some(1.0)), StringCharAtResult::Value('e'));
assert_eq!(s.char_at(Some(2.0)), StringCharAtResult::Value('s'));
assert_eq!(s.char_at(Some(4.0)), StringCharAtResult::OutOfRange);
assert_eq!(s.char_at(Some(0.5)), StringCharAtResult::Value('t'));
assert_eq!(s.char_at(None), StringCharAtResult::Value('t'));
assert_eq!(s.char_at(Some(f64::INFINITY)), StringCharAtResult::OutOfRange);
assert_eq!(s.char_at(Some(f64::NEG_INFINITY)), StringCharAtResult::OutOfRange);
assert_eq!(s.char_at(Some(-1.0)), StringCharAtResult::OutOfRange);
assert_eq!(s.char_at(Some(-1.1)), StringCharAtResult::OutOfRange);
assert_eq!(s.char_at(Some(-1_073_741_825.0)), StringCharAtResult::OutOfRange);
}
}
10 changes: 7 additions & 3 deletions crates/oxc_ecmascript/src/string_char_code_at.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::StringCharAt;
use crate::{string_char_at::StringCharAtResult, StringCharAt};

pub trait StringCharCodeAt {
/// `String.prototype.charCodeAt ( pos )`
Expand All @@ -8,7 +8,11 @@ pub trait StringCharCodeAt {

impl StringCharCodeAt for &str {
fn char_code_at(&self, index: Option<f64>) -> Option<u32> {
self.char_at(index).map(|c| c as u32)
match self.char_at(index) {
StringCharAtResult::Value(c) => Some(c as u32),
StringCharAtResult::InvalidChar(v) => Some(u32::from(v)),
StringCharAtResult::OutOfRange => None,
}
}
}

Expand All @@ -28,7 +32,7 @@ mod test {
assert_eq!(s.char_code_at(Some(-1.0)), None);
assert_eq!(s.char_code_at(None), Some(97));
assert_eq!(s.char_code_at(Some(0.0)), Some(97));
assert_eq!(s.char_code_at(Some(f64::NAN)), None);
assert_eq!(s.char_code_at(Some(f64::NAN)), Some(97));
assert_eq!(s.char_code_at(Some(f64::INFINITY)), None);
}
}
63 changes: 63 additions & 0 deletions crates/oxc_ecmascript/src/to_integer_or_infinity.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
use num_traits::ToPrimitive;

pub trait ToIntegerOrInfinity {
/// `ToIntegerOrInfinity`
/// <https://tc39.es/ecma262/multipage/abstract-operations.html#sec-tointegerorinfinity>
fn to_integer_or_infinity(&self) -> f64;

/// Convert the value to i64. If the value is bigger or smaller than i64::MIN or i64::MAX,
/// it will be converted to Infinity or NegativeInfinity.
fn to_integer_or_infinity_as_i64(&self) -> ToIntegerOrInfinityResult {
let res = self.to_integer_or_infinity();
match res {
f64::INFINITY => ToIntegerOrInfinityResult::Infinity,
f64::NEG_INFINITY => ToIntegerOrInfinityResult::NegativeInfinity,
_ => res.to_i64().map_or_else(
|| {
if res >= 0.0 {
ToIntegerOrInfinityResult::Infinity
} else {
ToIntegerOrInfinityResult::NegativeInfinity
}
},
ToIntegerOrInfinityResult::Value,
),
}
}
}

impl ToIntegerOrInfinity for f64 {
fn to_integer_or_infinity(&self) -> f64 {
if self.is_nan() || *self == 0.0 {
return 0.0;
}
if self.is_infinite() {
return *self;
}
self.trunc()
}
}

pub enum ToIntegerOrInfinityResult {
Infinity,
NegativeInfinity,
Value(i64),
}

#[cfg(test)]
mod test {
use super::*;

#[expect(clippy::float_cmp)]
#[test]
fn test_to_integer_or_infinity() {
assert_eq!(f64::NAN.to_integer_or_infinity(), 0.0);
assert_eq!(0.0.to_integer_or_infinity(), 0.0);
assert_eq!(f64::INFINITY.to_integer_or_infinity(), f64::INFINITY);
assert_eq!(f64::NEG_INFINITY.to_integer_or_infinity(), f64::NEG_INFINITY);
assert_eq!(1.0.to_integer_or_infinity(), 1.0);
assert_eq!(-1.0.to_integer_or_infinity(), -1.0);
assert_eq!(1.1.to_integer_or_infinity(), 1.0);
assert_eq!(-1.1.to_integer_or_infinity(), -1.0);
}
}
6 changes: 3 additions & 3 deletions crates/oxc_linter/src/rules/unicorn/prefer_string_raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use oxc_ast::{
AstKind,
};
use oxc_diagnostics::OxcDiagnostic;
use oxc_ecmascript::StringCharAt;
use oxc_ecmascript::{StringCharAt, StringCharAtResult};
use oxc_macros::declare_oxc_lint;
use oxc_span::Span;
use oxc_syntax::keyword::RESERVED_KEYWORDS;
Expand Down Expand Up @@ -152,15 +152,15 @@ impl Rule for PreferStringRaw {
let raw = ctx.source_range(string_literal.span);

let last_char_index = raw.len() - 2;
if raw.char_at(Some(last_char_index as f64)) == Some('\\') {
if raw.char_at(Some(last_char_index as f64)) == StringCharAtResult::Value('\\') {
return;
}

if !raw.contains(r"\\") || raw.contains('`') || raw.contains("${") {
return;
}

let Some(quote) = raw.char_at(Some(0.0)) else {
let StringCharAtResult::Value(quote) = raw.char_at(Some(0.0)) else {
return;
};

Expand Down
56 changes: 26 additions & 30 deletions crates/oxc_minifier/src/peephole/replace_known_methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ use oxc_allocator::IntoIn;
use oxc_ast::ast::*;
use oxc_ecmascript::{
constant_evaluation::{ConstantEvaluation, ValueType},
StringCharAt, StringCharCodeAt, StringIndexOf, StringLastIndexOf, StringSubstring, ToInt32,
StringCharAt, StringCharAtResult, StringCharCodeAt, StringIndexOf, StringLastIndexOf,
StringSubstring, ToInt32,
};
use oxc_span::SPAN;
use oxc_syntax::es_target::ESTarget;
Expand Down Expand Up @@ -174,21 +175,18 @@ impl<'a> PeepholeOptimizations {
return None;
}
let Expression::StringLiteral(s) = object else { return None };
let char_at_index: Option<f64> = match args.first() {
Some(Argument::NumericLiteral(numeric_lit)) => Some(numeric_lit.value),
Some(Argument::UnaryExpression(unary_expr))
if unary_expr.operator == UnaryOperator::UnaryNegation =>
{
let Expression::NumericLiteral(numeric_lit) = &unary_expr.argument else {
return None;
};
Some(-(numeric_lit.value))
let char_at_index = match args.first() {
Some(Argument::SpreadElement(_)) => return None,
Some(arg @ match_expression!(Argument)) => {
Some(ctx.get_side_free_number_value(arg.to_expression())?)
}
None => None,
_ => return None,
};
let result =
&s.value.as_str().char_at(char_at_index).map_or(String::new(), |v| v.to_string());
let result = match s.value.as_str().char_at(char_at_index) {
StringCharAtResult::Value(c) => &c.to_string(),
StringCharAtResult::InvalidChar(_) => return None,
StringCharAtResult::OutOfRange => "",
};
Some(ctx.ast.expression_string_literal(span, result, None))
}

Expand All @@ -201,17 +199,13 @@ impl<'a> PeepholeOptimizations {
) -> Option<Expression<'a>> {
let Expression::StringLiteral(s) = object else { return None };
let char_at_index = match args.first() {
None => Some(0.0),
Some(Argument::SpreadElement(_)) => None,
Some(e) => ctx.get_side_free_number_value(e.to_expression()),
}?;
let value = if (0.0..65536.0).contains(&char_at_index) {
s.value.as_str().char_code_at(Some(char_at_index))? as f64
} else if char_at_index.is_nan() || char_at_index.is_infinite() {
return None;
} else {
f64::NAN
Some(Argument::SpreadElement(_)) => return None,
Some(arg @ match_expression!(Argument)) => {
Some(ctx.get_side_free_number_value(arg.to_expression())?)
}
None => None,
};
let value = s.value.as_str().char_code_at(char_at_index).map_or(f64::NAN, |n| n as f64);
Some(ctx.ast.expression_numeric_literal(span, value, None, NumberBase::Decimal))
}

Expand Down Expand Up @@ -1121,12 +1115,13 @@ mod test {
test("x = 'abcde'.charAt(5)", "x = ''");
test("x = 'abcde'.charAt(-1)", "x = ''");
test("x = 'abcde'.charAt()", "x = 'a'");
test_same("x = 'abcde'.charAt(...foo)");
test_same("x = 'abcde'.charAt(0, ++z)");
test_same("x = 'abcde'.charAt(y)");
test_same("x = 'abcde'.charAt(null)"); // or x = 'a'
test_same("x = 'abcde'.charAt(!0)"); // or x = 'b'
// test("x = '\\ud834\udd1e'.charAt(0)", "x = '\\ud834'");
// test("x = '\\ud834\udd1e'.charAt(1)", "x = '\\udd1e'");
test("x = 'abcde'.charAt(null)", "x = 'a'");
test("x = 'abcde'.charAt(!0)", "x = 'b'");
test_same("x = '\\ud834\\udd1e'.charAt(0)"); // or x = '\\ud834'
test_same("x = '\\ud834\\udd1e'.charAt(1)"); // or x = '\\udd1e'

// Template strings
test("x = `abcdef`.charAt(0)", "x = 'a'");
Expand All @@ -1141,15 +1136,16 @@ mod test {
test("x = 'abcde'.charCodeAt(2)", "x = 99");
test("x = 'abcde'.charCodeAt(3)", "x = 100");
test("x = 'abcde'.charCodeAt(4)", "x = 101");
test_same("x = 'abcde'.charCodeAt(5)");
test("x = 'abcde'.charCodeAt(5)", "x = NaN");
test("x = 'abcde'.charCodeAt(-1)", "x = NaN");
test_same("x = 'abcde'.charCodeAt(...foo)");
test_same("x = 'abcde'.charCodeAt(y)");
test("x = 'abcde'.charCodeAt()", "x = 97");
test("x = 'abcde'.charCodeAt(0, ++z)", "x = 97");
test("x = 'abcde'.charCodeAt(null)", "x = 97");
test("x = 'abcde'.charCodeAt(true)", "x = 98");
// test("x = '\\ud834\udd1e'.charCodeAt(0)", "x = 55348");
// test("x = '\\ud834\udd1e'.charCodeAt(1)", "x = 56606");
test("x = '\\ud834\\udd1e'.charCodeAt(0)", "x = 55348");
test("x = '\\ud834\\udd1e'.charCodeAt(1)", "x = 56606");
test("x = `abcdef`.charCodeAt(0)", "x = 97");
test_same("x = `abcdef ${abc}`.charCodeAt(0)");
}
Expand Down
20 changes: 10 additions & 10 deletions crates/oxc_minifier/tests/peephole/esbuild.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,16 +442,16 @@ fn js_parser_test() {
test("a = 'xy'.charCodeAt(0)", "a = 120;");
test("a = 'xy'.charCodeAt(1)", "a = 121;");
test("a = 'xy'.charCodeAt(-1)", "a = NaN;");
// test("a = 'xy'.charCodeAt(2)", "a = NaN;");
// test("a = '🧀'.charCodeAt()", "a = 55358;");
// test("a = '🧀'.charCodeAt(0)", "a = 55358;");
// test("a = '🧀'.charCodeAt(1)", "a = 56768;");
// test("a = '🧀'.charCodeAt(-1)", "a = NaN;");
// test("a = '🧀'.charCodeAt(2)", "a = NaN;");
test("a = 'xy'.charCodeAt(NaN)", "a = 'xy'.charCodeAt(NaN);");
test("a = 'xy'.charCodeAt(-Infinity)", "a = 'xy'.charCodeAt(-Infinity);");
test("a = 'xy'.charCodeAt(Infinity)", "a = 'xy'.charCodeAt(Infinity);");
test("a = 'xy'.charCodeAt(0.5)", "a = 'xy'.charCodeAt(0.5);");
test("a = 'xy'.charCodeAt(2)", "a = NaN;");
test("a = '🧀'.charCodeAt()", "a = 55358;");
test("a = '🧀'.charCodeAt(0)", "a = 55358;");
test("a = '🧀'.charCodeAt(1)", "a = 56768;");
test("a = '🧀'.charCodeAt(-1)", "a = NaN;");
test("a = '🧀'.charCodeAt(2)", "a = NaN;");
test("a = 'xy'.charCodeAt(NaN)", "a = 120;");
test("a = 'xy'.charCodeAt(-Infinity)", "a = NaN;");
test("a = 'xy'.charCodeAt(Infinity)", "a = NaN;");
test("a = 'xy'.charCodeAt(0.5)", "a = 120;");
test("a = 'xy'.charCodeAt(1e99)", "a = NaN;");
test("a = 'xy'.charCodeAt('1')", "a = 121;");
test("a = 'xy'.charCodeAt(1, 2)", "a = 121;");
Expand Down

0 comments on commit 125d610

Please sign in to comment.