From d901772daa149c409cd0979af8913daf02a28f3e Mon Sep 17 00:00:00 2001 From: Boshen <1430279+Boshen@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:42:28 +0000 Subject: [PATCH] feat(codegen): implement minify number from terser (#5929) --- crates/oxc_codegen/src/gen.rs | 112 +++++++----------- .../oxc_codegen/tests/integration/esbuild.rs | 92 +++++++------- tasks/minsize/minsize.snap | 22 ++-- 3 files changed, 100 insertions(+), 126 deletions(-) diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs index 7654ad4e4fd9b..1655b749810eb 100644 --- a/crates/oxc_codegen/src/gen.rs +++ b/crates/oxc_codegen/src/gen.rs @@ -1110,16 +1110,17 @@ impl<'a> Gen for NumericLiteral<'a> { #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] fn gen(&self, p: &mut Codegen, _ctx: Context) { p.add_source_mapping(self.span.start); - if self.value != f64::INFINITY && (p.options.minify || self.raw.is_empty()) { + if !p.options.minify && !self.raw.is_empty() { + p.print_str(self.raw); + need_space_before_dot(self.raw, p); + } else if self.value != f64::INFINITY { p.print_space_before_identifier(); let abs_value = self.value.abs(); - if self.value.is_sign_negative() { p.print_space_before_operator(Operator::Unary(UnaryOperator::UnaryNegation)); p.print_str("-"); } - - let result = print_non_negative_float(abs_value, p); + let result = get_minified_number(abs_value); let bytes = result.as_str(); p.print_str(bytes); need_space_before_dot(bytes, p); @@ -1133,78 +1134,51 @@ impl<'a> Gen for NumericLiteral<'a> { } } -// TODO: refactor this with less allocations -// -#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] -fn print_non_negative_float(value: f64, p: &Codegen) -> String { +// https://github.com/terser/terser/blob/c5315c3fd6321d6b2e076af35a70ef532f498505/lib/output.js#L2418 +#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss, clippy::cast_possible_wrap)] +fn get_minified_number(num: f64) -> String { use oxc_syntax::number::ToJsString; - if value < 1000.0 && value.fract() == 0.0 { - return value.to_js_string(); - } - let mut result = format!("{value:e}"); - let chars = result.as_bytes(); - let len = chars.len(); - let dot = chars.iter().position(|&c| c == b'.'); - let u8_to_string = |num: &[u8]| { - // SAFETY: criteria of `from_utf8_unchecked`.are met. - unsafe { String::from_utf8_unchecked(num.to_vec()) } - }; - - if dot == Some(1) && chars[0] == b'0' { - // Strip off the leading zero when minifying - // "0.5" => ".5" - let stripped_result = &chars[1..]; - // after stripping the leading zero, the after dot position will be start from 1 - let after_dot = 1; - - // Try using an exponent - // "0.001" => "1e-3" - if stripped_result[after_dot] == b'0' { - let mut i = after_dot + 1; - while stripped_result[i] == b'0' { - i += 1; - } - let remaining = &stripped_result[i..]; - let exponent = format!("-{}", remaining.len() - after_dot + i); - - // Only switch if it's actually shorter - if stripped_result.len() > remaining.len() + 1 + exponent.len() { - result = format!("{}e{}", u8_to_string(remaining), exponent); - } else { - result = u8_to_string(stripped_result); - } - } else { - result = u8_to_string(stripped_result); - } - } else if chars[len - 1] == b'0' { - // Simplify numbers ending with "0" by trying to use an exponent - // "1000" => "1e3" - let mut i = len - 1; - while i > 0 && chars[i - 1] == b'0' { - i -= 1; - } - let remaining = &chars[0..i]; - let exponent = format!("{}", chars.len() - i); + if num < 1000.0 && num.fract() == 0.0 { + return num.to_js_string(); + } - // Only switch if it's actually shorter - if chars.len() > remaining.len() + 1 + exponent.len() { - result = format!("{}e{}", u8_to_string(remaining), exponent); - } else { - result = u8_to_string(chars); - } + let mut s = num.to_js_string(); + + if s.starts_with("0.") { + s = s[1..].to_string(); } - if p.options.minify && value.fract() == 0.0 { - let value = value as u64; - if (1_000_000_000_000..=0xFFFF_FFFF_FFFF_F800).contains(&value) { - let hex = format!("{value:#x}"); - if hex.len() < result.len() { - result = hex; - } + s = s.cow_replacen("e+", "e", 1).to_string(); + + let mut candidates = vec![s.clone()]; + + if num.fract() == 0.0 { + candidates.push(format!("0x{:x}", num as u128)); + } + + if s.starts_with(".0") { + // create `1e-2` + if let Some((i, _)) = s[1..].bytes().enumerate().find(|(_, c)| *c != b'0') { + let len = i + 1; // `+1` to include the dot. + let digits = &s[len..]; + candidates.push(format!("{digits}e-{}", digits.len() + len - 1)); + } + } else if s.ends_with('0') { + // create 1e2 + if let Some((len, _)) = s.bytes().rev().enumerate().find(|(_, c)| *c != b'0') { + candidates.push(format!("{}e{len}", &s[0..s.len() - len])); } + } else if let Some((integer, point, exponent)) = + s.split_once('.').and_then(|(a, b)| b.split_once('e').map(|e| (a, e.0, e.1))) + { + // `1.2e101` -> ("1", "2", "101") + candidates.push(format!( + "{integer}{point}e{}", + exponent.parse::().unwrap() - point.len() as isize + )); } - result + candidates.into_iter().min_by_key(String::len).unwrap() } impl<'a> Gen for BigIntLiteral<'a> { diff --git a/crates/oxc_codegen/tests/integration/esbuild.rs b/crates/oxc_codegen/tests/integration/esbuild.rs index 0b52ce294304b..22657df5704d3 100644 --- a/crates/oxc_codegen/tests/integration/esbuild.rs +++ b/crates/oxc_codegen/tests/integration/esbuild.rs @@ -2,18 +2,18 @@ use crate::tester::{test, test_minify}; +// NOTE: These values are aligned with terser, not esbuild. #[test] -#[ignore] fn test_number() { // Check "1eN" test("x = 1e-100", "x = 1e-100;\n"); test("x = 1e-4", "x = 1e-4;\n"); test("x = 1e-3", "x = 1e-3;\n"); - // test("x = 1e-2", "x = 0.01;\n"); - // test("x = 1e-1", "x = 0.1;\n"); - // test("x = 1e0", "x = 1;\n"); - // test("x = 1e1", "x = 10;\n"); - // test("x = 1e2", "x = 100;\n"); + test("x = 1e-2", "x = 1e-2;\n"); + test("x = 1e-1", "x = 1e-1;\n"); + test("x = 1e0", "x = 1e0;\n"); + test("x = 1e1", "x = 1e1;\n"); + test("x = 1e2", "x = 1e2;\n"); test("x = 1e3", "x = 1e3;\n"); test("x = 1e4", "x = 1e4;\n"); test("x = 1e100", "x = 1e100;\n"); @@ -28,22 +28,22 @@ fn test_number() { test_minify("x = 1e2", "x=100;"); test_minify("x = 1e3", "x=1e3;"); test_minify("x = 1e4", "x=1e4;"); - // test_minify("x = 1e100", "x=1e100;"); + test_minify("x = 1e100", "x=1e100;"); // Check "12eN" test("x = 12e-100", "x = 12e-100;\n"); test("x = 12e-5", "x = 12e-5;\n"); test("x = 12e-4", "x = 12e-4;\n"); - // test("x = 12e-3", "x = 0.012;\n"); - // test("x = 12e-2", "x = 0.12;\n"); - // test("x = 12e-1", "x = 1.2;\n"); - // test("x = 12e0", "x = 12;\n"); - // test("x = 12e1", "x = 120;\n"); - // test("x = 12e2", "x = 1200;\n"); + test("x = 12e-3", "x = 12e-3;\n"); + test("x = 12e-2", "x = 12e-2;\n"); + test("x = 12e-1", "x = 12e-1;\n"); + test("x = 12e0", "x = 12e0;\n"); + test("x = 12e1", "x = 12e1;\n"); + test("x = 12e2", "x = 12e2;\n"); test("x = 12e3", "x = 12e3;\n"); test("x = 12e4", "x = 12e4;\n"); test("x = 12e100", "x = 12e100;\n"); - // test_minify("x = 12e-100", "x=12e-100;"); + test_minify("x = 12e-100", "x=1.2e-99;"); test_minify("x = 12e-6", "x=12e-6;"); test_minify("x = 12e-5", "x=12e-5;"); test_minify("x = 12e-4", "x=.0012;"); @@ -55,7 +55,7 @@ fn test_number() { test_minify("x = 12e2", "x=1200;"); test_minify("x = 12e3", "x=12e3;"); test_minify("x = 12e4", "x=12e4;"); - // test_minify("x = 12e100", "x=12e100;"); + test_minify("x = 12e100", "x=12e100;"); // Check cases for "A.BeX" => "ABeY" simplification test("x = 123456789", "x = 123456789;\n"); @@ -66,11 +66,11 @@ fn test_number() { test("x = 10000123456789", "x = 10000123456789;\n"); test("x = 100000123456789", "x = 100000123456789;\n"); test("x = 1000000123456789", "x = 1000000123456789;\n"); - // test("x = 10000000123456789", "x = 10000000123456788;\n"); - // test("x = 100000000123456789", "x = 100000000123456780;\n"); - // test("x = 1000000000123456789", "x = 1000000000123456800;\n"); - // test("x = 10000000000123456789", "x = 10000000000123458e3;\n"); - // test("x = 100000000000123456789", "x = 10000000000012345e4;\n"); + test("x = 10000000123456789", "x = 10000000123456789;\n"); + test("x = 100000000123456789", "x = 100000000123456789;\n"); + test("x = 1000000000123456789", "x = 1000000000123456789;\n"); + test("x = 10000000000123456789", "x = 10000000000123456789;\n"); + test("x = 100000000000123456789", "x = 100000000000123456789;\n"); // Check numbers around the ends of various integer ranges. These were // crashing in the WebAssembly build due to a bug in the Go runtime. @@ -92,36 +92,36 @@ fn test_number() { test_minify("x = -0x1_0000_0001", "x=-4294967297;"); // int64 - test_minify("x = 0x7fff_ffff_ffff_fdff", "x=9223372036854775e3;"); - test_minify("x = 0x8000_0000_0000_0000", "x=9223372036854776e3;"); - test_minify("x = 0x8000_0000_0000_3000", "x=9223372036854788e3;"); - test_minify("x = -0x7fff_ffff_ffff_fdff", "x=-9223372036854775e3;"); - test_minify("x = -0x8000_0000_0000_0000", "x=-9223372036854776e3;"); - test_minify("x = -0x8000_0000_0000_3000", "x=-9223372036854788e3;"); + test_minify("x = 0x7fff_ffff_ffff_fdff", "x=0x7ffffffffffffc00;"); + test_minify("x = 0x8000_0000_0000_0000", "x=0x8000000000000000;"); + test_minify("x = 0x8000_0000_0000_3000", "x=0x8000000000003000;"); + test_minify("x = -0x7fff_ffff_ffff_fdff", "x=-0x7ffffffffffffc00;"); + test_minify("x = -0x8000_0000_0000_0000", "x=-0x8000000000000000;"); + test_minify("x = -0x8000_0000_0000_3000", "x=-0x8000000000003000;"); // uint64 - test_minify("x = 0xffff_ffff_ffff_fbff", "x=1844674407370955e4;"); - test_minify("x = 0x1_0000_0000_0000_0000", "x=18446744073709552e3;"); - test_minify("x = 0x1_0000_0000_0000_1000", "x=18446744073709556e3;"); - test_minify("x = -0xffff_ffff_ffff_fbff", "x=-1844674407370955e4;"); - test_minify("x = -0x1_0000_0000_0000_0000", "x=-18446744073709552e3;"); - test_minify("x = -0x1_0000_0000_0000_1000", "x=-18446744073709556e3;"); + test_minify("x = 0xffff_ffff_ffff_fbff", "x=0xfffffffffffff800;"); + test_minify("x = 0x1_0000_0000_0000_0000", "x=0x10000000000000000;"); + test_minify("x = 0x1_0000_0000_0000_1000", "x=0x10000000000001000;"); + test_minify("x = -0xffff_ffff_ffff_fbff", "x=-0xfffffffffffff800;"); + test_minify("x = -0x1_0000_0000_0000_0000", "x=-0x10000000000000000;"); + test_minify("x = -0x1_0000_0000_0000_1000", "x=-0x10000000000001000;"); // Check the hex vs. decimal decision boundary when minifying - // test("x = 999999999999", "x = 999999999999;\n"); - // test("x = 1000000000001", "x = 1000000000001;\n"); - // test("x = 0x0FFF_FFFF_FFFF_FF80", "x = 1152921504606846800;\n"); - // test("x = 0x1000_0000_0000_0000", "x = 1152921504606847e3;\n"); - // test("x = 0xFFFF_FFFF_FFFF_F000", "x = 18446744073709548e3;\n"); - // test("x = 0xFFFF_FFFF_FFFF_F800", "x = 1844674407370955e4;\n"); - // test("x = 0xFFFF_FFFF_FFFF_FFFF", "x = 18446744073709552e3;\n"); - // test_minify("x = 999999999999", "x=999999999999;"); - // test_minify("x = 1000000000001", "x=0xe8d4a51001;"); - // test_minify("x = 0x0FFF_FFFF_FFFF_FF80", "x=0xfffffffffffff80;"); - // test_minify("x = 0x1000_0000_0000_0000", "x=1152921504606847e3;"); - // test_minify("x = 0xFFFF_FFFF_FFFF_F000", "x=0xfffffffffffff000;"); - // test_minify("x = 0xFFFF_FFFF_FFFF_F800", "x=1844674407370955e4;"); - // test_minify("x = 0xFFFF_FFFF_FFFF_FFFF", "x=18446744073709552e3;"); + test("x = 999999999999", "x = 999999999999;\n"); + test("x = 1000000000001", "x = 1000000000001;\n"); + test("x = 0x0FFF_FFFF_FFFF_FF80", "x = 0x0FFF_FFFF_FFFF_FF80;\n"); + test("x = 0x1000_0000_0000_0000", "x = 0x1000_0000_0000_0000;\n"); + test("x = 0xFFFF_FFFF_FFFF_F000", "x = 0xFFFF_FFFF_FFFF_F000;\n"); + test("x = 0xFFFF_FFFF_FFFF_F800", "x = 0xFFFF_FFFF_FFFF_F800;\n"); + test("x = 0xFFFF_FFFF_FFFF_FFFF", "x = 0xFFFF_FFFF_FFFF_FFFF;\n"); + test_minify("x = 999999999999", "x=999999999999;"); + test_minify("x = 1000000000001", "x=0xe8d4a51001;"); + test_minify("x = 0x0FFF_FFFF_FFFF_FF80", "x=0xfffffffffffff80;"); + test_minify("x = 0x1000_0000_0000_0000", "x=0x1000000000000000;"); + test_minify("x = 0xFFFF_FFFF_FFFF_F000", "x=0xfffffffffffff000;"); + test_minify("x = 0xFFFF_FFFF_FFFF_F800", "x=0xfffffffffffff800;"); + test_minify("x = 0xFFFF_FFFF_FFFF_FFFF", "x=0x10000000000000000;"); // Check printing a space in between a number and a subsequent "." test_minify("x = 0.0001 .y", "x=1e-4.y;"); diff --git a/tasks/minsize/minsize.snap b/tasks/minsize/minsize.snap index 5bf6aed77c500..78a551bfcef68 100644 --- a/tasks/minsize/minsize.snap +++ b/tasks/minsize/minsize.snap @@ -1,26 +1,26 @@ Original | Minified | esbuild | Gzip | esbuild -72.14 kB | 24.37 kB | 23.70 kB | 8.73 kB | 8.54 kB | react.development.js +72.14 kB | 24.32 kB | 23.70 kB | 8.71 kB | 8.54 kB | react.development.js -173.90 kB | 61.83 kB | 59.82 kB | 19.59 kB | 19.33 kB | moment.js +173.90 kB | 61.79 kB | 59.82 kB | 19.57 kB | 19.33 kB | moment.js -287.63 kB | 92.93 kB | 90.07 kB | 32.35 kB | 31.95 kB | jquery.js +287.63 kB | 92.91 kB | 90.07 kB | 32.33 kB | 31.95 kB | jquery.js 342.15 kB | 122.97 kB | 118.14 kB | 45.05 kB | 44.37 kB | vue.js -544.10 kB | 73.57 kB | 72.48 kB | 26.14 kB | 26.20 kB | lodash.js +544.10 kB | 73.57 kB | 72.48 kB | 26.15 kB | 26.20 kB | lodash.js -555.77 kB | 275.81 kB | 270.13 kB | 91.66 kB | 90.80 kB | d3.js +555.77 kB | 274.81 kB | 270.13 kB | 91.40 kB | 90.80 kB | d3.js -1.01 MB | 471.78 kB | 458.89 kB | 127.61 kB | 126.71 kB | bundle.min.js +1.01 MB | 471.70 kB | 458.89 kB | 127.56 kB | 126.71 kB | bundle.min.js -1.25 MB | 676.14 kB | 646.76 kB | 167.18 kB | 163.73 kB | three.js +1.25 MB | 673.75 kB | 646.76 kB | 166.76 kB | 163.73 kB | three.js -2.14 MB | 751.46 kB | 724.14 kB | 182.74 kB | 181.07 kB | victory.js +2.14 MB | 743.40 kB | 724.14 kB | 181.95 kB | 181.07 kB | victory.js -3.20 MB | 1.03 MB | 1.01 MB | 332.60 kB | 331.56 kB | echarts.js +3.20 MB | 1.03 MB | 1.01 MB | 332.45 kB | 331.56 kB | echarts.js -6.69 MB | 2.42 MB | 2.31 MB | 503.22 kB | 488.28 kB | antd.js +6.69 MB | 2.42 MB | 2.31 MB | 503.16 kB | 488.28 kB | antd.js -10.95 MB | 3.60 MB | 3.49 MB | 915.21 kB | 915.50 kB | typescript.js +10.95 MB | 3.57 MB | 3.49 MB | 912.37 kB | 915.50 kB | typescript.js