Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(regular_expression)!: Migrate to new regexp parser API #6741

Merged
merged 1 commit into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 41 additions & 54 deletions crates/oxc_linter/src/rules/eslint/no_control_regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{
ast::{CapturingGroup, Character, Pattern},
visit::{walk, Visit},
Parser, ParserOptions,
ConstructorParser, Options,
};
use oxc_span::{GetSpan, Span};
use oxc_span::Span;

use crate::{ast_util::extract_regex_flags, context::LintContext, rule::Rule, AstNode};
use crate::{context::LintContext, rule::Rule, AstNode};

fn no_control_regex_diagnostic(count: usize, regex: &str, span: Span) -> OxcDiagnostic {
debug_assert!(count > 0);
Expand Down Expand Up @@ -82,75 +82,63 @@ impl Rule for NoControlRegex {
}

// new RegExp()
AstKind::NewExpression(expr) => {
// constructor is RegExp,
if expr.callee.is_specific_id("RegExp")
// which is provided at least 1 parameter,
&& expr.arguments.len() > 0
{
// where the first one is a string literal
// note: improvements required for strings used via identifier
// references
if let Argument::StringLiteral(pattern) = &expr.arguments[0] {
// get pattern from arguments. Missing or non-string arguments
// will be runtime errors, but are not covered by this rule.
parse_and_check_regex(
context,
&pattern.value,
&expr.arguments,
pattern.span,
);
AstKind::NewExpression(expr) if expr.callee.is_specific_id("RegExp") => {
// note: improvements required for strings used via identifier references
// Missing or non-string arguments will be runtime errors, but are not covered by this rule.
match (&expr.arguments.first(), &expr.arguments.get(1)) {
(
Some(Argument::StringLiteral(pattern)),
Some(Argument::StringLiteral(flags)),
) => {
parse_and_check_regex(context, pattern.span, Some(flags.span));
}
(Some(Argument::StringLiteral(pattern)), _) => {
parse_and_check_regex(context, pattern.span, None);
}
_ => {}
}
}

// RegExp()
AstKind::CallExpression(expr) => {
// constructor is RegExp,
if expr.callee.is_specific_id("RegExp")
// which is provided at least 1 parameter,
&& expr.arguments.len() > 0
{
// where the first one is a string literal
// note: improvements required for strings used via identifier
// references
if let Argument::StringLiteral(pattern) = &expr.arguments[0] {
// get pattern from arguments. Missing or non-string arguments
// will be runtime errors, but are not covered by this rule.
parse_and_check_regex(
context,
&pattern.value,
&expr.arguments,
pattern.span,
);
AstKind::CallExpression(expr) if expr.callee.is_specific_id("RegExp") => {
// note: improvements required for strings used via identifier references
// Missing or non-string arguments will be runtime errors, but are not covered by this rule.
match (&expr.arguments.first(), &expr.arguments.get(1)) {
(
Some(Argument::StringLiteral(pattern)),
Some(Argument::StringLiteral(flags)),
) => {
parse_and_check_regex(context, pattern.span, Some(flags.span));
}
(Some(Argument::StringLiteral(pattern)), _) => {
parse_and_check_regex(context, pattern.span, None);
}
_ => {}
}
}

_ => {}
};
}
}

fn parse_and_check_regex<'a>(
ctx: &LintContext<'a>,
source_text: &'a str,
arguments: &oxc_allocator::Vec<'a, Argument<'a>>,
expr_span: Span,
) {
fn parse_and_check_regex(ctx: &LintContext, pattern_span: Span, flags_span: Option<Span>) {
let allocator = Allocator::default();
let flags = extract_regex_flags(arguments);
let flags_text = flags.map_or(String::new(), |f| f.to_string());
let parser = Parser::new(

let flags_text = flags_span.map(|span| span.source_text(ctx.source_text()));
let parser = ConstructorParser::new(
&allocator,
source_text,
ParserOptions::default()
.with_span_offset(arguments.first().map_or(0, |arg| arg.span().start))
.with_flags(&flags_text),
pattern_span.source_text(ctx.source_text()),
flags_text,
Options {
pattern_span_offset: pattern_span.start,
flags_span_offset: flags_span.map_or(0, |span| span.start),
},
);
let Ok(pattern) = parser.parse() else {
return;
};
check_pattern(ctx, &pattern, expr_span);
check_pattern(ctx, &pattern, pattern_span);
}

fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) {
Expand Down Expand Up @@ -279,7 +267,6 @@ mod tests {
vec![
r"let r = /\u{0}/u",
r"let r = new RegExp('\\u{0}', 'u');",
r"let r = new RegExp('\\u{0}', `u`);",
Boshen marked this conversation as resolved.
Show resolved Hide resolved
r"let r = /\u{c}/u",
r"let r = /\u{1F}/u",
r"let r = new RegExp('\\u{1F}', 'u');", // flags are known & contain u
Expand Down
58 changes: 34 additions & 24 deletions crates/oxc_linter/src/rules/eslint/no_invalid_regexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use oxc_allocator::Allocator;
use oxc_ast::{ast::Argument, AstKind};
use oxc_diagnostics::OxcDiagnostic;
use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{Parser, ParserOptions};
use oxc_regular_expression::{ConstructorParser, Options};
use oxc_span::Span;
use rustc_hash::FxHashSet;
use serde::Deserialize;
Expand Down Expand Up @@ -86,13 +86,20 @@ impl Rule for NoInvalidRegexp {
return;
}

let (mut u_flag_found, mut v_flag_found) = (false, false);
// Validate flags first if exists
if let Some((flags_span_start, flags_text)) = flags_arg {
let (mut u_flag_found, mut v_flag_found) = (false, false);
// `oxc_regular_expression` crate has a ability to validate flags.
// But, it does not accept any `allow_constructor_flags` option.
// And if we omit user defined flags here, `Span` may be incorrect on error reporting.
if let Some(flags_span) = flags_arg {
// Strip quotes
let flags_text =
flags_span.source_text(ctx.source_text()).trim_matches('\'').trim_matches('"');

let mut unique_flags = FxHashSet::default();
for (idx, ch) in flags_text.char_indices() {
#[allow(clippy::cast_possible_truncation)]
let start = flags_span_start + idx as u32;
let start = flags_span.start + 1 + idx as u32;

// Invalid combination: u+v
if ch == 'u' {
Expand Down Expand Up @@ -128,40 +135,43 @@ impl Rule for NoInvalidRegexp {
// Pattern check is skipped when 1st argument is NOT a `StringLiteral`
// e.g. `new RegExp(var)`, `RegExp("str" + var)`
let allocator = Allocator::default();
if let Some((pattern_span_start, pattern_text)) = pattern_arg {
let options = ParserOptions::default()
.with_span_offset(pattern_span_start)
.with_flags(flags_arg.map_or("", |(_, flags_text)| flags_text));
if let Some(pattern_span) = pattern_arg {
let pattern_text = pattern_span.source_text(ctx.source_text());

let flags_text = match (u_flag_found, v_flag_found) {
(true, false) => Some("'u'"),
(_, true) => Some("'v'"),
(false, false) => None,
};

match Parser::new(&allocator, pattern_text, options).parse() {
match ConstructorParser::new(
&allocator,
pattern_text,
flags_text,
Options { pattern_span_offset: pattern_span.start, flags_span_offset: 0 },
)
.parse()
{
Ok(_) => {}
Err(diagnostic) => ctx.diagnostic(diagnostic),
}
}
}
}

/// Returns: (span_start, text)
/// span_start + 1 for opening string bracket.
type ParsedArgument<'a> = (u32, &'a str);
fn parse_arguments_to_check<'a>(
arg1: Option<&Argument<'a>>,
arg2: Option<&Argument<'a>>,
) -> (Option<ParsedArgument<'a>>, Option<ParsedArgument<'a>>) {
) -> (Option<Span>, Option<Span>) {
match (arg1, arg2) {
// ("pattern", "flags")
(Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => (
Some((pattern.span.start + 1, pattern.value.as_str())),
Some((flags.span.start + 1, flags.value.as_str())),
),
// (pattern, "flags")
(Some(_arg), Some(Argument::StringLiteral(flags))) => {
(None, Some((flags.span.start + 1, flags.value.as_str())))
(Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => {
(Some(pattern.span), Some(flags.span))
}
// (pattern, "flags")
(Some(_arg), Some(Argument::StringLiteral(flags))) => (None, Some(flags.span)),
// ("pattern")
(Some(Argument::StringLiteral(pattern)), None) => {
(Some((pattern.span.start + 1, pattern.value.as_str())), None)
}
(Some(Argument::StringLiteral(pattern)), None) => (Some(pattern.span), None),
// (pattern), ()
_ => (None, None),
}
Expand All @@ -172,7 +182,7 @@ fn test() {
use crate::tester::Tester;

let pass = vec![
("[RegExp(''), /a/uv]", None),
("RegExp('')", None),
("RegExp()", None),
("RegExp('.', 'g')", None),
("new RegExp('.')", None),
Expand Down
15 changes: 8 additions & 7 deletions crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{
ast::{Character, Pattern},
visit::{RegExpAstKind, Visit},
Parser, ParserOptions,
ConstructorParser, Options,
};
use oxc_span::Span;

Expand Down Expand Up @@ -63,13 +63,13 @@ impl Rule for NoRegexSpaces {
}

AstKind::CallExpression(expr) if Self::is_regexp_call_expression(expr) => {
if let Some(span) = Self::find_expr_to_report(&expr.arguments) {
if let Some(span) = Self::find_expr_to_report(&expr.arguments, ctx) {
ctx.diagnostic(no_regex_spaces_diagnostic(span)); // RegExp('a b')
}
}

AstKind::NewExpression(expr) if Self::is_regexp_new_expression(expr) => {
if let Some(span) = Self::find_expr_to_report(&expr.arguments) {
if let Some(span) = Self::find_expr_to_report(&expr.arguments, ctx) {
ctx.diagnostic(no_regex_spaces_diagnostic(span)); // new RegExp('a b')
}
}
Expand All @@ -90,7 +90,7 @@ impl NoRegexSpaces {
find_consecutive_spaces(pattern)
}

fn find_expr_to_report(args: &Vec<'_, Argument<'_>>) -> Option<Span> {
fn find_expr_to_report(args: &Vec<'_, Argument<'_>>, ctx: &LintContext) -> Option<Span> {
if let Some(expr) = args.get(1).and_then(Argument::as_expression) {
if !expr.is_string_literal() {
return None; // skip on indeterminate flag, e.g. RegExp('a b', flags)
Expand All @@ -105,10 +105,11 @@ impl NoRegexSpaces {
}

let alloc = Allocator::default();
let parser = Parser::new(
let parser = ConstructorParser::new(
&alloc,
pattern.value.as_str(),
ParserOptions::default().with_span_offset(pattern.span.start + 1),
pattern.span.source_text(ctx.source_text()),
None,
Options { pattern_span_offset: pattern.span.start, ..Options::default() },
);
let parsed_pattern = parser.parse().ok()?;

Expand Down
32 changes: 16 additions & 16 deletions crates/oxc_linter/src/snapshots/no_invalid_regexp.snap
Original file line number Diff line number Diff line change
Expand Up @@ -104,51 +104,51 @@ source: crates/oxc_linter/src/tester.rs
╰────

⚠ eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern
╭─[no_invalid_regexp.tsx:1:14]
╭─[no_invalid_regexp.tsx:1:15]
1 │ new RegExp('\\a', 'u');
· ▲
·
╰────

⚠ eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern
╭─[no_invalid_regexp.tsx:1:14]
╭─[no_invalid_regexp.tsx:1:15]
1 │ new RegExp('\\a', 'u');
· ▲
·
╰────

⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:14]
╭─[no_invalid_regexp.tsx:1:15]
1 │ RegExp('\\u{0}*');
· ─
·
╰────

⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:18]
╭─[no_invalid_regexp.tsx:1:19]
1 │ new RegExp('\\u{0}*');
· ─
·
╰────

⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:18]
╭─[no_invalid_regexp.tsx:1:19]
1 │ new RegExp('\\u{0}*', '');
· ─
·
╰────

⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:18]
╭─[no_invalid_regexp.tsx:1:19]
1 │ new RegExp('\\u{0}*', 'a');
· ─
·
╰────

⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:14]
╭─[no_invalid_regexp.tsx:1:15]
1 │ RegExp('\\u{0}*');
· ─
·
╰────

⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid extended atom escape
╭─[no_invalid_regexp.tsx:1:13]
1 │ new RegExp('\\');
· ─
· ─
╰────

⚠ eslint(no-invalid-regexp): Invalid regular expression: Unknown flag
Expand Down Expand Up @@ -196,7 +196,7 @@ source: crates/oxc_linter/src/tester.rs
⚠ eslint(no-invalid-regexp): Invalid regular expression: Unterminated character class
╭─[no_invalid_regexp.tsx:1:13]
1 │ new RegExp('[[]\\u{0}*' /* valid only with `u` flag */, 'v')
· ────────
· ────────
╰────

⚠ eslint(no-invalid-regexp): Invalid regular expression: Duplicated capturing group names
Expand Down
8 changes: 4 additions & 4 deletions crates/oxc_linter/src/snapshots/no_regex_spaces.snap
Original file line number Diff line number Diff line change
Expand Up @@ -128,16 +128,16 @@ source: crates/oxc_linter/src/tester.rs
help: Use a quantifier: ` {2}`

⚠ eslint(no-regex-spaces): Multiple consecutive spaces are hard to count.
╭─[no_regex_spaces.tsx:1:25]
╭─[no_regex_spaces.tsx:1:26]
1 │ var foo = new RegExp('\\d ')
· ──
· ──
╰────
help: Use a quantifier: ` {2}`

⚠ eslint(no-regex-spaces): Multiple consecutive spaces are hard to count.
╭─[no_regex_spaces.tsx:1:25]
╭─[no_regex_spaces.tsx:1:26]
1 │ var foo = RegExp('\\u0041 ')
· ───
· ───
╰────
help: Use a quantifier: ` {3}`

Expand Down
Loading
Loading