Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(regular_expression): Improve AST docs with refactoring may_contain_strings #5665

Merged
merged 1 commit into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions crates/oxc_ast/src/generated/assert_layouts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1506,8 +1506,8 @@ const _: () = {
assert!(align_of::<CharacterClass>() == 8usize);
assert!(offset_of!(CharacterClass, span) == 0usize);
assert!(offset_of!(CharacterClass, negative) == 8usize);
assert!(offset_of!(CharacterClass, kind) == 9usize);
assert!(offset_of!(CharacterClass, strings) == 10usize);
assert!(offset_of!(CharacterClass, strings) == 9usize);
assert!(offset_of!(CharacterClass, kind) == 10usize);
assert!(offset_of!(CharacterClass, body) == 16usize);

assert!(size_of::<CharacterClassContentsKind>() == 1usize);
Expand Down Expand Up @@ -3061,8 +3061,8 @@ const _: () = {
assert!(align_of::<CharacterClass>() == 4usize);
assert!(offset_of!(CharacterClass, span) == 0usize);
assert!(offset_of!(CharacterClass, negative) == 8usize);
assert!(offset_of!(CharacterClass, kind) == 9usize);
assert!(offset_of!(CharacterClass, strings) == 10usize);
assert!(offset_of!(CharacterClass, strings) == 9usize);
assert!(offset_of!(CharacterClass, kind) == 10usize);
assert!(offset_of!(CharacterClass, body) == 12usize);

assert!(size_of::<CharacterClassContentsKind>() == 1usize);
Expand Down
12 changes: 9 additions & 3 deletions crates/oxc_regular_expression/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ pub enum CharacterClassEscapeKind {
pub struct UnicodePropertyEscape<'a> {
pub span: Span,
pub negative: bool,
/// `true` if `UnicodeSetsMode` and `name` matched unicode property of strings.
/// `true` if `UnicodeSetsMode` and `name` matches unicode property of strings.
pub strings: bool,
pub name: Atom<'a>,
pub value: Option<Atom<'a>>,
Expand All @@ -237,8 +237,11 @@ pub struct Dot {
pub struct CharacterClass<'a> {
pub span: Span,
pub negative: bool,
pub kind: CharacterClassContentsKind,
/// `true` if:
/// - `body` contains [`UnicodePropertyEscape`], nested [`CharacterClass`] or [`ClassStringDisjunction`] which `strings` is `true`
/// - and matches each logic depends on `kind`
pub strings: bool,
pub kind: CharacterClassContentsKind,
pub body: Vec<'a, CharacterClassContents<'a>>,
}

Expand Down Expand Up @@ -288,7 +291,7 @@ pub struct CharacterClassRange {
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
pub struct ClassStringDisjunction<'a> {
pub span: Span,
/// `true` if body is empty or contain [`ClassString`] which `strings` is `true`
/// `true` if body is empty or contains [`ClassString`] which `strings` is `true`.
pub strings: bool,
pub body: Vec<'a, ClassString<'a>>,
}
Expand All @@ -313,6 +316,7 @@ pub struct ClassString<'a> {
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
pub struct CapturingGroup<'a> {
pub span: Span,
/// Group name to be referenced by [`NamedReference`].
pub name: Option<Atom<'a>>,
pub body: Disjunction<'a>,
}
Expand All @@ -330,6 +334,8 @@ pub struct IgnoreGroup<'a> {
pub body: Disjunction<'a>,
}

/// Pattern modifiers in [`IgnoreGroup`].
/// e.g. `(?i:...)`, `(?-s:...)`
#[ast]
#[derive(Debug)]
#[generate_derive(CloneIn, ContentEq, ContentHash)]
Expand Down
64 changes: 30 additions & 34 deletions crates/oxc_regular_expression/src/body_parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,7 @@ impl<'a> PatternParser<'a> {
let (kind, body) = self.parse_class_contents()?;

if self.reader.eat(']') {
let strings = body.iter().any(PatternParser::may_contain_strings_in_class_contents);
let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);

// [SS:EE] CharacterClass :: [^ ClassContents ]
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
Expand Down Expand Up @@ -1259,30 +1259,7 @@ impl<'a> PatternParser<'a> {
let (kind, body) = self.parse_class_contents()?;

if self.reader.eat(']') {
let strings = match kind {
// MayContainStrings is true
// - if ClassContents is ClassUnion
// - && ClassUnion has ClassOperands
// - && at least 1 ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Union => {
body.iter().any(PatternParser::may_contain_strings_in_class_contents)
}
// MayContainStrings is true
// - if ClassContents is ClassIntersection
// - && ClassIntersection has ClassOperands
// - && all ClassOperands have MayContainStrings: true
ast::CharacterClassContentsKind::Intersection => {
body.iter().all(PatternParser::may_contain_strings_in_class_contents)
}
// MayContainStrings is true
// - if ClassContents is ClassSubtraction
// - && ClassSubtraction has ClassOperands
// - && the first ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Subtraction => body
.iter()
.next()
.map_or(false, PatternParser::may_contain_strings_in_class_contents),
};
let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);

// [SS:EE] NestedClass :: [^ ClassContents ]
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
Expand Down Expand Up @@ -2163,27 +2140,46 @@ impl<'a> PatternParser<'a> {

// ---

fn may_contain_strings_in_class_contents(item: &ast::CharacterClassContents) -> bool {
match item {
fn may_contain_strings_in_class_contents(
kind: &ast::CharacterClassContentsKind,
body: &Vec<'a, ast::CharacterClassContents<'a>>,
) -> bool {
let may_contain_strings = |item: &ast::CharacterClassContents<'a>| match item {
// MayContainStrings is true
// - if ClassContents contains UnicodePropertyValueExpression
// - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
// - && it is binary property of strings(can be true only with `UnicodeSetsMode`)
ast::CharacterClassContents::UnicodePropertyEscape(unicode_property_escape) => {
unicode_property_escape.strings
}
ast::CharacterClassContents::UnicodePropertyEscape(item) => item.strings,
// MayContainStrings is true
// - if ClassStringDisjunction is [empty]
// - || if ClassStringDisjunction contains ClassString
// - && ClassString is [empty]
// - || ClassString contains 2 more ClassSetCharacters
ast::CharacterClassContents::ClassStringDisjunction(class_string_disjunction) => {
class_string_disjunction.strings
}
ast::CharacterClassContents::ClassStringDisjunction(item) => item.strings,
// MayContainStrings is true
// - if NestedClass has MayContainStrings: true
ast::CharacterClassContents::NestedCharacterClass(nested_class) => nested_class.strings,
ast::CharacterClassContents::NestedCharacterClass(item) => item.strings,
_ => false,
};

match kind {
// MayContainStrings is true
// - if ClassContents is ClassUnion
// - && ClassUnion has ClassOperands
// - && at least 1 ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Union => body.iter().any(may_contain_strings),
// MayContainStrings is true
// - if ClassContents is ClassIntersection
// - && ClassIntersection has ClassOperands
// - && all ClassOperands have MayContainStrings: true
ast::CharacterClassContentsKind::Intersection => body.iter().all(may_contain_strings),
// MayContainStrings is true
// - if ClassContents is ClassSubtraction
// - && ClassSubtraction has ClassOperands
// - && the first ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Subtraction => {
body.iter().next().map_or(false, may_contain_strings)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for CharacterClass<'old_alloc>
CharacterClass {
span: CloneIn::clone_in(&self.span, allocator),
negative: CloneIn::clone_in(&self.negative, allocator),
kind: CloneIn::clone_in(&self.kind, allocator),
strings: CloneIn::clone_in(&self.strings, allocator),
kind: CloneIn::clone_in(&self.kind, allocator),
body: CloneIn::clone_in(&self.body, allocator),
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,8 @@ impl ContentEq for Dot {
impl<'a> ContentEq for CharacterClass<'a> {
fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.negative, &other.negative)
&& ContentEq::content_eq(&self.kind, &other.kind)
&& ContentEq::content_eq(&self.strings, &other.strings)
&& ContentEq::content_eq(&self.kind, &other.kind)
&& ContentEq::content_eq(&self.body, &other.body)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ impl ContentHash for Dot {
impl<'a> ContentHash for CharacterClass<'a> {
fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&self.negative, state);
ContentHash::content_hash(&self.kind, state);
ContentHash::content_hash(&self.strings, state);
ContentHash::content_hash(&self.kind, state);
ContentHash::content_hash(&self.body, state);
}
}
Expand Down