Skip to content

Commit

Permalink
refactor(regular_expression): Improve AST docs with refactoring may_c…
Browse files Browse the repository at this point in the history
…ontain_strings (#5665)

Follow up #5661
  • Loading branch information
leaysgur committed Sep 10, 2024
1 parent c6bbf94 commit 2da42ef
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 44 deletions.
8 changes: 4 additions & 4 deletions crates/oxc_ast/src/generated/assert_layouts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1506,8 +1506,8 @@ const _: () = {
assert!(align_of::<CharacterClass>() == 8usize);
assert!(offset_of!(CharacterClass, span) == 0usize);
assert!(offset_of!(CharacterClass, negative) == 8usize);
assert!(offset_of!(CharacterClass, kind) == 9usize);
assert!(offset_of!(CharacterClass, strings) == 10usize);
assert!(offset_of!(CharacterClass, strings) == 9usize);
assert!(offset_of!(CharacterClass, kind) == 10usize);
assert!(offset_of!(CharacterClass, body) == 16usize);

assert!(size_of::<CharacterClassContentsKind>() == 1usize);
Expand Down Expand Up @@ -3061,8 +3061,8 @@ const _: () = {
assert!(align_of::<CharacterClass>() == 4usize);
assert!(offset_of!(CharacterClass, span) == 0usize);
assert!(offset_of!(CharacterClass, negative) == 8usize);
assert!(offset_of!(CharacterClass, kind) == 9usize);
assert!(offset_of!(CharacterClass, strings) == 10usize);
assert!(offset_of!(CharacterClass, strings) == 9usize);
assert!(offset_of!(CharacterClass, kind) == 10usize);
assert!(offset_of!(CharacterClass, body) == 12usize);

assert!(size_of::<CharacterClassContentsKind>() == 1usize);
Expand Down
12 changes: 9 additions & 3 deletions crates/oxc_regular_expression/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ pub enum CharacterClassEscapeKind {
pub struct UnicodePropertyEscape<'a> {
pub span: Span,
pub negative: bool,
/// `true` if `UnicodeSetsMode` and `name` matched unicode property of strings.
/// `true` if `UnicodeSetsMode` and `name` matches unicode property of strings.
pub strings: bool,
pub name: Atom<'a>,
pub value: Option<Atom<'a>>,
Expand All @@ -237,8 +237,11 @@ pub struct Dot {
pub struct CharacterClass<'a> {
pub span: Span,
pub negative: bool,
pub kind: CharacterClassContentsKind,
/// `true` if:
/// - `body` contains [`UnicodePropertyEscape`], nested [`CharacterClass`] or [`ClassStringDisjunction`] which `strings` is `true`
/// - and matches each logic depends on `kind`
pub strings: bool,
pub kind: CharacterClassContentsKind,
pub body: Vec<'a, CharacterClassContents<'a>>,
}

Expand Down Expand Up @@ -288,7 +291,7 @@ pub struct CharacterClassRange {
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
pub struct ClassStringDisjunction<'a> {
pub span: Span,
/// `true` if body is empty or contain [`ClassString`] which `strings` is `true`
/// `true` if body is empty or contains [`ClassString`] which `strings` is `true`.
pub strings: bool,
pub body: Vec<'a, ClassString<'a>>,
}
Expand All @@ -313,6 +316,7 @@ pub struct ClassString<'a> {
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
pub struct CapturingGroup<'a> {
pub span: Span,
/// Group name to be referenced by [`NamedReference`].
pub name: Option<Atom<'a>>,
pub body: Disjunction<'a>,
}
Expand All @@ -330,6 +334,8 @@ pub struct IgnoreGroup<'a> {
pub body: Disjunction<'a>,
}

/// Pattern modifiers in [`IgnoreGroup`].
/// e.g. `(?i:...)`, `(?-s:...)`
#[ast]
#[derive(Debug)]
#[generate_derive(CloneIn, ContentEq, ContentHash)]
Expand Down
64 changes: 30 additions & 34 deletions crates/oxc_regular_expression/src/body_parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,7 @@ impl<'a> PatternParser<'a> {
let (kind, body) = self.parse_class_contents()?;

if self.reader.eat(']') {
let strings = body.iter().any(PatternParser::may_contain_strings_in_class_contents);
let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);

// [SS:EE] CharacterClass :: [^ ClassContents ]
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
Expand Down Expand Up @@ -1259,30 +1259,7 @@ impl<'a> PatternParser<'a> {
let (kind, body) = self.parse_class_contents()?;

if self.reader.eat(']') {
let strings = match kind {
// MayContainStrings is true
// - if ClassContents is ClassUnion
// - && ClassUnion has ClassOperands
// - && at least 1 ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Union => {
body.iter().any(PatternParser::may_contain_strings_in_class_contents)
}
// MayContainStrings is true
// - if ClassContents is ClassIntersection
// - && ClassIntersection has ClassOperands
// - && all ClassOperands have MayContainStrings: true
ast::CharacterClassContentsKind::Intersection => {
body.iter().all(PatternParser::may_contain_strings_in_class_contents)
}
// MayContainStrings is true
// - if ClassContents is ClassSubtraction
// - && ClassSubtraction has ClassOperands
// - && the first ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Subtraction => body
.iter()
.next()
.map_or(false, PatternParser::may_contain_strings_in_class_contents),
};
let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);

// [SS:EE] NestedClass :: [^ ClassContents ]
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
Expand Down Expand Up @@ -2163,27 +2140,46 @@ impl<'a> PatternParser<'a> {

// ---

fn may_contain_strings_in_class_contents(item: &ast::CharacterClassContents) -> bool {
match item {
fn may_contain_strings_in_class_contents(
kind: &ast::CharacterClassContentsKind,
body: &Vec<'a, ast::CharacterClassContents<'a>>,
) -> bool {
let may_contain_strings = |item: &ast::CharacterClassContents<'a>| match item {
// MayContainStrings is true
// - if ClassContents contains UnicodePropertyValueExpression
// - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
// - && it is binary property of strings(can be true only with `UnicodeSetsMode`)
ast::CharacterClassContents::UnicodePropertyEscape(unicode_property_escape) => {
unicode_property_escape.strings
}
ast::CharacterClassContents::UnicodePropertyEscape(item) => item.strings,
// MayContainStrings is true
// - if ClassStringDisjunction is [empty]
// - || if ClassStringDisjunction contains ClassString
// - && ClassString is [empty]
// - || ClassString contains 2 more ClassSetCharacters
ast::CharacterClassContents::ClassStringDisjunction(class_string_disjunction) => {
class_string_disjunction.strings
}
ast::CharacterClassContents::ClassStringDisjunction(item) => item.strings,
// MayContainStrings is true
// - if NestedClass has MayContainStrings: true
ast::CharacterClassContents::NestedCharacterClass(nested_class) => nested_class.strings,
ast::CharacterClassContents::NestedCharacterClass(item) => item.strings,
_ => false,
};

match kind {
// MayContainStrings is true
// - if ClassContents is ClassUnion
// - && ClassUnion has ClassOperands
// - && at least 1 ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Union => body.iter().any(may_contain_strings),
// MayContainStrings is true
// - if ClassContents is ClassIntersection
// - && ClassIntersection has ClassOperands
// - && all ClassOperands have MayContainStrings: true
ast::CharacterClassContentsKind::Intersection => body.iter().all(may_contain_strings),
// MayContainStrings is true
// - if ClassContents is ClassSubtraction
// - && ClassSubtraction has ClassOperands
// - && the first ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Subtraction => {
body.iter().next().map_or(false, may_contain_strings)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for CharacterClass<'old_alloc>
CharacterClass {
span: CloneIn::clone_in(&self.span, allocator),
negative: CloneIn::clone_in(&self.negative, allocator),
kind: CloneIn::clone_in(&self.kind, allocator),
strings: CloneIn::clone_in(&self.strings, allocator),
kind: CloneIn::clone_in(&self.kind, allocator),
body: CloneIn::clone_in(&self.body, allocator),
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,8 @@ impl ContentEq for Dot {
impl<'a> ContentEq for CharacterClass<'a> {
fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.negative, &other.negative)
&& ContentEq::content_eq(&self.kind, &other.kind)
&& ContentEq::content_eq(&self.strings, &other.strings)
&& ContentEq::content_eq(&self.kind, &other.kind)
&& ContentEq::content_eq(&self.body, &other.body)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ impl ContentHash for Dot {
impl<'a> ContentHash for CharacterClass<'a> {
fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&self.negative, state);
ContentHash::content_hash(&self.kind, state);
ContentHash::content_hash(&self.strings, state);
ContentHash::content_hash(&self.kind, state);
ContentHash::content_hash(&self.body, state);
}
}
Expand Down

0 comments on commit 2da42ef

Please sign in to comment.