diff --git a/regex-syntax/src/ast/mod.rs b/regex-syntax/src/ast/mod.rs index 7329fabbe..f36f27791 100644 --- a/regex-syntax/src/ast/mod.rs +++ b/regex-syntax/src/ast/mod.rs @@ -1162,7 +1162,7 @@ impl Group { /// Returns true if and only if this group is capturing. pub fn is_capturing(&self) -> bool { match self.kind { - GroupKind::CaptureIndex(_) | GroupKind::CaptureName(_) => true, + GroupKind::CaptureIndex(_) | GroupKind::CaptureName { .. } => true, GroupKind::NonCapturing(_) => false, } } @@ -1173,7 +1173,7 @@ impl Group { pub fn capture_index(&self) -> Option { match self.kind { GroupKind::CaptureIndex(i) => Some(i), - GroupKind::CaptureName(ref x) => Some(x.index), + GroupKind::CaptureName { ref name, .. } => Some(name.index), GroupKind::NonCapturing(_) => None, } } @@ -1184,8 +1184,13 @@ impl Group { pub enum GroupKind { /// `(a)` CaptureIndex(u32), - /// `(?Pa)` - CaptureName(CaptureName), + /// `(?a)` or `(?Pa)` + CaptureName { + /// True if the `?P<` syntax is used and false if the `?<` syntax is used. + starts_with_p: bool, + /// The capture name. + name: CaptureName, + }, /// `(?:a)` and `(?i:a)` NonCapturing(Flags), } diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs index 48a0507e2..1d6d4d046 100644 --- a/regex-syntax/src/ast/parse.rs +++ b/regex-syntax/src/ast/parse.rs @@ -1202,12 +1202,16 @@ impl<'s, P: Borrow> ParserI<'s, P> { )); } let inner_span = self.span(); - if self.bump_if("?P<") { + let mut starts_with_p = true; + if self.bump_if("?P<") || { + starts_with_p = false; + self.bump_if("?<") + } { let capture_index = self.next_capture_index(open_span)?; - let cap = self.parse_capture_name(capture_index)?; + let name = self.parse_capture_name(capture_index)?; Ok(Either::Right(ast::Group { span: open_span, - kind: ast::GroupKind::CaptureName(cap), + kind: ast::GroupKind::CaptureName { starts_with_p, name }, ast: Box::new(Ast::Empty(self.span())), })) } else if self.bump_if("?") { @@ -2800,11 +2804,14 @@ bar flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), Ast::Group(ast::Group { span: span_range(pat, 4..pat.len()), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span_range(pat, 9..12), - name: s("foo"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span_range(pat, 9..12), + name: s("foo"), + index: 1, + } + }, ast: Box::new(lit_with('a', span_range(pat, 14..15))), }), ] @@ -3819,15 +3826,33 @@ bar #[test] fn parse_capture_name() { + assert_eq!( + parser("(?z)").parse(), + Ok(Ast::Group(ast::Group { + span: span(0..7), + kind: ast::GroupKind::CaptureName { + starts_with_p: false, + name: ast::CaptureName { + span: span(3..4), + name: s("a"), + index: 1, + } + }, + ast: Box::new(lit('z', 5)), + })) + ); assert_eq!( parser("(?Pz)").parse(), Ok(Ast::Group(ast::Group { span: span(0..8), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span(4..5), - name: s("a"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span(4..5), + name: s("a"), + index: 1, + } + }, ast: Box::new(lit('z', 6)), })) ); @@ -3835,11 +3860,14 @@ bar parser("(?Pz)").parse(), Ok(Ast::Group(ast::Group { span: span(0..10), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span(4..7), - name: s("abc"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span(4..7), + name: s("abc"), + index: 1, + } + }, ast: Box::new(lit('z', 8)), })) ); @@ -3848,11 +3876,14 @@ bar parser("(?Pz)").parse(), Ok(Ast::Group(ast::Group { span: span(0..10), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span(4..7), - name: s("a_1"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span(4..7), + name: s("a_1"), + index: 1, + } + }, ast: Box::new(lit('z', 8)), })) ); @@ -3861,11 +3892,14 @@ bar parser("(?Pz)").parse(), Ok(Ast::Group(ast::Group { span: span(0..10), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span(4..7), - name: s("a.1"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span(4..7), + name: s("a.1"), + index: 1, + } + }, ast: Box::new(lit('z', 8)), })) ); @@ -3874,11 +3908,14 @@ bar parser("(?Pz)").parse(), Ok(Ast::Group(ast::Group { span: span(0..11), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span(4..8), - name: s("a[1]"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span(4..8), + name: s("a[1]"), + index: 1, + } + }, ast: Box::new(lit('z', 9)), })) ); diff --git a/regex-syntax/src/ast/print.rs b/regex-syntax/src/ast/print.rs index e6c000d57..0922ea0e3 100644 --- a/regex-syntax/src/ast/print.rs +++ b/regex-syntax/src/ast/print.rs @@ -160,9 +160,10 @@ impl Writer { use crate::ast::GroupKind::*; match ast.kind { CaptureIndex(_) => self.wtr.write_str("("), - CaptureName(ref x) => { - self.wtr.write_str("(?P<")?; - self.wtr.write_str(&x.name)?; + CaptureName { ref name, starts_with_p } => { + let start = if starts_with_p { "(?P<" } else { "(?<" }; + self.wtr.write_str(start)?; + self.wtr.write_str(&name.name)?; self.wtr.write_str(">")?; Ok(()) } @@ -505,6 +506,7 @@ mod tests { fn print_group() { roundtrip("(?i:a)"); roundtrip("(?Pa)"); + roundtrip("(?a)"); roundtrip("(a)"); } diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index a787b79c4..b5bb41767 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -905,8 +905,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> { fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir { let (index, name) = match group.kind { ast::GroupKind::CaptureIndex(index) => (index, None), - ast::GroupKind::CaptureName(ref cap) => { - (cap.index, Some(cap.name.clone().into_boxed_str())) + ast::GroupKind::CaptureName { ref name, .. } => { + (name.index, Some(name.name.clone().into_boxed_str())) } // The HIR doesn't need to use non-capturing groups, since the way // in which the data type is defined handles this automatically. diff --git a/src/lib.rs b/src/lib.rs index 6b95739c5..1de347861 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -361,6 +361,7 @@ regex matches `abc` at positions `0`, `1`, `2` and `3`.
 (exp)          numbered capture group (indexed by opening parenthesis)
 (?P<name>exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]])
+(?<name>exp)   named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]])
 (?:exp)        non-capturing group
 (?flags)       set flags within current group
 (?flags:exp)   set flags for exp (non-capturing)