Skip to content

Commit

Permalink
syntax: polish and doc updates
Browse files Browse the repository at this point in the history
This updates docs in a number of places, including adding examples.

We also make it so zero-width matches never impact the 'utf8' property.
In practice, this means '(?-u:\B)' is now considered to match valid
UTF-8, which is consistent with the fact that 'a*' is considered to
match valid UTF-8 too.

We also do a refresh of the 'Look' and 'LookSet' APIs.
  • Loading branch information
BurntSushi committed Mar 19, 2023
1 parent c350a41 commit b454212
Show file tree
Hide file tree
Showing 10 changed files with 746 additions and 316 deletions.
9 changes: 4 additions & 5 deletions regex-syntax/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,12 @@ concrete syntax that produced the `Hir`.
This example shows how to parse a pattern string into its HIR:

```rust
use regex_syntax::Parser;
use regex_syntax::hir::{self, Hir};
use regex_syntax::{hir::Hir, parse};

let hir = Parser::new().parse("a|b").unwrap();
let hir = parse("a|b").unwrap();
assert_eq!(hir, Hir::alternation(vec![
Hir::literal(hir::Literal::Unicode('a')),
Hir::literal(hir::Literal::Unicode('b')),
Hir::literal("a".as_bytes()),
Hir::literal("b".as_bytes()),
]));
```

Expand Down
17 changes: 6 additions & 11 deletions regex-syntax/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use core::{cmp, fmt, result};

use alloc::{
format,
string::{String, ToString},
Expand All @@ -9,9 +7,6 @@ use alloc::{

use crate::{ast, hir};

/// A type alias for dealing with errors returned by this crate.
pub type Result<T> = result::Result<T, Error>;

/// This error type encompasses any error that can be returned by this crate.
///
/// This error type is marked as `non_exhaustive`. This means that adding a
Expand Down Expand Up @@ -42,8 +37,8 @@ impl From<hir::Error> for Error {
#[cfg(feature = "std")]
impl std::error::Error for Error {}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl core::fmt::Display for Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match *self {
Error::Parse(ref x) => x.fmt(f),
Error::Translate(ref x) => x.fmt(f),
Expand Down Expand Up @@ -91,8 +86,8 @@ impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
}
}

impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let spans = Spans::from_formatter(self);
if self.pattern.contains('\n') {
let divider = repeat_char('~', 79);
Expand Down Expand Up @@ -158,7 +153,7 @@ struct Spans<'p> {

impl<'p> Spans<'p> {
/// Build a sequence of spans from a formatter.
fn from_formatter<'e, E: fmt::Display>(
fn from_formatter<'e, E: core::fmt::Display>(
fmter: &'p Formatter<'e, E>,
) -> Spans<'p> {
let mut line_count = fmter.pattern.lines().count();
Expand Down Expand Up @@ -238,7 +233,7 @@ impl<'p> Spans<'p> {
pos += 1;
}
let note_len = span.end.column.saturating_sub(span.start.column);
for _ in 0..cmp::max(1, note_len) {
for _ in 0..core::cmp::max(1, note_len) {
notes.push('^');
pos += 1;
}
Expand Down
26 changes: 12 additions & 14 deletions regex-syntax/src/hir/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,9 @@ use crate::hir::{self, Hir};
/// This shows how to extract prefixes:
///
/// ```
/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, Parser};
///
/// let hir = Parser::new().parse(r"(a|b|c)(x|y|z)[A-Z]+foo")?;
/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
///
/// let hir = parse(r"(a|b|c)(x|y|z)[A-Z]+foo")?;
/// let got = Extractor::new().extract(&hir);
/// // All literals returned are "inexact" because none of them reach the
/// // match state.
Expand All @@ -129,11 +128,10 @@ use crate::hir::{self, Hir};
/// ```
/// use regex_syntax::{
/// hir::literal::{Extractor, ExtractKind, Literal, Seq},
/// Parser,
/// parse,
/// };
///
/// let hir = Parser::new().parse(r"foo|[A-Z]+bar")?;
///
/// let hir = parse(r"foo|[A-Z]+bar")?;
/// let got = Extractor::new().kind(ExtractKind::Suffix).extract(&hir);
/// // Since 'foo' gets to a match state, it is considered exact. But 'bar'
/// // does not because of the '[A-Z]+', and thus is marked inexact.
Expand Down Expand Up @@ -237,9 +235,9 @@ impl Extractor {
/// for character classes being turned into literal sequences.
///
/// ```
/// use regex_syntax::{hir::literal::{Extractor, Seq}, Parser};
/// use regex_syntax::{hir::literal::{Extractor, Seq}, parse};
///
/// let hir = Parser::new().parse(r"[0-9]")?;
/// let hir = parse(r"[0-9]")?;
///
/// let got = Extractor::new().extract(&hir);
/// let expected = Seq::new([
Expand Down Expand Up @@ -274,9 +272,9 @@ impl Extractor {
/// This shows how to decrease the limit and compares it with the default.
///
/// ```
/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, Parser};
/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
///
/// let hir = Parser::new().parse(r"(abc){8}")?;
/// let hir = parse(r"(abc){8}")?;
///
/// let got = Extractor::new().extract(&hir);
/// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]);
Expand Down Expand Up @@ -311,9 +309,9 @@ impl Extractor {
/// This shows how to decrease the limit and compares it with the default.
///
/// ```
/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, Parser};
/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
///
/// let hir = Parser::new().parse(r"(abc){2}{2}{2}")?;
/// let hir = parse(r"(abc){2}{2}{2}")?;
///
/// let got = Extractor::new().extract(&hir);
/// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]);
Expand Down Expand Up @@ -353,9 +351,9 @@ impl Extractor {
/// sequence returned.
///
/// ```
/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, Parser};
/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
///
/// let hir = Parser::new().parse(r"[ab]{2}{2}")?;
/// let hir = parse(r"[ab]{2}{2}")?;
///
/// let got = Extractor::new().extract(&hir);
/// let expected = Seq::new([
Expand Down
Loading

0 comments on commit b454212

Please sign in to comment.