diff --git a/Cargo.toml b/Cargo.toml index 46a57daeb..1c68090dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,8 @@ finite automata and guarantees linear time matching on all inputs. categories = ["text-processing"] autotests = false exclude = ["/scripts/*", "/.github/*"] -edition = "2018" +edition = "2021" +resolver = "2" [workspace] members = [ diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml index da19f3889..695ded66a 100644 --- a/regex-syntax/Cargo.toml +++ b/regex-syntax/Cargo.toml @@ -13,7 +13,8 @@ edition = "2021" # Features are documented in the "Crate features" section of the crate docs: # https://docs.rs/regex-syntax/*/#crate-features [features] -default = ["unicode"] +default = ["std", "unicode"] +std = [] unicode = [ "unicode-age", diff --git a/regex-syntax/src/ast/mod.rs b/regex-syntax/src/ast/mod.rs index 1f0fbd2e2..9e717f4a8 100644 --- a/regex-syntax/src/ast/mod.rs +++ b/regex-syntax/src/ast/mod.rs @@ -2,8 +2,9 @@ Defines an abstract syntax for regular expressions. */ -use std::cmp::Ordering; -use std::fmt; +use core::cmp::Ordering; + +use alloc::{boxed::Box, string::String, vec, vec::Vec}; pub use crate::ast::visitor::{visit, Visitor}; @@ -174,23 +175,24 @@ pub enum ErrorKind { UnsupportedLookAround, } +#[cfg(feature = "std")] impl std::error::Error for Error {} -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { crate::error::Formatter::from(self).fmt(f) } } -impl fmt::Display for ErrorKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use self::ErrorKind::*; match *self { CaptureLimitExceeded => write!( f, "exceeded the maximum number of \ capturing groups ({})", - ::std::u32::MAX + u32::MAX ), ClassEscapeInvalid => { write!(f, "invalid escape sequence found in character class") @@ -283,8 +285,8 @@ pub struct Span { pub end: Position, } -impl fmt::Debug for Span { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Span { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "Span({:?}, {:?})", self.start, self.end) } } @@ -316,8 +318,8 @@ pub struct Position { pub column: usize, } -impl fmt::Debug for Position { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Position { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, "Position(o: {:?}, l: {:?}, c: {:?})", @@ -497,8 +499,8 @@ impl Ast { /// /// This implementation uses constant stack space and heap space proportional /// to the size of the `Ast`. -impl fmt::Display for Ast { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Ast { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use crate::ast::print::Printer; Printer::new().print(self, f) } @@ -1315,7 +1317,7 @@ pub enum Flag { /// space but heap space proportional to the depth of the `Ast`. impl Drop for Ast { fn drop(&mut self) { - use std::mem; + use core::mem; match *self { Ast::Empty(_) @@ -1365,7 +1367,7 @@ impl Drop for Ast { /// stack space but heap space proportional to the depth of the `ClassSet`. impl Drop for ClassSet { fn drop(&mut self) { - use std::mem; + use core::mem; match *self { ClassSet::Item(ref item) => match *item { diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs index 6e9c9aca0..f730ee659 100644 --- a/regex-syntax/src/ast/parse.rs +++ b/regex-syntax/src/ast/parse.rs @@ -2,17 +2,26 @@ This module provides a regular expression parser. */ -use std::borrow::Borrow; -use std::cell::{Cell, RefCell}; -use std::mem; -use std::result; - -use crate::ast::{self, Ast, Position, Span}; -use crate::either::Either; - -use crate::is_meta_character; - -type Result = result::Result; +use core::{ + borrow::Borrow, + cell::{Cell, RefCell}, + mem, +}; + +use alloc::{ + boxed::Box, + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::{ + ast::{self, Ast, Position, Span}, + either::Either, + is_meta_character, +}; + +type Result = core::result::Result; /// A primitive is an expression with no sub-expressions. This includes /// literals, assertions and non-set character classes. This representation @@ -1533,9 +1542,6 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// Assuming the preconditions are met, this routine can never fail. #[inline(never)] fn parse_octal(&self) -> ast::Literal { - use std::char; - use std::u32; - assert!(self.parser().octal); assert!('0' <= self.char() && self.char() <= '7'); let start = self.pos(); @@ -1600,9 +1606,6 @@ impl<'s, P: Borrow> ParserI<'s, P> { &self, kind: ast::HexLiteralKind, ) -> Result { - use std::char; - use std::u32; - let mut scratch = self.parser().scratch.borrow_mut(); scratch.clear(); @@ -1646,9 +1649,6 @@ impl<'s, P: Borrow> ParserI<'s, P> { &self, kind: ast::HexLiteralKind, ) -> Result { - use std::char; - use std::u32; - let mut scratch = self.parser().scratch.borrow_mut(); scratch.clear(); @@ -2146,7 +2146,7 @@ impl<'p, 's, P: Borrow> NestLimiter<'p, 's, P> { let new = self.depth.checked_add(1).ok_or_else(|| { self.p.error( span.clone(), - ast::ErrorKind::NestLimitExceeded(::std::u32::MAX), + ast::ErrorKind::NestLimitExceeded(u32::MAX), ) })?; let limit = self.p.parser().nest_limit; @@ -2297,11 +2297,14 @@ fn specialize_err( #[cfg(test)] mod tests { - use std::ops::Range; + use core::ops::Range; + + use alloc::format; - use super::{Parser, ParserBuilder, ParserI, Primitive}; use crate::ast::{self, Ast, Position, Span}; + use super::*; + // Our own assert_eq, which has slightly better formatting (but honestly // still kind of crappy). macro_rules! assert_eq { @@ -4272,7 +4275,7 @@ bar Ok(Primitive::Literal(ast::Literal { span: span(0..pat.len()), kind: ast::LiteralKind::Octal, - c: ::std::char::from_u32(i).unwrap(), + c: char::from_u32(i).unwrap(), })) ); } @@ -4347,7 +4350,7 @@ bar Ok(Primitive::Literal(ast::Literal { span: span(0..pat.len()), kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X), - c: ::std::char::from_u32(i).unwrap(), + c: char::from_u32(i).unwrap(), })) ); } @@ -4378,7 +4381,7 @@ bar #[test] fn parse_hex_four() { for i in 0..65536 { - let c = match ::std::char::from_u32(i) { + let c = match char::from_u32(i) { None => continue, Some(c) => c, }; @@ -4442,7 +4445,7 @@ bar #[test] fn parse_hex_eight() { for i in 0..65536 { - let c = match ::std::char::from_u32(i) { + let c = match char::from_u32(i) { None => continue, Some(c) => c, }; diff --git a/regex-syntax/src/ast/print.rs b/regex-syntax/src/ast/print.rs index f6b2462c0..e6c000d57 100644 --- a/regex-syntax/src/ast/print.rs +++ b/regex-syntax/src/ast/print.rs @@ -2,10 +2,13 @@ This module provides a regular expression printer for `Ast`. */ -use std::fmt; +use core::fmt; -use crate::ast::visitor::{self, Visitor}; -use crate::ast::{self, Ast}; +use crate::ast::{ + self, + visitor::{self, Visitor}, + Ast, +}; /// A builder for constructing a printer. /// @@ -395,9 +398,12 @@ impl Writer { #[cfg(test)] mod tests { - use super::Printer; + use alloc::string::String; + use crate::ast::parse::ParserBuilder; + use super::*; + fn roundtrip(given: &str) { roundtrip_with(|b| b, given); } diff --git a/regex-syntax/src/ast/visitor.rs b/regex-syntax/src/ast/visitor.rs index 78ee487cf..03f8bf963 100644 --- a/regex-syntax/src/ast/visitor.rs +++ b/regex-syntax/src/ast/visitor.rs @@ -1,4 +1,4 @@ -use std::fmt; +use alloc::{vec, vec::Vec}; use crate::ast::{self, Ast}; @@ -475,8 +475,8 @@ impl<'a> ClassInduct<'a> { } } -impl<'a> fmt::Debug for ClassFrame<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl<'a> core::fmt::Debug for ClassFrame<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let x = match *self { ClassFrame::Union { .. } => "Union", ClassFrame::Binary { .. } => "Binary", @@ -487,8 +487,8 @@ impl<'a> fmt::Debug for ClassFrame<'a> { } } -impl<'a> fmt::Debug for ClassInduct<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl<'a> core::fmt::Debug for ClassInduct<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let x = match *self { ClassInduct::Item(it) => match *it { ast::ClassSetItem::Empty(_) => "Item(Empty)", diff --git a/regex-syntax/src/error.rs b/regex-syntax/src/error.rs index 6e7fa7466..a10230a87 100644 --- a/regex-syntax/src/error.rs +++ b/regex-syntax/src/error.rs @@ -1,9 +1,13 @@ -use std::cmp; -use std::fmt; -use std::result; +use core::{cmp, fmt, result}; -use crate::ast; -use crate::hir; +use alloc::{ + format, + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::{ast, hir}; /// A type alias for dealing with errors returned by this crate. pub type Result = result::Result; @@ -35,6 +39,7 @@ impl From for Error { } } +#[cfg(feature = "std")] impl std::error::Error for Error {} impl fmt::Display for Error { @@ -266,11 +271,13 @@ impl<'p> Spans<'p> { } fn repeat_char(c: char, count: usize) -> String { - ::std::iter::repeat(c).take(count).collect() + core::iter::repeat(c).take(count).collect() } #[cfg(test)] mod tests { + use alloc::string::ToString; + use crate::ast::parse::Parser; fn assert_panic_message(pattern: &str, expected_msg: &str) { diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index d6e83f7b2..fbe772ea4 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -1,8 +1,6 @@ -use std::char; -use std::cmp; -use std::fmt::Debug; -use std::slice; -use std::u8; +use core::{char, cmp, fmt::Debug, slice}; + +use alloc::vec::Vec; use crate::unicode; diff --git a/regex-syntax/src/hir/literal/mod.rs b/regex-syntax/src/hir/literal/mod.rs index 58b8871ed..d49cffd92 100644 --- a/regex-syntax/src/hir/literal/mod.rs +++ b/regex-syntax/src/hir/literal/mod.rs @@ -2,11 +2,15 @@ Provides routines for extracting literal prefixes and suffixes from an `Hir`. */ -use std::cmp; -use std::fmt; -use std::iter; -use std::mem; -use std::ops; +use core::{cmp, iter, mem, ops}; + +use alloc::{ + boxed::Box, + format, + string::{String, ToString}, + vec, + vec::Vec, +}; use crate::hir::{self, Hir, HirKind}; @@ -408,7 +412,7 @@ impl Literals { } if self.lits.is_empty() { let i = cmp::min(self.limit_size, bytes.len()); - self.lits.push(Literal::new(bytes[..i].to_owned())); + self.lits.push(Literal::new(bytes[..i].to_vec())); self.lits[0].cut = i < bytes.len(); return !self.lits[0].is_cut(); } @@ -465,8 +469,6 @@ impl Literals { cls: &hir::ClassUnicode, reverse: bool, ) -> bool { - use std::char; - if self.class_exceeds_limits(cls_char_count(cls)) { return false; } @@ -837,8 +839,8 @@ fn alternate_literals( } } -impl fmt::Debug for Literals { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Literals { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("Literals") .field("lits", &self.lits) .field("limit_size", &self.limit_size) @@ -881,8 +883,8 @@ impl PartialOrd for Literal { } } -impl fmt::Debug for Literal { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Literal { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { if self.is_cut() { write!(f, "Cut({})", escape_unicode(&self.v)) } else { @@ -923,7 +925,7 @@ fn position(needle: &[u8], mut haystack: &[u8]) -> Option { } fn escape_unicode(bytes: &[u8]) -> String { - let show = match ::std::str::from_utf8(bytes) { + let show = match core::str::from_utf8(bytes) { Ok(v) => v.to_string(), Err(_) => escape_bytes(bytes), }; @@ -955,7 +957,7 @@ fn escape_bytes(bytes: &[u8]) -> String { } fn escape_byte(byte: u8) -> String { - use std::ascii::escape_default; + use core::ascii::escape_default; let escaped: Vec = escape_default(byte).collect(); String::from_utf8_lossy(&escaped).into_owned() @@ -971,11 +973,15 @@ fn cls_byte_count(cls: &hir::ClassBytes) -> usize { #[cfg(test)] mod tests { - use std::fmt; + use alloc::{ + string::{String, ToString}, + vec, + vec::Vec, + }; + + use crate::{hir::Hir, ParserBuilder}; - use super::{escape_bytes, Literal, Literals}; - use crate::hir::Hir; - use crate::ParserBuilder; + use super::*; // To make test failures easier to read. #[derive(Debug, Eq, PartialEq)] @@ -1013,8 +1019,8 @@ mod tests { } } - impl fmt::Debug for ULiteral { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + impl core::fmt::Debug for ULiteral { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { if self.is_cut() { write!(f, "Cut({})", self.v) } else { @@ -1037,11 +1043,11 @@ mod tests { #[allow(non_snake_case)] fn C(s: &'static str) -> ULiteral { - ULiteral { v: s.to_owned(), cut: true } + ULiteral { v: s.to_string(), cut: true } } #[allow(non_snake_case)] fn M(s: &'static str) -> ULiteral { - ULiteral { v: s.to_owned(), cut: false } + ULiteral { v: s.to_string(), cut: false } } fn prefixes(lits: &mut Literals, expr: &Hir) { @@ -1626,7 +1632,7 @@ mod tests { let given: Vec = $given .into_iter() .map(|s: &str| Literal { - v: s.to_owned().into_bytes(), + v: s.to_string().into_bytes(), cut: false, }) .collect(); @@ -1661,7 +1667,7 @@ mod tests { let given: Vec = $given .into_iter() .map(|s: &str| Literal { - v: s.to_owned().into_bytes(), + v: s.to_string().into_bytes(), cut: false, }) .collect(); diff --git a/regex-syntax/src/hir/mod.rs b/regex-syntax/src/hir/mod.rs index 23fef78f9..88f1141d8 100644 --- a/regex-syntax/src/hir/mod.rs +++ b/regex-syntax/src/hir/mod.rs @@ -1,18 +1,27 @@ /*! Defines a high-level intermediate representation for regular expressions. */ -use std::char; -use std::cmp; -use std::fmt; -use std::result; -use std::u8; -use crate::ast::Span; -use crate::hir::interval::{Interval, IntervalSet, IntervalSetIter}; -use crate::unicode; +use core::{char, cmp}; -pub use crate::hir::visitor::{visit, Visitor}; -pub use crate::unicode::CaseFoldError; +use alloc::{ + boxed::Box, + format, + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::{ + ast::Span, + hir::interval::{Interval, IntervalSet, IntervalSetIter}, + unicode, +}; + +pub use crate::{ + hir::visitor::{visit, Visitor}, + unicode::CaseFoldError, +}; mod interval; pub mod literal; @@ -80,16 +89,17 @@ pub enum ErrorKind { UnicodeCaseUnavailable, } +#[cfg(feature = "std")] impl std::error::Error for Error {} -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { crate::error::Formatter::from(self).fmt(f) } } -impl fmt::Display for ErrorKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use self::ErrorKind::*; let msg = match *self { @@ -197,8 +207,7 @@ impl Hir { /// Consumes ownership of this HIR expression and returns its underlying /// `HirKind`. pub fn into_kind(mut self) -> HirKind { - use std::mem; - mem::replace(&mut self.kind, HirKind::Empty) + core::mem::replace(&mut self.kind, HirKind::Empty) } /// Returns an empty HIR expression. @@ -704,8 +713,8 @@ impl HirKind { /// /// This implementation uses constant stack space and heap space proportional /// to the size of the `Hir`. -impl fmt::Display for Hir { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Hir { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use crate::hir::print::Printer; Printer::new().print(self, f) } @@ -800,7 +809,7 @@ impl Class { /// Unicode oriented. pub fn try_case_fold_simple( &mut self, - ) -> result::Result<(), CaseFoldError> { + ) -> core::result::Result<(), CaseFoldError> { match *self { Class::Unicode(ref mut x) => x.try_case_fold_simple()?, Class::Bytes(ref mut x) => x.case_fold_simple(), @@ -909,7 +918,7 @@ impl ClassUnicode { /// `unicode-case` feature is not enabled. pub fn try_case_fold_simple( &mut self, - ) -> result::Result<(), CaseFoldError> { + ) -> core::result::Result<(), CaseFoldError> { self.set.case_fold_simple() } @@ -981,8 +990,8 @@ pub struct ClassUnicodeRange { end: char, } -impl fmt::Debug for ClassUnicodeRange { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for ClassUnicodeRange { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let start = if !self.start.is_whitespace() && !self.start.is_control() { self.start.to_string() @@ -1285,8 +1294,8 @@ impl ClassBytesRange { } } -impl fmt::Debug for ClassBytesRange { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for ClassBytesRange { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let mut debug = f.debug_struct("ClassBytesRange"); if self.start <= 0x7F { let ch = char::try_from(self.start).unwrap(); @@ -1459,7 +1468,7 @@ pub enum RepetitionRange { /// space but heap space proportional to the depth of the total `Hir`. impl Drop for Hir { fn drop(&mut self) { - use std::mem; + use core::mem; match *self.kind() { HirKind::Empty diff --git a/regex-syntax/src/hir/print.rs b/regex-syntax/src/hir/print.rs index 433f9bf11..63d014b1b 100644 --- a/regex-syntax/src/hir/print.rs +++ b/regex-syntax/src/hir/print.rs @@ -2,11 +2,16 @@ This module provides a regular expression printer for `Hir`. */ -use std::fmt; +use core::fmt; -use crate::hir::visitor::{self, Visitor}; -use crate::hir::{self, Hir, HirKind}; -use crate::is_meta_character; +use crate::{ + hir::{ + self, + visitor::{self, Visitor}, + Hir, HirKind, + }, + is_meta_character, +}; /// A builder for constructing a printer. /// @@ -235,9 +240,12 @@ impl Writer { #[cfg(test)] mod tests { - use super::Printer; + use alloc::string::String; + use crate::ParserBuilder; + use super::*; + fn roundtrip(given: &str, expected: &str) { roundtrip_with(|b| b, given, expected); } diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index 988384ede..b4338bc94 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -2,14 +2,17 @@ Defines a translator that converts an `Ast` to an `Hir`. */ -use std::cell::{Cell, RefCell}; -use std::result; +use core::cell::{Cell, RefCell}; -use crate::ast::{self, Ast, Span, Visitor}; -use crate::hir::{self, Error, ErrorKind, Hir}; -use crate::unicode::{self, ClassQuery}; +use alloc::{boxed::Box, string::ToString, vec, vec::Vec}; -type Result = result::Result; +use crate::{ + ast::{self, Ast, Span, Visitor}, + hir::{self, Error, ErrorKind, Hir}, + unicode::{self, ClassQuery}, +}; + +type Result = core::result::Result; /// A builder for constructing an AST->HIR translator. #[derive(Clone, Debug)] @@ -1119,12 +1122,13 @@ fn ascii_class_as_chars( #[cfg(test)] mod tests { - use crate::ast::parse::ParserBuilder; - use crate::ast::{self, Ast, Position, Span}; - use crate::hir::{self, Hir, HirKind}; - use crate::unicode::{self, ClassQuery}; + use crate::{ + ast::{self, parse::ParserBuilder, Ast, Position, Span}, + hir::{self, Hir, HirKind}, + unicode::{self, ClassQuery}, + }; - use super::{ascii_class, ascii_class_as_chars, TranslatorBuilder}; + use super::*; // We create these errors to compare with real hir::Errors in the tests. // We define equality between TestError and hir::Error to disregard the diff --git a/regex-syntax/src/hir/visitor.rs b/regex-syntax/src/hir/visitor.rs index 4f5a70909..97771d92f 100644 --- a/regex-syntax/src/hir/visitor.rs +++ b/regex-syntax/src/hir/visitor.rs @@ -1,3 +1,5 @@ +use alloc::{vec, vec::Vec}; + use crate::hir::{self, Hir, HirKind}; /// A trait for visiting the high-level IR (HIR) in depth first order. diff --git a/regex-syntax/src/lib.rs b/regex-syntax/src/lib.rs index 1dfb38af3..287b3417c 100644 --- a/regex-syntax/src/lib.rs +++ b/regex-syntax/src/lib.rs @@ -116,6 +116,11 @@ match semantics of a regular expression. The following features are available: +* **std** - + Enables support for the standard library. This feature is enabled by default. + When disabled, only `core` and `alloc` are used. Otherwise, enabling `std` + generally just enables `std::error::Error` trait impls for the various error + types. * **unicode** - Enables all Unicode features. This feature is enabled by default, and will always cover all Unicode features, even if more are added in the future. @@ -154,13 +159,23 @@ The following features are available: `\p{sb=ATerm}`. */ +#![forbid(unsafe_code)] #![deny(missing_docs)] #![warn(missing_debug_implementations)] -#![forbid(unsafe_code)] +#![no_std] + +#[cfg(any(test, feature = "std"))] +extern crate std; -pub use crate::error::{Error, Result}; -pub use crate::parser::{Parser, ParserBuilder}; -pub use crate::unicode::UnicodeWordError; +extern crate alloc; + +pub use crate::{ + error::{Error, Result}, + parser::{Parser, ParserBuilder}, + unicode::UnicodeWordError, +}; + +use alloc::string::String; pub mod ast; mod either; @@ -248,7 +263,7 @@ pub fn is_word_character(c: char) -> bool { /// returns an error. pub fn try_is_word_character( c: char, -) -> std::result::Result { +) -> core::result::Result { unicode::is_word_character(c) } @@ -265,6 +280,8 @@ pub fn is_word_byte(c: u8) -> bool { #[cfg(test)] mod tests { + use alloc::string::ToString; + use super::*; #[test] diff --git a/regex-syntax/src/parser.rs b/regex-syntax/src/parser.rs index ded95b280..93df72279 100644 --- a/regex-syntax/src/parser.rs +++ b/regex-syntax/src/parser.rs @@ -1,7 +1,4 @@ -use crate::ast; -use crate::hir; - -use crate::Result; +use crate::{ast, hir, Result}; /// A builder for a regular expression parser. /// diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs index 4e130d254..22d088a3c 100644 --- a/regex-syntax/src/unicode.rs +++ b/regex-syntax/src/unicode.rs @@ -1,11 +1,12 @@ -use std::error; -use std::fmt; -use std::result; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; use crate::hir; /// A type alias for errors specific to Unicode handling of classes. -pub type Result = result::Result; +pub type Result = core::result::Result; /// An inclusive range of codepoints from a generated file (hence the static /// lifetime). @@ -25,7 +26,7 @@ pub enum Error { } /// A type alias for errors specific to Unicode case folding. -pub type FoldResult = result::Result; +pub type FoldResult = core::result::Result; /// An error that occurs when Unicode-aware simple case folding fails. /// @@ -35,10 +36,11 @@ pub type FoldResult = result::Result; #[derive(Debug)] pub struct CaseFoldError(()); -impl error::Error for CaseFoldError {} +#[cfg(feature = "std")] +impl std::error::Error for CaseFoldError {} -impl fmt::Display for CaseFoldError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for CaseFoldError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, "Unicode-aware case folding is not available \ @@ -55,10 +57,11 @@ impl fmt::Display for CaseFoldError { #[derive(Debug)] pub struct UnicodeWordError(()); -impl error::Error for UnicodeWordError {} +#[cfg(feature = "std")] +impl std::error::Error for UnicodeWordError {} -impl fmt::Display for UnicodeWordError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for UnicodeWordError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, "Unicode-aware \\w class is not available \ @@ -80,21 +83,24 @@ impl fmt::Display for UnicodeWordError { /// This returns an error if the Unicode case folding tables are not available. pub fn simple_fold( c: char, -) -> FoldResult, Option>> { +) -> FoldResult, Option>> +{ #[cfg(not(feature = "unicode-case"))] fn imp( _: char, - ) -> FoldResult, Option>> - { - use std::option::IntoIter; - Err::, _>, _>(CaseFoldError(())) + ) -> FoldResult< + core::result::Result, Option>, + > { + use core::option::IntoIter; + Err::, _>, _>(CaseFoldError(())) } #[cfg(feature = "unicode-case")] fn imp( c: char, - ) -> FoldResult, Option>> - { + ) -> FoldResult< + core::result::Result, Option>, + > { use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE; Ok(CASE_FOLDING_SIMPLE @@ -130,8 +136,9 @@ pub fn contains_simple_case_mapping( #[cfg(feature = "unicode-case")] fn imp(start: char, end: char) -> FoldResult { + use core::cmp::Ordering; + use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE; - use std::cmp::Ordering; assert!(start <= end); Ok(CASE_FOLDING_SIMPLE @@ -402,17 +409,17 @@ pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode { /// Returns true only if the given codepoint is in the `\w` character class. /// /// If the `unicode-perl` feature is not enabled, then this returns an error. -pub fn is_word_character(c: char) -> result::Result { +pub fn is_word_character( + c: char, +) -> core::result::Result { #[cfg(not(feature = "unicode-perl"))] - fn imp(_: char) -> result::Result { + fn imp(_: char) -> core::result::Result { Err(UnicodeWordError(())) } #[cfg(feature = "unicode-perl")] - fn imp(c: char) -> result::Result { - use crate::is_word_byte; - use crate::unicode_tables::perl_word::PERL_WORD; - use std::cmp::Ordering; + fn imp(c: char) -> core::result::Result { + use crate::{is_word_byte, unicode_tables::perl_word::PERL_WORD}; // MSRV(1.59): Use 'u8::try_from(c)' instead. if u8::try_from(u32::from(c)).map_or(false, is_word_byte) { @@ -420,6 +427,8 @@ pub fn is_word_character(c: char) -> result::Result { } Ok(PERL_WORD .binary_search_by(|&(start, end)| { + use core::cmp::Ordering; + if start <= c && c <= end { Ordering::Equal } else if start > c { @@ -578,7 +587,7 @@ fn property_set( fn ages(canonical_age: &str) -> Result> { #[cfg(not(feature = "unicode-age"))] fn imp(_: &str) -> Result> { - use std::option::IntoIter; + use core::option::IntoIter; Err::, _>(Error::PropertyNotFound) } @@ -879,10 +888,7 @@ fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] { #[cfg(test)] mod tests { - use super::{ - contains_simple_case_mapping, simple_fold, symbolic_name_normalize, - symbolic_name_normalize_bytes, - }; + use super::*; #[cfg(feature = "unicode-case")] fn simple_fold_ok(c: char) -> impl Iterator { @@ -906,23 +912,23 @@ mod tests { #[cfg(feature = "unicode-case")] fn simple_fold_k() { let xs: Vec = simple_fold_ok('k').collect(); - assert_eq!(xs, vec!['K', 'K']); + assert_eq!(xs, alloc::vec!['K', 'K']); let xs: Vec = simple_fold_ok('K').collect(); - assert_eq!(xs, vec!['k', 'K']); + assert_eq!(xs, alloc::vec!['k', 'K']); let xs: Vec = simple_fold_ok('K').collect(); - assert_eq!(xs, vec!['K', 'k']); + assert_eq!(xs, alloc::vec!['K', 'k']); } #[test] #[cfg(feature = "unicode-case")] fn simple_fold_a() { let xs: Vec = simple_fold_ok('a').collect(); - assert_eq!(xs, vec!['A']); + assert_eq!(xs, alloc::vec!['A']); let xs: Vec = simple_fold_ok('A').collect(); - assert_eq!(xs, vec!['a']); + assert_eq!(xs, alloc::vec!['a']); } #[test] diff --git a/regex-syntax/src/utf8.rs b/regex-syntax/src/utf8.rs index b00cd7dba..a75a8afa8 100644 --- a/regex-syntax/src/utf8.rs +++ b/regex-syntax/src/utf8.rs @@ -80,12 +80,9 @@ I also got the idea from which uses it for executing automata on their term index. */ -#![deny(missing_docs)] +use core::{char, fmt, iter::FusedIterator, slice}; -use std::char; -use std::fmt; -use std::iter::FusedIterator; -use std::slice; +use alloc::{vec, vec::Vec}; const MAX_UTF8_BYTES: usize = 4; @@ -457,7 +454,9 @@ fn max_scalar_value(nbytes: usize) -> u32 { #[cfg(test)] mod tests { - use std::char; + use core::char; + + use alloc::{vec, vec::Vec}; use crate::utf8::{Utf8Range, Utf8Sequences}; diff --git a/regex-syntax/test b/regex-syntax/test index 4b1b9fb1a..d03db94b4 100755 --- a/regex-syntax/test +++ b/regex-syntax/test @@ -7,6 +7,7 @@ echo "===== DEFAULT FEATURES ===" cargo test features=( + std unicode unicode-age unicode-bool