From 5241919f48c3bf54fbe8d4e9d50b4a55038da150 Mon Sep 17 00:00:00 2001 From: kennytm Date: Mon, 29 Oct 2018 20:24:15 +0800 Subject: [PATCH] syntax: fix [[:blank:]] character class Ensure `[[:blank:]]` only matches `[ \t]`. It appears that there was a transcription error when `regex-syntax` was rewritten such that `[[:blank:]]` ended up matching more than it was supposed to. Fixes #533 --- regex-syntax/src/hir/translate.rs | 2 +- tests/regression.rs | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index 8fea00b164..ba0ac0a20a 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -1040,7 +1040,7 @@ fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] { X } Blank => { - const X: T = &[(' ', '\t')]; + const X: T = &[('\t', '\t'), (' ', ' ')]; X } Cntrl => { diff --git a/tests/regression.rs b/tests/regression.rs index a09333e2c3..bda63f0bcd 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -92,3 +92,20 @@ ismatch!( r"typename type\-parameter\-\d+\-\d+::.+", "test", false); + +// See: https://github.com/rust-lang/regex/issues/533 +ismatch!( + blank_matches_nothing_between_space_and_tab, + r"[[:blank:]]", + "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\ + \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\ + \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}", + false); + +ismatch!( + inverted_blank_matches_everything_between_space_and_tab, + r"^[[:^blank:]]+$", + "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\ + \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\ + \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}", + true);