diff --git a/CHANGELOG.md b/CHANGELOG.md index dc1b296d9f..82aa089bba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +1.4.1 (2020-10-13) +================== +This is a small bug fix release that makes `\p{cf}` work. Previously, it would +report "property not found" even though `cf` is a valid abbreviation for the +`Format` general category. + +* [BUG #719](https://github.com/rust-lang/regex/issues/719): + Fixes bug that prevented `\p{cf}` from working. + + 1.4.0 (2020-10-11) ================== This releases has a few minor documentation fixes as well as some very minor diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs index 7e414396f5..a78362b2fb 100644 --- a/regex-syntax/src/unicode.rs +++ b/regex-syntax/src/unicode.rs @@ -237,8 +237,16 @@ impl<'a> ClassQuery<'a> { fn canonical_binary(&self, name: &str) -> Result { let norm = symbolic_name_normalize(name); - if let Some(canon) = canonical_prop(&norm)? { - return Ok(CanonicalClassQuery::Binary(canon)); + // This is a special case where 'cf' refers to the 'Format' general + // category, but where the 'cf' abbreviation is also an abbreviation + // for the 'Case_Folding' property. But we want to treat it as + // a general category. (Currently, we don't even support the + // 'Case_Folding' property. But if we do in the future, users will be + // required to spell it out.) + if norm != "cf" { + if let Some(canon) = canonical_prop(&norm)? { + return Ok(CanonicalClassQuery::Binary(canon)); + } } if let Some(canon) = canonical_gencat(&norm)? { return Ok(CanonicalClassQuery::GeneralCategory(canon)); diff --git a/tests/unicode.rs b/tests/unicode.rs index 52522f41c6..9f1cd0c01f 100644 --- a/tests/unicode.rs +++ b/tests/unicode.rs @@ -74,6 +74,9 @@ mat!( Some((0, 3)) ); mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4))); +// See: https://github.com/rust-lang/regex/issues/719 +mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4))); +mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4))); mat!( uni_class_gencat_initial_punctuation, r"\p{Initial_Punctuation}",