Skip to content

Commit

Permalink
Ditch std::ascii
Browse files Browse the repository at this point in the history
When it moves into core and/or collections, we can drop some of this code.

See rust-lang/rust#16801.
  • Loading branch information
kmcallister committed Aug 28, 2014
1 parent c30deff commit 6c7f7d4
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 6 deletions.
3 changes: 1 addition & 2 deletions src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use self::char_ref::{CharRef, CharRefTokenizer};

use self::buffer_queue::{BufferQueue, SetResult, FromSet, NotFromSet};

use util::str::{lower_ascii, lower_ascii_letter, empty_str};
use util::str::{lower_ascii, lower_ascii_letter, empty_str, AsciiExt};
use util::atom::Atom;
use util::smallcharset::SmallCharSet;

Expand All @@ -39,7 +39,6 @@ use collections::vec::Vec;
use collections::string::String;
use collections::str::{MaybeOwned, Slice, Owned};
use std::collections::hashmap::HashMap;
use std::ascii::StrAsciiExt;

pub mod states;
mod interface;
Expand Down
3 changes: 1 addition & 2 deletions src/tree_builder/actions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use tokenizer::states::{RawData, RawKind};

use util::atom::Atom;
use util::namespace::{Namespace, HTML};
use util::str::to_escaped_string;
use util::str::{to_escaped_string, AsciiExt};

use core::mem::replace;
use core::iter::{Rev, Enumerate};
Expand All @@ -34,7 +34,6 @@ use collections::MutableSeq;
use collections::vec::Vec;
use collections::string::String;
use collections::str::Slice;
use std::ascii::StrAsciiExt;

pub struct ActiveFormattingIter<'a, Handle> {
iter: Rev<Enumerate<slice::Items<'a, FormatEntry<Handle>>>>,
Expand Down
2 changes: 1 addition & 1 deletion src/tree_builder/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ use core::prelude::*;

use tokenizer::Doctype;
use tree_builder::interface::{QuirksMode, Quirks, LimitedQuirks, NoQuirks};
use util::str::AsciiExt;

use collections::string::String;
use std::ascii::StrAsciiExt;

// These should all be lowercase, for ASCII-case-insensitive matching.
static quirky_public_prefixes: &'static [&'static str] = &[
Expand Down
126 changes: 125 additions & 1 deletion src/util/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,139 @@ use core::prelude::*;
use core::str::CharEq;
use core::fmt::Show;
use collections::str::StrAllocating;
use collections::vec::Vec;
use collections::string;
use collections::string::String;
use std::ascii::AsciiCast;

pub fn to_escaped_string<T: Show>(x: &T) -> String {
// FIXME: don't allocate twice
// FIXME: use std::to_str after Rust upgrade
(format!("{}", x)).escape_default()
}

// FIXME: The ASCII stuff is largely copied from std::ascii
// (see rust-lang/rust#16801).

pub static ASCII_LOWER_MAP: [u8, ..256] = [
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
b'@',

b'a', b'b', b'c', b'd', b'e', b'f', b'g',
b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
b'x', b'y', b'z',

b'[', b'\\', b']', b'^', b'_',
b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
];

#[deriving(Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
pub struct Ascii {
chr: u8,
}

impl Ascii {
pub fn to_char(self) -> char {
self.chr as char
}

#[inline]
pub fn is_alphabetic(&self) -> bool {
(self.chr >= 0x41 && self.chr <= 0x5A) || (self.chr >= 0x61 && self.chr <= 0x7A)
}

#[inline]
pub fn is_digit(&self) -> bool {
self.chr >= 0x30 && self.chr <= 0x39
}

#[inline]
pub fn is_alphanumeric(&self) -> bool {
self.is_alphabetic() || self.is_digit()
}

#[inline]
pub fn to_lowercase(self) -> Ascii {
Ascii { chr: ASCII_LOWER_MAP[self.chr as uint] }
}
}

pub trait AsciiCast {
fn to_ascii_opt(&self) -> Option<Ascii>;
}

impl AsciiCast for char {
fn to_ascii_opt(&self) -> Option<Ascii> {
let n = *self as uint;
if n < 0x80 {
Some(Ascii { chr: n as u8 })
} else {
None
}
}
}

pub trait AsciiExt<T> {
fn to_ascii_lower(&self) -> T;
fn eq_ignore_ascii_case(&self, other: Self) -> bool;
}

impl<'a> AsciiExt<Vec<u8>> for &'a [u8] {
#[inline]
fn to_ascii_lower(&self) -> Vec<u8> {
self.iter().map(|&byte| ASCII_LOWER_MAP[byte as uint]).collect()
}

#[inline]
fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
self.len() == other.len() && self.iter().zip(other.iter()).all(
|(byte_self, byte_other)| {
ASCII_LOWER_MAP[*byte_self as uint] ==
ASCII_LOWER_MAP[*byte_other as uint]
}
)
}
}

impl<'a> AsciiExt<String> for &'a str {
#[inline]
fn to_ascii_lower(&self) -> String {
// Vec<u8>::to_ascii_lower() preserves the UTF-8 invariant.
unsafe { string::raw::from_utf8(self.as_bytes().to_ascii_lower()) }
}

#[inline]
fn eq_ignore_ascii_case(&self, other: &str) -> bool {
self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
}
}

/// If `c` is an ASCII letter, return the corresponding lowercase
/// letter, otherwise None.
pub fn lower_ascii_letter(c: char) -> Option<char> {
Expand Down

0 comments on commit 6c7f7d4

Please sign in to comment.