From ffb670ffcd69ed8e7cd13a7f06375ede752349e2 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 29 Jul 2013 01:12:41 -0700 Subject: [PATCH] Add initial support for a new formatting syntax The new macro is available under the name ifmt! (only an intermediate name) --- src/libstd/either.rs | 2 +- src/libstd/fmt/mod.rs | 368 ++++++++ src/libstd/fmt/parse.rs | 896 ++++++++++++++++++++ src/libstd/fmt/rt.rs | 62 ++ src/libstd/rt/io/mem.rs | 2 +- src/libstd/std.rs | 3 + src/libsyntax/ext/base.rs | 2 + src/libsyntax/ext/expand.rs | 4 +- src/libsyntax/ext/ifmt.rs | 720 ++++++++++++++++ src/libsyntax/syntax.rs | 1 + src/test/compile-fail/ifmt-bad-arg.rs | 74 ++ src/test/compile-fail/ifmt-bad-plural.rs | 14 + src/test/compile-fail/ifmt-bad-select.rs | 14 + src/test/compile-fail/ifmt-unimpl.rs | 14 + src/test/compile-fail/ifmt-unknown-trait.rs | 14 + src/test/run-pass/ifmt.rs | 71 ++ 16 files changed, 2258 insertions(+), 3 deletions(-) create mode 100644 src/libstd/fmt/mod.rs create mode 100644 src/libstd/fmt/parse.rs create mode 100644 src/libstd/fmt/rt.rs create mode 100644 src/libsyntax/ext/ifmt.rs create mode 100644 src/test/compile-fail/ifmt-bad-arg.rs create mode 100644 src/test/compile-fail/ifmt-bad-plural.rs create mode 100644 src/test/compile-fail/ifmt-bad-select.rs create mode 100644 src/test/compile-fail/ifmt-unimpl.rs create mode 100644 src/test/compile-fail/ifmt-unknown-trait.rs create mode 100644 src/test/run-pass/ifmt.rs diff --git a/src/libstd/either.rs b/src/libstd/either.rs index cfaef550c6fa7..bb74d9b3ec484 100644 --- a/src/libstd/either.rs +++ b/src/libstd/either.rs @@ -24,7 +24,7 @@ use vec; use vec::{OwnedVector, ImmutableVector}; /// `Either` is a type that represents one of two alternatives -#[deriving(Clone, Eq)] +#[deriving(Clone, Eq, IterBytes)] pub enum Either { Left(L), Right(R) diff --git a/src/libstd/fmt/mod.rs b/src/libstd/fmt/mod.rs new file mode 100644 index 0000000000000..2b8807b229172 --- /dev/null +++ b/src/libstd/fmt/mod.rs @@ -0,0 +1,368 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use prelude::*; + +use cast; +use int; +use rt::io::Decorator; +use rt::io::mem::MemWriter; +use rt::io; +use str; +use sys; +use uint; +use util; +use vec; + +pub mod parse; +pub mod rt; + +/// A struct to represent both where to emit formatting strings to and how they +/// should be formatted. A mutable version of this is passed to all formatting +/// traits. +pub struct Formatter<'self> { + /// Flags for formatting (packed version of rt::Flag) + flags: uint, + /// Character used as 'fill' whenever there is alignment + fill: char, + /// Boolean indication of whether the output should be left-aligned + alignleft: bool, + /// Optionally specified integer width that the output should be + width: Option, + /// Optionally specified precision for numeric types + precision: Option, + + /// Output buffer. + buf: &'self mut io::Writer, + + priv curarg: vec::VecIterator<'self, Argument<'self>>, + priv args: &'self [Argument<'self>], +} + +/// This struct represents the generic "argument" which is taken by the Xprintf +/// family of functions. It contains a function to format the given value. At +/// compile time it is ensured that the function and the value have the correct +/// types, and then this struct is used to canonicalize arguments to one type. +pub struct Argument<'self> { + priv formatter: extern "Rust" fn(&util::Void, &mut Formatter), + priv value: &'self util::Void, +} + +#[allow(missing_doc)] +pub trait Bool { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Char { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Signed { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Unsigned { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Octal { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Binary { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait LowerHex { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait UpperHex { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait String { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Poly { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Pointer { fn fmt(&Self, &mut Formatter); } + +/// The sprintf function takes a precompiled format string and a list of +/// arguments, to return the resulting formatted string. +/// +/// This is currently an unsafe function because the types of all arguments +/// aren't verified by immediate callers of this function. This currently does +/// not validate that the correct types of arguments are specified for each +/// format specifier, nor that each argument itself contains the right function +/// for formatting the right type value. Because of this, the function is marked +/// as `unsafe` if this is being called manually. +/// +/// Thankfully the rust compiler provides the macro `ifmt!` which will perform +/// all of this validation at compile-time and provides a safe interface for +/// invoking this function. +/// +/// # Arguments +/// +/// * fmts - the precompiled format string to emit. +/// * args - the list of arguments to the format string. These are only the +/// positional arguments (not named) +/// +/// Note that this function assumes that there are enough arguments for the +/// format string. +pub unsafe fn sprintf(fmt: &[rt::Piece], args: &[Argument]) -> ~str { + let output = MemWriter::new(); + { + let mut formatter = Formatter { + flags: 0, + width: None, + precision: None, + // FIXME(#8248): shouldn't need a transmute + buf: cast::transmute(&output as &io::Writer), + alignleft: false, + fill: ' ', + args: args, + curarg: args.iter(), + }; + for piece in fmt.iter() { + formatter.run(piece, None); + } + } + return str::from_bytes_owned(output.inner()); +} + +impl<'self> Formatter<'self> { + fn run(&mut self, piece: &rt::Piece, cur: Option<&str>) { + let setcount = |slot: &mut Option, cnt: &parse::Count| { + match *cnt { + parse::CountIs(n) => { *slot = Some(n); } + parse::CountImplied => { *slot = None; } + parse::CountIsParam(i) => { + let v = self.args[i].value; + unsafe { *slot = Some(*(v as *util::Void as *uint)); } + } + parse::CountIsNextParam => { + let v = self.curarg.next().unwrap().value; + unsafe { *slot = Some(*(v as *util::Void as *uint)); } + } + } + }; + + match *piece { + rt::String(s) => { self.buf.write(s.as_bytes()); } + rt::CurrentArgument(()) => { self.buf.write(cur.unwrap().as_bytes()); } + rt::Argument(ref arg) => { + // Fill in the format parameters into the formatter + self.fill = arg.format.fill; + self.alignleft = arg.format.alignleft; + self.flags = arg.format.flags; + setcount(&mut self.width, &arg.format.width); + setcount(&mut self.precision, &arg.format.precision); + + // Extract the correct argument + let value = match arg.position { + rt::ArgumentNext => { *self.curarg.next().unwrap() } + rt::ArgumentIs(i) => self.args[i], + }; + + // Then actually do some printing + match arg.method { + None => { (value.formatter)(value.value, self); } + Some(ref method) => { self.execute(*method, value); } + } + } + } + } + + fn execute(&mut self, method: &rt::Method, arg: Argument) { + match *method { + // Pluralization is selection upon a numeric value specified as the + // parameter. + rt::Plural(offset, ref selectors, ref default) => { + // This is validated at compile-time to be a pointer to a + // '&uint' value. + let value: &uint = unsafe { cast::transmute(arg.value) }; + let value = *value; + + // First, attempt to match against explicit values without the + // offsetted value + for s in selectors.iter() { + match s.selector { + Right(val) if value == val => { + return self.runplural(value, s.result); + } + _ => {} + } + } + + // Next, offset the value and attempt to match against the + // keyword selectors. + let value = value - match offset { Some(i) => i, None => 0 }; + for s in selectors.iter() { + let run = match s.selector { + Left(parse::Zero) => value == 0, + Left(parse::One) => value == 1, + Left(parse::Two) => value == 2, + + // XXX: Few/Many should have a user-specified boundary + // One possible option would be in the function + // pointer of the 'arg: Argument' struct. + Left(parse::Few) => value < 8, + Left(parse::Many) => value >= 8, + + Right(*) => false + }; + if run { + return self.runplural(value, s.result); + } + } + + self.runplural(value, *default); + } + + // Select is just a matching against the string specified. + rt::Select(ref selectors, ref default) => { + // This is validated at compile-time to be a pointer to a + // string slice, + let value: & &str = unsafe { cast::transmute(arg.value) }; + let value = *value; + + for s in selectors.iter() { + if s.selector == value { + for piece in s.result.iter() { + self.run(piece, Some(value)); + } + return; + } + } + for piece in default.iter() { + self.run(piece, Some(value)); + } + } + } + } + + fn runplural(&mut self, value: uint, pieces: &[rt::Piece]) { + do uint::to_str_bytes(value, 10) |buf| { + let valuestr = str::from_bytes_slice(buf); + for piece in pieces.iter() { + self.run(piece, Some(valuestr)); + } + } + } +} + +/// This is a function which calls are emitted to by the compiler itself to +/// create the Argument structures that are passed into the `sprintf` function. +#[doc(hidden)] +pub fn argument<'a, T>(f: extern "Rust" fn(&T, &mut Formatter), + t: &'a T) -> Argument<'a> { + unsafe { + Argument { + formatter: cast::transmute(f), + value: cast::transmute(t) + } + } +} + +/// When the compiler determines that the type of an argument *must* be a string +/// (such as for select), then it invokes this method. +#[doc(hidden)] +pub fn argumentstr<'a>(s: &'a &str) -> Argument<'a> { + argument(String::fmt, s) +} + +/// When the compiler determines that the type of an argument *must* be a uint +/// (such as for plural), then it invokes this method. +#[doc(hidden)] +pub fn argumentuint<'a>(s: &'a uint) -> Argument<'a> { + argument(Unsigned::fmt, s) +} + +// Implementations of the core formatting traits + +impl Bool for bool { + fn fmt(b: &bool, f: &mut Formatter) { + String::fmt(&(if *b {"true"} else {"false"}), f); + } +} + +impl<'self> String for &'self str { + fn fmt(s: & &'self str, f: &mut Formatter) { + // XXX: formatting args + f.buf.write(s.as_bytes()) + } +} + +impl Char for char { + fn fmt(c: &char, f: &mut Formatter) { + // XXX: formatting args + // XXX: shouldn't require an allocation + let mut s = ~""; + s.push_char(*c); + f.buf.write(s.as_bytes()); + } +} + +impl Signed for int { + fn fmt(c: &int, f: &mut Formatter) { + // XXX: formatting args + do int::to_str_bytes(*c, 10) |buf| { + f.buf.write(buf); + } + } +} + +impl Unsigned for uint { + fn fmt(c: &uint, f: &mut Formatter) { + // XXX: formatting args + do uint::to_str_bytes(*c, 10) |buf| { + f.buf.write(buf); + } + } +} + +impl Octal for uint { + fn fmt(c: &uint, f: &mut Formatter) { + // XXX: formatting args + do uint::to_str_bytes(*c, 8) |buf| { + f.buf.write(buf); + } + } +} + +impl LowerHex for uint { + fn fmt(c: &uint, f: &mut Formatter) { + // XXX: formatting args + do uint::to_str_bytes(*c, 16) |buf| { + f.buf.write(buf); + } + } +} + +impl UpperHex for uint { + fn fmt(c: &uint, f: &mut Formatter) { + // XXX: formatting args + do uint::to_str_bytes(*c, 16) |buf| { + let mut local = [0u8, ..16]; + for (l, &b) in local.mut_iter().zip(buf.iter()) { + *l = match b as char { + 'a' .. 'f' => (b - 'a' as u8) + 'A' as u8, + _ => b, + }; + } + f.buf.write(local.slice_to(buf.len())); + } + } +} + +impl Poly for T { + fn fmt(t: &T, f: &mut Formatter) { + // XXX: formatting args + let s = sys::log_str(t); + f.buf.write(s.as_bytes()); + } +} + +// n.b. use 'const' to get an implementation for both '*mut' and '*' at the same +// time. +impl Pointer for *const T { + fn fmt(t: &*const T, f: &mut Formatter) { + // XXX: formatting args + f.buf.write("0x".as_bytes()); + LowerHex::fmt(&(*t as uint), f); + } +} + +// If you expected tests to be here, look instead at the run-pass/ifmt.rs test, +// it's a lot easier than creating all of the rt::Piece structures here. diff --git a/src/libstd/fmt/parse.rs b/src/libstd/fmt/parse.rs new file mode 100644 index 0000000000000..673ea1d3fa8b5 --- /dev/null +++ b/src/libstd/fmt/parse.rs @@ -0,0 +1,896 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use prelude::*; + +use char; +use str; +use iterator; + +condition! { pub parse_error: ~str -> (); } + +/// A piece is a portion of the format string which represents the next part to +/// emit. These are emitted as a stream by the `Parser` class. +#[deriving(Eq)] +pub enum Piece<'self> { + /// A literal string which should directly be emitted + String(&'self str), + /// A back-reference to whatever the current argument is. This is used + /// inside of a method call to refer back to the original argument. + CurrentArgument, + /// This describes that formatting should process the next argument (as + /// specified inside) for emission. + Argument(Argument<'self>), +} + +/// Representation of an argument specification. +#[deriving(Eq)] +pub struct Argument<'self> { + /// Where to find this argument + position: Position<'self>, + /// How to format the argument + format: FormatSpec<'self>, + /// If not `None`, what method to invoke on the argument + method: Option<~Method<'self>> +} + +/// Specification for the formatting of an argument in the format string. +#[deriving(Eq)] +pub struct FormatSpec<'self> { + /// Optionally specified character to fill alignment with + fill: Option, + /// Optionally specified alignment + align: Option, + /// Packed version of various flags provided + flags: uint, + /// The integer precision to use + precision: Count, + /// The string width requested for the resulting format + width: Count, + /// The descriptor string representing the name of the format desired for + /// this argument, this can be empty or any number of characters, although + /// it is required to be one word. + ty: &'self str +} + +/// Enum describing where an argument for a format can be located. +#[deriving(Eq)] +pub enum Position<'self> { + ArgumentNext, ArgumentIs(uint), ArgumentNamed(&'self str) +} + +/// Enum of alignments which are supoprted. +#[deriving(Eq)] +pub enum Alignment { AlignLeft, AlignRight } + +/// Various flags which can be applied to format strings, the meaning of these +/// flags is defined by the formatters themselves. +#[deriving(Eq)] +pub enum Flag { + FlagSignPlus, + FlagSignMinus, + FlagAlternate, +} + +/// A count is used for the precision and width parameters of an integer, and +/// can reference either an argument or a literal integer. +#[deriving(Eq)] +pub enum Count { + CountIs(uint), + CountIsParam(uint), + CountIsNextParam, + CountImplied, +} + +/// Enum describing all of the possible methods which the formatting language +/// currently supports. +#[deriving(Eq)] +pub enum Method<'self> { + /// A plural method selects on an integer over a list of either integer or + /// keyword-defined clauses. The meaning of the keywords is defined by the + /// current locale. + /// + /// An offset is optionally present at the beginning which is used to match + /// against keywords, but it is not matched against the literal integers. + /// + /// The final element of this enum is the default "other" case which is + /// always required to be specified. + Plural(Option, ~[PluralArm<'self>], ~[Piece<'self>]), + + /// A select method selects over a string. Each arm is a different string + /// which can be selected for. + /// + /// As with `Plural`, a default "other" case is required as well. + Select(~[SelectArm<'self>], ~[Piece<'self>]), +} + +/// Structure representing one "arm" of the `plural` function. +#[deriving(Eq)] +pub struct PluralArm<'self> { + /// A selector can either be specified by a keyword or with an integer + /// literal. + selector: Either, + /// Array of pieces which are the format of this arm + result: ~[Piece<'self>], +} + +/// Enum of the 5 CLDR plural keywords. There is one more, "other", but that is +/// specially placed in the `Plural` variant of `Method` +/// +/// http://www.icu-project.org/apiref/icu4c/classicu_1_1PluralRules.html +#[deriving(Eq, IterBytes)] +pub enum PluralKeyword { + Zero, One, Two, Few, Many +} + +/// Structure representing one "arm" of the `select` function. +#[deriving(Eq)] +pub struct SelectArm<'self> { + /// String selector which guards this arm + selector: &'self str, + /// Array of pieces which are the format of this arm + result: ~[Piece<'self>], +} + +/// The parser structure for interpreting the input format string. This is +/// modelled as an iterator over `Piece` structures to form a stream of tokens +/// being output. +/// +/// This is a recursive-descent parser for the sake of simplicity, and if +/// necessary there's probably lots of room for improvement performance-wise. +pub struct Parser<'self> { + priv input: &'self str, + priv cur: str::CharOffsetIterator<'self>, +} + +impl<'self> iterator::Iterator> for Parser<'self> { + fn next(&mut self) -> Option> { + match self.cur.clone().next() { + Some((_, '#')) => { self.cur.next(); Some(CurrentArgument) } + Some((_, '{')) => { + self.cur.next(); + let ret = Some(Argument(self.argument())); + if !self.consume('}') { + self.err(~"unterminated format string"); + } + ret + } + Some((pos, '\\')) => { + self.cur.next(); + self.escape(); // ensure it's a valid escape sequence + Some(String(self.string(pos + 1))) // skip the '\' character + } + Some((_, '}')) | None => { None } + Some((pos, _)) => { + Some(String(self.string(pos))) + } + } + } +} + +impl<'self> Parser<'self> { + /// Creates a new parser for the given format string + pub fn new<'a>(s: &'a str) -> Parser<'a> { + Parser { + input: s, + cur: s.char_offset_iter(), + } + } + + /// Notifies of an error. The message doesn't actually need to be of type + /// ~str, but I think it does when this eventually uses conditions so it + /// might as well start using it now. + fn err(&self, msg: ~str) { + parse_error::cond.raise(msg); + } + + /// Optionally consumes the specified character. If the character is not at + /// the current position, then the current iterator isn't moved and false is + /// returned, otherwise the character is consumed and true is returned. + fn consume(&mut self, c: char) -> bool { + match self.cur.clone().next() { + Some((_, maybe)) if c == maybe => { + self.cur.next(); + true + } + Some(*) | None => false, + } + } + + /// Attempts to consume any amount of whitespace followed by a character + fn wsconsume(&mut self, c: char) -> bool { + self.ws(); self.consume(c) + } + + /// Consumes all whitespace characters until the first non-whitespace + /// character + fn ws(&mut self) { + loop { + match self.cur.clone().next() { + Some((_, c)) if char::is_whitespace(c) => { self.cur.next(); } + Some(*) | None => { return } + } + } + } + + /// Consumes an escape sequence, failing if there is not a valid character + /// to be escaped. + fn escape(&mut self) -> char { + match self.cur.next() { + Some((_, c @ '#')) | Some((_, c @ '{')) | + Some((_, c @ '\\')) | Some((_, c @ '}')) => { c } + Some((_, c)) => { + self.err(fmt!("invalid escape character `%c`", c)); + c + } + None => { + self.err(~"expected an escape sequence, but format string was \ + terminated"); + ' ' + } + } + } + + /// Parses all of a string which is to be considered a "raw literal" in a + /// format string. This is everything outside of the braces. + fn string(&mut self, start: uint) -> &'self str { + loop { + // we may not consume the character, so clone the iterator + match self.cur.clone().next() { + Some((pos, '\\')) | Some((pos, '#')) | + Some((pos, '}')) | Some((pos, '{')) => { + return self.input.slice(start, pos); + } + Some(*) => { self.cur.next(); } + None => { + self.cur.next(); + return self.input.slice(start, self.input.len()); + } + } + } + } + + /// Parses an Argument structure, or what's contained within braces inside + /// the format string + fn argument(&mut self) -> Argument<'self> { + Argument { + position: self.position(), + format: self.format(), + method: self.method(), + } + } + + /// Parses a positional argument for a format. This could either be an + /// integer index of an argument, a named argument, or a blank string. + fn position(&mut self) -> Position<'self> { + match self.integer() { + Some(i) => { ArgumentIs(i) } + None => { + match self.cur.clone().next() { + Some((_, c)) if char::is_alphabetic(c) => { + ArgumentNamed(self.word()) + } + _ => ArgumentNext + } + } + } + } + + /// Parses a format specifier at the current position, returning all of the + /// relevant information in the FormatSpec struct. + fn format(&mut self) -> FormatSpec<'self> { + let mut spec = FormatSpec { + fill: None, + align: None, + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: self.input.slice(0, 0), + }; + if !self.consume(':') { return spec } + + // fill character + match self.cur.clone().next() { + Some((_, c)) => { + match self.cur.clone().skip(1).next() { + Some((_, '>')) | Some((_, '<')) => { + spec.fill = Some(c); + self.cur.next(); + } + Some(*) | None => {} + } + } + None => {} + } + // Alignment + if self.consume('<') { + spec.align = Some(AlignLeft); + } else if self.consume('>') { + spec.align = Some(AlignRight); + } + // Sign flags + if self.consume('+') { + spec.flags |= 1 << (FlagSignPlus as uint); + } else if self.consume('-') { + spec.flags |= 1 << (FlagSignMinus as uint); + } + // Alternate marker + if self.consume('#') { + spec.flags |= 1 << (FlagAlternate as uint); + } + // Width and precision + spec.width = self.count(); + if self.consume('.') { + if self.consume('*') { + spec.precision = CountIsNextParam; + } else { + spec.precision = self.count(); + } + } + // Finally the actual format specifier + spec.ty = self.word(); + return spec; + } + + /// Parses a method to be applied to the previously specified argument and + /// its format. The two current supported methods are 'plural' and 'select' + fn method(&mut self) -> Option<~Method<'self>> { + if !self.wsconsume(',') { + return None; + } + self.ws(); + match self.word() { + "select" => { + if !self.wsconsume(',') { + self.err(~"`select` must be followed by `,`"); + } + Some(self.select()) + } + "plural" => { + if !self.wsconsume(',') { + self.err(~"`plural` must be followed by `,`"); + } + Some(self.plural()) + } + "" => { + self.err(~"expected method after comma"); + return None; + } + method => { + self.err(fmt!("unknown method: `%s`", method)); + return None; + } + } + } + + /// Parses a 'select' statement (after the initial 'select' word) + fn select(&mut self) -> ~Method<'self> { + let mut other = None; + let mut arms = ~[]; + // Consume arms one at a time + loop { + self.ws(); + let selector = self.word(); + if selector == "" { + self.err(~"cannot have an empty selector"); + break + } + if !self.wsconsume('{') { + self.err(~"selector must be followed by `{`"); + } + let pieces = self.collect(); + if !self.wsconsume('}') { + self.err(~"selector case must be terminated by `}`"); + } + if selector == "other" { + if !other.is_none() { + self.err(~"multiple `other` statements in `select"); + } + other = Some(pieces); + } else { + arms.push(SelectArm { selector: selector, result: pieces }); + } + self.ws(); + match self.cur.clone().next() { + Some((_, '}')) => { break } + Some(*) | None => {} + } + } + // The "other" selector must be present + let other = match other { + Some(arm) => { arm } + None => { + self.err(~"`select` statement must provide an `other` case"); + ~[] + } + }; + ~Select(arms, other) + } + + /// Parses a 'plural' statement (after the initial 'plural' word) + fn plural(&mut self) -> ~Method<'self> { + let mut offset = None; + let mut other = None; + let mut arms = ~[]; + + // First, attempt to parse the 'offset:' field. We know the set of + // selector words which can appear in plural arms, and the only ones + // which start with 'o' are "other" and "offset", hence look two + // characters deep to see if we can consume the word "offset" + self.ws(); + let mut it = self.cur.clone(); + match it.next() { + Some((_, 'o')) => { + match it.next() { + Some((_, 'f')) => { + let word = self.word(); + if word != "offset" { + self.err(fmt!("expected `offset`, found `%s`", + word)); + } else { + if !self.consume(':') { + self.err(~"`offset` must be followed by `:`"); + } + match self.integer() { + Some(i) => { offset = Some(i); } + None => { + self.err(~"offset must be an integer"); + } + } + } + } + Some(*) | None => {} + } + } + Some(*) | None => {} + } + + // Next, generate all the arms + loop { + let mut isother = false; + let selector = if self.wsconsume('=') { + match self.integer() { + Some(i) => Right(i), + None => { + self.err(~"plural `=` selectors must be followed by an \ + integer"); + Right(0) + } + } + } else { + let word = self.word(); + match word { + "other" => { isother = true; Left(Zero) } + "zero" => Left(Zero), + "one" => Left(One), + "two" => Left(Two), + "few" => Left(Few), + "many" => Left(Many), + word => { + self.err(fmt!("unexpected plural selector `%s`", word)); + if word == "" { + break + } else { + Left(Zero) + } + } + } + }; + if !self.wsconsume('{') { + self.err(~"selector must be followed by `{`"); + } + let pieces = self.collect(); + if !self.wsconsume('}') { + self.err(~"selector case must be terminated by `}`"); + } + if isother { + if !other.is_none() { + self.err(~"multiple `other` statements in `select"); + } + other = Some(pieces); + } else { + arms.push(PluralArm { selector: selector, result: pieces }); + } + self.ws(); + match self.cur.clone().next() { + Some((_, '}')) => { break } + Some(*) | None => {} + } + } + + let other = match other { + Some(arm) => { arm } + None => { + self.err(~"`plural` statement must provide an `other` case"); + ~[] + } + }; + ~Plural(offset, arms, other) + } + + /// Parses a Count parameter at the current position. This does not check + /// for 'CountIsNextParam' because that is only used in precision, not + /// width. + fn count(&mut self) -> Count { + match self.integer() { + Some(i) => { + if self.consume('$') { + CountIsParam(i) + } else { + CountIs(i) + } + } + None => { CountImplied } + } + } + + /// Parses a word starting at the current position. A word is considered to + /// be an alphabetic character followed by any number of alphanumeric + /// characters. + fn word(&mut self) -> &'self str { + let start = match self.cur.clone().next() { + Some((pos, c)) if char::is_alphabetic(c) => { + self.cur.next(); + pos + } + Some(*) | None => { return self.input.slice(0, 0); } + }; + let mut end; + loop { + match self.cur.clone().next() { + Some((_, c)) if char::is_alphanumeric(c) => { + self.cur.next(); + } + Some((pos, _)) => { end = pos; break } + None => { end = self.input.len(); break } + } + } + self.input.slice(start, end) + } + + /// Optionally parses an integer at the current position. This doesn't deal + /// with overflow at all, it's just accumulating digits. + fn integer(&mut self) -> Option { + let mut cur = 0; + let mut found = false; + loop { + match self.cur.clone().next() { + Some((_, c)) => { + match char::to_digit(c, 10) { + Some(i) => { + cur = cur * 10 + i; + found = true; + self.cur.next(); + } + None => { break } + } + } + None => { break } + } + } + if found { + return Some(cur); + } else { + return None; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use prelude::*; + use realstd::fmt::{String}; + + fn same(fmt: &'static str, p: ~[Piece<'static>]) { + let mut parser = Parser::new(fmt); + assert_eq!(p, parser.collect()); + } + + fn fmtdflt() -> FormatSpec<'static> { + return FormatSpec { + fill: None, + align: None, + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: "", + } + } + + fn musterr(s: &str) { + Parser::new(s).next(); + } + + #[test] + fn simple() { + same("asdf", ~[String("asdf")]); + same("a\\{b", ~[String("a"), String("{b")]); + same("a\\#b", ~[String("a"), String("#b")]); + same("a\\}b", ~[String("a"), String("}b")]); + same("a\\}", ~[String("a"), String("}")]); + same("\\}", ~[String("}")]); + } + + #[test] #[should_fail] fn invalid01() { musterr("{") } + #[test] #[should_fail] fn invalid02() { musterr("\\") } + #[test] #[should_fail] fn invalid03() { musterr("\\a") } + #[test] #[should_fail] fn invalid04() { musterr("{3a}") } + #[test] #[should_fail] fn invalid05() { musterr("{:|}") } + #[test] #[should_fail] fn invalid06() { musterr("{:>>>}") } + + #[test] + fn format_nothing() { + same("{}", ~[Argument(Argument { + position: ArgumentNext, + format: fmtdflt(), + method: None, + })]); + } + #[test] + fn format_position() { + same("{3}", ~[Argument(Argument { + position: ArgumentIs(3), + format: fmtdflt(), + method: None, + })]); + } + #[test] + fn format_position_nothing_else() { + same("{3:}", ~[Argument(Argument { + position: ArgumentIs(3), + format: fmtdflt(), + method: None, + })]); + } + #[test] + fn format_type() { + same("{3:a}", ~[Argument(Argument { + position: ArgumentIs(3), + format: FormatSpec { + fill: None, + align: None, + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: "a", + }, + method: None, + })]); + } + #[test] + fn format_align_fill() { + same("{3:>}", ~[Argument(Argument { + position: ArgumentIs(3), + format: FormatSpec { + fill: None, + align: Some(AlignRight), + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: "", + }, + method: None, + })]); + same("{3:0<}", ~[Argument(Argument { + position: ArgumentIs(3), + format: FormatSpec { + fill: Some('0'), + align: Some(AlignLeft), + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: "", + }, + method: None, + })]); + same("{3:* or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! This is an internal module used by the ifmt! runtime. These structures are +//! emitted to static arrays to precompile format strings ahead of time. +//! +//! These definitions are similar to their `ct` equivalents, but differ in that +//! these can be statically allocated and are slightly optimized for the runtime + +#[allow(missing_doc)]; +#[doc(hidden)]; + +use either::Either; +use fmt::parse; +use option::Option; + +pub enum Piece<'self> { + String(&'self str), + // FIXME(#8259): this shouldn't require the unit-value here + CurrentArgument(()), + Argument(Argument<'self>), +} + +pub struct Argument<'self> { + position: Position, + format: FormatSpec, + method: Option<&'self Method<'self>> +} + +pub struct FormatSpec { + fill: char, + alignleft: bool, + flags: uint, + precision: parse::Count, + width: parse::Count, +} + +pub enum Position { + ArgumentNext, ArgumentIs(uint) +} + +pub enum Method<'self> { + Plural(Option, &'self [PluralArm<'self>], &'self [Piece<'self>]), + Select(&'self [SelectArm<'self>], &'self [Piece<'self>]), +} + +pub struct PluralArm<'self> { + selector: Either, + result: &'self [Piece<'self>], +} + +pub struct SelectArm<'self> { + selector: &'self str, + result: &'self [Piece<'self>], +} diff --git a/src/libstd/rt/io/mem.rs b/src/libstd/rt/io/mem.rs index c93945a6a9aa9..277897e5d2e27 100644 --- a/src/libstd/rt/io/mem.rs +++ b/src/libstd/rt/io/mem.rs @@ -26,7 +26,7 @@ pub struct MemWriter { } impl MemWriter { - pub fn new() -> MemWriter { MemWriter { buf: ~[] } } + pub fn new() -> MemWriter { MemWriter { buf: vec::with_capacity(128) } } } impl Writer for MemWriter { diff --git a/src/libstd/std.rs b/src/libstd/std.rs index 568709c89da73..7000b56069df6 100644 --- a/src/libstd/std.rs +++ b/src/libstd/std.rs @@ -177,6 +177,7 @@ pub mod rand; pub mod run; pub mod sys; pub mod cast; +pub mod fmt; pub mod repr; pub mod cleanup; pub mod reflect; @@ -216,4 +217,6 @@ mod std { pub use unstable; pub use str; pub use os; + pub use fmt; + pub use to_bytes; } diff --git a/src/libsyntax/ext/base.rs b/src/libsyntax/ext/base.rs index 6ed5ca3e402a8..99f74543e7978 100644 --- a/src/libsyntax/ext/base.rs +++ b/src/libsyntax/ext/base.rs @@ -139,6 +139,8 @@ pub fn syntax_expander_table() -> SyntaxEnv { ext::tt::macro_rules::add_new_extension)); syntax_expanders.insert(intern(&"fmt"), builtin_normal_tt(ext::fmt::expand_syntax_ext)); + syntax_expanders.insert(intern(&"ifmt"), + builtin_normal_tt(ext::ifmt::expand_syntax_ext)); syntax_expanders.insert( intern(&"auto_encode"), @SE(ItemDecorator(ext::auto_encode::expand_auto_encode))); diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index c7020b990bf00..a928680e09392 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -1014,7 +1014,9 @@ pub fn expand_crate(parse_sess: @mut parse::ParseSess, .. *afp}; let f = make_fold(f_pre); - @f.fold_crate(c) + let ret = @f.fold_crate(c); + parse_sess.span_diagnostic.handler().abort_if_errors(); + return ret; } // given a function from idents to idents, produce diff --git a/src/libsyntax/ext/ifmt.rs b/src/libsyntax/ext/ifmt.rs new file mode 100644 index 0000000000000..5cf5fdba632f4 --- /dev/null +++ b/src/libsyntax/ext/ifmt.rs @@ -0,0 +1,720 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use ast; +use codemap::{span, respan}; +use ext::base::*; +use ext::base; +use ext::build::AstBuilder; +use rsparse = parse; +use parse::token; + +use std::fmt::parse; +use std::hashmap::{HashMap, HashSet}; +use std::vec; + +#[deriving(Eq)] +enum ArgumentType { + Unknown, + Known(@str), + Unsigned, + String, +} + +struct Context { + ecx: @ExtCtxt, + fmtsp: span, + + // Parsed argument expressions and the types that we've found so far for + // them. + args: ~[@ast::expr], + arg_types: ~[Option], + // Parsed named expressions and the types that we've found for them so far + names: HashMap<@str, @ast::expr>, + name_types: HashMap<@str, ArgumentType>, + + // Collection of the compiled `rt::Piece` structures + pieces: ~[@ast::expr], + name_positions: HashMap<@str, uint>, + method_statics: ~[@ast::item], + + // Updated as arguments are consumed or methods are entered + nest_level: uint, + next_arg: uint, +} + +impl Context { + /// Parses the arguments from the given list of tokens, returning None if + /// there's a parse error so we can continue parsing other fmt! expressions. + fn parse_args(&mut self, sp: span, + tts: &[ast::token_tree]) -> Option<@ast::expr> { + let p = rsparse::new_parser_from_tts(self.ecx.parse_sess(), + self.ecx.cfg(), + tts.to_owned()); + if *p.token == token::EOF { + self.ecx.span_err(sp, "ifmt! expects at least one argument"); + return None; + } + let fmtstr = p.parse_expr(); + let mut named = false; + while *p.token != token::EOF { + if !p.eat(&token::COMMA) { + self.ecx.span_err(sp, "expected token: `,`"); + return None; + } + if named || (token::is_ident(p.token) && + p.look_ahead(1, |t| *t == token::EQ)) { + named = true; + let ident = match *p.token { + token::IDENT(i, _) => { + p.bump(); + i + } + _ if named => { + self.ecx.span_err(*p.span, + "expected ident, positional arguments \ + cannot follow named arguments"); + return None; + } + _ => { + self.ecx.span_err(*p.span, + fmt!("expected ident for named \ + argument, but found `%s`", + p.this_token_to_str())); + return None; + } + }; + let name = self.ecx.str_of(ident); + p.expect(&token::EQ); + let e = p.parse_expr(); + match self.names.find(&name) { + None => {} + Some(prev) => { + self.ecx.span_err(e.span, fmt!("duplicate argument \ + named `%s`", name)); + self.ecx.parse_sess.span_diagnostic.span_note( + prev.span, "previously here"); + loop + } + } + self.names.insert(name, e); + } else { + self.args.push(p.parse_expr()); + self.arg_types.push(None); + } + } + return Some(fmtstr); + } + + /// Verifies one piece of a parse string. All errors are not emitted as + /// fatal so we can continue giving errors about this and possibly other + /// format strings. + fn verify_piece(&mut self, p: &parse::Piece) { + match *p { + parse::String(*) => {} + parse::CurrentArgument => { + if self.nest_level == 0 { + self.ecx.span_err(self.fmtsp, + "`#` reference used with nothing to \ + reference back to"); + } + } + parse::Argument(ref arg) => { + // argument first (it's first in the format string) + let pos = match arg.position { + parse::ArgumentNext => { + let i = self.next_arg; + if self.check_positional_ok() { + self.next_arg += 1; + } + Left(i) + } + parse::ArgumentIs(i) => Left(i), + parse::ArgumentNamed(s) => Right(s.to_managed()), + }; + let ty = if arg.format.ty == "" { + Unknown + } else { Known(arg.format.ty.to_managed()) }; + self.verify_arg_type(pos, ty); + + // width/precision next + self.verify_count(arg.format.width); + self.verify_count(arg.format.precision); + + // and finally the method being applied + match arg.method { + None => {} + Some(ref method) => { self.verify_method(pos, *method); } + } + } + } + } + + fn verify_pieces(&mut self, pieces: &[parse::Piece]) { + for piece in pieces.iter() { + self.verify_piece(piece); + } + } + + fn verify_count(&mut self, c: parse::Count) { + match c { + parse::CountImplied | parse::CountIs(*) => {} + parse::CountIsParam(i) => { + self.verify_arg_type(Left(i), Unsigned); + } + parse::CountIsNextParam => { + if self.check_positional_ok() { + self.verify_arg_type(Left(self.next_arg), Unsigned); + self.next_arg += 1; + } + } + } + } + + fn check_positional_ok(&mut self) -> bool { + if self.nest_level != 0 { + self.ecx.span_err(self.fmtsp, "cannot use implicit positional \ + arguments nested inside methods"); + false + } else { + true + } + } + + fn verify_method(&mut self, pos: Either, m: &parse::Method) { + self.nest_level += 1; + match *m { + parse::Plural(_, ref arms, ref default) => { + let mut seen_cases = HashSet::new(); + self.verify_arg_type(pos, Unsigned); + for arm in arms.iter() { + if !seen_cases.insert(arm.selector) { + match arm.selector { + Left(name) => { + self.ecx.span_err(self.fmtsp, + fmt!("duplicate selector \ + `%?`", name)); + } + Right(idx) => { + self.ecx.span_err(self.fmtsp, + fmt!("duplicate selector \ + `=%u`", idx)); + } + } + } + self.verify_pieces(arm.result); + } + self.verify_pieces(*default); + } + parse::Select(ref arms, ref default) => { + self.verify_arg_type(pos, String); + let mut seen_cases = HashSet::new(); + for arm in arms.iter() { + if !seen_cases.insert(arm.selector) { + self.ecx.span_err(self.fmtsp, + fmt!("duplicate selector `%s`", + arm.selector)); + } else if arm.selector == "" { + self.ecx.span_err(self.fmtsp, + "empty selector in `select`"); + } + self.verify_pieces(arm.result); + } + self.verify_pieces(*default); + } + } + self.nest_level -= 1; + } + + fn verify_arg_type(&mut self, arg: Either, ty: ArgumentType) { + match arg { + Left(arg) => { + if arg < 0 || self.args.len() <= arg { + let msg = fmt!("invalid reference to argument `%u` (there \ + are %u arguments)", arg, self.args.len()); + self.ecx.span_err(self.fmtsp, msg); + return; + } + self.verify_same(self.args[arg].span, ty, self.arg_types[arg]); + if ty != Unknown || self.arg_types[arg].is_none() { + self.arg_types[arg] = Some(ty); + } + } + + Right(name) => { + let span = match self.names.find(&name) { + Some(e) => e.span, + None => { + let msg = fmt!("There is no argument named `%s`", name); + self.ecx.span_err(self.fmtsp, msg); + return; + } + }; + self.verify_same(span, ty, + self.name_types.find(&name).map(|&x| *x)); + if ty != Unknown || !self.name_types.contains_key(&name) { + self.name_types.insert(name, ty); + } + // Assign this named argument a slot in the arguments array if + // it hasn't already been assigned a slot. + if !self.name_positions.contains_key(&name) { + let slot = self.name_positions.len(); + self.name_positions.insert(name, slot); + } + } + } + } + + /// When we're keeping track of the types that are declared for certain + /// arguments, we assume that `None` means we haven't seen this argument + /// yet, `Some(None)` means that we've seen the argument, but no format was + /// specified, and `Some(Some(x))` means that the argument was declared to + /// have type `x`. + /// + /// Obviously `Some(Some(x)) != Some(Some(y))`, but we consider it true + /// that: `Some(None) == Some(Some(x))` + fn verify_same(&self, sp: span, ty: ArgumentType, + before: Option) { + if ty == Unknown { return } + let cur = match before { + Some(Unknown) | None => return, + Some(t) => t, + }; + if ty == cur { return } + match (cur, ty) { + (Known(cur), Known(ty)) => { + self.ecx.span_err(sp, + fmt!("argument redeclared with type `%s` when \ + it was previously `%s`", ty, cur)); + } + (Known(cur), _) => { + self.ecx.span_err(sp, + fmt!("argument used to format with `%s` was \ + attempted to not be used for formatting", + cur)); + } + (_, Known(ty)) => { + self.ecx.span_err(sp, + fmt!("argument previously used as a format \ + argument attempted to be used as `%s`", + ty)); + } + (_, _) => { + self.ecx.span_err(sp, "argument declared with multiple formats"); + } + } + } + + /// Translate a `parse::Piece` to a static `rt::Piece` + fn trans_piece(&mut self, piece: &parse::Piece) -> @ast::expr { + let sp = self.fmtsp; + let rtpath = |s: &str| { + ~[self.ecx.ident_of("std"), self.ecx.ident_of("fmt"), + self.ecx.ident_of("rt"), self.ecx.ident_of(s)] + }; + let ctpath = |s: &str| { + ~[self.ecx.ident_of("std"), self.ecx.ident_of("fmt"), + self.ecx.ident_of("parse"), self.ecx.ident_of(s)] + }; + let none = || { + let p = self.ecx.path(sp, ~[self.ecx.ident_of("None")]); + self.ecx.expr_path(p) + }; + let some = |e: @ast::expr| { + self.ecx.expr_call_ident(sp, self.ecx.ident_of("Some"), ~[e]) + }; + let trans_count = |c: parse::Count| { + match c { + parse::CountIs(i) => { + self.ecx.expr_call_global(sp, ctpath("CountIs"), + ~[self.ecx.expr_uint(sp, i)]) + } + parse::CountIsParam(i) => { + self.ecx.expr_call_global(sp, ctpath("CountIsParam"), + ~[self.ecx.expr_uint(sp, i)]) + } + parse::CountImplied => { + let path = self.ecx.path_global(sp, ctpath("CountImplied")); + self.ecx.expr_path(path) + } + parse::CountIsNextParam => { + let path = self.ecx.path_global(sp, ctpath("CountIsNextParam")); + self.ecx.expr_path(path) + } + } + }; + let trans_method = |method: &parse::Method| { + let method = match *method { + parse::Select(ref arms, ref default) => { + let arms = arms.iter().transform(|arm| { + let p = self.ecx.path_global(sp, rtpath("SelectArm")); + let result = arm.result.iter().transform(|p| { + self.trans_piece(p) + }).collect(); + let s = arm.selector.to_managed(); + let selector = self.ecx.expr_str(sp, s); + self.ecx.expr_struct(sp, p, ~[ + self.ecx.field_imm(sp, + self.ecx.ident_of("selector"), + selector), + self.ecx.field_imm(sp, self.ecx.ident_of("result"), + self.ecx.expr_vec_slice(sp, result)), + ]) + }).collect(); + let default = default.iter().transform(|p| { + self.trans_piece(p) + }).collect(); + self.ecx.expr_call_global(sp, rtpath("Select"), ~[ + self.ecx.expr_vec_slice(sp, arms), + self.ecx.expr_vec_slice(sp, default), + ]) + } + parse::Plural(offset, ref arms, ref default) => { + let offset = match offset { + Some(i) => { some(self.ecx.expr_uint(sp, i)) } + None => { none() } + }; + let arms = arms.iter().transform(|arm| { + let p = self.ecx.path_global(sp, rtpath("PluralArm")); + let result = arm.result.iter().transform(|p| { + self.trans_piece(p) + }).collect(); + let (lr, selarg) = match arm.selector { + Left(t) => { + let p = ctpath(fmt!("%?", t)); + let p = self.ecx.path_global(sp, p); + (self.ecx.ident_of("Left"), + self.ecx.expr_path(p)) + } + Right(i) => { + (self.ecx.ident_of("Right"), + self.ecx.expr_uint(sp, i)) + } + }; + let selector = self.ecx.expr_call_ident(sp, + lr, ~[selarg]); + self.ecx.expr_struct(sp, p, ~[ + self.ecx.field_imm(sp, + self.ecx.ident_of("selector"), + selector), + self.ecx.field_imm(sp, self.ecx.ident_of("result"), + self.ecx.expr_vec_slice(sp, result)), + ]) + }).collect(); + let default = default.iter().transform(|p| { + self.trans_piece(p) + }).collect(); + self.ecx.expr_call_global(sp, rtpath("Plural"), ~[ + offset, + self.ecx.expr_vec_slice(sp, arms), + self.ecx.expr_vec_slice(sp, default), + ]) + } + }; + let life = self.ecx.lifetime(sp, self.ecx.ident_of("static")); + let ty = self.ecx.ty_path(self.ecx.path_all( + sp, + true, + rtpath("Method"), + Some(life), + ~[] + ), None); + let st = ast::item_static(ty, ast::m_imm, method); + let static_name = self.ecx.ident_of(fmt!("__static_method_%u", + self.method_statics.len())); + let item = self.ecx.item(sp, static_name, ~[], st); + self.method_statics.push(item); + self.ecx.expr_ident(sp, static_name) + }; + + match *piece { + parse::String(s) => { + self.ecx.expr_call_global(sp, rtpath("String"), + ~[self.ecx.expr_str(sp, s.to_managed())]) + } + parse::CurrentArgument => { + let nil = self.ecx.expr_lit(sp, ast::lit_nil); + self.ecx.expr_call_global(sp, rtpath("CurrentArgument"), ~[nil]) + } + parse::Argument(ref arg) => { + // Translate the position + let pos = match arg.position { + // These two have a direct mapping + parse::ArgumentNext => { + let path = self.ecx.path_global(sp, + rtpath("ArgumentNext")); + self.ecx.expr_path(path) + } + parse::ArgumentIs(i) => { + self.ecx.expr_call_global(sp, rtpath("ArgumentIs"), + ~[self.ecx.expr_uint(sp, i)]) + } + // Named arguments are converted to positional arguments at + // the end of the list of arguments + parse::ArgumentNamed(n) => { + let n = n.to_managed(); + let i = match self.name_positions.find_copy(&n) { + Some(i) => i, + None => 0, // error already emitted elsewhere + }; + let i = i + self.args.len(); + self.ecx.expr_call_global(sp, rtpath("ArgumentIs"), + ~[self.ecx.expr_uint(sp, i)]) + } + }; + + // Translate the format + let fill = match arg.format.fill { Some(c) => c, None => ' ' }; + let fill = self.ecx.expr_lit(sp, ast::lit_int(fill as i64, + ast::ty_char)); + let align = match arg.format.align { + None | Some(parse::AlignLeft) => { + self.ecx.expr_bool(sp, true) + } + Some(parse::AlignRight) => { + self.ecx.expr_bool(sp, false) + } + }; + let flags = self.ecx.expr_uint(sp, arg.format.flags); + let prec = trans_count(arg.format.precision); + let width = trans_count(arg.format.width); + let path = self.ecx.path_global(sp, rtpath("FormatSpec")); + let fmt = self.ecx.expr_struct(sp, path, ~[ + self.ecx.field_imm(sp, self.ecx.ident_of("fill"), fill), + self.ecx.field_imm(sp, self.ecx.ident_of("alignleft"), align), + self.ecx.field_imm(sp, self.ecx.ident_of("flags"), flags), + self.ecx.field_imm(sp, self.ecx.ident_of("precision"), prec), + self.ecx.field_imm(sp, self.ecx.ident_of("width"), width), + ]); + + // Translate the method (if any) + let method = match arg.method { + None => { none() } + Some(ref m) => { + let m = trans_method(*m); + some(self.ecx.expr_addr_of(sp, m)) + } + }; + let path = self.ecx.path_global(sp, rtpath("Argument")); + let s = self.ecx.expr_struct(sp, path, ~[ + self.ecx.field_imm(sp, self.ecx.ident_of("position"), pos), + self.ecx.field_imm(sp, self.ecx.ident_of("format"), fmt), + self.ecx.field_imm(sp, self.ecx.ident_of("method"), method), + ]); + self.ecx.expr_call_global(sp, rtpath("Argument"), ~[s]) + } + } + } + + /// Actually builds the expression which the ifmt! block will be expanded + /// to + fn to_expr(&self) -> @ast::expr { + let mut lets = ~[]; + let mut locals = ~[]; + let mut names = vec::from_fn(self.name_positions.len(), |_| None); + + // First, declare all of our methods that are statics + for &method in self.method_statics.iter() { + let decl = respan(self.fmtsp, ast::decl_item(method)); + lets.push(@respan(self.fmtsp, + ast::stmt_decl(@decl, self.ecx.next_id()))); + } + + // Next, build up the static array which will become our precompiled + // format "string" + let fmt = self.ecx.expr_vec(self.fmtsp, self.pieces.clone()); + let ty = ast::ty_fixed_length_vec( + self.ecx.ty_mt( + self.ecx.ty_path(self.ecx.path_all( + self.fmtsp, + true, ~[ + self.ecx.ident_of("std"), + self.ecx.ident_of("fmt"), + self.ecx.ident_of("rt"), + self.ecx.ident_of("Piece"), + ], + Some(self.ecx.lifetime(self.fmtsp, self.ecx.ident_of("static"))), + ~[] + ), None), + ast::m_imm + ), + self.ecx.expr_uint(self.fmtsp, self.pieces.len()) + ); + let ty = self.ecx.ty(self.fmtsp, ty); + let st = ast::item_static(ty, ast::m_imm, fmt); + let static_name = self.ecx.ident_of("__static_fmtstr"); + let item = self.ecx.item(self.fmtsp, static_name, ~[], st); + let decl = respan(self.fmtsp, ast::decl_item(item)); + lets.push(@respan(self.fmtsp, ast::stmt_decl(@decl, self.ecx.next_id()))); + + // Right now there is a bug such that for the expression: + // foo(bar(&1)) + // the lifetime of `1` doesn't outlast the call to `bar`, so it's not + // vald for the call to `foo`. To work around this all arguments to the + // fmt! string are shoved into locals. + for (i, &e) in self.args.iter().enumerate() { + if self.arg_types[i].is_none() { loop } // error already generated + + let name = self.ecx.ident_of(fmt!("__arg%u", i)); + lets.push(self.ecx.stmt_let(e.span, false, name, e)); + locals.push(self.format_arg(e.span, Left(i), name)); + } + for (&name, &e) in self.names.iter() { + if !self.name_types.contains_key(&name) { loop } + + let lname = self.ecx.ident_of(fmt!("__arg%s", name)); + lets.push(self.ecx.stmt_let(e.span, false, lname, e)); + names[*self.name_positions.get(&name)] = + Some(self.format_arg(e.span, Right(name), lname)); + } + + let args = names.consume_iter().transform(|a| a.unwrap()); + let mut args = locals.consume_iter().chain_(args); + + // Next, build up the actual call to the sprintf function. + let result = self.ecx.expr_call_global(self.fmtsp, ~[ + self.ecx.ident_of("std"), + self.ecx.ident_of("fmt"), + self.ecx.ident_of("sprintf"), + ], ~[ + self.ecx.expr_ident(self.fmtsp, static_name), + self.ecx.expr_vec(self.fmtsp, args.collect()), + ]); + + // sprintf is unsafe, but we just went through a lot of work to + // validate that our call is save, so inject the unsafe block for the + // user. + let result = self.ecx.expr_block(ast::Block { + view_items: ~[], + stmts: ~[], + expr: Some(result), + id: self.ecx.next_id(), + rules: ast::UnsafeBlock, + span: self.fmtsp, + }); + + self.ecx.expr_block(self.ecx.block(self.fmtsp, lets, Some(result))) + } + + fn format_arg(&self, sp: span, arg: Either, + ident: ast::ident) -> @ast::expr { + let mut ty = match arg { + Left(i) => self.arg_types[i].unwrap(), + Right(s) => *self.name_types.get(&s) + }; + // Default types to '?' if nothing else is specified. + if ty == Unknown { + ty = Known(@"?"); + } + let argptr = self.ecx.expr_addr_of(sp, self.ecx.expr_ident(sp, ident)); + match ty { + Known(tyname) => { + let fmt_trait = match tyname.as_slice() { + "?" => "Poly", + "d" | "i" => "Signed", + "u" => "Unsigned", + "b" => "Bool", + "c" => "Char", + "o" => "Octal", + "x" => "LowerHex", + "X" => "UpperHex", + "s" => "String", + "p" => "Pointer", + _ => { + self.ecx.span_err(sp, fmt!("unknown format trait \ + `%s`", tyname)); + "Dummy" + } + }; + let format_fn = self.ecx.path_global(sp, ~[ + self.ecx.ident_of("std"), + self.ecx.ident_of("fmt"), + self.ecx.ident_of(fmt_trait), + self.ecx.ident_of("fmt"), + ]); + self.ecx.expr_call_global(sp, ~[ + self.ecx.ident_of("std"), + self.ecx.ident_of("fmt"), + self.ecx.ident_of("argument"), + ], ~[self.ecx.expr_path(format_fn), argptr]) + } + String => { + self.ecx.expr_call_global(sp, ~[ + self.ecx.ident_of("std"), + self.ecx.ident_of("fmt"), + self.ecx.ident_of("argumentstr"), + ], ~[argptr]) + } + Unsigned => { + self.ecx.expr_call_global(sp, ~[ + self.ecx.ident_of("std"), + self.ecx.ident_of("fmt"), + self.ecx.ident_of("argumentuint"), + ], ~[argptr]) + } + Unknown => { fail!() } + } + } +} + +pub fn expand_syntax_ext(ecx: @ExtCtxt, sp: span, + tts: &[ast::token_tree]) -> base::MacResult { + let mut cx = Context { + ecx: ecx, + args: ~[], + arg_types: ~[], + names: HashMap::new(), + name_positions: HashMap::new(), + name_types: HashMap::new(), + nest_level: 0, + next_arg: 0, + pieces: ~[], + method_statics: ~[], + fmtsp: sp, + }; + let efmt = match cx.parse_args(sp, tts) { + Some(e) => e, + None => { return MRExpr(ecx.expr_uint(sp, 2)); } + }; + cx.fmtsp = efmt.span; + let fmt = expr_to_str(ecx, efmt, + ~"first argument to ifmt! must be a string literal."); + + let mut err = false; + do parse::parse_error::cond.trap(|m| { + if !err { + err = true; + ecx.span_err(efmt.span, m); + } + }).inside { + for piece in parse::Parser::new(fmt) { + if !err { + cx.verify_piece(&piece); + let piece = cx.trans_piece(&piece); + cx.pieces.push(piece); + } + } + } + if err { return MRExpr(efmt) } + + // Make sure that all arguments were used and all arguments have types. + for (i, ty) in cx.arg_types.iter().enumerate() { + if ty.is_none() { + ecx.span_err(cx.args[i].span, "argument never used"); + } + } + for (name, e) in cx.names.iter() { + if !cx.name_types.contains_key(name) { + ecx.span_err(e.span, "named argument never used"); + } + } + + MRExpr(cx.to_expr()) +} diff --git a/src/libsyntax/syntax.rs b/src/libsyntax/syntax.rs index e0f5aa848a29b..a5feb0483d894 100644 --- a/src/libsyntax/syntax.rs +++ b/src/libsyntax/syntax.rs @@ -73,6 +73,7 @@ pub mod ext { pub mod cfg; pub mod fmt; + pub mod ifmt; pub mod env; pub mod bytes; pub mod concat_idents; diff --git a/src/test/compile-fail/ifmt-bad-arg.rs b/src/test/compile-fail/ifmt-bad-arg.rs new file mode 100644 index 0000000000000..875ad0d2b62a5 --- /dev/null +++ b/src/test/compile-fail/ifmt-bad-arg.rs @@ -0,0 +1,74 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + // bad arguments to the ifmt! call + + ifmt!(); //~ ERROR: expects at least one + ifmt!("{}"); //~ ERROR: invalid reference to argument + + ifmt!("{1}", 1); //~ ERROR: invalid reference to argument `1` + //~^ ERROR: argument never used + ifmt!("{foo}"); //~ ERROR: no argument named `foo` + + ifmt!("{}", 1, 2); //~ ERROR: argument never used + ifmt!("{1}", 1, 2); //~ ERROR: argument never used + ifmt!("{}", 1, foo=2); //~ ERROR: named argument never used + ifmt!("{foo}", 1, foo=2); //~ ERROR: argument never used + ifmt!("", foo=2); //~ ERROR: named argument never used + + ifmt!("{0:d} {0:s}", 1); //~ ERROR: redeclared with type `s` + ifmt!("{foo:d} {foo:s}", foo=1); //~ ERROR: redeclared with type `s` + + ifmt!("{foo}", foo=1, foo=2); //~ ERROR: duplicate argument + ifmt!("#"); //~ ERROR: `#` reference used + ifmt!("", foo=1, 2); //~ ERROR: positional arguments cannot follow + ifmt!("" 1); //~ ERROR: expected token: `,` + ifmt!("", 1 1); //~ ERROR: expected token: `,` + + ifmt!("{0, select, a{} a{} other{}}", "a"); //~ ERROR: duplicate selector + ifmt!("{0, plural, =1{} =1{} other{}}", 1u); //~ ERROR: duplicate selector + ifmt!("{0, plural, one{} one{} other{}}", 1u); //~ ERROR: duplicate selector + + // bad syntax of the format string + + ifmt!("{"); //~ ERROR: unterminated format string + ifmt!("\\ "); //~ ERROR: invalid escape + ifmt!("\\"); //~ ERROR: expected an escape + + ifmt!("{0, }", 1); //~ ERROR: expected method + ifmt!("{0, foo}", 1); //~ ERROR: unknown method + ifmt!("{0, select}", "a"); //~ ERROR: must be followed by + ifmt!("{0, plural}", 1); //~ ERROR: must be followed by + + ifmt!("{0, select, a{{}", 1); //~ ERROR: must be terminated + ifmt!("{0, select, {} other{}}", "a"); //~ ERROR: empty selector + ifmt!("{0, select, other{} other{}}", "a"); //~ ERROR: multiple `other` + ifmt!("{0, plural, offset: other{}}", "a"); //~ ERROR: must be an integer + ifmt!("{0, plural, offset 1 other{}}", "a"); //~ ERROR: be followed by `:` + ifmt!("{0, plural, =a{} other{}}", "a"); //~ ERROR: followed by an integer + ifmt!("{0, plural, a{} other{}}", "a"); //~ ERROR: unexpected plural + ifmt!("{0, select, a{}}", "a"); //~ ERROR: must provide an `other` + ifmt!("{0, plural, =1{}}", "a"); //~ ERROR: must provide an `other` + + ifmt!("{0, plural, other{{0:s}}}", "a"); //~ ERROR: previously used as + ifmt!("{:s} {0, plural, other{}}", "a"); //~ ERROR: argument used to + ifmt!("{0, select, other{}} \ + {0, plural, other{}}", "a"); + //~^ ERROR: declared with multiple formats + + // It should be illegal to use implicit placement arguments nested inside of + // format strings because otherwise the "internal pointer of which argument + // is next" would be invalidated if different cases had different numbers of + // arguments. + ifmt!("{0, select, other{{}}}", "a"); //~ ERROR: cannot use implicit + ifmt!("{0, plural, other{{}}}", 1); //~ ERROR: cannot use implicit + ifmt!("{0, plural, other{{1:.*d}}}", 1, 2); //~ ERROR: cannot use implicit +} diff --git a/src/test/compile-fail/ifmt-bad-plural.rs b/src/test/compile-fail/ifmt-bad-plural.rs new file mode 100644 index 0000000000000..76a697b174f54 --- /dev/null +++ b/src/test/compile-fail/ifmt-bad-plural.rs @@ -0,0 +1,14 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + ifmt!("{0, plural, other{}}", "a"); + //~^ ERROR: expected uint but found +} diff --git a/src/test/compile-fail/ifmt-bad-select.rs b/src/test/compile-fail/ifmt-bad-select.rs new file mode 100644 index 0000000000000..abe3b6ed65a6d --- /dev/null +++ b/src/test/compile-fail/ifmt-bad-select.rs @@ -0,0 +1,14 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + ifmt!("{0, select, other{}}", 2); + //~^ ERROR: expected &str but found integral +} diff --git a/src/test/compile-fail/ifmt-unimpl.rs b/src/test/compile-fail/ifmt-unimpl.rs new file mode 100644 index 0000000000000..427f5ea562c7e --- /dev/null +++ b/src/test/compile-fail/ifmt-unimpl.rs @@ -0,0 +1,14 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + ifmt!("{:d}", "3"); + //~^ ERROR: failed to find an implementation of trait std::fmt::Signed +} diff --git a/src/test/compile-fail/ifmt-unknown-trait.rs b/src/test/compile-fail/ifmt-unknown-trait.rs new file mode 100644 index 0000000000000..85556f9501acb --- /dev/null +++ b/src/test/compile-fail/ifmt-unknown-trait.rs @@ -0,0 +1,14 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + ifmt!("{:notimplemented}", "3"); + //~^ ERROR: unknown format trait `notimplemented` +} diff --git a/src/test/run-pass/ifmt.rs b/src/test/run-pass/ifmt.rs new file mode 100644 index 0000000000000..562642453fd27 --- /dev/null +++ b/src/test/run-pass/ifmt.rs @@ -0,0 +1,71 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::fmt; + +struct A; +struct B; + +#[fmt="foo"] +impl fmt::Signed for A { + fn fmt(_: &A, f: &mut fmt::Formatter) { f.buf.write("aloha".as_bytes()); } +} +impl fmt::Signed for B { + fn fmt(_: &B, f: &mut fmt::Formatter) { f.buf.write("adios".as_bytes()); } +} + +pub fn main() { + fn t(a: ~str, b: &str) { assert_eq!(a, b.to_owned()); } + + // Make sure there's a poly formatter that takes anything + t(ifmt!("{}", 1), "1"); + t(ifmt!("{}", A), "{}"); + t(ifmt!("{}", ()), "()"); + t(ifmt!("{}", @(~1, "foo")), "@(~1, \"foo\")"); + + // Various edge cases without formats + t(ifmt!(""), ""); + t(ifmt!("hello"), "hello"); + t(ifmt!("hello \\{"), "hello {"); + + // At least exercise all the formats + t(ifmt!("{:b}", true), "true"); + t(ifmt!("{:c}", '☃'), "☃"); + t(ifmt!("{:d}", 10), "10"); + t(ifmt!("{:i}", 10), "10"); + t(ifmt!("{:u}", 10u), "10"); + t(ifmt!("{:o}", 10u), "12"); + t(ifmt!("{:x}", 10u), "a"); + t(ifmt!("{:X}", 10u), "A"); + t(ifmt!("{:s}", "foo"), "foo"); + t(ifmt!("{:p}", 0x1234 as *int), "0x1234"); + t(ifmt!("{:p}", 0x1234 as *mut int), "0x1234"); + t(ifmt!("{:d}", A), "aloha"); + t(ifmt!("{:d}", B), "adios"); + t(ifmt!("foo {:s} ☃☃☃☃☃☃", "bar"), "foo bar ☃☃☃☃☃☃"); + t(ifmt!("{1} {0}", 0, 1), "1 0"); + t(ifmt!("{foo} {bar}", foo=0, bar=1), "0 1"); + t(ifmt!("{foo} {1} {bar} {0}", 0, 1, foo=2, bar=3), "2 1 3 0"); + t(ifmt!("{} {0:s}", "a"), "a a"); + t(ifmt!("{} {0}", "a"), "\"a\" \"a\""); + + // Methods should probably work + t(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 0u), "c0"); + t(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 1u), "a1"); + t(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 2u), "b2"); + t(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 3u), "d3"); + t(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "a"), "aa"); + t(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "b"), "bb"); + t(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "c"), "cc"); + t(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "d"), "dd"); + t(ifmt!("{1, select, a{#{0:s}} other{#{1}}}", "b", "a"), "ab"); + t(ifmt!("{1, select, a{#{0}} other{#{1}}}", "c", "b"), "bb"); +} +