diff --git a/error_handling.cpp b/error_handling.cpp index 69116baf08..56d0aa0f71 100644 --- a/error_handling.cpp +++ b/error_handling.cpp @@ -13,7 +13,7 @@ namespace Sass { void error(string msg, ParserState pstate, Backtrace* bt) { - if (!pstate.path.empty() && Prelexer::string_constant(pstate.path.c_str())) + if (!pstate.path.empty() && Prelexer::quoted_string(pstate.path.c_str())) pstate.path = pstate.path.substr(1, pstate.path.size() - 1); Backtrace top(bt, pstate, ""); diff --git a/inspect.cpp b/inspect.cpp index c157a930ea..d58dd3ce74 100644 --- a/inspect.cpp +++ b/inspect.cpp @@ -1,6 +1,7 @@ #include "inspect.hpp" #include "ast.hpp" #include "context.hpp" +#include "utf8/checked.h" #include #include #include @@ -768,17 +769,60 @@ namespace Sass { if (s.length() == 1) { if (s[0] == '"' || s[0] == '\'') return ""; } - char q; - if (*s.begin() == '"' && *s.rbegin() == '"') q = '"'; - else if (*s.begin() == '\'' && *s.rbegin() == '\'') q = '\''; + // char q; + if (*s.begin() == '"' && *s.rbegin() == '"') {} // q = '"'; + else if (*s.begin() == '\'' && *s.rbegin() == '\'') {} // q = '\''; else return s; string t; t.reserve(s.length()-2); + for (size_t i = 1, L = s.length()-1; i < L; ++i) { - // if we see a quote, we need to remove the preceding backslash from t - if (s[i-1] == '\\' && s[i] == q) t.erase(t.length()-1); - t.push_back(s[i]); + + // implement the same strange ruby sass behavior + // an escape sequence can also mean a unicode char + if (s[i] == '\\') { + + // skip it + ++ i; + + // escape length + size_t len = 0; + + // parse as many sequence chars as possible + // ToDo: Check if ruby aborts after possible max + while (s[i + len] && isxdigit(s[i + len])) ++ len; + + // hex string? + if (len == 0) { + + // add next char + t.push_back(s[i]); + + } else { + + // convert the extracted hex string to code point value + // ToDo: Maybe we could do this without creating a substring + uint32_t cp = strtol(s.substr (i, len).c_str(), nullptr, 16); + + // use a very simple approach to convert via utf8 lib + // maybe there is a more elegant way; maybe we shoud + // convert the whole output from string to a stream!? + // allocate memory for utf8 char and convert to utf8 + unsigned char u[5] = {0,0,0,0,0}; utf8::append(cp, u); + for(size_t m = 0; u[m] && m < 5; m++) t.push_back(u[m]); + + // skip some more chars? + if (len > 1) i += len - 1; + + } + // EO if hex + + } else { + // add single char + t.push_back(s[i]); + } } + return t; } diff --git a/parser.cpp b/parser.cpp index c03abc8048..5dd72f4613 100644 --- a/parser.cpp +++ b/parser.cpp @@ -108,7 +108,7 @@ namespace Sass { } // ignore the @charset directive for now else if (lex< exactly< charset_kwd > >()) { - lex< string_constant >(); + lex< quoted_string >(); lex< one_plus< exactly<';'> > >(); } else if (peek< at_keyword >()) { @@ -164,7 +164,7 @@ namespace Sass { Import* imp = new (ctx.mem) Import(pstate); bool first = true; do { - if (lex< string_constant >()) { + if (lex< quoted_string >()) { string import_path(lexed); // struct Sass_Options opt = sass_context_get_options(ctx) @@ -514,10 +514,10 @@ namespace Sass { return seq; } } - if (sawsomething && lex< sequence< negate< functional >, alternatives< identifier_fragment, universal, string_constant, dimension, percentage, number > > >()) { + if (sawsomething && lex< sequence< negate< functional >, alternatives< identifier_fragment, universal, quoted_string, dimension, percentage, number > > >()) { // saw an ampersand, then allow type selectors with arbitrary number of hyphens at the beginning (*seq) << new (ctx.mem) Type_Selector(pstate, lexed); - } else if (lex< sequence< negate< functional >, alternatives< type_selector, universal, string_constant, dimension, percentage, number > > >()) { + } else if (lex< sequence< negate< functional >, alternatives< type_selector, universal, quoted_string, dimension, percentage, number > > >()) { // if you see a type selector (*seq) << new (ctx.mem) Type_Selector(pstate, lexed); sawsomething = true; @@ -546,7 +546,7 @@ namespace Sass { if (lex< id_name >() || lex< class_name >()) { return new (ctx.mem) Selector_Qualifier(pstate, lexed); } - else if (lex< string_constant >() || lex< number >()) { + else if (lex< quoted_string >() || lex< number >()) { return new (ctx.mem) Type_Selector(pstate, lexed); } else if (peek< pseudo_not >()) { @@ -619,7 +619,7 @@ namespace Sass { lex< identifier >(); expr = new (ctx.mem) String_Constant(p, lexed); } - else if (lex< string_constant >()) { + else if (lex< quoted_string >()) { expr = new (ctx.mem) String_Constant(p, lexed); } else if (peek< exactly<')'> >()) { @@ -661,7 +661,7 @@ namespace Sass { if (lex< identifier >()) { value = new (ctx.mem) String_Constant(p, lexed, true); } - else if (lex< string_constant >()) { + else if (lex< quoted_string >()) { value = parse_interpolated_chunk(lexed); } else { @@ -795,7 +795,7 @@ namespace Sass { } // ignore the @charset directive for now else if (lex< exactly< charset_kwd > >()) { - lex< string_constant >(); + lex< quoted_string >(); lex< one_plus< exactly<';'> > >(); } else if (peek< at_keyword >()) { @@ -1209,7 +1209,7 @@ namespace Sass { if (lex< number >()) { return new (ctx.mem) Textual(pstate, Textual::NUMBER, lexed); } - if (peek< string_constant >()) + if (peek< quoted_string >()) { return parse_string(); } if (lex< variable >()) @@ -1278,7 +1278,7 @@ namespace Sass { String* Parser::parse_string() { - lex< string_constant >(); + lex< quoted_string >(); Token str(lexed); return parse_interpolated_chunk(str); // const char* i = str.begin; @@ -1408,7 +1408,7 @@ namespace Sass { else if (lex< hex >()) { (*schema) << new (ctx.mem) Textual(pstate, Textual::HEX, lexed); } - else if (lex< string_constant >()) { + else if (lex< quoted_string >()) { (*schema) << new (ctx.mem) String_Constant(pstate, lexed); if (!num_items) schema->quote_mark(*lexed.begin); } @@ -1875,7 +1875,7 @@ namespace Sass { (q = peek< sequence< pseudo_prefix, identifier > >(p)) || (q = peek< percentage >(p)) || (q = peek< dimension >(p)) || - (q = peek< string_constant >(p)) || + (q = peek< quoted_string >(p)) || (q = peek< exactly<'*'> >(p)) || (q = peek< exactly<'('> >(p)) || (q = peek< exactly<')'> >(p)) || @@ -1933,7 +1933,7 @@ namespace Sass { (q = peek< sequence< pseudo_prefix, identifier > >(p)) || (q = peek< percentage >(p)) || (q = peek< dimension >(p)) || - (q = peek< string_constant >(p)) || + (q = peek< quoted_string >(p)) || (q = peek< exactly<'*'> >(p)) || (q = peek< exactly<'('> >(p)) || (q = peek< exactly<')'> >(p)) || diff --git a/prelexer.cpp b/prelexer.cpp index d63180e256..d2d96bf2c3 100644 --- a/prelexer.cpp +++ b/prelexer.cpp @@ -84,54 +84,6 @@ namespace Sass { >(src); } - // Match double- and single-quoted strings. - const char* double_quoted_string(const char* src) { - src = exactly<'"'>(src); - if (!src) return 0; - const char* p; - while (1) { - if (!*src) return 0; - if((p = escape(src))) { - src = p; - continue; - } - else if((p = exactly<'"'>(src))) { - return p; - } - else { - ++src; - } - } - return 0; - } - const char* single_quoted_string(const char* src) { - src = exactly<'\''>(src); - if (!src) return 0; - const char* p; - while (1) { - if (!*src) return 0; - if((p = escape(src))) { - src = p; - continue; - } - else if((p = exactly<'\''>(src))) { - return p; - } - else { - ++src; - } - } - return 0; - } - const char* string_constant(const char* src) { - return alternatives(src); - } - // Match interpolants. - - - const char* interpolant(const char* src) { - return delimited_by(src); - } // Whitespace handling. const char* optional_spaces(const char* src) { return optional(src); } @@ -188,11 +140,64 @@ namespace Sass { zero_plus< alternatives< identifier, number, exactly<'-'> > > > >, negate< exactly<'%'> > >(src); } + + // interpolants can be recursive/nested + const char* interpolant(const char* src) { + return smartdel_by(src); + } + + // $re_squote = /'(?:$re_itplnt|\\.|[^'])*'/ + const char* single_quoted_string(const char* src) { + // match a single quoted string, while skipping interpolants + return sequence < + exactly <'\''>, + zero_plus < + alternatives < + // skip all escaped chars first + sequence < exactly < '\\' >, any_char >, + // skip interpolants + interpolant, + // skip non delimiters + any_char_except < '\'' > + > + >, + exactly <'\''> + >(src); + } + + // $re_dquote = /"(?:$re_itp|\\.|[^"])*"/ + const char* double_quoted_string(const char* src) { + // match a single quoted string, while skipping interpolants + return sequence < + exactly <'"'>, + zero_plus < + alternatives < + // skip all escaped chars first + sequence < exactly < '\\' >, any_char >, + // skip interpolants + interpolant, + // skip non delimiters + any_char_except < '"' > + > + >, + exactly <'"'> + >(src); + } + + // $re_quoted = /(?:$re_squote|$re_dquote)/ + const char* quoted_string(const char* src) { + // match a quoted string, while skipping interpolants + return alternatives< + single_quoted_string, + double_quoted_string + >(src); + } + const char* value_schema(const char* src) { // follows this pattern: ([xyz]*i[xyz]*)+ - return one_plus< sequence< zero_plus< alternatives< identifier, percentage, dimension, hex, number, string_constant > >, + return one_plus< sequence< zero_plus< alternatives< identifier, percentage, dimension, hex, number, quoted_string > >, interpolant, - zero_plus< alternatives< identifier, percentage, dimension, hex, number, string_constant, exactly<'%'> > > > >(src); + zero_plus< alternatives< identifier, percentage, dimension, hex, number, quoted_string, exactly<'%'> > > > >(src); } const char* filename_schema(const char* src) { return one_plus< sequence< zero_plus< alternatives< identifier, number, exactly<'.'>, exactly<'/'> > >, @@ -439,7 +444,7 @@ namespace Sass { const char* uri(const char* src) { return sequence< exactly, optional, - string_constant, + quoted_string, optional, exactly<')'> >(src); } @@ -577,7 +582,7 @@ namespace Sass { spaces_and_comments, exactly<'='>, spaces_and_comments, - alternatives< variable, identifier_schema, identifier, string_constant, number, hexa >, + alternatives< variable, identifier_schema, identifier, quoted_string, number, hexa >, zero_plus< sequence< spaces_and_comments, exactly<','>, @@ -587,13 +592,12 @@ namespace Sass { spaces_and_comments, exactly<'='>, spaces_and_comments, - alternatives< variable, identifier_schema, identifier, string_constant, number, hexa > + alternatives< variable, identifier_schema, identifier, quoted_string, number, hexa > > > > > >, spaces_and_comments, - exactly<')'>, - spaces_and_comments + exactly<')'> > > >(src); } @@ -605,12 +609,18 @@ namespace Sass { } // const char* ie_args(const char* src) { - // return sequence< alternatives< ie_keyword_arg, value_schema, string_constant, interpolant, number, identifier, delimited_by< '(', ')', true> >, - // zero_plus< sequence< spaces_and_comments, exactly<','>, spaces_and_comments, alternatives< ie_keyword_arg, value_schema, string_constant, interpolant, number, identifier, delimited_by<'(', ')', true> > > > >(src); + // return sequence< alternatives< ie_keyword_arg, value_schema, quoted_string, interpolant, number, identifier, delimited_by< '(', ')', true> >, + // zero_plus< sequence< spaces_and_comments, exactly<','>, spaces_and_comments, alternatives< ie_keyword_arg, value_schema, quoted_string, interpolant, number, identifier, delimited_by<'(', ')', true> > > > >(src); // } const char* ie_keyword_arg(const char* src) { - return sequence< alternatives< variable, identifier_schema, identifier >, spaces_and_comments, exactly<'='>, spaces_and_comments, alternatives< variable, identifier_schema, identifier, string_constant, number, hexa > >(src); + return sequence< + alternatives< variable, identifier_schema, identifier >, + spaces_and_comments, + exactly<'='>, + spaces_and_comments, + alternatives< variable, identifier_schema, identifier, quoted_string, number, hexa > + >(src); } // Path matching functions. @@ -677,7 +687,7 @@ namespace Sass { const char* static_string(const char* src) { const char* pos = src; - const char * s = string_constant(pos); + const char * s = quoted_string(pos); Token t(pos, s, Position(0, 0)); const unsigned int p = count_interval< interpolant >(t.begin, t.end); return (p == 0) ? t.end : 0; @@ -686,6 +696,7 @@ namespace Sass { const char* static_component(const char* src) { return alternatives< identifier, static_string, + percentage, hex, number, sequence< exactly<'!'>, exactly > @@ -696,8 +707,11 @@ namespace Sass { return sequence< static_component, zero_plus < sequence< alternatives< - sequence< optional_spaces, exactly<'/'>, optional_spaces >, - sequence< optional_spaces, exactly<','>, optional_spaces >, + sequence< optional_spaces, alternatives< + exactly < '/' >, + exactly < ',' >, + exactly < ' ' > + >, optional_spaces >, spaces >, static_component diff --git a/prelexer.hpp b/prelexer.hpp index 3c08003ae6..c21b3c9723 100644 --- a/prelexer.hpp +++ b/prelexer.hpp @@ -19,7 +19,12 @@ namespace Sass { template const char* exactly(const char* src) { const char* pre = prefix; - while (*pre && *src == *pre) ++src, ++pre; + if (*src == 0) return 0; + // there is a small chance that the search prefix + // is longer than the rest of the string to look at + while (*pre && *src == *pre) { + ++src, ++pre; + } return *pre ? 0 : src; } @@ -83,6 +88,58 @@ namespace Sass { } } + // Match a sequence of characters delimited by the supplied chars. + template + const char* smartdel_by(const char* src) { + + size_t level = 0; + bool in_squote = false; + bool in_dquote = false; + // bool in_braces = false; + + src = exactly(src); + + if (!src) return 0; + + while (1) { + + // end of string? + if (!*src) return 0; + + // has escaped sequence? + if (!esc && *src == '\\') { + ++ src; // skip this (and next) + } + else if (*src == '"') { + in_dquote = ! in_dquote; + } + else if (*src == '\'') { + in_squote = ! in_squote; + } + else if (in_dquote || in_squote) { + // take everything literally + } + + // find another opener inside? + else if (exactly(src)) { + ++ level; // increase counter + } + + // look for the closer (maybe final, maybe not) + else if (const char* stop = exactly(src)) { + // only close one level? + if (level > 0) -- level; + // return position at end of stop + // delimiter may be multiple chars + else return stop; + } + + // next + ++ src; + + } + } + // Match a sequence of characters delimited by the supplied strings. template const char* delimited_by(const char* src) { @@ -97,6 +154,58 @@ namespace Sass { } } + // Match a sequence of characters delimited by the supplied strings. + template + const char* smartdel_by(const char* src) { + + size_t level = 0; + bool in_squote = false; + bool in_dquote = false; + // bool in_braces = false; + + src = exactly(src); + + if (!src) return 0; + + while (1) { + + // end of string? + if (!*src) return 0; + + // has escaped sequence? + if (!esc && *src == '\\') { + ++ src; // skip this (and next) + } + else if (*src == '"') { + in_dquote = ! in_dquote; + } + else if (*src == '\'') { + in_squote = ! in_squote; + } + else if (in_dquote || in_squote) { + // take everything literally + } + + // find another opener inside? + else if (exactly(src)) { + ++ level; // increase counter + } + + // look for the closer (maybe final, maybe not) + else if (const char* stop = exactly(src)) { + // only close one level? + if (level > 0) -- level; + // return position at end of stop + // delimiter may be multiple chars + else return stop; + } + + // next + ++ src; + + } + } + // Match any single character. const char* any_char(const char* src); // Match any single character except the supplied one. @@ -319,7 +428,7 @@ namespace Sass { // Match double- and single-quoted strings. const char* double_quoted_string(const char* src); const char* single_quoted_string(const char* src); - const char* string_constant(const char* src); + const char* quoted_string(const char* src); // Match interpolants. const char* interpolant(const char* src);