Skip to content

Commit

Permalink
Refactor builtins to handle CESU-8 encoded strings.
Browse files Browse the repository at this point in the history
JerryScript-DCO-1.0-Signed-off-by: Zsolt Borbély [email protected]
JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai [email protected]
  • Loading branch information
dbatyai committed Oct 15, 2015
1 parent dcd610b commit 579b1ed
Show file tree
Hide file tree
Showing 17 changed files with 517 additions and 696 deletions.
24 changes: 11 additions & 13 deletions jerry-core/ecma/base/ecma-helpers-conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,42 +354,40 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
return ECMA_NUMBER_ZERO;
}

lit_utf8_iterator_t iter = lit_utf8_iterator_create (str_p, str_size);
lit_utf8_byte_t *str_curr_p = (lit_utf8_byte_t *) str_p;
const lit_utf8_byte_t *str_end_p = str_p + str_size;
ecma_char_t code_unit;

while (!lit_utf8_iterator_is_eos (&iter))
while (str_curr_p < str_end_p)
{
code_unit = lit_utf8_iterator_peek_next (&iter);
code_unit = lit_utf8_peek_next (str_curr_p);
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
{
lit_utf8_iterator_incr (&iter);
lit_utf8_incr (&str_curr_p);
}
else
{
break;
}
}

JERRY_ASSERT (!iter.buf_pos.is_non_bmp_middle);
const lit_utf8_byte_t *begin_p = iter.buf_p + iter.buf_pos.offset;
const lit_utf8_byte_t *begin_p = str_curr_p;
str_curr_p = (lit_utf8_byte_t *) str_end_p;

iter = lit_utf8_iterator_create (iter.buf_p + iter.buf_pos.offset, str_size - iter.buf_pos.offset);
lit_utf8_iterator_seek_eos (&iter);
while (!lit_utf8_iterator_is_bos (&iter))
while (str_curr_p > str_p)
{
code_unit = lit_utf8_iterator_peek_prev (&iter);
code_unit = lit_utf8_peek_prev (str_curr_p);
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
{
lit_utf8_iterator_decr (&iter);
lit_utf8_decr (&str_curr_p);
}
else
{
break;
}
}

JERRY_ASSERT (!iter.buf_pos.is_non_bmp_middle);
const lit_utf8_byte_t *end_p = iter.buf_p + iter.buf_pos.offset - 1;
const lit_utf8_byte_t *end_p = str_curr_p - 1;

if (begin_p > end_p)
{
Expand Down
102 changes: 50 additions & 52 deletions jerry-core/ecma/builtin-objects/ecma-builtin-date.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,27 +48,22 @@
* @return NaN if cannot read from string, ToNumber() otherwise
*/
static ecma_number_t
ecma_date_parse_date_chars (lit_utf8_iterator_t *iter, /**< iterator of the utf8 string */
ecma_date_parse_date_chars (lit_utf8_byte_t **str_p, /**< pointer to the cesu8 string */
const lit_utf8_byte_t *str_end_p, /**< pointer to the end of the string */
uint32_t num_of_chars) /**< number of characters to read and convert */
{
JERRY_ASSERT (num_of_chars > 0);

lit_utf8_size_t copy_size = 0;
const lit_utf8_byte_t *str_start_p = iter->buf_p + iter->buf_pos.offset;
const lit_utf8_byte_t *str_start_p = *str_p;

while (num_of_chars--)
{
if (lit_utf8_iterator_is_eos (iter)
|| !lit_char_is_decimal_digit (lit_utf8_iterator_peek_next (iter)))
if (*str_p >= str_end_p || !lit_char_is_decimal_digit (lit_utf8_read_next (str_p)))
{
return ecma_number_make_nan ();
}

copy_size += lit_get_unicode_char_size_by_utf8_first_byte (*(iter->buf_p + iter->buf_pos.offset));
lit_utf8_iterator_incr (iter);
}

return ecma_utf8_string_to_number (str_start_p, copy_size);
return ecma_utf8_string_to_number (str_start_p, (lit_utf8_size_t) (*str_p - str_start_p));
} /* ecma_date_parse_date_chars */

/**
Expand Down Expand Up @@ -211,10 +206,11 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
ssize_t sz = ecma_string_to_utf8_string (date_str_p, date_start_p, (ssize_t) date_str_size);
JERRY_ASSERT (sz >= 0);

lit_utf8_iterator_t iter = lit_utf8_iterator_create (date_start_p, date_str_size);
lit_utf8_byte_t *date_str_curr_p = date_start_p;
const lit_utf8_byte_t *date_str_end_p = date_start_p + date_str_size;

/* 1. read year */
ecma_number_t year = ecma_date_parse_date_chars (&iter, 4);
ecma_number_t year = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 4);

if (!ecma_number_is_nan (year)
&& year >= 0)
Expand All @@ -224,12 +220,12 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
ecma_number_t time = ECMA_NUMBER_ZERO;

/* 2. read month if any */
if (!lit_utf8_iterator_is_eos (&iter)
&& lit_utf8_iterator_peek_next (&iter) == '-')
if (date_str_curr_p < date_str_end_p
&& *date_str_curr_p == '-')
{
/* eat up '-' */
lit_utf8_iterator_incr (&iter);
month = ecma_date_parse_date_chars (&iter, 2);
date_str_curr_p++;
month = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);

if (month > 12 || month < 1)
{
Expand All @@ -238,12 +234,12 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
}

/* 3. read day if any */
if (!lit_utf8_iterator_is_eos (&iter)
&& lit_utf8_iterator_peek_next (&iter) == '-')
if (date_str_curr_p < date_str_end_p
&& *date_str_curr_p == '-')
{
/* eat up '-' */
lit_utf8_iterator_incr (&iter);
day = ecma_date_parse_date_chars (&iter, 2);
date_str_curr_p++;
day = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);

if (day < 1 || day > 31)
{
Expand All @@ -252,24 +248,24 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
}

/* 4. read time if any */
if (!lit_utf8_iterator_is_eos (&iter)
&& lit_utf8_iterator_peek_next (&iter) == 'T')
if (date_str_curr_p < date_str_end_p
&& *date_str_curr_p == 'T')
{
/* eat up 'T' */
date_str_curr_p++;

ecma_number_t hours = ECMA_NUMBER_ZERO;
ecma_number_t minutes = ECMA_NUMBER_ZERO;
ecma_number_t seconds = ECMA_NUMBER_ZERO;
ecma_number_t milliseconds = ECMA_NUMBER_ZERO;

ecma_length_t num_of_visited_chars = lit_utf8_iterator_get_index (&iter);
ecma_length_t date_str_len = lit_utf8_string_length (iter.buf_p, iter.buf_size) - 1;
ecma_length_t remaining_length = lit_utf8_string_length (date_str_curr_p,
(lit_utf8_size_t) (date_str_end_p - date_str_curr_p));

if ((date_str_len - num_of_visited_chars) >= 5)
if (remaining_length >= 5)
{
/* eat up 'T' */
lit_utf8_iterator_incr (&iter);

/* 4.1 read hours and minutes */
hours = ecma_date_parse_date_chars (&iter, 2);
hours = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);

if (hours < 0 || hours > 24)
{
Expand All @@ -281,33 +277,35 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
}

/* eat up ':' */
lit_utf8_iterator_incr (&iter);
date_str_curr_p++;

minutes = ecma_date_parse_date_chars (&iter, 2);
minutes = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);

if (minutes < 0 || minutes > 59)
{
minutes = ecma_number_make_nan ();
}

/* 4.2 read seconds if any */
if (!lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_peek_next (&iter) == ':')
if (date_str_curr_p < date_str_end_p
&& *date_str_curr_p == ':')
{
/* eat up ':' */
lit_utf8_iterator_incr (&iter);
seconds = ecma_date_parse_date_chars (&iter, 2);
date_str_curr_p++;
seconds = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);

if (seconds < 0 || seconds > 59)
{
seconds = ecma_number_make_nan ();
}

/* 4.3 read milliseconds if any */
if (!lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_peek_next (&iter) == '.')
if (date_str_curr_p < date_str_end_p
&& *date_str_curr_p == '.')
{
/* eat up '.' */
lit_utf8_iterator_incr (&iter);
milliseconds = ecma_date_parse_date_chars (&iter, 3);
date_str_curr_p++;
milliseconds = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 3);

if (milliseconds < 0)
{
Expand All @@ -324,34 +322,34 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
}

/* 4.4 read timezone if any */
if (!lit_utf8_iterator_is_eos (&iter)
&& lit_utf8_iterator_peek_next (&iter) == 'Z'
if (date_str_curr_p < date_str_end_p
&& *date_str_curr_p == 'Z'
&& !ecma_number_is_nan (time))
{
lit_utf8_iterator_incr (&iter);
date_str_curr_p++;
time = ecma_date_make_time (hours, minutes, seconds, milliseconds);
}
else if (!lit_utf8_iterator_is_eos (&iter)
&& (lit_utf8_iterator_peek_next (&iter) == '+'
|| lit_utf8_iterator_peek_next (&iter) == '-'))
else if (date_str_curr_p < date_str_end_p
&& (*date_str_curr_p == '+' || *date_str_curr_p == '-'))
{
ecma_length_t num_of_visited_chars = lit_utf8_iterator_get_index (&iter);
ecma_length_t date_str_len = lit_utf8_string_length (iter.buf_p, iter.buf_size) - 1;
ecma_length_t remaining_length;
remaining_length = lit_utf8_string_length (date_str_curr_p,
(lit_utf8_size_t) (date_str_end_p - date_str_curr_p)) - 1;

if ((date_str_len - num_of_visited_chars) == 5)
if (remaining_length == 5)
{
bool is_negative = false;

if (lit_utf8_iterator_peek_next (&iter) == '-')
if (*date_str_curr_p == '-')
{
is_negative = true;
}

/* eat up '+/-' */
lit_utf8_iterator_incr (&iter);
date_str_curr_p++;

/* read hours and minutes */
hours = ecma_date_parse_date_chars (&iter, 2);
hours = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);

if (hours < 0 || hours > 24)
{
Expand All @@ -363,9 +361,9 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
}

/* eat up ':' */
lit_utf8_iterator_incr (&iter);
date_str_curr_p++;

minutes = ecma_date_parse_date_chars (&iter, 2);
minutes = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);

if (minutes < 0 || minutes > 59)
{
Expand All @@ -384,7 +382,7 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
}
}

if (lit_utf8_iterator_is_eos (&iter))
if (date_str_curr_p >= date_str_end_p)
{
ecma_number_t date = ecma_date_make_day (year, month - 1, day);
*date_num_p = ecma_date_make_date (date, time);
Expand Down
Loading

0 comments on commit 579b1ed

Please sign in to comment.