Skip to content

Commit

Permalink
Use code unit instead of code point
Browse files Browse the repository at this point in the history
JerryScript-DCO-1.0-Signed-off-by: László Langó [email protected]
  • Loading branch information
LaszloLango committed Mar 11, 2016
1 parent a9c77b4 commit 5aa4211
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 52 deletions.
30 changes: 15 additions & 15 deletions jerry-core/ecma/builtin-objects/ecma-builtin-global.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,25 +96,25 @@ ecma_builtin_global_object_print (ecma_value_t this_arg __attr_unused___, /**< t

while (utf8_str_curr_p < utf8_str_end_p)
{
ecma_char_t code_point = lit_utf8_read_next (&utf8_str_curr_p);
ecma_char_t code_unit = lit_utf8_read_next (&utf8_str_curr_p);

if (code_point == LIT_CHAR_NULL)
if (code_unit == LIT_CHAR_NULL)
{
printf ("\\u0000");
}
else if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
else if (code_unit <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
printf ("%c", (char) code_point);
printf ("%c", (char) code_unit);
}
else
{
JERRY_STATIC_ASSERT (sizeof (code_point) == 2,
JERRY_STATIC_ASSERT (sizeof (code_unit) == 2,
size_of_code_point_must_be_equal_to_2_bytes);

uint32_t byte_high = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_point,
uint32_t byte_high = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_unit,
JERRY_BITSINBYTE,
JERRY_BITSINBYTE);
uint32_t byte_low = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_point,
uint32_t byte_low = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_unit,
0,
JERRY_BITSINBYTE);

Expand Down Expand Up @@ -801,9 +801,9 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
continue;
}

lit_code_point_t decoded_byte;
ecma_char_t decoded_byte;

if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte))
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &decoded_byte))
{
ret_value = ecma_raise_uri_error (ECMA_ERR_MSG (""));
break;
Expand Down Expand Up @@ -857,9 +857,9 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
continue;
}

lit_code_point_t decoded_byte;
ecma_char_t decoded_byte;

if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte))
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &decoded_byte))
{
ret_value = ecma_raise_uri_error (ECMA_ERR_MSG (""));
break;
Expand Down Expand Up @@ -916,16 +916,16 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
}
else
{
lit_code_point_t cp;
ecma_char_t chr;

if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &cp)
|| ((cp & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER))
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &chr)
|| ((chr & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER))
{
is_valid = false;
break;
}

octets[i] = (lit_utf8_byte_t) cp;
octets[i] = (lit_utf8_byte_t) chr;
input_char_p += URI_ENCODED_BYTE_SIZE;
}
}
Expand Down
6 changes: 3 additions & 3 deletions jerry-core/ecma/builtin-objects/ecma-builtin-json.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,15 +178,15 @@ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument
}
case LIT_CHAR_LOWERCASE_U:
{
lit_code_point_t code_point;
ecma_char_t code_unit;

if (!(lit_read_code_point_from_hex (current_p + 1, 4, &code_point)))
if (!(lit_read_code_unit_from_hex (current_p + 1, 4, &code_unit)))
{
return;
}

current_p += 5;
write_p += lit_code_point_to_cesu8 (code_point, write_p);
write_p += lit_code_unit_to_utf8 (code_unit, write_p);
continue;
}
default:
Expand Down
20 changes: 10 additions & 10 deletions jerry-core/lit/lit-char-helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -289,32 +289,32 @@ lit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to
* @return true if decoding was successful, false otherwise
*/
bool
lit_read_code_point_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with characters */
lit_utf8_size_t number_of_characters, /**< number of characters to be read */
lit_code_point_t *out_code_point_p) /**< [out] decoded result */
lit_read_code_unit_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with characters */
lit_utf8_size_t number_of_characters, /**< number of characters to be read */
ecma_char_ptr_t out_unit_point_p) /**< [out] decoded result */
{
lit_code_point_t code_point = 0;
ecma_char_t code_unit = LIT_BYTE_NULL;

JERRY_ASSERT (number_of_characters >= 2 && number_of_characters <= 4);

for (lit_utf8_size_t i = 0; i < number_of_characters; i++)
{
code_point <<= 4;
code_unit = (ecma_char_t) (code_unit << 4u);

if (*buf_p >= LIT_CHAR_ASCII_DIGITS_BEGIN
&& *buf_p <= LIT_CHAR_ASCII_DIGITS_END)
{
code_point |= (uint32_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN);
code_unit |= (ecma_char_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN);
}
else if (*buf_p >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN
&& *buf_p <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
{
code_point |= (uint32_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10));
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10));
}
else if (*buf_p >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN
&& *buf_p <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END)
{
code_point |= (uint32_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10));
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10));
}
else
{
Expand All @@ -324,9 +324,9 @@ lit_read_code_point_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with character
buf_p++;
}

*out_code_point_p = code_point;
*out_unit_point_p = code_unit;
return true;
} /* lit_read_code_point_from_hex */
} /* lit_read_code_unit_from_hex */

/**
* Check if specified character is a word character (part of IsWordChar abstract operation)
Expand Down
2 changes: 1 addition & 1 deletion jerry-core/lit/lit-char-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ extern bool lit_char_is_hex_digit (ecma_char_t);
extern uint32_t lit_char_hex_to_int (ecma_char_t);

/* read a hex encoded code point from a zero terminated buffer */
bool lit_read_code_point_from_hex (lit_utf8_byte_t *, lit_utf8_size_t, lit_code_point_t *);
bool lit_read_code_unit_from_hex (lit_utf8_byte_t *, lit_utf8_size_t, ecma_char_ptr_t);

/**
* Null character
Expand Down
8 changes: 4 additions & 4 deletions jerry-core/parser/regexp/re-compiler.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@
*/
static void
re_append_char_class (void *re_ctx_p, /**< RegExp compiler context */
uint32_t start, /**< character class range from */
uint32_t end) /**< character class range to */
ecma_char_t start, /**< character class range from */
ecma_char_t end) /**< character class range to */
{
re_compiler_ctx_t *ctx_p = (re_compiler_ctx_t *) re_ctx_p;
re_append_char (ctx_p->bytecode_ctx_p, (ecma_char_t) start);
re_append_char (ctx_p->bytecode_ctx_p, (ecma_char_t) end);
re_append_char (ctx_p->bytecode_ctx_p, start);
re_append_char (ctx_p->bytecode_ctx_p, end);
ctx_p->parser_ctx_p->num_of_classes++;
} /* re_append_char_class */

Expand Down
33 changes: 16 additions & 17 deletions jerry-core/parser/regexp/re-parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
{
re_token_type_t token_type = ((re_compiler_ctx_t *) re_ctx_p)->current_token.type;
out_token_p->qmax = out_token_p->qmin = 1;
uint32_t start = RE_CHAR_UNDEF;
ecma_char_t start = RE_CHAR_UNDEF;
bool is_range = false;
parser_ctx_p->num_of_classes = 0;

Expand All @@ -332,7 +332,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string"));
}

uint32_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
ecma_char_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);

if (ch == LIT_CHAR_RIGHT_SQUARE)
{
Expand Down Expand Up @@ -412,27 +412,27 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
}
else if (ch == LIT_CHAR_LOWERCASE_X)
{
lit_code_point_t code_point;
ecma_char_t code_unit;

if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 2, &code_point))
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 2, &code_unit))
{
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\x'"));
}

parser_ctx_p->input_curr_p += 2;
append_char_class (re_ctx_p, code_point, code_point);
append_char_class (re_ctx_p, code_unit, code_unit);
}
else if (ch == LIT_CHAR_LOWERCASE_U)
{
lit_code_point_t code_point;
ecma_char_t code_unit;

if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 4, &code_point))
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 4, &code_unit))
{
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\u'"));
}

parser_ctx_p->input_curr_p += 4;
append_char_class (re_ctx_p, code_point, code_point);
append_char_class (re_ctx_p, code_unit, code_unit);
}
else if (ch == LIT_CHAR_LOWERCASE_D)
{
Expand Down Expand Up @@ -499,12 +499,11 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_Z + 1, LIT_UTF16_CODE_UNIT_MAX);
ch = RE_CHAR_UNDEF;
}
else if (ch <= LIT_UTF16_CODE_UNIT_MAX
&& lit_char_is_octal_digit ((ecma_char_t) ch)
else if (lit_char_is_octal_digit ((ecma_char_t) ch)
&& ch != LIT_CHAR_0)
{
parser_ctx_p->input_curr_p--;
ch = re_parse_octal (parser_ctx_p);
ch = (ecma_char_t) re_parse_octal (parser_ctx_p);
}
} /* ch == LIT_CHAR_BACKSLASH */

Expand Down Expand Up @@ -667,28 +666,28 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
else if (ch == LIT_CHAR_LOWERCASE_X
&& re_hex_lookup (parser_ctx_p, 2))
{
lit_code_point_t code_point;
ecma_char_t code_unit;

if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 2, &code_point))
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 2, &code_unit))
{
return ecma_raise_syntax_error (ECMA_ERR_MSG ("decode error"));
}

parser_ctx_p->input_curr_p += 2;
out_token_p->value = code_point;
out_token_p->value = code_unit;
}
else if (ch == LIT_CHAR_LOWERCASE_U
&& re_hex_lookup (parser_ctx_p, 4))
{
lit_code_point_t code_point;
ecma_char_t code_unit;

if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 4, &code_point))
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 4, &code_unit))
{
return ecma_raise_syntax_error (ECMA_ERR_MSG ("decode error"));
}

parser_ctx_p->input_curr_p += 4;
out_token_p->value = code_point;
out_token_p->value = code_unit;
}
else if (ch == LIT_CHAR_LOWERCASE_D)
{
Expand Down
4 changes: 2 additions & 2 deletions jerry-core/parser/regexp/re-parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ typedef enum
/**
* Undefined character (out of the range of the codeunit)
*/
#define RE_CHAR_UNDEF 0xFFFFFFFF
#define RE_CHAR_UNDEF 0xFFFF

/**
* RegExp token type
Expand All @@ -104,7 +104,7 @@ typedef struct
uint32_t num_of_classes; /**< number of character classes */
} re_parser_ctx_t;

typedef void (*re_char_class_callback) (void *re_ctx_p, uint32_t start, uint32_t end);
typedef void (*re_char_class_callback) (void *re_ctx_p, ecma_char_t start, ecma_char_t end);

ecma_value_t
re_parse_char_class (re_parser_ctx_t *, re_char_class_callback, void *, re_token_t *);
Expand Down

0 comments on commit 5aa4211

Please sign in to comment.