diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-global.c b/jerry-core/ecma/builtin-objects/ecma-builtin-global.c index 620081ae07..9e749fade1 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-global.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-global.c @@ -1,4 +1,4 @@ -/* Copyright 2014-2015 Samsung Electronics Co., Ltd. +/* Copyright 2014-2016 Samsung Electronics Co., Ltd. * Copyright 2015-2016 University of Szeged. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -96,25 +96,25 @@ ecma_builtin_global_object_print (ecma_value_t this_arg __attr_unused___, /**< t while (utf8_str_curr_p < utf8_str_end_p) { - ecma_char_t code_point = lit_utf8_read_next (&utf8_str_curr_p); + ecma_char_t code_unit = lit_utf8_read_next (&utf8_str_curr_p); - if (code_point == LIT_CHAR_NULL) + if (code_unit == LIT_CHAR_NULL) { printf ("\\u0000"); } - else if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX) + else if (code_unit <= LIT_UTF8_1_BYTE_CODE_POINT_MAX) { - printf ("%c", (char) code_point); + printf ("%c", (char) code_unit); } else { - JERRY_STATIC_ASSERT (sizeof (code_point) == 2, + JERRY_STATIC_ASSERT (sizeof (code_unit) == 2, size_of_code_point_must_be_equal_to_2_bytes); - uint32_t byte_high = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_point, + uint32_t byte_high = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_unit, JERRY_BITSINBYTE, JERRY_BITSINBYTE); - uint32_t byte_low = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_point, + uint32_t byte_low = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_unit, 0, JERRY_BITSINBYTE); @@ -801,9 +801,9 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___, continue; } - lit_code_point_t decoded_byte; + ecma_char_t decoded_byte; - if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte)) + if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &decoded_byte)) { ret_value = ecma_raise_uri_error (ECMA_ERR_MSG ("")); break; @@ -857,9 +857,9 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___, continue; } - lit_code_point_t decoded_byte; + ecma_char_t decoded_byte; - if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte)) + if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &decoded_byte)) { ret_value = ecma_raise_uri_error (ECMA_ERR_MSG ("")); break; @@ -916,16 +916,16 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___, } else { - lit_code_point_t cp; + ecma_char_t chr; - if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &cp) - || ((cp & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER)) + if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &chr) + || ((chr & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER)) { is_valid = false; break; } - octets[i] = (lit_utf8_byte_t) cp; + octets[i] = (lit_utf8_byte_t) chr; input_char_p += URI_ENCODED_BYTE_SIZE; } } diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-json.c b/jerry-core/ecma/builtin-objects/ecma-builtin-json.c index a42957c215..de39c760d6 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-json.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-json.c @@ -178,15 +178,15 @@ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument } case LIT_CHAR_LOWERCASE_U: { - lit_code_point_t code_point; + ecma_char_t code_unit; - if (!(lit_read_code_point_from_hex (current_p + 1, 4, &code_point))) + if (!(lit_read_code_unit_from_hex (current_p + 1, 4, &code_unit))) { return; } current_p += 5; - write_p += lit_code_point_to_cesu8 (code_point, write_p); + write_p += lit_code_unit_to_utf8 (code_unit, write_p); continue; } default: diff --git a/jerry-core/lit/lit-char-helpers.c b/jerry-core/lit/lit-char-helpers.c index e884bcb64f..6e3c45ed2e 100644 --- a/jerry-core/lit/lit-char-helpers.c +++ b/jerry-core/lit/lit-char-helpers.c @@ -1,4 +1,5 @@ -/* Copyright 2015 Samsung Electronics Co., Ltd. +/* Copyright 2015-2016 Samsung Electronics Co., Ltd. + * Copyright 2016 University of Szeged. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -283,38 +284,38 @@ lit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to /** * Parse the next number_of_characters hexadecimal character, - * and construct a code point from them. The buffer must + * and construct a code unit from them. The buffer must * be zero terminated. * * @return true if decoding was successful, false otherwise */ bool -lit_read_code_point_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with characters */ - lit_utf8_size_t number_of_characters, /**< number of characters to be read */ - lit_code_point_t *out_code_point_p) /**< [out] decoded result */ +lit_read_code_unit_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with characters */ + lit_utf8_size_t number_of_characters, /**< number of characters to be read */ + ecma_char_ptr_t out_code_unit_p) /**< [out] decoded result */ { - lit_code_point_t code_point = 0; + ecma_char_t code_unit = LIT_CHAR_NULL; JERRY_ASSERT (number_of_characters >= 2 && number_of_characters <= 4); for (lit_utf8_size_t i = 0; i < number_of_characters; i++) { - code_point <<= 4; + code_unit = (ecma_char_t) (code_unit << 4u); if (*buf_p >= LIT_CHAR_ASCII_DIGITS_BEGIN && *buf_p <= LIT_CHAR_ASCII_DIGITS_END) { - code_point |= (uint32_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN); + code_unit |= (ecma_char_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN); } else if (*buf_p >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN && *buf_p <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END) { - code_point |= (uint32_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10)); + code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10)); } else if (*buf_p >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN && *buf_p <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END) { - code_point |= (uint32_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10)); + code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10)); } else { @@ -324,9 +325,9 @@ lit_read_code_point_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with character buf_p++; } - *out_code_point_p = code_point; + *out_code_unit_p = code_unit; return true; -} /* lit_read_code_point_from_hex */ +} /* lit_read_code_unit_from_hex */ /** * Check if specified character is a word character (part of IsWordChar abstract operation) diff --git a/jerry-core/lit/lit-char-helpers.h b/jerry-core/lit/lit-char-helpers.h index c96616c6d7..7792935533 100644 --- a/jerry-core/lit/lit-char-helpers.h +++ b/jerry-core/lit/lit-char-helpers.h @@ -1,4 +1,5 @@ /* Copyright 2015-2016 Samsung Electronics Co., Ltd. + * Copyright 2016 University of Szeged. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +19,8 @@ #include "lit-globals.h" +#define LIT_CHAR_UNDEF ((ecma_char_t) 0xFFFF) /* undefined character */ + /* * Format control characters (ECMA-262 v5, Table 1) */ @@ -213,7 +216,7 @@ extern bool lit_char_is_hex_digit (ecma_char_t); extern uint32_t lit_char_hex_to_int (ecma_char_t); /* read a hex encoded code point from a zero terminated buffer */ -bool lit_read_code_point_from_hex (lit_utf8_byte_t *, lit_utf8_size_t, lit_code_point_t *); +bool lit_read_code_unit_from_hex (lit_utf8_byte_t *, lit_utf8_size_t, ecma_char_ptr_t); /** * Null character diff --git a/jerry-core/parser/regexp/re-compiler.c b/jerry-core/parser/regexp/re-compiler.c index 6e5db4d28c..293244d2c6 100644 --- a/jerry-core/parser/regexp/re-compiler.c +++ b/jerry-core/parser/regexp/re-compiler.c @@ -41,12 +41,12 @@ */ static void re_append_char_class (void *re_ctx_p, /**< RegExp compiler context */ - uint32_t start, /**< character class range from */ - uint32_t end) /**< character class range to */ + ecma_char_t start, /**< character class range from */ + ecma_char_t end) /**< character class range to */ { re_compiler_ctx_t *ctx_p = (re_compiler_ctx_t *) re_ctx_p; - re_append_char (ctx_p->bytecode_ctx_p, (ecma_char_t) start); - re_append_char (ctx_p->bytecode_ctx_p, (ecma_char_t) end); + re_append_char (ctx_p->bytecode_ctx_p, start); + re_append_char (ctx_p->bytecode_ctx_p, end); ctx_p->parser_ctx_p->num_of_classes++; } /* re_append_char_class */ diff --git a/jerry-core/parser/regexp/re-parser.c b/jerry-core/parser/regexp/re-parser.c index ff368ca939..533ae0ac15 100644 --- a/jerry-core/parser/regexp/re-parser.c +++ b/jerry-core/parser/regexp/re-parser.c @@ -315,7 +315,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ { re_token_type_t token_type = ((re_compiler_ctx_t *) re_ctx_p)->current_token.type; out_token_p->qmax = out_token_p->qmin = 1; - uint32_t start = RE_CHAR_UNDEF; + ecma_char_t start = LIT_CHAR_UNDEF; bool is_range = false; parser_ctx_p->num_of_classes = 0; @@ -332,11 +332,11 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string")); } - uint32_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p); + ecma_char_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p); if (ch == LIT_CHAR_RIGHT_SQUARE) { - if (start != RE_CHAR_UNDEF) + if (start != LIT_CHAR_UNDEF) { append_char_class (re_ctx_p, start, start); } @@ -349,7 +349,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '-'")); } - if (start != RE_CHAR_UNDEF + if (start != LIT_CHAR_UNDEF && !is_range && *parser_ctx_p->input_curr_p != LIT_CHAR_RIGHT_SQUARE) { @@ -412,40 +412,40 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ } else if (ch == LIT_CHAR_LOWERCASE_X) { - lit_code_point_t code_point; + ecma_char_t code_unit; - if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 2, &code_point)) + if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 2, &code_unit)) { return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\x'")); } parser_ctx_p->input_curr_p += 2; - append_char_class (re_ctx_p, code_point, code_point); + append_char_class (re_ctx_p, code_unit, code_unit); } else if (ch == LIT_CHAR_LOWERCASE_U) { - lit_code_point_t code_point; + ecma_char_t code_unit; - if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 4, &code_point)) + if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 4, &code_unit)) { return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\u'")); } parser_ctx_p->input_curr_p += 4; - append_char_class (re_ctx_p, code_point, code_point); + append_char_class (re_ctx_p, code_unit, code_unit); } else if (ch == LIT_CHAR_LOWERCASE_D) { /* See ECMA-262 v5, 15.10.2.12 */ append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_BEGIN, LIT_CHAR_ASCII_DIGITS_END); - ch = RE_CHAR_UNDEF; + ch = LIT_CHAR_UNDEF; } else if (ch == LIT_CHAR_UPPERCASE_D) { /* See ECMA-262 v5, 15.10.2.12 */ append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_ASCII_DIGITS_BEGIN - 1); append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_END + 1, LIT_UTF16_CODE_UNIT_MAX); - ch = RE_CHAR_UNDEF; + ch = LIT_CHAR_UNDEF; } else if (ch == LIT_CHAR_LOWERCASE_S) { @@ -461,7 +461,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ append_char_class (re_ctx_p, 0x205FUL, 0x205FUL); /* Medium Mathematical Space */ append_char_class (re_ctx_p, 0x3000UL, 0x3000UL); /* Ideographic Space */ append_char_class (re_ctx_p, LIT_CHAR_BOM, LIT_CHAR_BOM); - ch = RE_CHAR_UNDEF; + ch = LIT_CHAR_UNDEF; } else if (ch == LIT_CHAR_UPPERCASE_S) { @@ -478,7 +478,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ append_char_class (re_ctx_p, 0x2060UL, 0x2FFFUL); append_char_class (re_ctx_p, 0x3001UL, LIT_CHAR_BOM - 1); append_char_class (re_ctx_p, LIT_CHAR_BOM + 1, LIT_UTF16_CODE_UNIT_MAX); - ch = RE_CHAR_UNDEF; + ch = LIT_CHAR_UNDEF; } else if (ch == LIT_CHAR_LOWERCASE_W) { @@ -487,7 +487,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_A, LIT_CHAR_UPPERCASE_Z); append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE, LIT_CHAR_UNDERSCORE); append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_A, LIT_CHAR_LOWERCASE_Z); - ch = RE_CHAR_UNDEF; + ch = LIT_CHAR_UNDEF; } else if (ch == LIT_CHAR_UPPERCASE_W) { @@ -497,20 +497,19 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_Z + 1, LIT_CHAR_UNDERSCORE - 1); append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE + 1, LIT_CHAR_LOWERCASE_A - 1); append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_Z + 1, LIT_UTF16_CODE_UNIT_MAX); - ch = RE_CHAR_UNDEF; + ch = LIT_CHAR_UNDEF; } - else if (ch <= LIT_UTF16_CODE_UNIT_MAX - && lit_char_is_octal_digit ((ecma_char_t) ch) + else if (lit_char_is_octal_digit ((ecma_char_t) ch) && ch != LIT_CHAR_0) { parser_ctx_p->input_curr_p--; - ch = re_parse_octal (parser_ctx_p); + ch = (ecma_char_t) re_parse_octal (parser_ctx_p); } } /* ch == LIT_CHAR_BACKSLASH */ - if (ch == RE_CHAR_UNDEF) + if (ch == LIT_CHAR_UNDEF) { - if (start != RE_CHAR_UNDEF) + if (start != LIT_CHAR_UNDEF) { if (is_range) { @@ -519,13 +518,13 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ else { append_char_class (re_ctx_p, start, start); - start = RE_CHAR_UNDEF; + start = LIT_CHAR_UNDEF; } } } else { - if (start != RE_CHAR_UNDEF) + if (start != LIT_CHAR_UNDEF) { if (is_range) { @@ -536,7 +535,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ else { append_char_class (re_ctx_p, start, ch); - start = RE_CHAR_UNDEF; + start = LIT_CHAR_UNDEF; is_range = false; } } @@ -667,28 +666,28 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context * else if (ch == LIT_CHAR_LOWERCASE_X && re_hex_lookup (parser_ctx_p, 2)) { - lit_code_point_t code_point; + ecma_char_t code_unit; - if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 2, &code_point)) + if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 2, &code_unit)) { return ecma_raise_syntax_error (ECMA_ERR_MSG ("decode error")); } parser_ctx_p->input_curr_p += 2; - out_token_p->value = code_point; + out_token_p->value = code_unit; } else if (ch == LIT_CHAR_LOWERCASE_U && re_hex_lookup (parser_ctx_p, 4)) { - lit_code_point_t code_point; + ecma_char_t code_unit; - if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 4, &code_point)) + if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 4, &code_unit)) { return ecma_raise_syntax_error (ECMA_ERR_MSG ("decode error")); } parser_ctx_p->input_curr_p += 4; - out_token_p->value = code_point; + out_token_p->value = code_unit; } else if (ch == LIT_CHAR_LOWERCASE_D) { diff --git a/jerry-core/parser/regexp/re-parser.h b/jerry-core/parser/regexp/re-parser.h index d4aedb64fb..d011f1b733 100644 --- a/jerry-core/parser/regexp/re-parser.h +++ b/jerry-core/parser/regexp/re-parser.h @@ -75,11 +75,6 @@ typedef enum */ #define RE_MAX_RE_DECESC_DIGITS 9 -/** - * Undefined character (out of the range of the codeunit) - */ -#define RE_CHAR_UNDEF 0xFFFFFFFF - /** * RegExp token type */ @@ -104,7 +99,7 @@ typedef struct uint32_t num_of_classes; /**< number of character classes */ } re_parser_ctx_t; -typedef void (*re_char_class_callback) (void *re_ctx_p, uint32_t start, uint32_t end); +typedef void (*re_char_class_callback) (void *re_ctx_p, ecma_char_t start, ecma_char_t end); ecma_value_t re_parse_char_class (re_parser_ctx_t *, re_char_class_callback, void *, re_token_t *);