diff --git a/jerry-core/CMakeLists.txt b/jerry-core/CMakeLists.txt index b33115d3b2..2a67fc1402 100644 --- a/jerry-core/CMakeLists.txt +++ b/jerry-core/CMakeLists.txt @@ -102,6 +102,7 @@ project (JerryCore CXX C ASM) ${CMAKE_SOURCE_DIR}/jerry-core/ecma/operations ${CMAKE_SOURCE_DIR}/jerry-core/parser/js ${CMAKE_SOURCE_DIR}/jerry-core/parser/js/collections + ${CMAKE_SOURCE_DIR}/jerry-core/parser/regexp ${CMAKE_SOURCE_DIR}/jerry-core/jrt) # Third-party @@ -120,6 +121,7 @@ project (JerryCore CXX C ASM) file(GLOB SOURCE_CORE_ECMA_OPERATIONS ecma/operations/*.cpp) file(GLOB SOURCE_CORE_PARSER_JS parser/js/*.cpp) file(GLOB SOURCE_CORE_PARSER_JS_COLLECTIONS parser/js/collections/*.cpp) + file(GLOB SOURCE_CORE_PARSER_REGEXP parser/regexp/*.cpp) file(GLOB SOURCE_CORE_JRT jrt/*.cpp) set(SOURCE_CORE @@ -134,6 +136,7 @@ project (JerryCore CXX C ASM) ${SOURCE_CORE_ECMA_OPERATIONS} ${SOURCE_CORE_PARSER_JS} ${SOURCE_CORE_PARSER_JS_COLLECTIONS} + ${SOURCE_CORE_PARSER_REGEXP} ${SOURCE_CORE_JRT}) # Per-option configuration diff --git a/jerry-core/ecma/base/ecma-gc.cpp b/jerry-core/ecma/base/ecma-gc.cpp index 00fcf9b861..27ea2ec4b0 100644 --- a/jerry-core/ecma/base/ecma-gc.cpp +++ b/jerry-core/ecma/base/ecma-gc.cpp @@ -330,6 +330,7 @@ ecma_gc_mark (ecma_object_t *object_p) /**< object to mark from */ case ECMA_INTERNAL_PROPERTY_EXTENSION_ID: /* an integer */ case ECMA_INTERNAL_PROPERTY_NON_INSTANTIATED_BUILT_IN_MASK_0_31: /* an integer (bit-mask) */ case ECMA_INTERNAL_PROPERTY_NON_INSTANTIATED_BUILT_IN_MASK_32_63: /* an integer (bit-mask) */ + case ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE: { break; } diff --git a/jerry-core/ecma/base/ecma-globals.h b/jerry-core/ecma/base/ecma-globals.h index 49d6988676..072ddeb1fb 100644 --- a/jerry-core/ecma/base/ecma-globals.h +++ b/jerry-core/ecma/base/ecma-globals.h @@ -255,6 +255,11 @@ typedef enum */ ECMA_INTERNAL_PROPERTY_NON_INSTANTIATED_BUILT_IN_MASK_32_63, + /** + * RegExp bytecode array + */ + ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE, + /** * Number of internal properties' types */ diff --git a/jerry-core/ecma/base/ecma-helpers-char.cpp b/jerry-core/ecma/base/ecma-helpers-char.cpp index f1f2c1fec1..1f0196efa5 100644 --- a/jerry-core/ecma/base/ecma-helpers-char.cpp +++ b/jerry-core/ecma/base/ecma-helpers-char.cpp @@ -62,6 +62,64 @@ ecma_char_is_line_terminator (ecma_char_t c) /**< character value */ || ecma_char_is_new_line (c)); } /* ecma_char_is_line_terminator */ +/** + * Check if specified character is a word character (part of IsWordChar abstract operation) + * + * See also: ECMA-262 v5, 15.10.2.6 (IsWordChar) + * + * @return true - if the character is a word character + * false - otherwise. + */ +bool +ecma_char_is_word_char (ecma_char_t c) /**< character value */ +{ + if ((c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + || c == '_') + { + return true; + } + + return false; +} /* ecma_char_is_word_char */ + +/** + * Convert a hex character to an unsigned integer + * + * @return digit value, corresponding to the hex char + */ +uint32_t +ecma_char_hex_to_int (ecma_char_t hex) /**< [0-9A-Fa-f] character value */ +{ + switch (hex) + { + case '0': return 0x0; + case '1': return 0x1; + case '2': return 0x2; + case '3': return 0x3; + case '4': return 0x4; + case '5': return 0x5; + case '6': return 0x6; + case '7': return 0x7; + case '8': return 0x8; + case '9': return 0x9; + case 'a': + case 'A': return 0xA; + case 'b': + case 'B': return 0xB; + case 'c': + case 'C': return 0xC; + case 'd': + case 'D': return 0xD; + case 'e': + case 'E': return 0xE; + case 'f': + case 'F': return 0xF; + default: JERRY_UNREACHABLE (); + } +} /* ecma_char_hex_to_int */ + /** * @} * @} diff --git a/jerry-core/ecma/base/ecma-helpers-string.cpp b/jerry-core/ecma/base/ecma-helpers-string.cpp index c3590f716e..da00f74f48 100644 --- a/jerry-core/ecma/base/ecma-helpers-string.cpp +++ b/jerry-core/ecma/base/ecma-helpers-string.cpp @@ -455,6 +455,54 @@ ecma_init_ecma_string_from_magic_string_ex_id (ecma_string_t *string_p, /**< des string_p->u.magic_string_ex_id = magic_string_ex_id; } /* ecma_init_ecma_string_from_magic_string_ex_id */ +/** + * Allocate new ecma-string and fill it with specified number of characters from specified buffer + * + * @return pointer to ecma-string descriptor + */ +ecma_string_t* +ecma_new_ecma_string (const ecma_char_t *string_p, /**< input string */ + const ecma_length_t length) /**< number of characters */ +{ + JERRY_ASSERT (string_p != NULL); + JERRY_ASSERT (length > 0 && length <= ecma_zt_string_length (string_p)); + + if (length != ecma_zt_string_length (string_p)) + { + /* FIXME: update this when 'ecma_is_charset_magic' interface is added */ + ecma_char_t *zt_str_p = (ecma_char_t *) mem_heap_alloc_block ((size_t) (length + 1), MEM_HEAP_ALLOC_SHORT_TERM); + memcpy (zt_str_p, string_p, length * sizeof (ecma_char_t)); + zt_str_p[length] = 0; + + ecma_magic_string_id_t magic_string_id; + if (ecma_is_zt_string_magic (zt_str_p, &magic_string_id)) + { + mem_heap_free_block (zt_str_p); + return ecma_get_magic_string (magic_string_id); + } + + ecma_magic_string_ex_id_t magic_string_ex_id; + if (ecma_is_zt_ex_string_magic (zt_str_p, &magic_string_ex_id)) + { + mem_heap_free_block (zt_str_p); + return ecma_get_magic_string_ex (magic_string_ex_id); + } + mem_heap_free_block (zt_str_p); + } + + ecma_string_t *string_desc_p = ecma_alloc_string (); + string_desc_p->refs = 1; + string_desc_p->is_stack_var = false; + string_desc_p->container = ECMA_STRING_CONTAINER_HEAP_CHUNKS; + string_desc_p->hash = ecma_chars_buffer_calc_hash_last_chars (string_p, length); + string_desc_p->u.common_field = 0; + + ecma_collection_header_t *collection_p = ecma_new_chars_collection (string_p, length); + ECMA_SET_NON_NULL_POINTER (string_desc_p->u.collection_cp, collection_p); + + return string_desc_p; +} /* ecma_new_ecma_string */ + /** * Allocate new ecma-string and fill it with characters from specified buffer * @@ -485,19 +533,7 @@ ecma_new_ecma_string (const ecma_char_t *string_p) /**< zero-terminated string * length++; } - JERRY_ASSERT (length > 0); - - ecma_string_t* string_desc_p = ecma_alloc_string (); - string_desc_p->refs = 1; - string_desc_p->is_stack_var = false; - string_desc_p->container = ECMA_STRING_CONTAINER_HEAP_CHUNKS; - string_desc_p->hash = ecma_chars_buffer_calc_hash_last_chars (string_p, length); - - string_desc_p->u.common_field = 0; - ecma_collection_header_t *collection_p = ecma_new_chars_collection (string_p, length); - ECMA_SET_NON_NULL_POINTER (string_desc_p->u.collection_cp, collection_p); - - return string_desc_p; + return ecma_new_ecma_string (string_p, length); } /* ecma_new_ecma_string */ /** diff --git a/jerry-core/ecma/base/ecma-helpers.cpp b/jerry-core/ecma/base/ecma-helpers.cpp index 2db1512ddf..e61877136a 100644 --- a/jerry-core/ecma/base/ecma-helpers.cpp +++ b/jerry-core/ecma/base/ecma-helpers.cpp @@ -809,6 +809,11 @@ ecma_free_internal_property (ecma_property_t *property_p) /**< the property */ { JERRY_UNREACHABLE (); } + case ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE: + { + void *bytecode_p = ECMA_GET_NON_NULL_POINTER (void, property_value); + mem_heap_free_block (bytecode_p); + } } ecma_dealloc_property (property_p); diff --git a/jerry-core/ecma/base/ecma-helpers.h b/jerry-core/ecma/base/ecma-helpers.h index 987e6345f6..df4c4384d3 100644 --- a/jerry-core/ecma/base/ecma-helpers.h +++ b/jerry-core/ecma/base/ecma-helpers.h @@ -51,7 +51,7 @@ */ #define ECMA_SET_POINTER(field, non_compressed_pointer) MEM_CP_SET_POINTER (field, non_compressed_pointer) -/* ecma-helpers-value.c */ +/* ecma-helpers-value.cpp */ extern bool ecma_is_value_empty (ecma_value_t value); extern bool ecma_is_value_undefined (ecma_value_t value); extern bool ecma_is_value_null (ecma_value_t value); @@ -109,7 +109,8 @@ extern bool ecma_is_completion_value_normal_true (ecma_completion_value_t value) extern bool ecma_is_completion_value_normal_false (ecma_completion_value_t value); extern bool ecma_is_completion_value_empty (ecma_completion_value_t value); -/* ecma-helpers-string.c */ +/* ecma-helpers-string.cpp */ +extern ecma_string_t* ecma_new_ecma_string (const ecma_char_t *string_p, const ecma_length_t length); extern ecma_string_t* ecma_new_ecma_string (const ecma_char_t *string_p); extern ecma_string_t* ecma_new_ecma_string_from_uint32 (uint32_t uint_number); extern ecma_string_t* ecma_new_ecma_string_from_number (ecma_number_t number); @@ -161,7 +162,7 @@ extern bool ecma_is_zt_ex_string_magic (const ecma_char_t *zt_string_p, ecma_mag extern ecma_string_hash_t ecma_string_hash (const ecma_string_t *string_p); extern ecma_string_hash_t ecma_chars_buffer_calc_hash_last_chars (const ecma_char_t *chars, ecma_length_t length); -/* ecma-helpers-number.c */ +/* ecma-helpers-number.cpp */ extern const ecma_number_t ecma_number_relative_eps; extern ecma_number_t ecma_number_make_nan (void); @@ -199,7 +200,7 @@ extern void ecma_number_to_decimal (ecma_number_t num, int32_t *out_digits_num_p, int32_t *out_decimal_exp_p); -/* ecma-helpers-values-collection.c */ +/* ecma-helpers-values-collection.cpp */ extern ecma_collection_header_t *ecma_new_values_collection (const ecma_value_t values_buffer[], ecma_length_t values_number, @@ -227,7 +228,7 @@ ecma_collection_iterator_init (ecma_collection_iterator_t *iterator_p, extern bool ecma_collection_iterator_next (ecma_collection_iterator_t *iterator_p); -/* ecma-helpers.c */ +/* ecma-helpers.cpp */ extern ecma_object_t* ecma_create_object (ecma_object_t *prototype_object_p, bool is_extensible, ecma_object_type_t type); @@ -308,7 +309,7 @@ extern ecma_property_descriptor_t ecma_make_empty_property_descriptor (void); extern void ecma_free_property_descriptor (ecma_property_descriptor_t *prop_desc_p); extern ecma_property_descriptor_t ecma_get_property_descriptor_from_property (ecma_property_t *prop_p); -/* ecma-helpers-external-pointers.c */ +/* ecma-helpers-external-pointers.cpp */ extern bool ecma_create_external_pointer_property (ecma_object_t *obj_p, ecma_internal_property_id_t id, @@ -320,7 +321,7 @@ ecma_get_external_pointer_value (ecma_object_t *obj_p, extern void ecma_free_external_pointer_in_property (ecma_property_t *prop_p); -/* ecma-helpers-conversion.c */ +/* ecma-helpers-conversion.cpp */ extern ecma_number_t ecma_zt_string_to_number (const ecma_char_t *str_p); extern ssize_t ecma_uint32_to_string (uint32_t value, ecma_char_t *out_buffer_p, ssize_t buffer_size); extern uint32_t ecma_number_to_uint32 (ecma_number_t value); @@ -333,6 +334,8 @@ extern ecma_length_t ecma_number_to_zt_string (ecma_number_t num, ecma_char_t *b extern bool ecma_char_is_new_line (ecma_char_t c); extern bool ecma_char_is_carriage_return (ecma_char_t c); extern bool ecma_char_is_line_terminator (ecma_char_t c); +extern bool ecma_char_is_word_char (ecma_char_t c); +extern uint32_t ecma_char_hex_to_int (ecma_char_t hex); #endif /* !JERRY_ECMA_HELPERS_H */ diff --git a/jerry-core/ecma/base/ecma-magic-strings.inc.h b/jerry-core/ecma/base/ecma-magic-strings.inc.h index 911d2421ed..b6ddaa4cff 100644 --- a/jerry-core/ecma/base/ecma-magic-strings.inc.h +++ b/jerry-core/ecma/base/ecma-magic-strings.inc.h @@ -32,6 +32,13 @@ ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_STRING, "string") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_OBJECT, "object") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_FUNCTION, "function") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_LENGTH, "length") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_SOURCE, "source") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_GLOBAL, "global") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_IGNORECASE_UL, "ignoreCase") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_MULTILINE, "multiline") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_INDEX, "index") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_INPUT, "input") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_LASTINDEX_UL, "lastIndex") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_NAN, "NaN") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_INFINITY_UL, "Infinity") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_UNDEFINED_UL, "Undefined") @@ -44,7 +51,8 @@ ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_STRING_UL, "String") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_BOOLEAN_UL, "Boolean") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_NUMBER_UL, "Number") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_DATE_UL, "Date") -ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_REG_EXP_UL, "RegExp") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_REGEXP_UL, "RegExp") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_REGEXP_SOURCE_UL, "Source") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_ERROR_UL, "Error") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_EVAL_ERROR_UL, "EvalError") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_RANGE_ERROR_UL, "RangeError") @@ -205,6 +213,11 @@ ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_EXEC, "exec") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_TEST, "test") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_NAME, "name") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_MESSAGE, "message") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_G_CHAR, "g") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_I_CHAR, "i") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_M_CHAR, "m") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_SLASH_CHAR, "/") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP, "(?:)") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_LEFT_SQUARE_CHAR, "[") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_RIGHT_SQUARE_CHAR, "]") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_COLON_CHAR, ":") diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h index a300c76f84..43e1749842 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h @@ -133,12 +133,14 @@ OBJECT_VALUE (ECMA_MAGIC_STRING_DATE_UL, ECMA_PROPERTY_CONFIGURABLE) #endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_DATE_BUILTIN */ +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN // ECMA-262 v5, 15.1.4.8 -CP_UNIMPLEMENTED_VALUE (ECMA_MAGIC_STRING_REG_EXP_UL, - ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP), - ECMA_PROPERTY_WRITABLE, - ECMA_PROPERTY_NOT_ENUMERABLE, - ECMA_PROPERTY_CONFIGURABLE) +OBJECT_VALUE (ECMA_MAGIC_STRING_REGEXP_UL, + ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP), + ECMA_PROPERTY_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_CONFIGURABLE) +#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ #ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ERROR_BUILTINS // ECMA-262 v5, 15.1.4.9 diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp new file mode 100644 index 0000000000..c197d41010 --- /dev/null +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp @@ -0,0 +1,229 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecma-builtins.h" +#include "ecma-conversion.h" +#include "ecma-exceptions.h" +#include "ecma-globals.h" +#include "ecma-helpers.h" +#include "ecma-objects.h" +#include "ecma-try-catch-macro.h" + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN +#include "ecma-regexp-object.h" +#include "re-compiler.h" + +#define ECMA_BUILTINS_INTERNAL +#include "ecma-builtins-internal.h" + +#define BUILTIN_INC_HEADER_NAME "ecma-builtin-regexp-prototype.inc.h" +#define BUILTIN_UNDERSCORED_ID regexp_prototype +#include "ecma-builtin-internal-routines-template.inc.h" + +/** \addtogroup ecma ECMA + * @{ + * + * \addtogroup ecmabuiltins + * @{ + * + * \addtogroup regexp ECMA RegExp.prototype object built-in + * @{ + */ + +/** + * The RegExp.prototype object's 'exec' routine + * + * See also: + * ECMA-262 v5, 15.10.6.2 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value. + */ +static ecma_completion_value_t +ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */ + ecma_value_t arg) /**< routine's argument */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + if (ecma_object_get_class_name (ecma_get_object_from_value (this_arg)) != ECMA_MAGIC_STRING_REGEXP_UL) + { + ret_value = ecma_raise_type_error ((const ecma_char_t *) "Incomplete RegExp type"); + } + else + { + ECMA_TRY_CATCH (obj_this, ecma_op_to_object (this_arg), ret_value); + + ecma_object_t *obj_p = ecma_get_object_from_value (obj_this); + ecma_property_t *bytecode_prop_p = ecma_get_internal_property (obj_p, ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE); + re_bytecode_t *bytecode_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value); + + ECMA_TRY_CATCH (input_str_value, + ecma_op_to_string (arg), + ret_value); + + ecma_string_t *input_str_p = ecma_get_string_from_value (input_str_value); + + /* Convert ecma_String_t *to regexp_bytecode_t* */ + int32_t input_str_len = ecma_string_get_length (input_str_p); + + MEM_DEFINE_LOCAL_ARRAY (input_zt_str_p, input_str_len + 1, ecma_char_t); + + ssize_t zt_str_size = (ssize_t) sizeof (ecma_char_t) * (input_str_len + 1); + ecma_string_to_zt_string (input_str_p, input_zt_str_p, zt_str_size); + + ret_value = ecma_regexp_exec_helper (obj_p, bytecode_p, input_zt_str_p); + + MEM_FINALIZE_LOCAL_ARRAY (input_zt_str_p); + + ECMA_FINALIZE (input_str_value); + + ECMA_FINALIZE (obj_this); + } + + return ret_value; +} /* ecma_builtin_regexp_prototype_exec */ + +/** + * The RegExp.prototype object's 'test' routine + * + * See also: + * ECMA-262 v5, 15.10.6.3 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value. + */ +static ecma_completion_value_t +ecma_builtin_regexp_prototype_test (ecma_value_t this_arg, /**< this argument */ + ecma_value_t arg) /**< routine's argument */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + ECMA_TRY_CATCH (match_value, + ecma_builtin_regexp_prototype_exec (this_arg, arg), + ret_value); + + if (ecma_is_value_undefined (match_value)) + { + ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); + } + else + { + ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); + } + + ECMA_FINALIZE (match_value); + + return ret_value; +} /* ecma_builtin_regexp_prototype_test */ + +/** + * The RegExp.prototype object's 'toString' routine + * + * See also: + * ECMA-262 v5, 15.10.6.4 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value. + */ +static ecma_completion_value_t +ecma_builtin_regexp_prototype_to_string (ecma_value_t this_arg) /**< this argument */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + if (ecma_object_get_class_name (ecma_get_object_from_value (this_arg)) != ECMA_MAGIC_STRING_REGEXP_UL) + { + ret_value = ecma_raise_type_error ((const ecma_char_t *) "Incomplete RegExp type"); + } + else + { + ECMA_TRY_CATCH (obj_this, + ecma_op_to_object (this_arg), + ret_value); + + ecma_object_t *obj_p = ecma_get_object_from_value (obj_this); + + /* Get RegExp source from the source property */ + ecma_string_t *magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_SOURCE); + ecma_property_t *source_prop_p = ecma_op_object_get_property (obj_p, magic_string_p); + ecma_deref_ecma_string (magic_string_p); + + ecma_string_t *src_sep_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_SLASH_CHAR); + ecma_string_t *source_str_p = ecma_get_string_from_value (source_prop_p->u.named_data_property.value); + ecma_string_t *output_str_p = ecma_concat_ecma_strings (src_sep_str_p, ecma_copy_or_ref_ecma_string (source_str_p)); + ecma_deref_ecma_string (source_str_p); + + ecma_string_t *concat_p = ecma_concat_ecma_strings (output_str_p, src_sep_str_p); + ecma_deref_ecma_string (src_sep_str_p); + ecma_deref_ecma_string (output_str_p); + output_str_p = concat_p; + + /* Check the global flag */ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_GLOBAL); + ecma_property_t *global_prop_p = ecma_op_object_get_property (obj_p, magic_string_p); + ecma_deref_ecma_string (magic_string_p); + + if (ecma_is_value_true (global_prop_p->u.named_data_property.value)) + { + ecma_string_t *g_flag_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_G_CHAR); + concat_p = ecma_concat_ecma_strings (output_str_p, g_flag_str_p); + ecma_deref_ecma_string (output_str_p); + ecma_deref_ecma_string (g_flag_str_p); + output_str_p = concat_p; + } + + /* Check the ignoreCase flag */ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_IGNORECASE_UL); + ecma_property_t *ignorecase_prop_p = ecma_op_object_get_property (obj_p, magic_string_p); + ecma_deref_ecma_string (magic_string_p); + + if (ecma_is_value_true (ignorecase_prop_p->u.named_data_property.value)) + { + ecma_string_t *ic_flag_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_I_CHAR); + concat_p = ecma_concat_ecma_strings (output_str_p, ic_flag_str_p); + ecma_deref_ecma_string (output_str_p); + ecma_deref_ecma_string (ic_flag_str_p); + output_str_p = concat_p; + } + + /* Check the global flag */ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_MULTILINE); + ecma_property_t *multiline_prop_p = ecma_op_object_get_property (obj_p, magic_string_p); + ecma_deref_ecma_string (magic_string_p); + + if (ecma_is_value_true (multiline_prop_p->u.named_data_property.value)) + { + ecma_string_t *m_flag_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_M_CHAR); + concat_p = ecma_concat_ecma_strings (output_str_p, m_flag_str_p); + ecma_deref_ecma_string (output_str_p); + ecma_deref_ecma_string (m_flag_str_p); + output_str_p = concat_p; + } + + ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_str_p)); + + ECMA_FINALIZE (obj_this); + } + + return ret_value; +} /* ecma_builtin_regexp_prototype_to_string */ + +/** + * @} + * @} + * @} + */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h new file mode 100644 index 0000000000..232591597d --- /dev/null +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h @@ -0,0 +1,52 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * RegExp.prototype built-in description + */ + +#ifndef OBJECT_ID +# define OBJECT_ID(builtin_object_id) +#endif /* !OBJECT_ID */ + +#ifndef OBJECT_VALUE +# define OBJECT_VALUE(name, obj_getter, prop_writable, prop_enumerable, prop_configurable) +#endif /* !OBJECT_VALUE */ + +#ifndef ROUTINE +# define ROUTINE(name, c_function_name, args_number, length_prop_value) +#endif /* !ROUTINE */ + +/* Object identifier */ +OBJECT_ID (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE) + +OBJECT_VALUE (ECMA_MAGIC_STRING_CONSTRUCTOR, + ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP), + ECMA_PROPERTY_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_CONFIGURABLE) + +ROUTINE (ECMA_MAGIC_STRING_EXEC, ecma_builtin_regexp_prototype_exec, 1, 1) +ROUTINE (ECMA_MAGIC_STRING_TEST, ecma_builtin_regexp_prototype_test, 1, 1) +ROUTINE (ECMA_MAGIC_STRING_TO_STRING_UL, ecma_builtin_regexp_prototype_to_string, 0, 0) + +#undef OBJECT_ID +#undef SIMPLE_VALUE +#undef NUMBER_VALUE +#undef STRING_VALUE +#undef OBJECT_VALUE +#undef CP_UNIMPLEMENTED_VALUE +#undef ROUTINE diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.cpp new file mode 100644 index 0000000000..35b7e75a1a --- /dev/null +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.cpp @@ -0,0 +1,142 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecma-alloc.h" +#include "ecma-builtins.h" +#include "ecma-conversion.h" +#include "ecma-exceptions.h" +#include "ecma-helpers.h" +#include "ecma-objects.h" +#include "ecma-regexp-object.h" +#include "ecma-try-catch-macro.h" + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +#define ECMA_BUILTINS_INTERNAL +#include "ecma-builtins-internal.h" + +#define BUILTIN_INC_HEADER_NAME "ecma-builtin-regexp.inc.h" +#define BUILTIN_UNDERSCORED_ID regexp +#include "ecma-builtin-internal-routines-template.inc.h" + +/** \addtogroup ecma ECMA + * @{ + * + * \addtogroup ecmabuiltins + * @{ + * + * \addtogroup regexp ECMA RegExp object built-in + * @{ + */ + +/** + * Handle calling [[Call]] of built-in RegExp object + * + * @return completion-value + */ +ecma_completion_value_t +ecma_builtin_regexp_dispatch_call (const ecma_value_t *arguments_list_p, /**< arguments list */ + ecma_length_t arguments_list_len) /**< number of arguments */ +{ + return ecma_builtin_regexp_dispatch_construct (arguments_list_p, arguments_list_len); +} /* ecma_builtin_regexp_dispatch_call */ + +/** + * Handle calling [[Construct]] of built-in RegExp object + * + * @return completion-value + */ +ecma_completion_value_t +ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /**< arguments list */ + ecma_length_t arguments_list_len) /**< number of arguments */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + ecma_value_t pattern_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_UNDEFINED); + ecma_value_t flags_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_UNDEFINED); + + if (arguments_list_len > 0) + { + /* pattern string or RegExp object */ + pattern_value = arguments_list_p[0]; + + if (arguments_list_len > 1) + { + flags_value = arguments_list_p[1]; + } + } + + if (arguments_list_len == 0) + { + ecma_string_t *magic_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP); + ret_value = ecma_op_create_regexp_object (magic_str_p, NULL); + ecma_deref_ecma_string (magic_str_p); + } + else if (ecma_is_value_object (pattern_value) + && ecma_object_get_class_name (ecma_get_object_from_value (pattern_value)) == ECMA_MAGIC_STRING_REGEXP_UL) + { + if (arguments_list_len == 1 + || (arguments_list_len > 1 && ecma_is_value_undefined (flags_value))) + { + ret_value = ecma_make_normal_completion_value (ecma_copy_value (pattern_value, true)); + } + else + { + ret_value = ecma_raise_type_error ((const ecma_char_t *) "Invalid argument of RegExp call."); + } + } + else + { + ECMA_TRY_CATCH (regexp_str_value, + ecma_op_to_string (pattern_value), + ret_value); + + ecma_string_t *pattern_string_p = ecma_get_string_from_value (regexp_str_value); + + ecma_string_t *flags_string_p = NULL; + + if (arguments_list_len > 1) + { + ECMA_TRY_CATCH (flags_str_value, + ecma_op_to_string (flags_value), + ret_value); + + flags_string_p = ecma_copy_or_ref_ecma_string (ecma_get_string_from_value (flags_str_value)); + ECMA_FINALIZE (flags_str_value); + } + + if (ecma_is_completion_value_empty (ret_value)) + { + ret_value = ecma_op_create_regexp_object (pattern_string_p, flags_string_p); + } + + if (flags_string_p != NULL) + { + ecma_deref_ecma_string (flags_string_p); + } + + ECMA_FINALIZE (regexp_str_value); + } + + return ret_value; +} /* ecma_builtin_regexp_dispatch_construct */ + +/** + * @} + * @} + * @} + */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.inc.h new file mode 100644 index 0000000000..1170cb009f --- /dev/null +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.inc.h @@ -0,0 +1,97 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * RegExp built-in description + */ + +#ifndef OBJECT_ID +# define OBJECT_ID(builtin_object_id) +#endif /* !OBJECT_ID */ + +#ifndef OBJECT_VALUE +# define OBJECT_VALUE(name, obj_getter, prop_writable, prop_enumerable, prop_configurable) +#endif /* !OBJECT_VALUE */ + +#ifndef NUMBER_VALUE +# define NUMBER_VALUE(name, number_value, prop_writable, prop_enumerable, prop_configurable) +#endif /* !NUMBER_VALUE */ + +#ifndef SIMPLE_VALUE +# define SIMPLE_VALUE(name, simple_value, prop_writable, prop_enumerable, prop_configurable) +#endif /* !SIMPLE_VALUE */ + +#ifndef STRING_VALUE +# define STRING_VALUE(name, magic_string_id, prop_writable, prop_enumerable, prop_configurable) +#endif /* !STRING_VALUE */ + +/* Object identifier */ +OBJECT_ID (ECMA_BUILTIN_ID_REGEXP) + +// ECMA-262 v5, 15.10.5.1 +OBJECT_VALUE (ECMA_MAGIC_STRING_PROTOTYPE, + ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE), + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +// ECMA-262 v5, 15.10.7.1 +STRING_VALUE (ECMA_MAGIC_STRING_SOURCE, + ECMA_MAGIC_STRING_REGEXP_SOURCE_UL, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +// ECMA-262 v5, 15.10.7.2 +SIMPLE_VALUE (ECMA_MAGIC_STRING_GLOBAL, + ECMA_SIMPLE_VALUE_FALSE, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +// ECMA-262 v5, 15.10.7.3 +SIMPLE_VALUE (ECMA_MAGIC_STRING_IGNORECASE_UL, + ECMA_SIMPLE_VALUE_FALSE, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) +// ECMA-262 v5, 15.10.7.4 +SIMPLE_VALUE (ECMA_MAGIC_STRING_MULTILINE, + ECMA_SIMPLE_VALUE_FALSE, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +// ECMA-262 v5, 15.10.7.5 +NUMBER_VALUE (ECMA_MAGIC_STRING_LASTINDEX_UL, + 0, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +NUMBER_VALUE (ECMA_MAGIC_STRING_LENGTH, + 2, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +#undef OBJECT_ID +#undef SIMPLE_VALUE +#undef NUMBER_VALUE +#undef STRING_VALUE +#undef OBJECT_VALUE +#undef CP_UNIMPLEMENTED_VALUE +#undef ROUTINE diff --git a/jerry-core/ecma/builtin-objects/ecma-builtins.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtins.inc.h index 285588f750..a2a7abca61 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtins.inc.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtins.inc.h @@ -148,6 +148,24 @@ BUILTIN (ECMA_BUILTIN_ID_DATE, date) #endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_DATE_BUILTIN*/ +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN +/* The RegExp.prototype object (15.10.6) */ +BUILTIN (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE, + ECMA_OBJECT_TYPE_GENERAL, + ECMA_BUILTIN_ID_OBJECT_PROTOTYPE, + true, + true, + regexp_prototype) + +/* The RegExp object (15.10) */ +BUILTIN (ECMA_BUILTIN_ID_REGEXP, + ECMA_OBJECT_TYPE_FUNCTION, + ECMA_BUILTIN_ID_FUNCTION_PROTOTYPE, + true, + true, + regexp) +#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ + #ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ERROR_BUILTINS /* The Error.prototype object (15.11.4) */ BUILTIN (ECMA_BUILTIN_ID_ERROR_PROTOTYPE, diff --git a/jerry-core/ecma/operations/ecma-exceptions.cpp b/jerry-core/ecma/operations/ecma-exceptions.cpp index 4fbe2ab6e0..5658037049 100644 --- a/jerry-core/ecma/operations/ecma-exceptions.cpp +++ b/jerry-core/ecma/operations/ecma-exceptions.cpp @@ -130,6 +130,118 @@ ecma_new_standard_error_with_message (ecma_standard_error_t error_type, /**< nat return new_error_obj_p; } /* ecma_new_standard_error_with_message */ +/** + * Raise a standard ecma-error with the given type and message. + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_standard_error (ecma_standard_error_t error_type, /**< error type */ + const ecma_char_t *msg_p) /**< error message */ +{ + ecma_string_t *error_msg_p = ecma_new_ecma_string (msg_p); + ecma_object_t *error_obj_p = ecma_new_standard_error_with_message (error_type, error_msg_p); + ecma_deref_ecma_string (error_msg_p); + return ecma_make_throw_obj_completion_value (error_obj_p); +} /* ecma_raise_standard_error */ + +/** + * Raise a common error with the given message. + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_common_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_COMMON, msg_p); +} /* ecma_raise_common_error */ + +/** + * Raise an EvalError with the given message. + * + * See also: ECMA-262 v5, 15.11.6.1 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_eval_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_EVAL, msg_p); +} /* ecma_raise_eval_error */ + +/** + * Raise a RangeError with the given message. + * + * See also: ECMA-262 v5, 15.11.6.2 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_range_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_RANGE, msg_p); +} /* ecma_raise_range_error */ + +/** + * Raise a ReferenceError with the given message. + * + * See also: ECMA-262 v5, 15.11.6.3 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_reference_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_REFERENCE, msg_p); +} /* ecma_raise_reference_error */ + +/** + * Raise a SyntaxError with the given message. + * + * See also: ECMA-262 v5, 15.11.6.4 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_syntax_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_SYNTAX, msg_p); +} /* ecma_raise_syntax_error */ + +/** + * Raise a TypeError with the given message. + * +* See also: ECMA-262 v5, 15.11.6.5 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_type_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_TYPE, msg_p); +} /* ecma_raise_type_error */ + +/** + * Raise a URIError with the given message. + * +* See also: ECMA-262 v5, 15.11.6.6 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_uri_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_URI, msg_p); +} /* ecma_raise_uri_error */ + /** * @} * @} diff --git a/jerry-core/ecma/operations/ecma-exceptions.h b/jerry-core/ecma/operations/ecma-exceptions.h index b29ed14c73..e93eaed838 100644 --- a/jerry-core/ecma/operations/ecma-exceptions.h +++ b/jerry-core/ecma/operations/ecma-exceptions.h @@ -45,8 +45,17 @@ typedef enum } ecma_standard_error_t; extern ecma_object_t *ecma_new_standard_error (ecma_standard_error_t error_type); -extern ecma_object_t* ecma_new_standard_error_with_message (ecma_standard_error_t error_type, +extern ecma_object_t *ecma_new_standard_error_with_message (ecma_standard_error_t error_type, ecma_string_t *message_string_p); +extern ecma_completion_value_t ecma_raise_standard_error (ecma_standard_error_t error_type, + const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_common_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_eval_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_range_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_reference_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_syntax_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_type_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_uri_error (const ecma_char_t *msg_p); /** * @} diff --git a/jerry-core/ecma/operations/ecma-objects.cpp b/jerry-core/ecma/operations/ecma-objects.cpp index f6dd435dc2..a8af6c535b 100644 --- a/jerry-core/ecma/operations/ecma-objects.cpp +++ b/jerry-core/ecma/operations/ecma-objects.cpp @@ -681,6 +681,12 @@ ecma_object_get_class_name (ecma_object_t *obj_p) /**< object */ return ECMA_MAGIC_STRING_ERROR_UL; } #endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ERROR_BUILTINS */ +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + case ECMA_BUILTIN_ID_REGEXP_PROTOTYPE: + { + return ECMA_MAGIC_STRING_REGEXP_UL; + } +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ default: { JERRY_ASSERT (ecma_builtin_is (obj_p, ECMA_BUILTIN_ID_GLOBAL)); diff --git a/jerry-core/ecma/operations/ecma-regexp-object.cpp b/jerry-core/ecma/operations/ecma-regexp-object.cpp new file mode 100644 index 0000000000..0d43b046e1 --- /dev/null +++ b/jerry-core/ecma/operations/ecma-regexp-object.cpp @@ -0,0 +1,1329 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecma-alloc.h" +#include "ecma-array-object.h" +#include "ecma-exceptions.h" +#include "ecma-gc.h" +#include "ecma-globals.h" +#include "ecma-objects.h" +#include "ecma-regexp-object.h" +#include "ecma-try-catch-macro.h" +#include "jrt-libc-includes.h" +#include "re-compiler.h" + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +#define ECMA_BUILTINS_INTERNAL +#include "ecma-builtins-internal.h" + +/** \addtogroup ecma ECMA + * @{ + * + * \addtogroup ecmaregexpobject ECMA RegExp object related routines + * @{ + */ + +/* + * RegExp results are stored in an array of string pointers. If N is the number + * of groups then the length of the array is 2*N, because every group has a start + * and end. We have to handle those pointers. + * + * [0] RE global start + * [1] RE global end + * [2] 1st group start + * [3] 1st group end + * ... + * [n] n/2 th group start + * [n+1] n/2 th group end + */ +#define RE_GLOBAL_START_IDX 0 +#define RE_GLOBAL_END_IDX 1 + +/* RegExp flags */ +#define RE_FLAG_GLOBAL (1 << 0) /* ECMA-262 v5, 15.10.7.2 */ +#define RE_FLAG_IGNORE_CASE (1 << 1) /* ECMA-262 v5, 15.10.7.3 */ +#define RE_FLAG_MULTILINE (1 << 2) /* ECMA-262 v5, 15.10.7.4 */ + +/** + * Parse RegExp flags (global, ignoreCase, multiline) + * + * See also: ECMA-262 v5, 15.10.4.1 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +static ecma_completion_value_t +re_parse_regexp_flags (ecma_string_t *flags_str_p, /**< Input string with flags */ + uint8_t *flags_p) /**< Output: parsed flag bits */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + int32_t flags_str_len = ecma_string_get_length (flags_str_p); + MEM_DEFINE_LOCAL_ARRAY (flags_start_p, flags_str_len + 1, ecma_char_t); + ssize_t zt_str_size = (ssize_t) sizeof (ecma_char_t) * (flags_str_len + 1); + ecma_string_to_zt_string (flags_str_p, flags_start_p, zt_str_size); + + ecma_char_t *flags_char_p = flags_start_p; + for (int ch_cnt = 1; flags_char_p + && ch_cnt < zt_str_size + && ecma_is_completion_value_empty (ret_value); ch_cnt++) + { + switch (*flags_char_p) + { + case 'g': + { + if (*flags_p & RE_FLAG_GLOBAL) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid RegExp flags."); + } + *flags_p |= RE_FLAG_GLOBAL; + break; + } + case 'i': + { + if (*flags_p & RE_FLAG_IGNORE_CASE) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid RegExp flags."); + } + *flags_p |= RE_FLAG_IGNORE_CASE; + break; + } + case 'm': + { + if (*flags_p & RE_FLAG_MULTILINE) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid RegExp flags."); + } + *flags_p |= RE_FLAG_MULTILINE; + break; + } + default: + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid RegExp flags."); + break; + } + } + flags_char_p++; + } + + MEM_FINALIZE_LOCAL_ARRAY (flags_start_p); + + return ret_value; +} /* re_parse_regexp_flags */ + +/** + * RegExp object creation operation. + * + * See also: ECMA-262 v5, 15.10.4.1 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_op_create_regexp_object (ecma_string_t *pattern_p, /**< input pattern */ + ecma_string_t *flags_str_p) /**< flags */ +{ + JERRY_ASSERT (pattern_p != NULL); + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + uint8_t flags = 0; + if (flags_str_p != NULL) + { + ECMA_TRY_CATCH (empty, re_parse_regexp_flags (flags_str_p, &flags), ret_value); + ECMA_FINALIZE (empty); + + if (!ecma_is_completion_value_empty (ret_value)) + { + return ret_value; + } + } + + ecma_object_t *re_prototype_obj_p = ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE); + + ecma_object_t *obj_p = ecma_create_object (re_prototype_obj_p, true, ECMA_OBJECT_TYPE_GENERAL); + ecma_deref_object (re_prototype_obj_p); + + /* Set the internal [[Class]] property */ + ecma_property_t *class_prop_p = ecma_create_internal_property (obj_p, ECMA_INTERNAL_PROPERTY_CLASS); + class_prop_p->u.internal_property.value = ECMA_MAGIC_STRING_REGEXP_UL; + + /* Set source property. ECMA-262 v5, 15.10.7.1 */ + ecma_string_t *magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_SOURCE); + ecma_property_t *source_prop_p = ecma_create_named_data_property (obj_p, + magic_string_p, + false, false, false); + ecma_deref_ecma_string (magic_string_p); + ecma_set_named_data_property_value (source_prop_p, + ecma_make_string_value (ecma_copy_or_ref_ecma_string (pattern_p))); + + ecma_simple_value_t prop_value; + + /* Set global property. ECMA-262 v5, 15.10.7.2*/ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_GLOBAL); + ecma_property_t *global_prop_p = ecma_create_named_data_property (obj_p, + magic_string_p, + false, false, false); + ecma_deref_ecma_string (magic_string_p); + prop_value = flags & RE_FLAG_GLOBAL ? ECMA_SIMPLE_VALUE_TRUE : ECMA_SIMPLE_VALUE_FALSE; + ecma_set_named_data_property_value (global_prop_p, ecma_make_simple_value (prop_value)); + + /* Set ignoreCase property. ECMA-262 v5, 15.10.7.3*/ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_IGNORECASE_UL); + ecma_property_t *ignorecase_prop_p = ecma_create_named_data_property (obj_p, + magic_string_p, + false, false, false); + ecma_deref_ecma_string (magic_string_p); + prop_value = flags & RE_FLAG_IGNORE_CASE ? ECMA_SIMPLE_VALUE_TRUE : ECMA_SIMPLE_VALUE_FALSE; + ecma_set_named_data_property_value (ignorecase_prop_p, ecma_make_simple_value (prop_value)); + + + /* Set multiline property. ECMA-262 v5, 15.10.7.4*/ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_MULTILINE); + ecma_property_t *multiline_prop_p = ecma_create_named_data_property (obj_p, + magic_string_p, + false, false, false); + ecma_deref_ecma_string (magic_string_p); + prop_value = flags & RE_FLAG_MULTILINE ? ECMA_SIMPLE_VALUE_TRUE : ECMA_SIMPLE_VALUE_FALSE; + ecma_set_named_data_property_value (multiline_prop_p, ecma_make_simple_value (prop_value)); + + /* Set lastIndex property. ECMA-262 v5, 15.10.7.5*/ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_LASTINDEX_UL); + ecma_property_t *lastindex_prop_p = ecma_create_named_data_property (obj_p, + magic_string_p, + true, false, false); + ecma_deref_ecma_string (magic_string_p); + + ecma_number_t *lastindex_num_p = ecma_alloc_number (); + *lastindex_num_p = ECMA_NUMBER_ZERO; + ecma_named_data_property_assign_value (obj_p, lastindex_prop_p, ecma_make_number_value (lastindex_num_p)); + ecma_dealloc_number (lastindex_num_p); + + /* Set bytecode internal property. */ + ecma_property_t *bytecode = ecma_create_internal_property (obj_p, ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE); + + /* Compile bytecode. */ + ECMA_TRY_CATCH (empty, re_compile_bytecode (bytecode, pattern_p, flags), ret_value); + ret_value = ecma_make_normal_completion_value (ecma_make_object_value (obj_p)); + ECMA_FINALIZE (empty); + + if (ecma_is_completion_value_throw (ret_value)) + { + ecma_deref_object (obj_p); + } + + return ret_value; +} /* ecma_op_create_regexp_object */ + +/** + * Backtrack a unicode character + */ +static const ecma_char_t * +utf8_backtrack (const ecma_char_t *str_p) +{ + /* FIXME: change to string iterator with unicode support, when it would be implemented */ + return --str_p; +} /* utf8_backtrack */ + +/** + * Helper to get an input character and increase string pointer. + */ +static ecma_char_t +get_input_char (const ecma_char_t** char_p) +{ + /* FIXME: change to string iterator with unicode support, when it would be implemented */ + const ecma_char_t ch = **char_p; + (*char_p)++; + return ch; +} /* get_input_char */ + +/** + * Helper to get current input character, won't increase string pointer. + */ +static ecma_char_t +lookup_input_char (const ecma_char_t *str_p) +{ + /* FIXME: change to string iterator with unicode support, when it would be implemented */ + return *str_p; +} /* lookup_input_char */ + +/** + * Helper to get previous input character, won't decrease string pointer. + */ +static ecma_char_t +lookup_prev_char (const ecma_char_t *str_p) +{ + /* FIXME: change to string iterator with unicode support, when it would be implemented */ + return *(--str_p); +} /* lookup_prev_char */ + +/** + * Recursive function for RegExp matching. Tests for a regular expression + * match and returns a MatchResult value. + * + * See also: + * ECMA-262 v5, 15.10.2.1 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +static ecma_completion_value_t +re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */ + re_bytecode_t *bc_p, /**< pointer to the current RegExp bytecode */ + const ecma_char_t *str_p, /**< pointer to the current input character */ + const ecma_char_t **res_p) /**< pointer to the matching substring */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + re_opcode_t op; + + if (re_ctx_p->recursion_depth >= RE_EXECUTE_RECURSION_LIMIT) + { + ret_value = ecma_raise_range_error ((const ecma_char_t *) "RegExp executor recursion limit is exceeded."); + return ret_value; + } + re_ctx_p->recursion_depth++; + + while ((op = re_get_opcode (&bc_p))) + { + if (re_ctx_p->match_limit >= RE_EXECUTE_MATCH_LIMIT) + { + ret_value = ecma_raise_range_error ((const ecma_char_t *) "RegExp executor steps limit is exceeded."); + return ret_value; + } + re_ctx_p->match_limit++; + + switch (op) + { + case RE_OP_MATCH: + { + JERRY_DDLOG ("Execute RE_OP_MATCH: match\n"); + *res_p = str_p; + re_ctx_p->recursion_depth--; + ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); + return ret_value; /* match */ + } + case RE_OP_CHAR: + { + uint32_t ch1 = re_get_value (&bc_p); + uint32_t ch2 = get_input_char (&str_p); + JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2); + + if (ch2 == '\0' || ch1 != ch2) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + case RE_OP_PERIOD: + { + uint32_t ch1 = get_input_char (&str_p); + JERRY_DDLOG ("Period matching '.' to %d: ", ch1); + if (ch1 == '\n' || ch1 == '\0') + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + case RE_OP_ASSERT_START: + { + JERRY_DDLOG ("Execute RE_OP_ASSERT_START: "); + + if (str_p <= re_ctx_p->input_start_p) + { + JERRY_DDLOG ("match\n"); + break; + } + + if (!(re_ctx_p->flags & RE_FLAG_MULTILINE)) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + + if (ecma_char_is_line_terminator (lookup_prev_char (str_p))) + { + JERRY_DDLOG ("match\n"); + break; + } + + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_ASSERT_END: + { + JERRY_DDLOG ("Execute RE_OP_ASSERT_END: "); + + if (str_p >= re_ctx_p->input_end_p) + { + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + + if (!(re_ctx_p->flags & RE_FLAG_MULTILINE)) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + + if (ecma_char_is_line_terminator (lookup_input_char (str_p))) + { + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_ASSERT_WORD_BOUNDARY: + case RE_OP_ASSERT_NOT_WORD_BOUNDARY: + { + bool is_wordchar_left, is_wordchar_right; + + if (str_p <= re_ctx_p->input_start_p) + { + is_wordchar_left = false; /* not a wordchar */ + } + else + { + is_wordchar_left = ecma_char_is_word_char (lookup_prev_char (str_p)); + } + + if (str_p >= re_ctx_p->input_end_p) + { + is_wordchar_right = false; /* not a wordchar */ + } + else + { + is_wordchar_right = ecma_char_is_word_char (lookup_input_char (str_p)); + } + + if (op == RE_OP_ASSERT_WORD_BOUNDARY) + { + JERRY_DDLOG ("Execute RE_OP_ASSERT_WORD_BOUNDARY at %c: ", *str_p); + if (is_wordchar_left == is_wordchar_right) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + else + { + JERRY_ASSERT (op == RE_OP_ASSERT_NOT_WORD_BOUNDARY); + JERRY_DDLOG ("Execute RE_OP_ASSERT_NOT_WORD_BOUNDARY at %c: ", *str_p); + + if (is_wordchar_left != is_wordchar_right) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + case RE_OP_LOOKAHEAD_POS: + case RE_OP_LOOKAHEAD_NEG: + { + ecma_completion_value_t match_value = ecma_make_empty_completion_value (); + const ecma_char_t *sub_str_p = NULL; + + MEM_DEFINE_LOCAL_ARRAY (saved_bck_p, re_ctx_p->num_of_captures, ecma_char_t *); + size_t size = (size_t) (re_ctx_p->num_of_captures) * sizeof (const ecma_char_t *); + memcpy (saved_bck_p, re_ctx_p->saved_p, size); + + do + { + uint32_t offset = re_get_value (&bc_p); + if (!sub_str_p) + { + match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_completion_value_throw (match_value)) + { + break; + } + } + bc_p += offset; + } + while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); + + if (!ecma_is_completion_value_throw (match_value)) + { + JERRY_DDLOG ("Execute RE_OP_LOOKAHEAD_POS/NEG: "); + ecma_free_completion_value (match_value); + if ((op == RE_OP_LOOKAHEAD_POS && sub_str_p) + || (op == RE_OP_LOOKAHEAD_NEG && !sub_str_p)) + { + JERRY_DDLOG ("match\n"); + match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + } + else + { + JERRY_DDLOG ("fail\n"); + match_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + + if (!ecma_is_completion_value_throw (match_value)) + { + re_ctx_p->recursion_depth--; + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + } + else + { + JERRY_ASSERT (ecma_is_value_boolean (match_value)); + /* restore saved */ + memcpy (re_ctx_p->saved_p, saved_bck_p, size); + } + } + + MEM_FINALIZE_LOCAL_ARRAY (saved_bck_p); + return match_value; + } + case RE_OP_CHAR_CLASS: + case RE_OP_INV_CHAR_CLASS: + { + uint32_t curr_ch, num_of_ranges; + bool is_match; + + JERRY_DDLOG ("Execute RE_OP_CHAR_CLASS/RE_OP_INV_CHAR_CLASS, "); + + if (str_p >= re_ctx_p->input_end_p) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + + curr_ch = get_input_char (&str_p); + + num_of_ranges = re_get_value (&bc_p); + is_match = false; + while (num_of_ranges) + { + uint32_t ch1, ch2; + ch1 = (uint32_t) re_get_value (&bc_p); + ch2 = (uint32_t) re_get_value (&bc_p); + JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ", + num_of_ranges, ch1, ch2, curr_ch); + + if (curr_ch >= ch1 && curr_ch <= ch2) + { + /* We must read all the ranges from bytecode. */ + is_match = true; + } + num_of_ranges--; + } + + if (op == RE_OP_CHAR_CLASS) + { + if (!is_match) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + else + { + JERRY_ASSERT (op == RE_OP_INV_CHAR_CLASS); + if (is_match) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + case RE_OP_BACKREFERENCE: + { + uint32_t backref_idx; + const ecma_char_t *sub_str_p; + + backref_idx = re_get_value (&bc_p); + JERRY_DDLOG ("Execute RE_OP_BACKREFERENCE (idx: %d): ", backref_idx); + backref_idx *= 2; /* backref n -> saved indices [n*2, n*2+1] */ + JERRY_ASSERT (backref_idx >= 2 && backref_idx + 1 < re_ctx_p->num_of_captures); + + if (!re_ctx_p->saved_p[backref_idx] || !re_ctx_p->saved_p[backref_idx + 1]) + { + JERRY_DDLOG ("match\n"); + break; /* capture is 'undefined', always matches! */ + } + + sub_str_p = re_ctx_p->saved_p[backref_idx]; + while (sub_str_p < re_ctx_p->saved_p[backref_idx + 1]) + { + uint32_t ch1, ch2; + + if (str_p >= re_ctx_p->input_end_p) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + + ch1 = get_input_char (&sub_str_p); + ch2 = get_input_char (&str_p); + + if (ch1 != ch2) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + case RE_OP_SAVE_AT_START: + { + const ecma_char_t *old_start_p; + re_bytecode_t *old_bc_p; + + JERRY_DDLOG ("Execute RE_OP_SAVE_AT_START\n"); + old_start_p = re_ctx_p->saved_p[RE_GLOBAL_START_IDX]; + re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = str_p; + do + { + uint32_t offset = re_get_value (&bc_p); + const ecma_char_t *sub_str_p; + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + bc_p += offset; + old_bc_p = bc_p; + } + while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); + bc_p = old_bc_p; + + re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p; + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_SAVE_AND_MATCH: + { + JERRY_DDLOG ("End of pattern is reached: match\n"); + re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = str_p; + *res_p = str_p; + re_ctx_p->recursion_depth--; + return ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); /* match */ + } + case RE_OP_ALTERNATIVE: + { + /* + * Alternatives should be jump over, when alternative opcode appears. + */ + uint32_t offset = re_get_value (&bc_p); + JERRY_DDLOG ("Execute RE_OP_ALTERNATIVE"); + bc_p += offset; + while (*bc_p == RE_OP_ALTERNATIVE) + { + JERRY_DDLOG (", jump: %d"); + bc_p++; + offset = re_get_value (&bc_p); + bc_p += offset; + } + JERRY_DDLOG ("\n"); + break; /* tail merge */ + } + case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START: + case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START: + { + /* + * On non-greedy iterations we have to execute the bytecode + * after the group first, if zero iteration is allowed. + */ + uint32_t start_idx, iter_idx, offset; + const ecma_char_t *old_start_p; + const ecma_char_t *sub_str_p; + re_bytecode_t *old_bc_p; + + old_bc_p = bc_p; /* save the bytecode start position of the group start */ + start_idx = re_get_value (&bc_p); + offset = re_get_value (&bc_p); + + if (RE_IS_CAPTURE_GROUP (op)) + { + JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2); + iter_idx = start_idx - 1; + start_idx *= 2; + + old_start_p = re_ctx_p->saved_p[start_idx]; + re_ctx_p->saved_p[start_idx] = str_p; + } + else + { + JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures); + iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1; + start_idx += re_ctx_p->num_of_captures; + } + re_ctx_p->num_of_iterations[iter_idx] = 0; + + /* Jump all over to the end of the END opcode. */ + bc_p += offset; + + /* Try to match after the close paren if zero is allowed */ + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + if (RE_IS_CAPTURE_GROUP (op)) + { + re_ctx_p->saved_p[start_idx] = old_start_p; + } + + bc_p = old_bc_p; + /* FALLTHRU */ + } + case RE_OP_CAPTURE_GROUP_START: + case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START: + case RE_OP_NON_CAPTURE_GROUP_START: + case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START: + { + uint32_t start_idx, iter_idx, old_iteration_cnt, offset; + const ecma_char_t *old_start_p; + const ecma_char_t *sub_str_p; + re_bytecode_t *old_bc_p; + re_bytecode_t *end_bc_p = NULL; + + start_idx = re_get_value (&bc_p); + if (op != RE_OP_CAPTURE_GROUP_START + && op != RE_OP_NON_CAPTURE_GROUP_START) + { + offset = re_get_value (&bc_p); + end_bc_p = bc_p + offset; + } + + if (RE_IS_CAPTURE_GROUP (op)) + { + JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2); + iter_idx = start_idx - 1; + start_idx *= 2; + } + else + { + JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures); + iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1; + start_idx += re_ctx_p->num_of_captures; + } + old_start_p = re_ctx_p->saved_p[start_idx]; + old_iteration_cnt = re_ctx_p->num_of_iterations[iter_idx]; + re_ctx_p->saved_p[start_idx] = str_p; + re_ctx_p->num_of_iterations[iter_idx] = 0; + + do + { + offset = re_get_value (&bc_p); + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + bc_p += offset; + old_bc_p = bc_p; + } + while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); + bc_p = old_bc_p; + re_ctx_p->num_of_iterations[iter_idx] = old_iteration_cnt; + + /* Try to match after the close paren if zero is allowed. */ + if (op == RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START + || op == RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START) + { + JERRY_ASSERT (end_bc_p); + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, end_bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + } + + re_ctx_p->saved_p[start_idx] = old_start_p; + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_CAPTURE_NON_GREEDY_GROUP_END: + case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END: + { + uint32_t end_idx, iter_idx, min, max; + const ecma_char_t *old_end_p; + re_bytecode_t *old_bc_p; + + /* + * On non-greedy iterations we have to execute the bytecode + * after the group first. Try to iterate only if it fails. + */ + old_bc_p = bc_p; /* save the bytecode start position of the group end */ + end_idx = re_get_value (&bc_p); + min = re_get_value (&bc_p); + max = re_get_value (&bc_p); + re_get_value (&bc_p); /* start offset */ + + if (RE_IS_CAPTURE_GROUP (op)) + { + JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2); + iter_idx = end_idx - 1; + end_idx = (end_idx * 2) + 1; + } + else + { + JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures); + iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1; + end_idx += re_ctx_p->num_of_captures; + } + + re_ctx_p->num_of_iterations[iter_idx]++; + if (re_ctx_p->num_of_iterations[iter_idx] >= min + && re_ctx_p->num_of_iterations[iter_idx] <= max) + { + old_end_p = re_ctx_p->saved_p[end_idx]; + re_ctx_p->saved_p[end_idx] = str_p; + + const ecma_char_t *sub_str_p; + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + + re_ctx_p->saved_p[end_idx] = old_end_p; + } + re_ctx_p->num_of_iterations[iter_idx]--; + bc_p = old_bc_p; + + /* If non-greedy fails and try to iterate... */ + /* FALLTHRU */ + } + case RE_OP_CAPTURE_GREEDY_GROUP_END: + case RE_OP_NON_CAPTURE_GREEDY_GROUP_END: + { + uint32_t start_idx, end_idx, iter_idx, min, max, offset; + const ecma_char_t *old_start_p; + const ecma_char_t *old_end_p; + const ecma_char_t *sub_str_p; + re_bytecode_t *old_bc_p; + + end_idx = re_get_value (&bc_p); + min = re_get_value (&bc_p); + max = re_get_value (&bc_p); + offset = re_get_value (&bc_p); + + if (RE_IS_CAPTURE_GROUP (op)) + { + JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2); + iter_idx = end_idx - 1; + start_idx = end_idx * 2; + end_idx = start_idx + 1; + } + else + { + JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures); + iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1; + end_idx += re_ctx_p->num_of_captures; + start_idx = end_idx; + } + + /* Check the empty iteration if the minimum number of iterations is reached. */ + if (re_ctx_p->num_of_iterations[iter_idx] >= min + && str_p == re_ctx_p->saved_p[start_idx]) + { + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + re_ctx_p->num_of_iterations[iter_idx]++; + + old_bc_p = bc_p; /* Save the bytecode end position of the END opcodes for matching after it. */ + old_end_p = re_ctx_p->saved_p[end_idx]; + re_ctx_p->saved_p[end_idx] = str_p; + + if (re_ctx_p->num_of_iterations[iter_idx] < max) + { + bc_p -= offset; + offset = re_get_value (&bc_p); + + old_start_p = re_ctx_p->saved_p[start_idx]; + re_ctx_p->saved_p[start_idx] = str_p; + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + + re_ctx_p->saved_p[start_idx] = old_start_p; + + /* Try to match alternatives if any. */ + bc_p += offset; + while (*bc_p == RE_OP_ALTERNATIVE) + { + bc_p++; /* RE_OP_ALTERNATIVE */ + offset = re_get_value (&bc_p); + + old_start_p = re_ctx_p->saved_p[start_idx]; + re_ctx_p->saved_p[start_idx] = str_p; + + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + + re_ctx_p->saved_p[start_idx] = old_start_p; + bc_p += offset; + } + } + + if (re_ctx_p->num_of_iterations[iter_idx] >= min + && re_ctx_p->num_of_iterations[iter_idx] <= max) + { + /* Try to match the rest of the bytecode. */ + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, old_bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + } + + /* restore if fails */ + re_ctx_p->saved_p[end_idx] = old_end_p; + re_ctx_p->num_of_iterations[iter_idx]--; + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_NON_GREEDY_ITERATOR: + { + uint32_t min, max, offset, num_of_iter; + const ecma_char_t *sub_str_p; + + min = re_get_value (&bc_p); + max = re_get_value (&bc_p); + + offset = re_get_value (&bc_p); + JERRY_DDLOG ("Non-greedy iterator, min=%lu, max=%lu, offset=%ld\n", + (unsigned long) min, (unsigned long) max, (long) offset); + + num_of_iter = 0; + while (num_of_iter <= max) + { + if (num_of_iter >= min) + { + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + } + + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (!ecma_is_value_true (match_value)) + { + break; + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + str_p = sub_str_p; + num_of_iter++; + } + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_GREEDY_ITERATOR: + { + uint32_t min, max, offset, num_of_iter; + const ecma_char_t *sub_str_p; + + min = re_get_value (&bc_p); + max = re_get_value (&bc_p); + + offset = re_get_value (&bc_p); + JERRY_DDLOG ("Greedy iterator, min=%lu, max=%lu, offset=%ld\n", + (unsigned long) min, (unsigned long) max, (long) offset); + + num_of_iter = 0; + while (num_of_iter < max) + { + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (!ecma_is_value_true (match_value)) + { + break; + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + str_p = sub_str_p; + num_of_iter++; + } + + while (num_of_iter >= min) + { + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + if (num_of_iter == min) + { + break; + } + + str_p = utf8_backtrack (str_p); + num_of_iter--; + } + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + default: + { + JERRY_DDLOG ("UNKNOWN opcode (%d)!\n", (uint32_t) op); + re_ctx_p->recursion_depth--; + return ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_COMMON)); + } + } + } + + JERRY_UNREACHABLE (); + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ +} /* regexp_match */ + +/** + * Define the necessary properties for the result array (index, input, length). + */ +static void +re_set_result_array_properties (ecma_object_t *array_obj_p, /**< result array */ + re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */ + int32_t index) /** index of matching */ +{ + /* Set index property of the result array */ + ecma_string_t *result_prop_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_INDEX); + { + ecma_property_descriptor_t array_item_prop_desc = ecma_make_empty_property_descriptor (); + + array_item_prop_desc.is_value_defined = true; + + ecma_number_t *num_p = ecma_alloc_number (); + *num_p = (ecma_number_t) index; + array_item_prop_desc.value = ecma_make_number_value (num_p); + + array_item_prop_desc.is_writable_defined = true; + array_item_prop_desc.is_writable = true; + + array_item_prop_desc.is_enumerable_defined = true; + array_item_prop_desc.is_enumerable = true; + + array_item_prop_desc.is_configurable_defined = true; + array_item_prop_desc.is_configurable = true; + + ecma_op_object_define_own_property (array_obj_p, + result_prop_str_p, + &array_item_prop_desc, + true); + + ecma_dealloc_number (num_p); + } + ecma_deref_ecma_string (result_prop_str_p); + + /* Set input property of the result array */ + result_prop_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_INPUT); + { + ecma_property_descriptor_t array_item_prop_desc = ecma_make_empty_property_descriptor (); + + array_item_prop_desc.is_value_defined = true; + ecma_string_t *input_str_p = ecma_new_ecma_string (re_ctx_p->input_start_p); + array_item_prop_desc.value = ecma_make_string_value (input_str_p); + + array_item_prop_desc.is_writable_defined = true; + array_item_prop_desc.is_writable = true; + + array_item_prop_desc.is_enumerable_defined = true; + array_item_prop_desc.is_enumerable = true; + + array_item_prop_desc.is_configurable_defined = true; + array_item_prop_desc.is_configurable = true; + + ecma_op_object_define_own_property (array_obj_p, + result_prop_str_p, + &array_item_prop_desc, + true); + + ecma_deref_ecma_string (input_str_p); + } + ecma_deref_ecma_string (result_prop_str_p); + + /* Set length property of the result array */ + result_prop_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_LENGTH); + { + + ecma_property_descriptor_t array_item_prop_desc = ecma_make_empty_property_descriptor (); + array_item_prop_desc.is_value_defined = true; + + ecma_number_t *num_p = ecma_alloc_number (); + *num_p = (ecma_number_t) (re_ctx_p->num_of_captures / 2); + array_item_prop_desc.value = ecma_make_number_value (num_p); + + array_item_prop_desc.is_writable_defined = false; + array_item_prop_desc.is_enumerable_defined = false; + array_item_prop_desc.is_configurable_defined = false; + + ecma_op_object_define_own_property (array_obj_p, + result_prop_str_p, + &array_item_prop_desc, + true); + + ecma_dealloc_number (num_p); + } + ecma_deref_ecma_string (result_prop_str_p); +} /* re_set_result_array_properties */ + +/** + * RegExp helper function to start the recursive matching algorithm + * and create the result Array object + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */ + re_bytecode_t *bc_p, /**< start of the RegExp bytecode */ + const ecma_char_t *str_p) /**< start of the input string */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + int32_t input_length = ecma_zt_string_length (str_p); + re_matcher_ctx_t re_ctx; + re_ctx.input_start_p = str_p; + re_ctx.input_end_p = str_p + strlen ((char *) str_p); + re_ctx.match_limit = 0; + re_ctx.recursion_depth = 0; + + /* 1. Read bytecode header and init regexp matcher context. */ + re_ctx.flags = (uint8_t) re_get_value (&bc_p); + JERRY_DDLOG ("Exec with flags [global: %d, ignoreCase: %d, multiline: %d]\n", + re_ctx.flags & RE_FLAG_GLOBAL, + re_ctx.flags & RE_FLAG_IGNORE_CASE, + re_ctx.flags & RE_FLAG_MULTILINE); + + re_ctx.num_of_captures = re_get_value (&bc_p); + JERRY_ASSERT (re_ctx.num_of_captures % 2 == 0); + re_ctx.num_of_non_captures = re_get_value (&bc_p); + + MEM_DEFINE_LOCAL_ARRAY (saved_p, re_ctx.num_of_captures + re_ctx.num_of_non_captures, const ecma_char_t*); + for (uint32_t i = 0; i < re_ctx.num_of_captures + re_ctx.num_of_non_captures; i++) + { + saved_p[i] = NULL; + } + re_ctx.saved_p = saved_p; + + uint32_t num_of_iter_length = (re_ctx.num_of_captures / 2) + (re_ctx.num_of_non_captures - 1); + MEM_DEFINE_LOCAL_ARRAY (num_of_iter_p, num_of_iter_length, uint32_t); + for (uint32_t i = 0; i < num_of_iter_length; i++) + { + num_of_iter_p[i] = 0u; + } + + bool is_match = false; + re_ctx.num_of_iterations = num_of_iter_p; + int32_t index = 0; + + if (re_ctx.flags & RE_FLAG_GLOBAL) + { + ecma_string_t *magic_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_LASTINDEX_UL); + ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (obj_p, magic_str_p); + ecma_number_t *lastindex_num_p = ecma_get_number_from_value (lastindex_prop_p->u.named_data_property.value); + index = ecma_number_to_int32 (*lastindex_num_p); + JERRY_ASSERT (str_p != NULL); + str_p += ecma_number_to_int32 (*lastindex_num_p); + ecma_deref_ecma_string (magic_str_p); + } + + /* 2. Try to match */ + const ecma_char_t *sub_str_p; + while (str_p && str_p <= re_ctx.input_end_p && ecma_is_completion_value_empty (ret_value)) + { + if (index < 0 || index > input_length) + { + ecma_string_t *magic_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_LASTINDEX_UL); + ecma_number_t *lastindex_num_p = ecma_alloc_number (); + *lastindex_num_p = ECMA_NUMBER_ZERO; + ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true); + ecma_dealloc_number (lastindex_num_p); + ecma_deref_ecma_string (magic_str_p); + + is_match = false; + break; + } + else + { + sub_str_p = NULL; + ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, str_p, &sub_str_p), ret_value); + if (ecma_is_value_true (match_value)) + { + is_match = true; + break; + } + str_p++; + index++; + ECMA_FINALIZE (match_value); + } + } + + if (re_ctx.flags & RE_FLAG_GLOBAL) + { + ecma_string_t *magic_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_LASTINDEX_UL); + ecma_number_t *lastindex_num_p = ecma_alloc_number (); + *lastindex_num_p = ((ecma_number_t) (sub_str_p - re_ctx.input_start_p)); + ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true); + ecma_dealloc_number (lastindex_num_p); + ecma_deref_ecma_string (magic_str_p); + } + + /* 3. Fill the result array or return with 'undefiend' */ + if (ecma_is_completion_value_empty (ret_value)) + { + if (is_match) + { + ecma_completion_value_t result_array = ecma_op_create_array_object (0, 0, false); + ecma_object_t *result_array_obj_p = ecma_get_object_from_completion_value (result_array); + + re_set_result_array_properties (result_array_obj_p, &re_ctx, index); + + for (uint32_t i = 0; i < re_ctx.num_of_captures; i += 2) + { + ecma_string_t *index_str_p = ecma_new_ecma_string_from_uint32 (i / 2); + + if (re_ctx.saved_p[i] && re_ctx.saved_p[i + 1] && re_ctx.saved_p[i + 1] >= re_ctx.saved_p[i]) + { + ecma_length_t capture_str_len = static_cast (re_ctx.saved_p[i + 1] - re_ctx.saved_p[i]); + ecma_string_t *capture_str_p; + + if (capture_str_len > 0) + { + capture_str_p = ecma_new_ecma_string (re_ctx.saved_p[i], capture_str_len); + } + else + { + capture_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING__EMPTY); + } + ecma_op_object_put (result_array_obj_p, index_str_p, ecma_make_string_value (capture_str_p), true); + ecma_deref_ecma_string (capture_str_p); + } + else + { + ecma_op_object_put (result_array_obj_p, + index_str_p, + ecma_make_simple_value (ECMA_SIMPLE_VALUE_UNDEFINED), + true); + } + ecma_deref_ecma_string (index_str_p); + } + ret_value = result_array; + } + else + { + ret_value = ecma_make_normal_completion_value (ecma_make_simple_value (ECMA_SIMPLE_VALUE_UNDEFINED)); + } + } + MEM_FINALIZE_LOCAL_ARRAY (num_of_iter_p); + MEM_FINALIZE_LOCAL_ARRAY (saved_p); + + return ret_value; +} /* ecma_regexp_exec_helper */ + +/** + * @} + * @} + */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ diff --git a/jerry-core/ecma/operations/ecma-regexp-object.h b/jerry-core/ecma/operations/ecma-regexp-object.h new file mode 100644 index 0000000000..d9fc30062a --- /dev/null +++ b/jerry-core/ecma/operations/ecma-regexp-object.h @@ -0,0 +1,66 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ECMA_REGEXP_OBJECT_H +#define ECMA_REGEXP_OBJECT_H + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +#include "ecma-globals.h" +#include "re-compiler.h" + +/** \addtogroup ecma ECMA + * @{ + * + * \addtogroup ecmaregexpobject ECMA RegExp object related routines + * @{ + */ + +#define RE_EXECUTE_RECURSION_LIMIT 1000 /* Limit of RegExp executor recursion depth */ +#define RE_EXECUTE_MATCH_LIMIT 10000 /* Limit of RegExp execetur matching steps */ + +/** + * RegExp executor context + * + * FIXME: + * Add comments with description of the structure members + */ +typedef struct +{ + const ecma_char_t **saved_p; + const ecma_char_t *input_start_p; + const ecma_char_t *input_end_p; + uint32_t match_limit; + uint32_t recursion_depth; + uint32_t num_of_captures; + uint32_t num_of_non_captures; + uint32_t *num_of_iterations; + uint8_t flags; +} re_matcher_ctx_t; + +extern ecma_completion_value_t +ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str_p); + +extern ecma_completion_value_t +ecma_regexp_exec_helper (ecma_object_t *obj_p, re_bytecode_t *bc_p, const ecma_char_t *str_p); + +/** + * @} + * @} + */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ +#endif /* !ECMA_REGEXP_OBJECT_H */ diff --git a/jerry-core/parser/js/lexer.cpp b/jerry-core/parser/js/lexer.cpp index a5d375bdd1..c1f1d20ce7 100644 --- a/jerry-core/parser/js/lexer.cpp +++ b/jerry-core/parser/js/lexer.cpp @@ -1,4 +1,5 @@ /* Copyright 2014-2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +15,14 @@ */ #include "ecma-helpers.h" +#include "ecma-exceptions.h" #include "jrt-libc-includes.h" #include "jsp-mm.h" #include "lexer.h" +#include "mem-allocator.h" +#include "opcodes.h" +#include "parser.h" +#include "stack.h" #include "syntax-errors.h" static token saved_token, prev_token, sent_token, empty_token; @@ -341,37 +347,6 @@ consume_char (void) } \ while (0) -static uint32_t -hex_to_int (char hex) -{ - switch (hex) - { - case '0': return 0x0; - case '1': return 0x1; - case '2': return 0x2; - case '3': return 0x3; - case '4': return 0x4; - case '5': return 0x5; - case '6': return 0x6; - case '7': return 0x7; - case '8': return 0x8; - case '9': return 0x9; - case 'a': - case 'A': return 0xA; - case 'b': - case 'B': return 0xB; - case 'c': - case 'C': return 0xC; - case 'd': - case 'D': return 0xD; - case 'e': - case 'E': return 0xE; - case 'f': - case 'F': return 0xF; - default: JERRY_UNREACHABLE (); - } -} - /** * Try to decode specified character as SingleEscapeCharacter (ECMA-262, v5, 7.8.4) * @@ -545,7 +520,7 @@ convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of JERRY_ASSERT ((char_code & 0xF000u) == 0); char_code = (uint16_t) (char_code << 4u); - char_code = (uint16_t) (char_code + hex_to_int (nc)); + char_code = (uint16_t) (char_code + ecma_char_hex_to_int ((ecma_char_t) nc)); } } @@ -761,11 +736,11 @@ parse_number (void) { if (!is_overflow) { - res = (res << 4) + hex_to_int (token_start[i]); + res = (res << 4) + ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } else { - fp_res = fp_res * 16 + (ecma_number_t) hex_to_int (token_start[i]); + fp_res = fp_res * 16 + (ecma_number_t) ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } if (res > 255) @@ -879,11 +854,11 @@ parse_number (void) { if (!is_overflow) { - res = res * 8 + hex_to_int (token_start[i]); + res = res * 8 + ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } else { - fp_res = fp_res * 8 + (ecma_number_t) hex_to_int (token_start[i]); + fp_res = fp_res * 8 + (ecma_number_t) ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } if (res > 255) { @@ -899,11 +874,11 @@ parse_number (void) { if (!is_overflow) { - res = res * 10 + hex_to_int (token_start[i]); + res = res * 10 + ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } else { - fp_res = fp_res * 10 + (ecma_number_t) hex_to_int (token_start[i]); + fp_res = fp_res * 10 + (ecma_number_t) ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } if (res > 255) { @@ -991,6 +966,76 @@ parse_string (void) return ret; } /* parse_string */ +/** + * Parse string literal (ECMA-262 v5, 7.8.5) + */ +static token +parse_regexp (void) +{ + token result; + bool is_char_class = false; + + /* Eat up '/' */ + JERRY_ASSERT ((ecma_char_t) LA (0) == '/'); + consume_char (); + new_token (); + + while (true) + { + ecma_char_t c = (ecma_char_t) LA (0); + + if (c == '\0') + { + PARSE_ERROR ("Unclosed string", token_start - buffer_start); + } + else if (c == '\n') + { + PARSE_ERROR ("RegExp literal shall not contain newline character", token_start - buffer_start); + } + else if (c == '\\') + { + consume_char (); + } + else if (c == '[') + { + is_char_class = true; + } + else if (c == ']') + { + is_char_class = false; + } + else if (c == '/' && !is_char_class) + { + /* Eat up '/' */ + consume_char (); + break; + } + + consume_char (); + } + + /* Try to parse RegExp flags */ + while (true) + { + ecma_char_t c = (ecma_char_t) LA (0); + + if (c == '\0' + || !ecma_char_is_word_char (c) + || ecma_char_is_line_terminator (c)) + { + break; + } + consume_char (); + } + + result = convert_string_to_token (TOK_REGEXP, + (const ecma_char_t*) token_start, + static_cast (buffer - token_start)); + + token_start = NULL; + return result; +} /* parse_regexp */ + static void grobble_whitespaces (void) { @@ -1114,10 +1159,27 @@ lexer_next_token_private (void) } } - if (c == '/' && LA (1) == '/') + + if (c == '/') { - replace_comment_by_newline (); - return lexer_next_token_private (); + if (LA (1) == '/') + { + replace_comment_by_newline (); + return lexer_next_token_private (); + } + else if (!(sent_token.type == TOK_NAME + || sent_token.type == TOK_NULL + || sent_token.type == TOK_BOOL + || sent_token.type == TOK_CLOSE_BRACE + || sent_token.type == TOK_CLOSE_SQUARE + || sent_token.type == TOK_CLOSE_PAREN + || sent_token.type == TOK_SMALL_INT + || sent_token.type == TOK_NUMBER + || sent_token.type == TOK_STRING + || sent_token.type == TOK_REGEXP)) + { + return parse_regexp (); + } } switch (c) @@ -1233,7 +1295,6 @@ lexer_next_token (void) prev_token = sent_token; sent_token = lexer_next_token_private (); - if (sent_token.type == TOK_NEWLINE) { dump_current_line (); diff --git a/jerry-core/parser/js/lexer.h b/jerry-core/parser/js/lexer.h index f67b6a9914..3d09d0df83 100644 --- a/jerry-core/parser/js/lexer.h +++ b/jerry-core/parser/js/lexer.h @@ -99,7 +99,7 @@ typedef enum __attr_packed___ TOK_OPEN_PAREN, // ( TOK_CLOSE_PAREN, //) TOK_OPEN_SQUARE, // [ - TOK_CLOSE_SQUARE, // [ + TOK_CLOSE_SQUARE, // ] TOK_DOT, // . TOK_SEMICOLON, // ; @@ -152,6 +152,7 @@ typedef enum __attr_packed___ TOK_DIV, // / TOK_DIV_EQ, // /= TOK_EMPTY, + TOK_REGEXP, // RegularExpressionLiteral (/.../gim) } token_type; typedef size_t locus; @@ -170,6 +171,9 @@ typedef struct #define TOKEN_EMPTY_INITIALIZER {0, TOK_EMPTY, 0} void lexer_init (const char *, size_t, bool); +void lexer_init_source (const char *, size_t); + +void lexer_free (void); token lexer_next_token (void); void lexer_save_token (token); diff --git a/jerry-core/parser/js/opcodes-dumper.cpp b/jerry-core/parser/js/opcodes-dumper.cpp index 16ce7bfece..abc733033b 100644 --- a/jerry-core/parser/js/opcodes-dumper.cpp +++ b/jerry-core/parser/js/opcodes-dumper.cpp @@ -843,6 +843,34 @@ dump_number_assignment_res (lit_cpointer_t lit_id) return op; } +void +dump_regexp_assignment (operand op, lit_cpointer_t lit_id) +{ + switch (op.type) + { + case OPERAND_LITERAL: + { + const opcode_t opcode = getop_assignment (LITERAL_TO_REWRITE, OPCODE_ARG_TYPE_REGEXP, LITERAL_TO_REWRITE); + serializer_dump_op_meta (create_op_meta_101 (opcode, op.data.lit_id, lit_id)); + break; + } + case OPERAND_TMP: + { + const opcode_t opcode = getop_assignment (op.data.uid, OPCODE_ARG_TYPE_REGEXP, LITERAL_TO_REWRITE); + serializer_dump_op_meta (create_op_meta_001 (opcode, lit_id)); + break; + } + } +} + +operand +dump_regexp_assignment_res (lit_cpointer_t lit_id) +{ + operand op = tmp_operand (); + dump_regexp_assignment (op, lit_id); + return op; +} + void dump_smallint_assignment (operand op, idx_t uid) { diff --git a/jerry-core/parser/js/opcodes-dumper.h b/jerry-core/parser/js/opcodes-dumper.h index 94719a6e73..72ed8573cd 100644 --- a/jerry-core/parser/js/opcodes-dumper.h +++ b/jerry-core/parser/js/opcodes-dumper.h @@ -69,6 +69,8 @@ void dump_string_assignment (operand, lit_cpointer_t); operand dump_string_assignment_res (lit_cpointer_t); void dump_number_assignment (operand, lit_cpointer_t); operand dump_number_assignment_res (lit_cpointer_t); +void dump_regexp_assignment (operand, lit_cpointer_t); +operand dump_regexp_assignment_res (lit_cpointer_t); void dump_smallint_assignment (operand, idx_t); operand dump_smallint_assignment_res (idx_t); void dump_undefined_assignment (operand); diff --git a/jerry-core/parser/js/parser.cpp b/jerry-core/parser/js/parser.cpp index e6014f0c1d..a2696256cf 100644 --- a/jerry-core/parser/js/parser.cpp +++ b/jerry-core/parser/js/parser.cpp @@ -1,4 +1,5 @@ /* Copyright 2014-2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +23,7 @@ #include "opcodes-dumper.h" #include "opcodes-native-call.h" #include "parser.h" +#include "re-parser.h" #include "scopes-tree.h" #include "serializer.h" #include "stack.h" @@ -745,6 +747,7 @@ parse_object_literal (void) | 'false' | number_literal | string_literal + | regexp_literal ; */ static operand parse_literal (void) @@ -753,6 +756,7 @@ parse_literal (void) { case TOK_NUMBER: return dump_number_assignment_res (token_data_as_lit_cp ()); case TOK_STRING: return dump_string_assignment_res (token_data_as_lit_cp ()); + case TOK_REGEXP: return dump_regexp_assignment_res (token_data_as_lit_cp ()); case TOK_NULL: return dump_null_assignment_res (); case TOK_BOOL: return dump_boolean_assignment_res ((bool) token_data ()); case TOK_SMALL_INT: return dump_smallint_assignment_res ((idx_t) token_data ()); @@ -786,6 +790,7 @@ parse_primary_expression (void) case TOK_BOOL: case TOK_SMALL_INT: case TOK_NUMBER: + case TOK_REGEXP: case TOK_STRING: return parse_literal (); case TOK_NAME: return literal_operand (token_data_as_lit_cp ()); case TOK_OPEN_SQUARE: return parse_array_literal (); diff --git a/jerry-core/parser/js/scopes-tree.cpp b/jerry-core/parser/js/scopes-tree.cpp index ce083e868f..7a00d93992 100644 --- a/jerry-core/parser/js/scopes-tree.cpp +++ b/jerry-core/parser/js/scopes-tree.cpp @@ -291,6 +291,7 @@ generate_opcode (scopes_tree tree, opcode_counter_t opc_index, lit_id_hash_table } case OPCODE_ARG_TYPE_NUMBER: case OPCODE_ARG_TYPE_NUMBER_NEGATE: + case OPCODE_ARG_TYPE_REGEXP: case OPCODE_ARG_TYPE_STRING: case OPCODE_ARG_TYPE_VARIABLE: { @@ -430,6 +431,7 @@ count_new_literals_in_opcode (scopes_tree tree, opcode_counter_t opc_index) } case OPCODE_ARG_TYPE_NUMBER: case OPCODE_ARG_TYPE_NUMBER_NEGATE: + case OPCODE_ARG_TYPE_REGEXP: case OPCODE_ARG_TYPE_STRING: case OPCODE_ARG_TYPE_VARIABLE: { diff --git a/jerry-core/parser/regexp/re-compiler.cpp b/jerry-core/parser/regexp/re-compiler.cpp new file mode 100644 index 0000000000..f9f5145bc1 --- /dev/null +++ b/jerry-core/parser/regexp/re-compiler.cpp @@ -0,0 +1,888 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecma-exceptions.h" +#include "ecma-helpers.h" +#include "ecma-try-catch-macro.h" +#include "jrt-libc-includes.h" +#include "mem-heap.h" +#include "re-compiler.h" + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +/** + * FIXME: + * Add comments to macro definitions in the component + */ + +#define REGEXP_BYTECODE_BLOCK_SIZE 256UL +#define BYTECODE_LEN(bc_ctx_p) ((uint32_t) (bc_ctx_p->current_p - bc_ctx_p->block_start_p)) + +void +regexp_dump_bytecode (re_bytecode_ctx_t *bc_ctx); + +/** + * FIXME: + * Add missing 're' prefixes to the component's external and internal interfaces + */ + +/** + * Realloc the bytecode container + */ +static re_bytecode_t* +realloc_regexp_bytecode_block (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */ +{ + JERRY_ASSERT (bc_ctx_p->block_end_p - bc_ctx_p->block_start_p >= 0); + size_t old_size = static_cast (bc_ctx_p->block_end_p - bc_ctx_p->block_start_p); + JERRY_ASSERT (!bc_ctx_p->current_p && !bc_ctx_p->block_end_p && !bc_ctx_p->block_start_p); + + size_t new_block_size = old_size + REGEXP_BYTECODE_BLOCK_SIZE; + JERRY_ASSERT (bc_ctx_p->current_p - bc_ctx_p->block_start_p >= 0); + size_t current_ptr_offset = static_cast (bc_ctx_p->current_p - bc_ctx_p->block_start_p); + + re_bytecode_t *new_block_start_p = (re_bytecode_t *) mem_heap_alloc_block (new_block_size, + MEM_HEAP_ALLOC_SHORT_TERM); + if (bc_ctx_p->current_p) + { + memcpy (new_block_start_p, bc_ctx_p->block_start_p, static_cast (current_ptr_offset)); + mem_heap_free_block (bc_ctx_p->block_start_p); + } + bc_ctx_p->block_start_p = new_block_start_p; + bc_ctx_p->block_end_p = new_block_start_p + new_block_size; + bc_ctx_p->current_p = new_block_start_p + current_ptr_offset; + + return bc_ctx_p->current_p; +} /* realloc_regexp_bytecode_block */ + +/** + * Append a new bytecode to the and of the bytecode container + */ +static void +bytecode_list_append (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + re_bytecode_t *bytecode_p, /**< input bytecode */ + size_t length) /**< length of input */ +{ + JERRY_ASSERT (length <= REGEXP_BYTECODE_BLOCK_SIZE); + + re_bytecode_t *current_p = bc_ctx_p->current_p; + if (current_p + length > bc_ctx_p->block_end_p) + { + current_p = realloc_regexp_bytecode_block (bc_ctx_p); + } + + memcpy (current_p, bytecode_p, length); + bc_ctx_p->current_p += length; +} /* bytecode_list_append */ + +/** + * Insert a new bytecode to the bytecode container + */ +static void +bytecode_list_insert (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + size_t offset, /**< distance from the start of the container */ + re_bytecode_t *bytecode_p, /**< input bytecode */ + size_t length) /**< length of input */ +{ + JERRY_ASSERT (length <= REGEXP_BYTECODE_BLOCK_SIZE); + + re_bytecode_t *current_p = bc_ctx_p->current_p; + if (current_p + length > bc_ctx_p->block_end_p) + { + realloc_regexp_bytecode_block (bc_ctx_p); + } + + re_bytecode_t *src_p = bc_ctx_p->block_start_p + offset; + if ((BYTECODE_LEN (bc_ctx_p) - offset) > 0) + { + re_bytecode_t *dest_p = src_p + length; + re_bytecode_t *tmp_block_start_p = (re_bytecode_t *) mem_heap_alloc_block ((BYTECODE_LEN (bc_ctx_p) - offset), + MEM_HEAP_ALLOC_SHORT_TERM); + memcpy (tmp_block_start_p, src_p, (size_t) (BYTECODE_LEN (bc_ctx_p) - offset)); + memcpy (dest_p, tmp_block_start_p, (size_t) (BYTECODE_LEN (bc_ctx_p) - offset)); + mem_heap_free_block (tmp_block_start_p); + } + memcpy (src_p, bytecode_p, length); + + bc_ctx_p->current_p += length; +} /* bytecode_list_insert */ + +/** + * Append a RegExp opcode + */ +static void +append_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + re_opcode_t opcode) /**< input opcode */ +{ + bytecode_list_append (bc_ctx_p, (re_bytecode_t*) &opcode, sizeof (re_bytecode_t)); +} /* append_opcode */ + +/** + * Append a parameter of a RegExp opcode + */ +static void +append_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + uint32_t value) /**< input value */ +{ + bytecode_list_append (bc_ctx_p, (re_bytecode_t*) &value, sizeof (uint32_t)); +} /* append_u32 */ + +/** + * Append a jump offset parameter of a RegExp opcode + */ +static void +append_jump_offset (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + uint32_t value) /**< input value */ +{ + value += (uint32_t) (sizeof (uint32_t)); + append_u32 (bc_ctx_p, value); +} /* append_jump_offset */ + +/** + * Insert a RegExp opcode + */ +static void +insert_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + uint32_t offset, /**< distance from the start of the container */ + re_opcode_t opcode) /**< input opcode */ +{ + bytecode_list_insert (bc_ctx_p, offset, (re_bytecode_t*) &opcode, sizeof (re_bytecode_t)); +} /* insert_opcode */ + +/** + * Insert a parameter of a RegExp opcode + */ +static void +insert_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + uint32_t offset, /**< distance from the start of the container */ + uint32_t value) /**< input value */ +{ + bytecode_list_insert (bc_ctx_p, offset, (re_bytecode_t*) &value, sizeof (uint32_t)); +} /* insert_u32 */ + +/** + * Get a RegExp opcode + */ +re_opcode_t +re_get_opcode (re_bytecode_t **bc_p) /**< pointer to bytecode start */ +{ + re_bytecode_t bytecode = **bc_p; + (*bc_p) += sizeof (re_bytecode_t); + return (re_opcode_t) bytecode; +} /* get_opcode */ + +/** + * Get a parameter of a RegExp opcode + */ +uint32_t +re_get_value (re_bytecode_t **bc_p) /**< pointer to bytecode start */ +{ + uint32_t value = *((uint32_t*) *bc_p); + (*bc_p) += sizeof (uint32_t); + return value; +} /* get_value */ + +/** + * Callback function of character class generation + */ +static void +append_char_class (void* re_ctx_p, /**< RegExp compiler context */ + uint32_t start, /**< character class range from */ + uint32_t end) /**< character class range to */ +{ + /* FIXME: Handle ignore case flag and add unicode support. */ + re_compiler_ctx_t *ctx_p = (re_compiler_ctx_t*) re_ctx_p; + append_u32 (ctx_p->bytecode_ctx_p, start); + append_u32 (ctx_p->bytecode_ctx_p, end); + ctx_p->parser_ctx_p->num_of_classes++; +} /* append_char_class */ + +/** + * Insert simple atom iterator + */ +static void +insert_simple_iterator (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + uint32_t new_atom_start_offset) /**< atom start offset */ +{ + uint32_t atom_code_length; + uint32_t offset; + uint32_t qmin, qmax; + + qmin = re_ctx_p->current_token.qmin; + qmax = re_ctx_p->current_token.qmax; + JERRY_ASSERT (qmin <= qmax); + + /* FIXME: optimize bytecode length. Store 0 rather than INF */ + + append_opcode (re_ctx_p->bytecode_ctx_p, RE_OP_MATCH); /* complete 'sub atom' */ + uint32_t bytecode_length = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + atom_code_length = (uint32_t) (bytecode_length - new_atom_start_offset); + + offset = new_atom_start_offset; + insert_u32 (re_ctx_p->bytecode_ctx_p, offset, atom_code_length); + insert_u32 (re_ctx_p->bytecode_ctx_p, offset, qmax); + insert_u32 (re_ctx_p->bytecode_ctx_p, offset, qmin); + if (re_ctx_p->current_token.greedy) + { + insert_opcode (re_ctx_p->bytecode_ctx_p, offset, RE_OP_GREEDY_ITERATOR); + } + else + { + insert_opcode (re_ctx_p->bytecode_ctx_p, offset, RE_OP_NON_GREEDY_ITERATOR); + } +} /* insert_simple_iterator */ + +/** + * Get the type of a group start + */ +static re_opcode_t +get_start_opcode_type (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + bool is_capturable) /**< is capturabel group */ +{ + if (is_capturable) + { + if (re_ctx_p->current_token.qmin == 0) + { + if (re_ctx_p->current_token.greedy) + { + return RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START; + } + + return RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START; + } + + return RE_OP_CAPTURE_GROUP_START; + } + + if (re_ctx_p->current_token.qmin == 0) + { + if (re_ctx_p->current_token.greedy) + { + return RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START; + } + + return RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START; + } + + return RE_OP_NON_CAPTURE_GROUP_START; + + JERRY_UNREACHABLE (); + return 0; +} /* get_start_opcode_type */ + +/** + * Get the type of a group end + */ +static re_opcode_t +get_end_opcode_type (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + bool is_capturable) /**< is capturabel group */ +{ + if (is_capturable) + { + if (re_ctx_p->current_token.greedy) + { + return RE_OP_CAPTURE_GREEDY_GROUP_END; + } + + return RE_OP_CAPTURE_NON_GREEDY_GROUP_END; + } + + if (re_ctx_p->current_token.greedy) + { + return RE_OP_NON_CAPTURE_GREEDY_GROUP_END; + } + + return RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END; + + JERRY_UNREACHABLE (); + return 0; +} /* get_end_opcode_type */ + +/** + * Enclose the given bytecode to a group + */ +static void +insert_into_group (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + uint32_t group_start_offset, /**< offset of group start */ + uint32_t idx, /**< index of group */ + bool is_capturable) /**< is capturabel group */ +{ + uint32_t qmin, qmax; + re_opcode_t start_opcode = get_start_opcode_type (re_ctx_p, is_capturable); + re_opcode_t end_opcode = get_end_opcode_type (re_ctx_p, is_capturable); + uint32_t start_head_offset_len; + + qmin = re_ctx_p->current_token.qmin; + qmax = re_ctx_p->current_token.qmax; + JERRY_ASSERT (qmin <= qmax); + + start_head_offset_len = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + insert_u32 (re_ctx_p->bytecode_ctx_p, group_start_offset, idx); + insert_opcode (re_ctx_p->bytecode_ctx_p, group_start_offset, start_opcode); + start_head_offset_len = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - start_head_offset_len; + append_opcode (re_ctx_p->bytecode_ctx_p, end_opcode); + append_u32 (re_ctx_p->bytecode_ctx_p, idx); + append_u32 (re_ctx_p->bytecode_ctx_p, qmin); + append_u32 (re_ctx_p->bytecode_ctx_p, qmax); + + group_start_offset += start_head_offset_len; + append_jump_offset (re_ctx_p->bytecode_ctx_p, + BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - group_start_offset); + + if (start_opcode != RE_OP_CAPTURE_GROUP_START && start_opcode != RE_OP_NON_CAPTURE_GROUP_START) + { + insert_u32 (re_ctx_p->bytecode_ctx_p, + group_start_offset, + BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - group_start_offset); + } +} /* insert_into_group */ + +/** + * Enclose the given bytecode to a group and inster jump value + */ +static void +insert_into_group_with_jump (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + uint32_t group_start_offset, /**< offset of group start */ + uint32_t idx, /**< index of group */ + bool is_capturable) /**< is capturabel group */ +{ + insert_u32 (re_ctx_p->bytecode_ctx_p, + group_start_offset, + BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - group_start_offset); + insert_into_group (re_ctx_p, group_start_offset, idx, is_capturable); +} /* insert_into_group_with_jump */ + +/** + * Parse alternatives + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +static ecma_completion_value_t +parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + bool expect_eof) /**< expect end of file */ +{ + uint32_t idx; + re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p; + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + uint32_t alterantive_offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + + if (re_ctx_p->recursion_depth >= RE_COMPILE_RECURSION_LIMIT) + { + ret_value = ecma_raise_range_error ((const ecma_char_t *) "RegExp compiler recursion limit is exceeded."); + return ret_value; + } + re_ctx_p->recursion_depth++; + + while (true) + { + ECMA_TRY_CATCH (empty, + re_parse_next_token (re_ctx_p->parser_ctx_p, + &(re_ctx_p->current_token)), + ret_value); + ECMA_FINALIZE (empty); + if (!ecma_is_completion_value_empty (ret_value)) + { + return ret_value; /* error */ + } + uint32_t new_atom_start_offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + + switch (re_ctx_p->current_token.type) + { + case RE_TOK_START_CAPTURE_GROUP: + { + idx = re_ctx_p->num_of_captures++; + JERRY_DDLOG ("Compile a capture group start (idx: %d)\n", idx); + + ret_value = parse_alternative (re_ctx_p, false); + if (ecma_is_completion_value_empty (ret_value)) + { + insert_into_group (re_ctx_p, new_atom_start_offset, idx, true); + } + else + { + return ret_value; /* error */ + } + break; + } + case RE_TOK_START_NON_CAPTURE_GROUP: + { + idx = re_ctx_p->num_of_non_captures++; + JERRY_DDLOG ("Compile a non-capture group start (idx: %d)\n", idx); + + ret_value = parse_alternative (re_ctx_p, false); + if (ecma_is_completion_value_empty (ret_value)) + { + insert_into_group (re_ctx_p, new_atom_start_offset, idx, false); + } + else + { + return ret_value; /* error */ + } + break; + } + case RE_TOK_CHAR: + { + JERRY_DDLOG ("Compile character token: %c, qmin: %d, qmax: %d\n", + re_ctx_p->current_token.value, re_ctx_p->current_token.qmin, re_ctx_p->current_token.qmax); + + append_opcode (bc_ctx_p, RE_OP_CHAR); + append_u32 (bc_ctx_p, re_ctx_p->current_token.value); + + if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1)) + { + insert_simple_iterator (re_ctx_p, new_atom_start_offset); + } + break; + } + case RE_TOK_PERIOD: + { + JERRY_DDLOG ("Compile a period\n"); + append_opcode (bc_ctx_p, RE_OP_PERIOD); + + if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1)) + { + insert_simple_iterator (re_ctx_p, new_atom_start_offset); + } + break; + } + case RE_TOK_ALTERNATIVE: + { + JERRY_DDLOG ("Compile an alternative\n"); + insert_u32 (bc_ctx_p, alterantive_offset, BYTECODE_LEN (bc_ctx_p) - alterantive_offset); + append_opcode (bc_ctx_p, RE_OP_ALTERNATIVE); + alterantive_offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + break; + } + case RE_TOK_ASSERT_START: + { + JERRY_DDLOG ("Compile a start assertion\n"); + append_opcode (bc_ctx_p, RE_OP_ASSERT_START); + break; + } + case RE_TOK_ASSERT_END: + { + JERRY_DDLOG ("Compile an end assertion\n"); + append_opcode (bc_ctx_p, RE_OP_ASSERT_END); + break; + } + case RE_TOK_ASSERT_WORD_BOUNDARY: + { + JERRY_DDLOG ("Compile a word boundary assertion\n"); + append_opcode (bc_ctx_p, RE_OP_ASSERT_WORD_BOUNDARY); + break; + } + case RE_TOK_ASSERT_NOT_WORD_BOUNDARY: + { + JERRY_DDLOG ("Compile a not word boundary assertion\n"); + append_opcode (bc_ctx_p, RE_OP_ASSERT_NOT_WORD_BOUNDARY); + break; + } + case RE_TOK_ASSERT_START_POS_LOOKAHEAD: + { + JERRY_DDLOG ("Compile a positive lookahead assertion\n"); + idx = re_ctx_p->num_of_non_captures++; + append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_POS); + + ret_value = parse_alternative (re_ctx_p, false); + if (ecma_is_completion_value_empty (ret_value)) + { + append_opcode (bc_ctx_p, RE_OP_MATCH); + + insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false); + } + else + { + return ret_value; /* error */ + } + break; + } + case RE_TOK_ASSERT_START_NEG_LOOKAHEAD: + { + JERRY_DDLOG ("Compile a negative lookahead assertion\n"); + idx = re_ctx_p->num_of_non_captures++; + append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_NEG); + + ret_value = parse_alternative (re_ctx_p, false); + if (ecma_is_completion_value_empty (ret_value)) + { + append_opcode (bc_ctx_p, RE_OP_MATCH); + + insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false); + } + else + { + return ret_value; /* error */ + } + break; + } + case RE_TOK_BACKREFERENCE: + { + uint32_t backref = (uint32_t) re_ctx_p->current_token.value; + idx = re_ctx_p->num_of_non_captures++; + if (backref > re_ctx_p->highest_backref) + { + re_ctx_p->highest_backref = backref; + } + JERRY_DDLOG ("Compile a backreference: %d\n", backref); + append_opcode (bc_ctx_p, RE_OP_BACKREFERENCE); + append_u32 (bc_ctx_p, backref); + + insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false); + break; + } + case RE_TOK_START_CHAR_CLASS: + case RE_TOK_START_INV_CHAR_CLASS: + { + JERRY_DDLOG ("Compile a character class\n"); + append_opcode (bc_ctx_p, + re_ctx_p->current_token.type == RE_TOK_START_CHAR_CLASS + ? RE_OP_CHAR_CLASS + : RE_OP_INV_CHAR_CLASS); + uint32_t offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + + ECMA_TRY_CATCH (empty, + re_parse_char_class (re_ctx_p->parser_ctx_p, + append_char_class, + re_ctx_p, + &(re_ctx_p->current_token)), + ret_value); + insert_u32 (bc_ctx_p, offset, re_ctx_p->parser_ctx_p->num_of_classes); + + if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1)) + { + insert_simple_iterator (re_ctx_p, new_atom_start_offset); + } + ECMA_FINALIZE (empty); + break; + } + case RE_TOK_END_GROUP: + { + JERRY_DDLOG ("Compile a group end\n"); + + if (expect_eof) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected end of paren."); + } + else + { + insert_u32 (bc_ctx_p, alterantive_offset, BYTECODE_LEN (bc_ctx_p) - alterantive_offset); + re_ctx_p->recursion_depth--; + } + + return ret_value; + } + case RE_TOK_EOF: + { + if (!expect_eof) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected end of pattern."); + } + else + { + insert_u32 (bc_ctx_p, alterantive_offset, BYTECODE_LEN (bc_ctx_p) - alterantive_offset); + re_ctx_p->recursion_depth--; + } + + return ret_value; + } + default: + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected RegExp token."); + return ret_value; + } + } + } + + JERRY_UNREACHABLE (); + return ret_value; +} /* parse_alternative */ + +/** + * Compilation of RegExp bytecode + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +re_compile_bytecode (ecma_property_t *bytecode_p, /**< bytecode */ + ecma_string_t *pattern_str_p, /**< pattern */ + uint8_t flags) /**< flags */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + re_compiler_ctx_t re_ctx; + re_ctx.flags = flags; + re_ctx.highest_backref = 0; + re_ctx.num_of_non_captures = 0; + re_ctx.recursion_depth = 0; + + re_bytecode_ctx_t bc_ctx; + bc_ctx.block_start_p = NULL; + bc_ctx.block_end_p = NULL; + bc_ctx.current_p = NULL; + + re_ctx.bytecode_ctx_p = &bc_ctx; + + int32_t pattern_str_len = ecma_string_get_length (pattern_str_p); + MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_len + 1, ecma_char_t); + ssize_t zt_str_size = (ssize_t) sizeof (ecma_char_t) * (pattern_str_len + 1); + ecma_string_to_zt_string (pattern_str_p, pattern_start_p, zt_str_size); + + re_parser_ctx_t parser_ctx; + parser_ctx.pattern_start_p = pattern_start_p; + parser_ctx.current_char_p = pattern_start_p; + parser_ctx.num_of_groups = -1; + re_ctx.parser_ctx_p = &parser_ctx; + + /* 1. Parse RegExp pattern */ + re_ctx.num_of_captures = 1; + append_opcode (&bc_ctx, RE_OP_SAVE_AT_START); + + ECMA_TRY_CATCH (empty, parse_alternative (&re_ctx, true), ret_value); + + /* 2. Check for invalid backreference */ + if (re_ctx.highest_backref >= re_ctx.num_of_captures) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid backreference.\n"); + } + else + { + append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH); + append_opcode (&bc_ctx, RE_OP_EOF); + + /* 3. Insert extra informations for bytecode header */ + insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_non_captures); + insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_captures * 2); + insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.flags); + } + ECMA_FINALIZE (empty); + + /* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */ + JERRY_ASSERT (bc_ctx.block_start_p != NULL); + ECMA_SET_POINTER (bytecode_p->u.internal_property.value, bc_ctx.block_start_p); + + MEM_FINALIZE_LOCAL_ARRAY (pattern_start_p); + +#ifdef JERRY_ENABLE_LOG + regexp_dump_bytecode (&bc_ctx); +#endif + + return ret_value; +} /* re_compile_bytecode */ + +#ifdef JERRY_ENABLE_LOG +/** + * RegExp bytecode dumper + */ +void +regexp_dump_bytecode (re_bytecode_ctx_t *bc_ctx_p) +{ + re_bytecode_t *bytecode_p = bc_ctx_p->block_start_p; + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d | ", re_get_value (&bytecode_p)); + + re_opcode_t op; + while ((op = re_get_opcode (&bytecode_p))) + { + switch (op) + { + case RE_OP_MATCH: + { + JERRY_DLOG ("MATCH, "); + break; + } + case RE_OP_CHAR: + { + JERRY_DLOG ("CHAR "); + JERRY_DLOG ("%c, ", (char) re_get_value (&bytecode_p)); + break; + } + case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START: + { + JERRY_DLOG ("N"); + /* FALLTHRU */ + } + case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START: + { + JERRY_DLOG ("GZ_START "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_CAPTURE_GROUP_START: + { + JERRY_DLOG ("START "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_CAPTURE_NON_GREEDY_GROUP_END: + { + JERRY_DLOG ("N"); + /* FALLTHRU */ + } + case RE_OP_CAPTURE_GREEDY_GROUP_END: + { + JERRY_DLOG ("G_END "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START: + { + JERRY_DLOG ("N"); + /* FALLTHRU */ + } + case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START: + { + JERRY_DLOG ("GZ_NC_START "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_NON_CAPTURE_GROUP_START: + { + JERRY_DLOG ("NC_START "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END: + { + JERRY_DLOG ("N"); + /* FALLTHRU */ + } + case RE_OP_NON_CAPTURE_GREEDY_GROUP_END: + { + JERRY_DLOG ("G_NC_END "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_SAVE_AT_START: + { + JERRY_DLOG ("RE_START "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_SAVE_AND_MATCH: + { + JERRY_DLOG ("RE_END, "); + break; + } + case RE_OP_GREEDY_ITERATOR: + { + JERRY_DLOG ("GREEDY_ITERATOR "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_NON_GREEDY_ITERATOR: + { + JERRY_DLOG ("NON_GREEDY_ITERATOR "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_PERIOD: + { + JERRY_DLOG ("PERIOD "); + break; + } + case RE_OP_ALTERNATIVE: + { + JERRY_DLOG ("ALTERNATIVE "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_ASSERT_START: + { + JERRY_DLOG ("ASSERT_START "); + break; + } + case RE_OP_ASSERT_END: + { + JERRY_DLOG ("ASSERT_END "); + break; + } + case RE_OP_ASSERT_WORD_BOUNDARY: + { + JERRY_DLOG ("ASSERT_WORD_BOUNDARY "); + break; + } + case RE_OP_ASSERT_NOT_WORD_BOUNDARY: + { + JERRY_DLOG ("ASSERT_NOT_WORD_BOUNDARY "); + break; + } + case RE_OP_LOOKAHEAD_POS: + { + JERRY_DLOG ("LOOKAHEAD_POS "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_LOOKAHEAD_NEG: + { + JERRY_DLOG ("LOOKAHEAD_NEG "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_BACKREFERENCE: + { + JERRY_DLOG ("BACKREFERENCE "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_INV_CHAR_CLASS: + { + JERRY_DLOG ("INV_"); + /* FALLTHRU */ + } + case RE_OP_CHAR_CLASS: + { + JERRY_DLOG ("CHAR_CLASS "); + uint32_t num_of_class = re_get_value (&bytecode_p); + JERRY_DLOG ("%d", num_of_class); + while (num_of_class) + { + JERRY_DLOG (" %d", re_get_value (&bytecode_p)); + JERRY_DLOG ("-%d", re_get_value (&bytecode_p)); + num_of_class--; + } + JERRY_DLOG (", "); + break; + } + default: + { + JERRY_DLOG ("UNKNOWN(%d), ", (uint32_t) op); + break; + } + } + } + JERRY_DLOG ("EOF\n"); +} /* regexp_dump_bytecode */ +#endif /* JERRY_ENABLE_LOG */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ diff --git a/jerry-core/parser/regexp/re-compiler.h b/jerry-core/parser/regexp/re-compiler.h new file mode 100644 index 0000000000..73e4eedabe --- /dev/null +++ b/jerry-core/parser/regexp/re-compiler.h @@ -0,0 +1,108 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RE_COMPILER_H +#define RE_COMPILER_H + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +#include "ecma-globals.h" +#include "re-parser.h" + +/* RegExp opcodes + * Group opcode order is important, because RE_IS_CAPTURE_GROUP is based on it. + * Change it carfully. Capture opcodes should be at first. + */ +#define RE_OP_EOF 0 + +#define RE_OP_CAPTURE_GROUP_START 1 +#define RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START 2 +#define RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START 3 +#define RE_OP_CAPTURE_GREEDY_GROUP_END 4 +#define RE_OP_CAPTURE_NON_GREEDY_GROUP_END 5 +#define RE_OP_NON_CAPTURE_GROUP_START 6 +#define RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START 7 +#define RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START 8 +#define RE_OP_NON_CAPTURE_GREEDY_GROUP_END 9 +#define RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END 10 + +#define RE_OP_MATCH 11 +#define RE_OP_CHAR 12 +#define RE_OP_SAVE_AT_START 13 +#define RE_OP_SAVE_AND_MATCH 14 +#define RE_OP_PERIOD 15 +#define RE_OP_ALTERNATIVE 16 +#define RE_OP_GREEDY_ITERATOR 17 +#define RE_OP_NON_GREEDY_ITERATOR 18 +#define RE_OP_ASSERT_START 19 +#define RE_OP_ASSERT_END 20 +#define RE_OP_ASSERT_WORD_BOUNDARY 21 +#define RE_OP_ASSERT_NOT_WORD_BOUNDARY 22 +#define RE_OP_LOOKAHEAD_POS 23 +#define RE_OP_LOOKAHEAD_NEG 24 +#define RE_OP_BACKREFERENCE 25 +#define RE_OP_CHAR_CLASS 26 +#define RE_OP_INV_CHAR_CLASS 27 + +#define RE_COMPILE_RECURSION_LIMIT 100 + +#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0) + +typedef uint8_t re_opcode_t; /* type of RegExp opcodes */ +typedef uint8_t re_bytecode_t; /* type of standard bytecode elements (ex.: opcode parameters) */ + +/** + * Context of RegExp bytecode container + * + * FIXME: + * Add comments with description of the structure members + */ +typedef struct +{ + re_bytecode_t *block_start_p; + re_bytecode_t *block_end_p; + re_bytecode_t *current_p; +} re_bytecode_ctx_t; + +/** + * Context of RegExp compiler + * + * FIXME: + * Add comments with description of the structure members + */ +typedef struct +{ + uint8_t flags; + uint32_t recursion_depth; + uint32_t num_of_captures; + uint32_t num_of_non_captures; + uint32_t highest_backref; + re_bytecode_ctx_t *bytecode_ctx_p; + re_token_t current_token; + re_parser_ctx_t *parser_ctx_p; +} re_compiler_ctx_t; + +ecma_completion_value_t +re_compile_bytecode (ecma_property_t *bytecode_p, ecma_string_t *pattern_str_p, uint8_t flags); + +re_opcode_t +re_get_opcode (re_bytecode_t **bc_p); + +uint32_t +re_get_value (re_bytecode_t **bc_p); + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ +#endif /* RE_COMPILER_H */ diff --git a/jerry-core/parser/regexp/re-parser.cpp b/jerry-core/parser/regexp/re-parser.cpp new file mode 100644 index 0000000000..51ed3a8c9d --- /dev/null +++ b/jerry-core/parser/regexp/re-parser.cpp @@ -0,0 +1,808 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecma-exceptions.h" +#include "ecma-globals.h" +#include "ecma-helpers.h" +#include "ecma-try-catch-macro.h" +#include "jrt-libc-includes.h" +#include "re-parser.h" +#include "syntax-errors.h" + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +/* FIXME: change it, when unicode support would be implemented */ +#define RE_LOOKUP(str_p, lookup) (ecma_zt_string_length (str_p) > lookup ? str_p[lookup] : '\0') + +/* FIXME: change it, when unicode support would be implemented */ +#define RE_ADVANCE(str_p, advance) do { str_p += advance; } while (0) + +static ecma_char_t +get_ecma_char (ecma_char_t** char_p) +{ + /* FIXME: change to string iterator with unicode support, when it would be implemented */ + ecma_char_t ch = **char_p; + RE_ADVANCE (*char_p, 1); + return ch; +} /* get_ecma_char */ + +/** + * Parse RegExp iterators + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +static ecma_completion_value_t +parse_re_iterator (ecma_char_t *pattern_p, /**< RegExp pattern */ + re_token_t *re_token_p, /**< output token */ + uint32_t lookup, /**< size of lookup */ + uint32_t *advance_p) /**< output length of current advance */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + ecma_char_t ch0 = RE_LOOKUP (pattern_p, lookup); + ecma_char_t ch1 = RE_LOOKUP (pattern_p, lookup + 1); + + switch (ch0) + { + case '?': + { + re_token_p->qmin = 0; + re_token_p->qmax = 1; + if (ch1 == '?') + { + *advance_p = 2; + re_token_p->greedy = false; + } + else + { + *advance_p = 1; + re_token_p->greedy = true; + } + break; + } + case '*': + { + re_token_p->qmin = 0; + re_token_p->qmax = RE_ITERATOR_INFINITE; + if (ch1 == '?') + { + *advance_p = 2; + re_token_p->greedy = false; + } + else + { + *advance_p = 1; + re_token_p->greedy = true; + } + break; + } + case '+': + { + re_token_p->qmin = 1; + re_token_p->qmax = RE_ITERATOR_INFINITE; + if (ch1 == '?') + { + *advance_p = 2; + re_token_p->greedy = false; + } + else + { + *advance_p = 1; + re_token_p->greedy = true; + } + break; + } + case '{': + { + uint32_t qmin = 0; + uint32_t qmax = RE_ITERATOR_INFINITE; + uint32_t digits = 0; + while (true) + { + (*advance_p)++; + ch1 = RE_LOOKUP (pattern_p, lookup + *advance_p); + + if (isdigit (ch1)) + { + if (digits >= ECMA_NUMBER_MAX_DIGITS) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: too many digits."); + return ret_value; + } + digits++; + qmin = qmin * 10 + ecma_char_hex_to_int (ch1); + } + else if (ch1 == ',') + { + if (qmax != RE_ITERATOR_INFINITE) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: double comma."); + return ret_value; + } + if ((RE_LOOKUP (pattern_p, lookup + *advance_p + 1)) == '}') + { + if (digits == 0) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: missing digits."); + return ret_value; + } + + re_token_p->qmin = qmin; + re_token_p->qmax = RE_ITERATOR_INFINITE; + *advance_p += 2; + break; + } + qmax = qmin; + qmin = 0; + digits = 0; + } + else if (ch1 == '}') + { + if (digits == 0) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: missing digits."); + return ret_value; + } + + if (qmax != RE_ITERATOR_INFINITE) + { + re_token_p->qmin = qmax; + re_token_p->qmax = qmin; + } + else + { + re_token_p->qmin = qmin; + re_token_p->qmax = qmin; + } + + *advance_p += 1; + break; + } + else + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: unknown char."); + return ret_value; + } + } + + if ((RE_LOOKUP (pattern_p, lookup + *advance_p)) == '?') + { + re_token_p->greedy = false; + *advance_p += 1; + } + else + { + re_token_p->greedy = true; + } + break; + + JERRY_UNREACHABLE (); + break; + } + default: + { + re_token_p->qmin = 1; + re_token_p->qmax = 1; + re_token_p->greedy = true; + break; + } + } + + JERRY_ASSERT (ecma_is_completion_value_empty (ret_value)); + + if (re_token_p->qmin > re_token_p->qmax) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: qmin > qmax."); + } + + return ret_value; +} /* parse_re_iterator */ + +/** + * Count the number of groups in pattern + */ +static void +re_count_num_of_groups (re_parser_ctx_t *parser_ctx_p) /**< RegExp parser context */ +{ + ecma_char_t *pattern_p = parser_ctx_p->pattern_start_p; + ecma_char_t ch1; + int char_class_in = 0; + parser_ctx_p->num_of_groups = 0; + + ch1 = get_ecma_char (&pattern_p); + while (ch1 != '\0') + { + ecma_char_t ch0 = ch1; + ch1 = get_ecma_char (&pattern_p); + switch (ch0) + { + case '\\': + { + ch1 = get_ecma_char (&pattern_p); + break; + } + case '[': + { + char_class_in++; + break; + } + case ']': + { + if (!char_class_in) + { + char_class_in--; + } + break; + } + case '(': + { + if (ch1 != '?' && !char_class_in) + { + parser_ctx_p->num_of_groups++; + } + break; + } + } + } +} /* re_count_num_of_groups */ + +/** + * Read the input pattern and parse the range of character class + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ + re_char_class_callback append_char_class, /**< callback function, + * which adds the char-ranges + * to the bytecode */ + void* re_ctx_p, /**< regexp compiler context */ + re_token_t *out_token_p) /**< output token */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + ecma_char_t **pattern_p = &(parser_ctx_p->current_char_p); + + out_token_p->qmax = out_token_p->qmin = 1; + ecma_char_t start = RE_CHAR_UNDEF; + bool is_range = false; + parser_ctx_p->num_of_classes = 0; + + do + { + ecma_char_t ch = get_ecma_char (pattern_p); + if (ch == ']') + { + if (start != RE_CHAR_UNDEF) + { + append_char_class (re_ctx_p, start, start); + } + break; + } + else if (ch == '-') + { + if (start != RE_CHAR_UNDEF && !is_range && RE_LOOKUP (*pattern_p, 0) != ']') + { + is_range = true; + continue; + } + } + else if (ch == '\\') + { + ch = get_ecma_char (pattern_p); + + if (ch == 'b') + { + ch = RE_CONTROL_CHAR_BEL; + } + else if (ch == 'f') + { + ch = RE_CONTROL_CHAR_FF; + } + else if (ch == 'n') + { + ch = RE_CONTROL_CHAR_EOL; + } + else if (ch == 't') + { + ch = RE_CONTROL_CHAR_TAB; + } + else if (ch == 'r') + { + ch = RE_CONTROL_CHAR_CR; + } + else if (ch == 'v') + { + ch = RE_CONTROL_CHAR_VT; + } + else if (ch == 'c') + { + ch = get_ecma_char (pattern_p); + if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) + { + ch = (ch % 32); + } + else + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid regexp control escape"); + return ret_value; + } + } + else if (ch == 'x') + { + /* FIXME: get unicode char from hex-digits */ + /* ch = ...; */ + } + else if (ch == 'u') + { + /* FIXME: get unicode char from digits */ + /* ch = ...; */ + } + else if (ch == 'd') + { + /* append digits from '0' to '9'. */ + append_char_class (re_ctx_p, 0x0030UL, 0x0039UL); + ch = RE_CHAR_UNDEF; + } + else if (ch == 'D') + { + append_char_class (re_ctx_p, 0x0000UL, 0x002FUL); + append_char_class (re_ctx_p, 0x003AUL, 0xFFFFUL); + ch = RE_CHAR_UNDEF; + } + else if (ch == 's') + { + append_char_class (re_ctx_p, 0x0009UL, 0x000DUL); + append_char_class (re_ctx_p, 0x0020UL, 0x0020UL); + append_char_class (re_ctx_p, 0x00A0UL, 0x00A0UL); + append_char_class (re_ctx_p, 0x1680UL, 0x1680UL); + append_char_class (re_ctx_p, 0x180EUL, 0x180EUL); + append_char_class (re_ctx_p, 0x2000UL, 0x200AUL); + append_char_class (re_ctx_p, 0x2028UL, 0x2029UL); + append_char_class (re_ctx_p, 0x202FUL, 0x202FUL); + append_char_class (re_ctx_p, 0x205FUL, 0x205FUL); + append_char_class (re_ctx_p, 0x3000UL, 0x3000UL); + append_char_class (re_ctx_p, 0xFEFFUL, 0xFEFFUL); + ch = RE_CHAR_UNDEF; + } + else if (ch == 'S') + { + append_char_class (re_ctx_p, 0x0000UL, 0x0008UL); + append_char_class (re_ctx_p, 0x000EUL, 0x001FUL); + append_char_class (re_ctx_p, 0x0021UL, 0x009FUL); + append_char_class (re_ctx_p, 0x00A1UL, 0x167FUL); + append_char_class (re_ctx_p, 0x1681UL, 0x180DUL); + append_char_class (re_ctx_p, 0x180FUL, 0x1FFFUL); + append_char_class (re_ctx_p, 0x200BUL, 0x2027UL); + append_char_class (re_ctx_p, 0x202AUL, 0x202EUL); + append_char_class (re_ctx_p, 0x2030UL, 0x205EUL); + append_char_class (re_ctx_p, 0x2060UL, 0x2FFFUL); + append_char_class (re_ctx_p, 0x3001UL, 0xFEFEUL); + append_char_class (re_ctx_p, 0xFF00UL, 0xFFFFUL); + ch = RE_CHAR_UNDEF; + } + else if (ch == 'w') + { + append_char_class (re_ctx_p, 0x0030UL, 0x0039UL); + append_char_class (re_ctx_p, 0x0041UL, 0x005AUL); + append_char_class (re_ctx_p, 0x005FUL, 0x005FUL); + append_char_class (re_ctx_p, 0x0061UL, 0x007AUL); + ch = RE_CHAR_UNDEF; + } + else if (ch == 'W') + { + append_char_class (re_ctx_p, 0x0000UL, 0x002FUL); + append_char_class (re_ctx_p, 0x003AUL, 0x0040UL); + append_char_class (re_ctx_p, 0x005BUL, 0x005EUL); + append_char_class (re_ctx_p, 0x0060UL, 0x0060UL); + append_char_class (re_ctx_p, 0x007BUL, 0xFFFFUL); + ch = RE_CHAR_UNDEF; + } + else if (isdigit (ch)) + { + if (ch != '\0' || isdigit (RE_LOOKUP (*pattern_p, 1))) + { + /* FIXME: octal support */ + } + } + /* FIXME: depends on the unicode support + else if (!jerry_unicode_identifier (ch)) + { + JERRY_ERROR_MSG ("RegExp escape pattern error. (Char class)"); + } + */ + } + + if (ch == RE_CHAR_UNDEF) + { + if (start != RE_CHAR_UNDEF) + { + if (is_range) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid character class range"); + return ret_value; + } + else + { + append_char_class (re_ctx_p, start, start); + start = RE_CHAR_UNDEF; + } + } + } + else + { + if (start != RE_CHAR_UNDEF) + { + if (is_range) + { + if (start > ch) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid character class range"); + return ret_value; + } + else + { + append_char_class (re_ctx_p, start, ch); + start = RE_CHAR_UNDEF; + is_range = false; + } + } + else + { + append_char_class (re_ctx_p, start, start); + start = ch; + } + } + else + { + start = ch; + } + } + } + while (true); + + uint32_t advance = 0; + ECMA_TRY_CATCH (empty, + parse_re_iterator (parser_ctx_p->current_char_p, + out_token_p, + 0, + &advance), + ret_value); + RE_ADVANCE (parser_ctx_p->current_char_p, advance); + ECMA_FINALIZE (empty); + + return ret_value; +} /* re_parse_char_class */ + +/** + * Read the input pattern and parse the next token for the RegExp compiler + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context */ + re_token_t *out_token_p) /**< output token */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + uint32_t advance = 0; + ecma_char_t ch0 = *(parser_ctx_p->current_char_p); + + switch (ch0) + { + case '|': + { + advance = 1; + out_token_p->type = RE_TOK_ALTERNATIVE; + break; + } + case '^': + { + advance = 1; + out_token_p->type = RE_TOK_ASSERT_START; + break; + } + case '$': + { + advance = 1; + out_token_p->type = RE_TOK_ASSERT_END; + break; + } + case '.': + { + ECMA_TRY_CATCH (empty, + parse_re_iterator (parser_ctx_p->current_char_p, + out_token_p, + 1, + &advance), + ret_value); + advance += 1; + out_token_p->type = RE_TOK_PERIOD; + ECMA_FINALIZE (empty); + break; + } + case '\\': + { + advance = 2; + out_token_p->type = RE_TOK_CHAR; + ecma_char_t ch1 = RE_LOOKUP (parser_ctx_p->current_char_p, 1); + + if (ch1 == 'b') + { + out_token_p->type = RE_TOK_ASSERT_WORD_BOUNDARY; + } + else if (ch1 == 'B') + { + out_token_p->type = RE_TOK_ASSERT_NOT_WORD_BOUNDARY; + } + else if (ch1 == 'f') + { + out_token_p->value = RE_CONTROL_CHAR_FF; + } + else if (ch1 == 'n') + { + out_token_p->value = RE_CONTROL_CHAR_EOL; + } + else if (ch1 == 't') + { + out_token_p->value = RE_CONTROL_CHAR_TAB; + } + else if (ch1 == 'r') + { + out_token_p->value = RE_CONTROL_CHAR_CR; + } + else if (ch1 == 'v') + { + out_token_p->value = RE_CONTROL_CHAR_VT; + } + else if (ch1 == 'c') + { + ecma_char_t ch2 = RE_LOOKUP (parser_ctx_p->current_char_p, 2); + if ((ch2 >= 'A' && ch2 <= 'Z') || (ch2 >= 'a' && ch2 <= 'z')) + { + advance = 3; + out_token_p->type = RE_TOK_CHAR; + out_token_p->value = (ch2 % 32); + } + else + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid regexp control escape"); + break; + } + } + else if (ch1 == 'x' + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 2)) + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 3))) + { + advance = 4; + out_token_p->type = RE_TOK_CHAR; + /* FIXME: get unicode char from hex-digits */ + /* result.value = ...; */ + } + else if (ch1 == 'u' + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 2)) + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 3)) + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 4)) + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 5))) + { + advance = 4; + out_token_p->type = RE_TOK_CHAR; + /* FIXME: get unicode char from digits */ + /* result.value = ...; */ + } + else if (ch1 == 'd') + { + advance = 2; + out_token_p->type = RE_TOK_DIGIT; + } + else if (ch1 == 'D') + { + advance = 2; + out_token_p->type = RE_TOK_NOT_DIGIT; + } + else if (ch1 == 's') + { + advance = 2; + out_token_p->type = RE_TOK_WHITE; + } + else if (ch1 == 'S') + { + advance = 2; + out_token_p->type = RE_TOK_NOT_WHITE; + } + else if (ch1 == 'w') + { + advance = 2; + out_token_p->type = RE_TOK_WORD_CHAR; + } + else if (ch1 == 'W') + { + advance = 2; + out_token_p->type = RE_TOK_NOT_WORD_CHAR; + } + else if (isdigit (ch1)) + { + if (ch1 == '0') + { + if (isdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 2))) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp escape pattern error."); + break; + } + + advance = 2; + out_token_p->value = RE_CONTROL_CHAR_NUL; + } + else + { + if (parser_ctx_p->num_of_groups == -1) + { + re_count_num_of_groups (parser_ctx_p); + } + + if (parser_ctx_p->num_of_groups) + { + uint32_t number = 0; + int index = 0; + advance = 0; + + do + { + if (index >= RE_MAX_RE_DECESC_DIGITS) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) + "RegExp escape pattern error: decimal escape too long."); + return ret_value; + } + + advance++; + ecma_char_t digit = RE_LOOKUP (parser_ctx_p->current_char_p, advance); + if (!isdigit (digit)) + { + break; + } + number = number * 10 + ecma_char_hex_to_int (digit); + index++; + } + while (true); + + if ((int) number <= parser_ctx_p->num_of_groups) + { + out_token_p->type = RE_TOK_BACKREFERENCE; + } + + out_token_p->value = number; + } + else + { + out_token_p->value = ch1; + } + } + } + else + { + out_token_p->value = ch1; + } + + uint32_t iter_adv = 0; + ECMA_TRY_CATCH (empty, + parse_re_iterator (parser_ctx_p->current_char_p, + out_token_p, + advance, + &iter_adv), + ret_value); + advance += iter_adv; + ECMA_FINALIZE (empty); + break; + } + case '(': + { + if (RE_LOOKUP (parser_ctx_p->current_char_p, 1) == '?') + { + ecma_char_t ch2 = RE_LOOKUP (parser_ctx_p->current_char_p, 2); + if (ch2 == '=') + { + /* (?= */ + advance = 3; + out_token_p->type = RE_TOK_ASSERT_START_POS_LOOKAHEAD; + } + else if (ch2 == '!') + { + /* (?! */ + advance = 3; + out_token_p->type = RE_TOK_ASSERT_START_NEG_LOOKAHEAD; + } + else if (ch2 == ':') + { + /* (?: */ + advance = 3; + out_token_p->type = RE_TOK_START_NON_CAPTURE_GROUP; + } + } + else + { + /* ( */ + advance = 1; + out_token_p->type = RE_TOK_START_CAPTURE_GROUP; + } + break; + } + case ')': + { + ECMA_TRY_CATCH (empty, + parse_re_iterator (parser_ctx_p->current_char_p, + out_token_p, + 1, + &advance), + ret_value); + advance += 1; + out_token_p->type = RE_TOK_END_GROUP; + ECMA_FINALIZE (empty); + break; + } + case '[': + { + advance = 1; + out_token_p->type = RE_TOK_START_CHAR_CLASS; + if (RE_LOOKUP (parser_ctx_p->current_char_p, 1) == '^') + { + advance = 2; + out_token_p->type = RE_TOK_START_INV_CHAR_CLASS; + } + break; + } + case ']': + case '}': + case '?': + case '*': + case '+': + case '{': + { + JERRY_UNREACHABLE (); + break; + } + case '\0': + { + advance = 0; + out_token_p->type = RE_TOK_EOF; + break; + } + default: + { + ECMA_TRY_CATCH (empty, + parse_re_iterator (parser_ctx_p->current_char_p, + out_token_p, + 1, + &advance), + ret_value); + advance += 1; + out_token_p->type = RE_TOK_CHAR; + out_token_p->value = ch0; + ECMA_FINALIZE (empty); + break; + } + } + + if (ecma_is_completion_value_empty (ret_value)) + { + RE_ADVANCE (parser_ctx_p->current_char_p, advance); + } + + return ret_value; +} /* re_parse_next_token */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ diff --git a/jerry-core/parser/regexp/re-parser.h b/jerry-core/parser/regexp/re-parser.h new file mode 100644 index 0000000000..160cbce7cd --- /dev/null +++ b/jerry-core/parser/regexp/re-parser.h @@ -0,0 +1,91 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RE_PARSER_H +#define RE_PARSER_H + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +#include "opcodes-dumper.h" + +typedef uint8_t token_type_t; + +#define RE_TOK_EOF 0 /* EOF */ +#define RE_TOK_BACKREFERENCE 1 /* \[0..9] */ +#define RE_TOK_CHAR 2 /* any character */ +#define RE_TOK_ALTERNATIVE 3 /* | */ +#define RE_TOK_ASSERT_START 4 /* ^ */ +#define RE_TOK_ASSERT_END 5 /* $ */ +#define RE_TOK_PERIOD 6 /* . */ +#define RE_TOK_START_CAPTURE_GROUP 7 /* ( */ +#define RE_TOK_START_NON_CAPTURE_GROUP 8 /* (?: */ +#define RE_TOK_END_GROUP 9 /* ')' */ +#define RE_TOK_ASSERT_START_POS_LOOKAHEAD 10 /* (?= */ +#define RE_TOK_ASSERT_START_NEG_LOOKAHEAD 11 /* (?! */ +#define RE_TOK_ASSERT_WORD_BOUNDARY 12 /* \b */ +#define RE_TOK_ASSERT_NOT_WORD_BOUNDARY 13 /* \B */ +#define RE_TOK_DIGIT 14 /* \d */ +#define RE_TOK_NOT_DIGIT 15 /* \D */ +#define RE_TOK_WHITE 16 /* \s */ +#define RE_TOK_NOT_WHITE 17 /* \S */ +#define RE_TOK_WORD_CHAR 18 /* \w */ +#define RE_TOK_NOT_WORD_CHAR 19 /* \W */ +#define RE_TOK_START_CHAR_CLASS 20 /* [ ] */ +#define RE_TOK_START_INV_CHAR_CLASS 21 /* [^ ] */ + +#define RE_ITERATOR_INFINITE ((uint32_t)-1) +#define RE_MAX_RE_DECESC_DIGITS 9 + +/* FIXME: depends on unicode support */ +#define RE_CHAR_UNDEF ((ecma_char_t)-1) + +#define RE_CONTROL_CHAR_NUL 0x0000 /* \0 */ +#define RE_CONTROL_CHAR_BEL 0x0008 /* \b */ +#define RE_CONTROL_CHAR_TAB 0x0009 /* \t */ +#define RE_CONTROL_CHAR_EOL 0x000a /* \n */ +#define RE_CONTROL_CHAR_VT 0x000b /* \v */ +#define RE_CONTROL_CHAR_FF 0x000c /* \f */ +#define RE_CONTROL_CHAR_CR 0x000d /* \r */ + +typedef struct +{ + token_type_t type; + uint32_t value; + uint32_t qmin; + uint32_t qmax; + bool greedy; +} re_token_t; + +typedef struct +{ + ecma_char_t *pattern_start_p; + ecma_char_t *current_char_p; + int num_of_groups; + uint32_t num_of_classes; +} re_parser_ctx_t; + +typedef void (*re_char_class_callback) (void *re_ctx_p, uint32_t start, uint32_t end); + +ecma_completion_value_t +re_parse_char_class (re_parser_ctx_t *parser_ctx_p, + re_char_class_callback append_char_class, + void *re_ctx_p, re_token_t *out_token_p); + +ecma_completion_value_t +re_parse_next_token (re_parser_ctx_t *parser_ctx_p, re_token_t *out_token_p); + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ +#endif /* RE_PARSER_H */ diff --git a/jerry-core/vm/opcodes-ecma-support.h b/jerry-core/vm/opcodes-ecma-support.h index 8fd39c9dab..9826c63d25 100644 --- a/jerry-core/vm/opcodes-ecma-support.h +++ b/jerry-core/vm/opcodes-ecma-support.h @@ -29,6 +29,7 @@ #include "ecma-objects.h" #include "ecma-objects-general.h" #include "ecma-reference.h" +#include "ecma-regexp-object.h" #include "ecma-try-catch-macro.h" #include "serializer.h" diff --git a/jerry-core/vm/opcodes.cpp b/jerry-core/vm/opcodes.cpp index 04e04948fa..d1e3a6fc59 100644 --- a/jerry-core/vm/opcodes.cpp +++ b/jerry-core/vm/opcodes.cpp @@ -1,4 +1,5 @@ /* Copyright 2014-2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -162,6 +163,65 @@ opfunc_assignment (opcode_t opdata, /**< operation data */ dst_var_idx, ecma_make_number_value (num_p)); } + else if (type_value_right == OPCODE_ARG_TYPE_REGEXP) + { +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + lit_cpointer_t lit_cp = serializer_get_literal_cp_by_uid (src_val_descr, + int_data->opcodes_p, + int_data->pos); + ecma_string_t *string_p = ecma_new_ecma_string_from_lit_cp (lit_cp); + + int32_t re_str_len = ecma_string_get_length (string_p); + MEM_DEFINE_LOCAL_ARRAY (re_str_p, re_str_len + 1, ecma_char_t); + + ssize_t zt_str_size = (ssize_t) sizeof (ecma_char_t) * (re_str_len + 1); + ecma_string_to_zt_string (string_p, re_str_p, zt_str_size); + + ecma_char_t *ch_p = re_str_p; + ecma_char_t *last_slash_p = NULL; + while (*ch_p) + { + if (*ch_p == '/') + { + last_slash_p = ch_p; + } + ch_p++; + } + + JERRY_ASSERT (last_slash_p != NULL); + JERRY_ASSERT ((re_str_p < last_slash_p) && (last_slash_p < ch_p)); + JERRY_ASSERT ((last_slash_p - re_str_p) > 0); + ecma_string_t *pattern_p = ecma_new_ecma_string (re_str_p, (ecma_length_t) (last_slash_p - re_str_p)); + ecma_string_t *flags_p = NULL; + + if ((ch_p - last_slash_p) > 1) + { + flags_p = ecma_new_ecma_string (last_slash_p + 1, (ecma_length_t) ((ch_p - last_slash_p - 1))); + } + + ECMA_TRY_CATCH (regexp_obj_value, + ecma_op_create_regexp_object (pattern_p, flags_p), + ret_value); + + ret_value = set_variable_value (int_data, + int_data->pos, + dst_var_idx, + regexp_obj_value); + + ECMA_FINALIZE (regexp_obj_value); + + ecma_deref_ecma_string (pattern_p); + if (flags_p != NULL) + { + ecma_deref_ecma_string (flags_p); + } + + MEM_FINALIZE_LOCAL_ARRAY (re_str_p) + ecma_deref_ecma_string (string_p); +#else + JERRY_UNIMPLEMENTED ("Regular Expressions are not supported in compact profile!"); +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ + } else { JERRY_ASSERT (type_value_right == OPCODE_ARG_TYPE_SMALLINT_NEGATE); diff --git a/jerry-core/vm/opcodes.h b/jerry-core/vm/opcodes.h index 1afea172c0..ff7d8594aa 100644 --- a/jerry-core/vm/opcodes.h +++ b/jerry-core/vm/opcodes.h @@ -50,7 +50,8 @@ typedef enum OPCODE_ARG_TYPE_NUMBER, /**< index of number literal */ OPCODE_ARG_TYPE_NUMBER_NEGATE, /**< index of number literal with negation */ OPCODE_ARG_TYPE_STRING, /**< index of string literal */ - OPCODE_ARG_TYPE_VARIABLE /**< index of variable name */ + OPCODE_ARG_TYPE_VARIABLE, /**< index of string literal with variable name */ + OPCODE_ARG_TYPE_REGEXP /**< index of string literal with regular expression */ } opcode_arg_type_operand; /** diff --git a/tests/jerry/regexp-alternatives.js b/tests/jerry/regexp-alternatives.js new file mode 100644 index 0000000000..dcb102ad86 --- /dev/null +++ b/tests/jerry/regexp-alternatives.js @@ -0,0 +1,61 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp ("a|b"); +assert (r.exec("a") == "a"); + +r = new RegExp ("a|b"); +assert (r.exec("b") == "b"); + +r = new RegExp ("a|b|c"); +assert (r.exec("b") == "b"); + +r = new RegExp ("a|b|c"); +assert (r.exec("c") == "c"); + +r = new RegExp ("a|b|c|d"); +assert (r.exec("") == undefined); + +r = new RegExp ("a|b|c|d"); +assert (r.exec("a") == "a"); + +r = new RegExp ("a|b|c|d"); +assert (r.exec("b") == "b"); + +r = new RegExp ("a|b|c|d"); +assert (r.exec("c") == "c"); + +r = new RegExp ("a|b|c|d"); +assert (r.exec("d") == "d"); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("e") == undefined); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("bb") == "bb"); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("bba") == "bb"); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("bbbb") == "bb"); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("a") == "a"); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("b") == undefined); diff --git a/tests/jerry/regexp-assertions.js b/tests/jerry/regexp-assertions.js new file mode 100644 index 0000000000..a8656b691d --- /dev/null +++ b/tests/jerry/regexp-assertions.js @@ -0,0 +1,152 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var t; + +t = new RegExp ("^alma$").exec("alma"); +assert (t == "alma"); + +t = new RegExp ("^alma$").exec("almaa"); +assert (t == undefined); + +t = new RegExp ("^alma$").exec("aalma"); +assert (t == undefined); + +t = new RegExp ("^alma").exec("alma"); +assert (t == "alma"); + +t = new RegExp ("^alma").exec("almaa"); +assert (t == "alma"); + +t = new RegExp ("^alma").exec("aalma"); +assert (t == undefined); + +t = new RegExp ("alma$").exec("alma"); +assert (t == "alma"); + +t = new RegExp ("alma$").exec("almaa"); +assert (t == undefined); + +t = new RegExp ("alma$").exec("aalma"); +assert (t == "alma"); + +t = new RegExp ("\\bis\\b").exec("This island is beautiful"); +assert (t == "is"); + +t = new RegExp ("\\Bis\\B").exec("This island is beautiful"); +assert (t == undefined); + +t = new RegExp ("\\Bis").exec("This island is beautiful"); +assert (t == "is"); + +t = new RegExp ("is\\B").exec("This island is beautiful"); +assert (t == "is"); + +t = new RegExp ("\\Bis\\b").exec("This island is beautiful"); +assert (t == "is"); + +t = new RegExp ("\\bis\\B").exec("This island is beautiful"); +assert (t == "is"); + +t = new RegExp ("al(?=(ma))").exec("al"); +assert (t == undefined); + +t = new RegExp ("al(?!(ma))").exec("ala"); +assert (t[0] == "al"); +assert (t[1] == undefined); + +t = new RegExp ("al(?=(ma))").exec("alma"); +assert (t[0] == "al"); +assert (t[1] == "ma"); + +t = new RegExp ("al(?=(ma))").exec("almama"); +assert (t[0] == "al"); +assert (t[1] == "ma"); + +t = new RegExp ("(al)(?=(ma))ma").exec("al"); +assert (t == undefined); + +t = new RegExp ("(al)(?=(ma)ma)").exec("al"); +assert (t == undefined); + +t = new RegExp ("al(?=(ma))*ma").exec("alma"); +assert (t[0] == "alma"); +assert (t[1] == undefined); + +t = new RegExp ("al(?!(ma))*ma").exec("alma"); +assert (t[0] == "alma"); +assert (t[1] == undefined); + +t = new RegExp ("al(?=(ma))ma").exec("alma"); +assert (t[0] == "alma"); +assert (t[1] == "ma"); + +t = new RegExp ("al(?!(ma))ma").exec("alma"); +assert (t == undefined); + +t = new RegExp ("(al)(?=(ma))ma").exec("almama"); +t = new RegExp ("(al)(?=(ma)ma)").exec("almama"); + +t = new RegExp ("al(?=(ma))ma").exec("almama"); +assert (t[0] == "alma"); +assert (t[1] == "ma"); + +t = new RegExp ("al(?=(ma)ma)").exec("almama"); +assert (t[0] == "al"); +assert (t[1] == "ma"); + +t = new RegExp ("al(?!(ma))ma").exec("almama"); +assert (t == undefined); + +t = new RegExp ("a(?=(a)(a))aab|aaac").exec("aaac"); +t = new RegExp ("a(?=(a)(a))aab|aaac").exec("aaab"); + +t = new RegExp ("(?!(a)b)|ab").exec("ab"); +assert (t[0] == "ab"); +assert (t[1] == undefined); + +t = new RegExp ("(?=(a)b)|ab").exec("ab"); +assert (t[0] == ""); +assert (t[1] == "a"); + +t = new RegExp ("(?=a|.)Dt").exec("Dt"); +assert (t == "Dt"); + +t = new RegExp ("(?=.|a)Dt").exec("Dt"); +assert (t == "Dt"); + +t = new RegExp ("(?=a|b)Dt").exec("Dt"); +assert (t == undefined); + +t = new RegExp ("(?=.|P)").exec("a"); +assert (t == ""); + +t = new RegExp ("(?=.)").exec("a"); +assert (t == ""); + +t = new RegExp ("(?!a|.)Dt").exec("Dt"); +assert (t == undefined); + +t = new RegExp ("(?!.|a)Dt").exec("Dt"); +assert (t == undefined); + +t = new RegExp ("(?!a|b)Dt").exec("Dt"); +assert (t == "Dt"); + +t = new RegExp ("(?!.|P)").exec("a"); +assert (t == ""); + +t = new RegExp ("(?!.)").exec("a"); +assert (t == ""); diff --git a/tests/jerry/regexp-backreference.js b/tests/jerry/regexp-backreference.js new file mode 100644 index 0000000000..d638ab5dad --- /dev/null +++ b/tests/jerry/regexp-backreference.js @@ -0,0 +1,27 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp ("(a)b\\1").exec("aba"); +assert (r[0] == "aba"); +assert (r[1] == "a"); + +r = new RegExp ("(a)b\\1").exec("b"); +assert (r == undefined); + +r = new RegExp ("(a)*b\\1").exec("b"); +assert (r[0] == "b"); +assert (r[1] == undefined); diff --git a/tests/jerry/regexp-capture-groups.js b/tests/jerry/regexp-capture-groups.js new file mode 100644 index 0000000000..85bc8d21bb --- /dev/null +++ b/tests/jerry/regexp-capture-groups.js @@ -0,0 +1,199 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +// Simple test cases +r = new RegExp ("()"); +assert (r.exec ("a") == ","); + +r = new RegExp ("(a)"); +assert (r.exec ("a") == "a,a"); + +r = new RegExp ("((a)b)c"); +assert (r.exec ("abc") == "abc,ab,a"); + +r = new RegExp ("(a)*"); +assert (r.exec ("b")[0] == ""); +assert (r.exec ("b")[1] == undefined); +assert (r.exec ("aaaa") == "aaaa,a"); + +r = new RegExp ("(a)+"); +assert (r.exec ("aaaa") == "aaaa,a"); + +r = new RegExp ("(a){4}"); +assert (r.exec ("aaaa") == "aaaa,a"); + +r = new RegExp ("(a){1,2}"); +assert (r.exec ("a") == "a,a"); +assert (r.exec ("aa") == "aa,a"); +assert (r.exec ("aaaa") == "aa,a"); + +r = new RegExp ("(a)?"); +assert (r.exec ("a") == "a,a"); +assert (r.exec ("b")[0] == ""); +assert (r.exec ("b")[1] == undefined); + +// Test greedy iterations +r = new RegExp ("(a){1,3}a"); +assert (r.exec("aa") == "aa,a"); + +r = new RegExp ("(a){1,3}a"); +assert (r.exec("aaa") == "aaa,a"); + +r = new RegExp ("(a){1,3}"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a){1,3}"); +assert (r.exec("aaa") == "aaa,a"); + +r = new RegExp ("(a){1,3}"); +assert (r.exec("aaaa") == "aaa,a"); + +r = new RegExp ("(a){1,5}"); +assert (r.exec("aaaa") == "aaaa,a"); + +r = new RegExp ("(a|b){1,2}"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a|b){1,3}a"); +assert (r.exec("aaa") == "aaa,a"); + +r = new RegExp ("(a|b){1,3}a"); +assert (r.exec("aba") == "aba,b"); + +r = new RegExp ("(a|b){1,3}a"); +assert (r.exec("b") == undefined); + +r = new RegExp ("(a|b){1,3}a"); +assert (r.exec("bbb") == undefined); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("aa") == "aa,a"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("aaa") == "aaa,a"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("ab") == "ab,b"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("aba") == "aba,a"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("bab") == "bab,b"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("bbb") == "bbb,b"); + +r = new RegExp ("(a|b){1,4}a"); +assert (r.exec("bbb") == undefined); + +r = new RegExp ("(a|b){1,4}"); +assert (r.exec("ab") == "ab,b"); + +r = new RegExp ("(a|b){1,4}"); +assert (r.exec("aba") == "aba,a"); + +r = new RegExp ("(a|b){1,4}"); +assert (r.exec("bbb") == "bbb,b"); + +r = new RegExp ("(a|b){1,5}"); +assert (r.exec("aba") == "aba,a"); + +r = new RegExp ("(a|b){1,5}"); +assert (r.exec("abab") == "abab,b"); + +r = new RegExp ("(a|b){1,5}"); +assert (r.exec("bbb") == "bbb,b"); + +r = new RegExp ("(aba)*"); +assert (r.exec("aaaa") == ","); + +r = new RegExp ("(aba)+"); +assert (r.exec("aaaa") == undefined); + +r = new RegExp ("(a|bb|c|d)"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a|b)"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a|b)+"); +assert (r.exec("aba") == "aba,a"); + +r = new RegExp ("(a|b)"); +assert (r.exec("b") == "b,b"); + +r = new RegExp ("(a)"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a)*"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a)*"); +assert (r.exec("aaaa") == "aaaa,a"); + +r = new RegExp ("(a)+"); +assert (r.exec("aaaa") == "aaaa,a"); + +r = new RegExp ("(a|aa){0,3}b"); +assert (r.exec("aaaaaab") == "aaaaaab,aa"); + +r = new RegExp ("((a){2,3}){4}b"); +assert (r.exec("aaaaaaaab") == "aaaaaaaab,aa,a"); + +// Test non-greedy iterations +r = new RegExp ("(a)+?"); +assert (r.exec("aaaa") == "a,a"); + +r = new RegExp ("(a)*?aa"); +assert (r.exec("aaaa") == "aa,"); + +r = new RegExp ("(aaa|aa)*?aa"); +assert (r.exec("aaaa")[0] == "aa"); +assert (r.exec("aaaa")[1] == undefined); + +r = new RegExp ("(a)??aa"); +assert (r.exec("aaaa")[0] == "aa"); +assert (r.exec("aaaa")[1] == undefined); + +r = new RegExp ("(a)?aa"); +assert (r.exec("aaaa") == "aaa,a"); + +r = new RegExp ("(()*?)*?a"); +assert (r.exec("ba")[0] == "a"); +assert (r.exec("ba")[1] == undefined); +assert (r.exec("ba")[2] == undefined); + +r = new RegExp ("((bb?)*)*a"); +assert (r.exec("bbba") == "bbba,bbb,b"); + +r = new RegExp ("((bb?)*)*bbb\\Ba"); +assert (r.exec("bbba")[0] == "bbba"); +assert (r.exec("bbba")[1] == undefined); +assert (r.exec("bbba")[2] == undefined); + +r = new RegExp ("(a??){0,1}a"); +assert (r.exec("aa") == "aa,a"); + +r = new RegExp ("(a?){0,1}a"); +assert (r.exec("aa") == "aa,a"); + +r = new RegExp ("(a{0,1}?){0,1}a"); +assert (r.exec("aa") == "aa,a"); diff --git a/tests/jerry/regexp-character-class.js b/tests/jerry/regexp-character-class.js new file mode 100644 index 0000000000..aaa744deef --- /dev/null +++ b/tests/jerry/regexp-character-class.js @@ -0,0 +1,33 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp ("[abc]*").exec("aaabbcccabcacbacabacbacab"); +assert (r == "aaabbcccabcacbacabacbacab"); + +r = new RegExp ("[abc]*").exec("aaabbcccabdcacb"); +assert (r == "aaabbcccab"); + +r = new RegExp ("[abc]*").exec("defghjklmnopqrstuvwxyz"); +assert (r == ""); + +r = new RegExp ("[a-z]*").exec("abcdefghjklmnopqrstuvwxyz"); +assert (r == "abcdefghjklmnopqrstuvwxyz"); + +r = new RegExp ("[A-Z]*").exec("abcdefghjklmnopqrstuvwxyz"); +assert (r == ""); + +// FIXME: Add more tescase when Unicode support is finished! diff --git a/tests/jerry/regexp-construct.js b/tests/jerry/regexp-construct.js new file mode 100644 index 0000000000..88faa9e771 --- /dev/null +++ b/tests/jerry/regexp-construct.js @@ -0,0 +1,88 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp (); +assert (r.source == "(?:)"); +assert (r.global == false); +assert (r.ignoreCase == false); +assert (r.multiline == false); + +r = new RegExp ("a"); +assert (r.source == "a"); +assert (r.global == false); +assert (r.ignoreCase == false); +assert (r.multiline == false); + +r = new RegExp ("a","gim"); +assert (r.source == "a"); +assert (r.global == true); +assert (r.ignoreCase == true); +assert (r.multiline == true); + +r = RegExp ("a"); +assert (r.source == "a"); +assert (r.global == false); +assert (r.ignoreCase == false); +assert (r.multiline == false); + +r = RegExp ("a","gim"); +assert (r.source == "a"); +assert (r.global == true); +assert (r.ignoreCase == true); +assert (r.multiline == true); + +var r2; +try { + r2 = RegExp (r,"gim"); + assert(false); +} +catch ( e ) +{ + assert (e instanceof TypeError); +} + +r2 = RegExp (r); +assert (r2.source == "a"); +assert (r2.global == true); +assert (r2.ignoreCase == true); +assert (r2.multiline == true); + +r2 = RegExp (r, undefined); +assert (r2.source == "a"); +assert (r2.global == true); +assert (r2.ignoreCase == true); +assert (r2.multiline == true); + +r = /(?:)/; +assert (r.source == "(?:)"); +assert (r.global == false); +assert (r.ignoreCase == false); +assert (r.multiline == false); + +r = /a/; +assert (r.source == "a"); +assert (r.global == false); +assert (r.ignoreCase == false); +assert (r.multiline == false); + +r = /a/gim; +assert (r.source == "a"); +assert (r.global == true); +assert (r.ignoreCase == true); +assert (r.multiline == true); + +assert(Object.prototype.toString.call(RegExp.prototype) === '[object RegExp]'); diff --git a/tests/jerry/regexp-literal.js b/tests/jerry/regexp-literal.js new file mode 100644 index 0000000000..70124e9463 --- /dev/null +++ b/tests/jerry/regexp-literal.js @@ -0,0 +1,25 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var t; + +t = /\//.exec("/"); +assert (t == "/"); + +t = /[/]/.exec("/"); +assert ("a"+/x/+"b" == "a/x/b"); + +t = /\/\[[\]/]/.exec("/[/"); +assert (t == "/[/"); diff --git a/tests/jerry/regexp-non-capture-groups.js b/tests/jerry/regexp-non-capture-groups.js new file mode 100644 index 0000000000..55bbcc9ded --- /dev/null +++ b/tests/jerry/regexp-non-capture-groups.js @@ -0,0 +1,197 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +// Simple test cases +r = new RegExp ("(?:)"); +assert (r.exec ("a") == ""); + +r = new RegExp ("(?:a)"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:(?:a)b)c"); +assert (r.exec ("abc") == "abc"); + +r = new RegExp ("(?:a)*"); +assert (r.exec ("b") == ""); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a)+"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a){4}"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a){1,2}"); +assert (r.exec ("a") == "a"); +assert (r.exec ("aa") == "aa"); +assert (r.exec ("aaaa") == "aa"); + +r = new RegExp ("(?:a)?"); +assert (r.exec ("a") == "a"); +assert (r.exec ("b") == ""); + +// Test greedy iterations +r = new RegExp ("(?:a){1,3}a"); +assert (r.exec ("aa") == "aa"); + +r = new RegExp ("(?:a){1,3}a"); +assert (r.exec ("aaa") == "aaa"); + +r = new RegExp ("(?:a){1,3}"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a){1,3}"); +assert (r.exec ("aaa") == "aaa"); + +r = new RegExp ("(?:a){1,3}"); +assert (r.exec ("aaaa") == "aaa"); + +r = new RegExp ("(?:a){1,5}"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a|b){1,2}"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a|b){1,3}a"); +assert (r.exec ("aaa") == "aaa"); + +r = new RegExp ("(?:a|b){1,3}a"); +assert (r.exec ("aba") == "aba"); + +r = new RegExp ("(?:a|b){1,3}a"); +assert (r.exec ("b") == undefined); + +r = new RegExp ("(?:a|b){1,3}a"); +assert (r.exec ("bbb") == undefined); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("aa") == "aa"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("aaa") == "aaa"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("ab") == "ab"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("aba") == "aba"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("bab") == "bab"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("bbb") == "bbb"); + +r = new RegExp ("(?:a|b){1,4}a"); +assert (r.exec ("bbb") == undefined); + +r = new RegExp ("(?:a|b){1,4}"); +assert (r.exec ("ab") == "ab"); + +r = new RegExp ("(?:a|b){1,4}"); +assert (r.exec ("aba") == "aba"); + +r = new RegExp ("(?:a|b){1,4}"); +assert (r.exec ("bbb") == "bbb"); + +r = new RegExp ("(?:a|b){1,5}"); +assert (r.exec ("abab") == "abab"); + +r = new RegExp ("(?:aba)*"); +assert (r.exec ("aaaa") == ""); + +r = new RegExp ("(?:aba)+"); +assert (r.exec ("aaaa") == undefined); + +r = new RegExp ("(?:a|bb|c|d)"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a|b|c|d)"); +assert (r.exec ("") == undefined); + +r = new RegExp ("(?:a|b|c|d)"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a|b|c|d)"); +assert (r.exec ("b") == "b"); + +r = new RegExp ("(?:a|b|c|d)"); +assert (r.exec ("c") == "c"); + +r = new RegExp ("(?:a|b|c|d)"); +assert (r.exec ("d") == "d"); + +r = new RegExp ("(?:a|b)+"); +assert (r.exec ("aba") == "aba"); + +r = new RegExp ("(?:a|b)"); +assert (r.exec ("b") == "b"); + +r = new RegExp ("(?:a)"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a)*"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a)*"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a)+"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a)?aa"); +assert (r.exec ("aaaa") == "aaa"); + +r = new RegExp ("(?:a?){0,1}a"); +assert (r.exec ("aa") == "aa"); + +r = new RegExp ("(?:a|aa){0,3}b"); +assert (r.exec ("aaaaaab") == "aaaaaab"); + +r = new RegExp ("(?:(?:a){2,3}){4}b"); +assert (r.exec ("aaaaaaaab") == "aaaaaaaab"); + +// Test non-greedy iterations +r = new RegExp ("(?:a)+?"); +assert (r.exec ("aaaa") == "a"); + +r = new RegExp ("(?:a)*?aa"); +assert (r.exec ("aaaa") == "aa"); + +r = new RegExp ("(?:aaa|aa)*?aa"); +assert (r.exec ("aaaa") == "aa"); + +r = new RegExp ("(?:a)??aa"); +assert (r.exec ("aaaa") == "aa"); + +r = new RegExp ("(?:(?:)*?)*?a"); +assert (r.exec ("ba") == "a"); + +r = new RegExp ("(?:(?:bb?)*)*a"); +assert (r.exec ("bbba") == "bbba"); + +r = new RegExp ("(?:(?:bb?)*)*bbb\\Ba"); +assert (r.exec ("bbba") == "bbba"); + +r = new RegExp ("(?:a??){0,1}a"); +assert (r.exec ("aa") == "aa"); + +r = new RegExp ("(?:a{0,1}?){0,1}a"); +assert (r.exec ("aa") == "aa"); diff --git a/tests/jerry/regexp-routines.js b/tests/jerry/regexp-routines.js new file mode 100644 index 0000000000..df3653ca50 --- /dev/null +++ b/tests/jerry/regexp-routines.js @@ -0,0 +1,50 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp ("a"); +assert (r.exec ("a") == "a"); +assert (r.exec ("b") == undefined); +try { + r.exec.call({}, "a"); + assert (false) +} +catch (e) +{ + assert (e instanceof TypeError); +} + +assert (r.test ("a") == true); +assert (r.test ("b") == false); +try { + r.test.call({}, "a"); + assert (false) +} +catch (e) +{ + assert (e instanceof TypeError); +} + +r = new RegExp ("a", "mig"); +assert (r.toString () == "/a/gim"); +try { + r.toString.call({}, "a"); + assert (false) +} +catch (e) +{ + assert (e instanceof TypeError); +} diff --git a/tests/jerry/regexp-simple-atom-and-iterations.js b/tests/jerry/regexp-simple-atom-and-iterations.js new file mode 100644 index 0000000000..71d2aafe9b --- /dev/null +++ b/tests/jerry/regexp-simple-atom-and-iterations.js @@ -0,0 +1,55 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp ("a"); +assert (r.exec ("a") == "a"); +assert (r.exec ("b") == undefined); + +r = new RegExp ("abc"); +assert (r.exec ("abc") == "abc"); + +r = new RegExp ("a*"); +assert (r.exec ("aaa") == "aaa"); +assert (r.exec ("b") == ""); + +r = new RegExp ("a+"); +assert (r.exec ("aaa") == "aaa"); +assert (r.exec ("b") == undefined); + +r = new RegExp ("ab*"); +assert (r.exec ("a") == "a"); +assert (r.exec ("ab") == "ab"); +assert (r.exec ("abbbb") == "abbbb"); +assert (r.exec ("bbb") == undefined); + +r = new RegExp ("a?"); +assert (r.exec ("a") == "a"); +assert (r.exec ("b") == ""); + +r = new RegExp ("a{4}"); +assert (r.exec ("aaa") == undefined); +assert (r.exec ("aaaaa") == "aaaa"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("a{2,6}"); +assert (r.exec ("a") == undefined); +assert (r.exec ("aa") == "aa"); +assert (r.exec ("aaaaaa") == "aaaaaa"); +assert (r.exec ("aaaaaaa") == "aaaaaa"); + +r = new RegExp (".*"); +assert (r.exec ("abcdefghijkl") == "abcdefghijkl");