Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ECMA262 RegExp builtin object and RegExp engine #169

Merged
merged 7 commits into from
Jun 26, 2015
3 changes: 3 additions & 0 deletions jerry-core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ project (JerryCore CXX C ASM)
${CMAKE_SOURCE_DIR}/jerry-core/ecma/operations
${CMAKE_SOURCE_DIR}/jerry-core/parser/js
${CMAKE_SOURCE_DIR}/jerry-core/parser/js/collections
${CMAKE_SOURCE_DIR}/jerry-core/parser/regexp
${CMAKE_SOURCE_DIR}/jerry-core/jrt)

# Third-party
Expand All @@ -120,6 +121,7 @@ project (JerryCore CXX C ASM)
file(GLOB SOURCE_CORE_ECMA_OPERATIONS ecma/operations/*.cpp)
file(GLOB SOURCE_CORE_PARSER_JS parser/js/*.cpp)
file(GLOB SOURCE_CORE_PARSER_JS_COLLECTIONS parser/js/collections/*.cpp)
file(GLOB SOURCE_CORE_PARSER_REGEXP parser/regexp/*.cpp)
file(GLOB SOURCE_CORE_JRT jrt/*.cpp)

set(SOURCE_CORE
Expand All @@ -134,6 +136,7 @@ project (JerryCore CXX C ASM)
${SOURCE_CORE_ECMA_OPERATIONS}
${SOURCE_CORE_PARSER_JS}
${SOURCE_CORE_PARSER_JS_COLLECTIONS}
${SOURCE_CORE_PARSER_REGEXP}
${SOURCE_CORE_JRT})

# Per-option configuration
Expand Down
1 change: 1 addition & 0 deletions jerry-core/ecma/base/ecma-gc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ ecma_gc_mark (ecma_object_t *object_p) /**< object to mark from */
case ECMA_INTERNAL_PROPERTY_EXTENSION_ID: /* an integer */
case ECMA_INTERNAL_PROPERTY_NON_INSTANTIATED_BUILT_IN_MASK_0_31: /* an integer (bit-mask) */
case ECMA_INTERNAL_PROPERTY_NON_INSTANTIATED_BUILT_IN_MASK_32_63: /* an integer (bit-mask) */
case ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE:
{
break;
}
Expand Down
5 changes: 5 additions & 0 deletions jerry-core/ecma/base/ecma-globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ typedef enum
*/
ECMA_INTERNAL_PROPERTY_NON_INSTANTIATED_BUILT_IN_MASK_32_63,

/**
* RegExp bytecode array
*/
ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE,

/**
* Number of internal properties' types
*/
Expand Down
58 changes: 58 additions & 0 deletions jerry-core/ecma/base/ecma-helpers-char.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,64 @@ ecma_char_is_line_terminator (ecma_char_t c) /**< character value */
|| ecma_char_is_new_line (c));
} /* ecma_char_is_line_terminator */

/**
* Check if specified character is a word character (part of IsWordChar abstract operation)
*
* See also: ECMA-262 v5, 15.10.2.6 (IsWordChar)
*
* @return true - if the character is a word character
* false - otherwise.
*/
bool
ecma_char_is_word_char (ecma_char_t c) /**< character value */
{
if ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9')
|| c == '_')
{
return true;
}

return false;
} /* ecma_char_is_word_char */

/**
* Convert a hex character to an unsigned integer
*
* @return digit value, corresponding to the hex char
*/
uint32_t
ecma_char_hex_to_int (ecma_char_t hex) /**< [0-9A-Fa-f] character value */
{
switch (hex)
{
case '0': return 0x0;
case '1': return 0x1;
case '2': return 0x2;
case '3': return 0x3;
case '4': return 0x4;
case '5': return 0x5;
case '6': return 0x6;
case '7': return 0x7;
case '8': return 0x8;
case '9': return 0x9;
case 'a':
case 'A': return 0xA;
case 'b':
case 'B': return 0xB;
case 'c':
case 'C': return 0xC;
case 'd':
case 'D': return 0xD;
case 'e':
case 'E': return 0xE;
case 'f':
case 'F': return 0xF;
default: JERRY_UNREACHABLE ();
}
} /* ecma_char_hex_to_int */

/**
* @}
* @}
Expand Down
62 changes: 49 additions & 13 deletions jerry-core/ecma/base/ecma-helpers-string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,54 @@ ecma_init_ecma_string_from_magic_string_ex_id (ecma_string_t *string_p, /**< des
string_p->u.magic_string_ex_id = magic_string_ex_id;
} /* ecma_init_ecma_string_from_magic_string_ex_id */

/**
* Allocate new ecma-string and fill it with specified number of characters from specified buffer
*
* @return pointer to ecma-string descriptor
*/
ecma_string_t*
ecma_new_ecma_string (const ecma_char_t *string_p, /**< input string */
const ecma_length_t length) /**< number of characters */
{
JERRY_ASSERT (string_p != NULL);
JERRY_ASSERT (length > 0 && length <= ecma_zt_string_length (string_p));

if (length != ecma_zt_string_length (string_p))
{
/* FIXME: update this when 'ecma_is_charset_magic' interface is added */
ecma_char_t *zt_str_p = (ecma_char_t *) mem_heap_alloc_block ((size_t) (length + 1), MEM_HEAP_ALLOC_SHORT_TERM);
memcpy (zt_str_p, string_p, length * sizeof (ecma_char_t));
zt_str_p[length] = 0;

ecma_magic_string_id_t magic_string_id;
if (ecma_is_zt_string_magic (zt_str_p, &magic_string_id))
{
mem_heap_free_block (zt_str_p);
return ecma_get_magic_string (magic_string_id);
}

ecma_magic_string_ex_id_t magic_string_ex_id;
if (ecma_is_zt_ex_string_magic (zt_str_p, &magic_string_ex_id))
{
mem_heap_free_block (zt_str_p);
return ecma_get_magic_string_ex (magic_string_ex_id);
}
mem_heap_free_block (zt_str_p);
}

ecma_string_t *string_desc_p = ecma_alloc_string ();
string_desc_p->refs = 1;
string_desc_p->is_stack_var = false;
string_desc_p->container = ECMA_STRING_CONTAINER_HEAP_CHUNKS;
string_desc_p->hash = ecma_chars_buffer_calc_hash_last_chars (string_p, length);
string_desc_p->u.common_field = 0;

ecma_collection_header_t *collection_p = ecma_new_chars_collection (string_p, length);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing new line ^_^

ECMA_SET_NON_NULL_POINTER (string_desc_p->u.collection_cp, collection_p);

return string_desc_p;
} /* ecma_new_ecma_string */

/**
* Allocate new ecma-string and fill it with characters from specified buffer
*
Expand Down Expand Up @@ -485,19 +533,7 @@ ecma_new_ecma_string (const ecma_char_t *string_p) /**< zero-terminated string *
length++;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, remove the above checks on equality to a magic string, as they are now performed in function, called from there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was my first thought too. :) I didn't want to do the string duplication when the whole string is magic string. But I see the problem now. When we create an ecma_string_t from the whole zero-terminated string, then these checks will be evaluated twice and there will be an unnecessary string duplication too. I should clean this up.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is fixed now. We don't copy and check again when the whole zero terminated string is converted to ecma_string_t.

}

JERRY_ASSERT (length > 0);

ecma_string_t* string_desc_p = ecma_alloc_string ();
string_desc_p->refs = 1;
string_desc_p->is_stack_var = false;
string_desc_p->container = ECMA_STRING_CONTAINER_HEAP_CHUNKS;
string_desc_p->hash = ecma_chars_buffer_calc_hash_last_chars (string_p, length);

string_desc_p->u.common_field = 0;
ecma_collection_header_t *collection_p = ecma_new_chars_collection (string_p, length);
ECMA_SET_NON_NULL_POINTER (string_desc_p->u.collection_cp, collection_p);

return string_desc_p;
return ecma_new_ecma_string (string_p, length);
} /* ecma_new_ecma_string */

/**
Expand Down
5 changes: 5 additions & 0 deletions jerry-core/ecma/base/ecma-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,11 @@ ecma_free_internal_property (ecma_property_t *property_p) /**< the property */
{
JERRY_UNREACHABLE ();
}
case ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE:
{
void *bytecode_p = ECMA_GET_NON_NULL_POINTER (void, property_value);
mem_heap_free_block (bytecode_p);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this block?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We had to introduce a new type of internal property to store the bytecode of RegExp. This property store a compressed pointer to the start of the bytecode container. The compiled bytecode should be exist until RegExp object exists. When the object is freed and the ecma_free_internal_property is called, then we have to free the allocated bytecode container to avoid memory leak.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see.

}
}

ecma_dealloc_property (property_p);
Expand Down
17 changes: 10 additions & 7 deletions jerry-core/ecma/base/ecma-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
*/
#define ECMA_SET_POINTER(field, non_compressed_pointer) MEM_CP_SET_POINTER (field, non_compressed_pointer)

/* ecma-helpers-value.c */
/* ecma-helpers-value.cpp */
extern bool ecma_is_value_empty (ecma_value_t value);
extern bool ecma_is_value_undefined (ecma_value_t value);
extern bool ecma_is_value_null (ecma_value_t value);
Expand Down Expand Up @@ -109,7 +109,8 @@ extern bool ecma_is_completion_value_normal_true (ecma_completion_value_t value)
extern bool ecma_is_completion_value_normal_false (ecma_completion_value_t value);
extern bool ecma_is_completion_value_empty (ecma_completion_value_t value);

/* ecma-helpers-string.c */
/* ecma-helpers-string.cpp */
extern ecma_string_t* ecma_new_ecma_string (const ecma_char_t *string_p, const ecma_length_t length);
extern ecma_string_t* ecma_new_ecma_string (const ecma_char_t *string_p);
extern ecma_string_t* ecma_new_ecma_string_from_uint32 (uint32_t uint_number);
extern ecma_string_t* ecma_new_ecma_string_from_number (ecma_number_t number);
Expand Down Expand Up @@ -161,7 +162,7 @@ extern bool ecma_is_zt_ex_string_magic (const ecma_char_t *zt_string_p, ecma_mag
extern ecma_string_hash_t ecma_string_hash (const ecma_string_t *string_p);
extern ecma_string_hash_t ecma_chars_buffer_calc_hash_last_chars (const ecma_char_t *chars, ecma_length_t length);

/* ecma-helpers-number.c */
/* ecma-helpers-number.cpp */
extern const ecma_number_t ecma_number_relative_eps;

extern ecma_number_t ecma_number_make_nan (void);
Expand Down Expand Up @@ -199,7 +200,7 @@ extern void ecma_number_to_decimal (ecma_number_t num,
int32_t *out_digits_num_p,
int32_t *out_decimal_exp_p);

/* ecma-helpers-values-collection.c */
/* ecma-helpers-values-collection.cpp */

extern ecma_collection_header_t *ecma_new_values_collection (const ecma_value_t values_buffer[],
ecma_length_t values_number,
Expand Down Expand Up @@ -227,7 +228,7 @@ ecma_collection_iterator_init (ecma_collection_iterator_t *iterator_p,
extern bool
ecma_collection_iterator_next (ecma_collection_iterator_t *iterator_p);

/* ecma-helpers.c */
/* ecma-helpers.cpp */
extern ecma_object_t* ecma_create_object (ecma_object_t *prototype_object_p,
bool is_extensible,
ecma_object_type_t type);
Expand Down Expand Up @@ -308,7 +309,7 @@ extern ecma_property_descriptor_t ecma_make_empty_property_descriptor (void);
extern void ecma_free_property_descriptor (ecma_property_descriptor_t *prop_desc_p);
extern ecma_property_descriptor_t ecma_get_property_descriptor_from_property (ecma_property_t *prop_p);

/* ecma-helpers-external-pointers.c */
/* ecma-helpers-external-pointers.cpp */
extern bool
ecma_create_external_pointer_property (ecma_object_t *obj_p,
ecma_internal_property_id_t id,
Expand All @@ -320,7 +321,7 @@ ecma_get_external_pointer_value (ecma_object_t *obj_p,
extern void
ecma_free_external_pointer_in_property (ecma_property_t *prop_p);

/* ecma-helpers-conversion.c */
/* ecma-helpers-conversion.cpp */
extern ecma_number_t ecma_zt_string_to_number (const ecma_char_t *str_p);
extern ssize_t ecma_uint32_to_string (uint32_t value, ecma_char_t *out_buffer_p, ssize_t buffer_size);
extern uint32_t ecma_number_to_uint32 (ecma_number_t value);
Expand All @@ -333,6 +334,8 @@ extern ecma_length_t ecma_number_to_zt_string (ecma_number_t num, ecma_char_t *b
extern bool ecma_char_is_new_line (ecma_char_t c);
extern bool ecma_char_is_carriage_return (ecma_char_t c);
extern bool ecma_char_is_line_terminator (ecma_char_t c);
extern bool ecma_char_is_word_char (ecma_char_t c);
extern uint32_t ecma_char_hex_to_int (ecma_char_t hex);

#endif /* !JERRY_ECMA_HELPERS_H */

Expand Down
15 changes: 14 additions & 1 deletion jerry-core/ecma/base/ecma-magic-strings.inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_STRING, "string")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_OBJECT, "object")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_FUNCTION, "function")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_LENGTH, "length")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_SOURCE, "source")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_GLOBAL, "global")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_IGNORECASE_UL, "ignoreCase")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_MULTILINE, "multiline")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_INDEX, "index")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_INPUT, "input")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_LASTINDEX_UL, "lastIndex")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_NAN, "NaN")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_INFINITY_UL, "Infinity")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_UNDEFINED_UL, "Undefined")
Expand All @@ -44,7 +51,8 @@ ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_STRING_UL, "String")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_BOOLEAN_UL, "Boolean")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_NUMBER_UL, "Number")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_DATE_UL, "Date")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_REG_EXP_UL, "RegExp")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_REGEXP_UL, "RegExp")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_REGEXP_SOURCE_UL, "Source")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_ERROR_UL, "Error")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_EVAL_ERROR_UL, "EvalError")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_RANGE_ERROR_UL, "RangeError")
Expand Down Expand Up @@ -205,6 +213,11 @@ ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_EXEC, "exec")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_TEST, "test")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_NAME, "name")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_MESSAGE, "message")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_G_CHAR, "g")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_I_CHAR, "i")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_M_CHAR, "m")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_SLASH_CHAR, "/")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP, "(?:)")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_LEFT_SQUARE_CHAR, "[")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_RIGHT_SQUARE_CHAR, "]")
ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_COLON_CHAR, ":")
Expand Down
12 changes: 7 additions & 5 deletions jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,14 @@ OBJECT_VALUE (ECMA_MAGIC_STRING_DATE_UL,
ECMA_PROPERTY_CONFIGURABLE)
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_DATE_BUILTIN */

#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
// ECMA-262 v5, 15.1.4.8
CP_UNIMPLEMENTED_VALUE (ECMA_MAGIC_STRING_REG_EXP_UL,
ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP),
ECMA_PROPERTY_WRITABLE,
ECMA_PROPERTY_NOT_ENUMERABLE,
ECMA_PROPERTY_CONFIGURABLE)
OBJECT_VALUE (ECMA_MAGIC_STRING_REGEXP_UL,
ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP),
ECMA_PROPERTY_WRITABLE,
ECMA_PROPERTY_NOT_ENUMERABLE,
ECMA_PROPERTY_CONFIGURABLE)
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ERROR_BUILTINS
// ECMA-262 v5, 15.1.4.9
Expand Down
Loading