From 62d400d89212b5a5c05e765512dca35ddd1a5ec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=C3=A1tyai?= Date: Tue, 20 Oct 2015 17:35:33 +0200 Subject: [PATCH] Use heap allocated buffer instead of char collection in ecma_strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai.u-szeged@partner.samsung.com --- jerry-core/ecma/base/ecma-globals.h | 17 +- jerry-core/ecma/base/ecma-helpers-string.cpp | 358 ++++--------------- 2 files changed, 75 insertions(+), 300 deletions(-) diff --git a/jerry-core/ecma/base/ecma-globals.h b/jerry-core/ecma/base/ecma-globals.h index 6eceae0b67..02a0d5fbea 100644 --- a/jerry-core/ecma/base/ecma-globals.h +++ b/jerry-core/ecma/base/ecma-globals.h @@ -778,8 +778,7 @@ typedef struct typedef enum { ECMA_STRING_CONTAINER_LIT_TABLE, /**< actual data is in literal table */ - ECMA_STRING_CONTAINER_HEAP_CHUNKS, /**< actual data is on the heap - in a ecma_collection_chunk_t chain */ + ECMA_STRING_CONTAINER_STRING_DATA, /**< actual data is on the heap */ ECMA_STRING_CONTAINER_HEAP_NUMBER, /**< actual data is on the heap as a ecma_number_t */ ECMA_STRING_CONTAINER_UINT32_IN_DESC, /**< actual data is UInt32-represeneted Number stored locally in the string's descriptor */ @@ -795,6 +794,18 @@ FIXME (Move to library that should define the type (literal.h /* ? */)) typedef rcs_record_t *literal_t; typedef rcs_cpointer_t lit_cpointer_t; +#define ECMA_STRING_DATA_EXTRA_SIZE (2 * sizeof (uint16_t)) + +/** + * ECMA string data descriptor + */ +typedef struct ecma_string_data_t +{ + uint16_t size; + uint16_t length; + lit_utf8_byte_t data; +} ecma_string_data_t; + /** * ECMA string-value descriptor */ @@ -822,7 +833,7 @@ typedef struct ecma_string_t lit_cpointer_t lit_cp; /** Compressed pointer to an ecma_collection_header_t */ - mem_cpointer_t collection_cp : ECMA_POINTER_FIELD_WIDTH; + mem_cpointer_t data_cp : ECMA_POINTER_FIELD_WIDTH; /** Compressed pointer to an ecma_number_t */ mem_cpointer_t number_cp : ECMA_POINTER_FIELD_WIDTH; diff --git a/jerry-core/ecma/base/ecma-helpers-string.cpp b/jerry-core/ecma/base/ecma-helpers-string.cpp index 053372695c..66b6e12a53 100644 --- a/jerry-core/ecma/base/ecma-helpers-string.cpp +++ b/jerry-core/ecma/base/ecma-helpers-string.cpp @@ -56,264 +56,6 @@ static void ecma_init_ecma_string_from_magic_string_ex_id (ecma_string_t *string_p, lit_magic_string_ex_id_t magic_string_ex_id, bool is_stack_var); -/** - * Allocate a collection of ecma-chars. - * - * @return pointer to the collection's header - */ -static ecma_collection_header_t* -ecma_new_chars_collection (const lit_utf8_byte_t chars_buffer[], /**< utf-8 chars */ - lit_utf8_size_t chars_size) /**< size of buffer with chars */ -{ - JERRY_ASSERT (chars_buffer != NULL); - JERRY_ASSERT (chars_size > 0); - - ecma_collection_header_t* collection_p = ecma_alloc_collection_header (); - - collection_p->unit_number = chars_size; - - mem_cpointer_t* next_chunk_cp_p = &collection_p->first_chunk_cp; - lit_utf8_byte_t *cur_char_buf_iter_p = NULL; - lit_utf8_byte_t *cur_char_buf_end_p = NULL; - - for (lit_utf8_size_t byte_index = 0; - byte_index < chars_size; - byte_index++) - { - if (cur_char_buf_iter_p == cur_char_buf_end_p) - { - ecma_collection_chunk_t *chunk_p = ecma_alloc_collection_chunk (); - ECMA_SET_NON_NULL_POINTER (*next_chunk_cp_p, chunk_p); - next_chunk_cp_p = &chunk_p->next_chunk_cp; - - cur_char_buf_iter_p = (lit_utf8_byte_t *) chunk_p->data; - cur_char_buf_end_p = cur_char_buf_iter_p + sizeof (chunk_p->data); - } - - JERRY_ASSERT (cur_char_buf_iter_p + 1 <= cur_char_buf_end_p); - - *cur_char_buf_iter_p++ = chars_buffer[byte_index]; - } - - *next_chunk_cp_p = ECMA_NULL_POINTER; - - return collection_p; -} /* ecma_new_chars_collection */ - -/** - * Get length of a collection of ecma-chars - * - * NOTE: - * While chars collection holds a string in utf-8 encoding, this function acts as if the string was encoded in - * UTF-16 and returns number of 16-bit characters (code units) required for string representation in this format. - * - * @return number of UTF-16 code units in a collecton - */ -static ecma_length_t -ecma_get_chars_collection_length (const ecma_collection_header_t *header_p) /**< collection's header */ -{ - JERRY_ASSERT (header_p != NULL); - - const ecma_length_t chars_number = header_p->unit_number; - - const lit_utf8_byte_t *cur_char_buf_iter_p = NULL; - const lit_utf8_byte_t *cur_char_buf_end_p = NULL; - - mem_cpointer_t next_chunk_cp = header_p->first_chunk_cp; - lit_utf8_size_t skip_bytes = 0; - ecma_length_t length = 0; - - ecma_length_t char_index; - for (char_index = 0; - char_index < chars_number; - char_index++) - { - if (cur_char_buf_iter_p == cur_char_buf_end_p) - { - const ecma_collection_chunk_t *chunk_p = ECMA_GET_NON_NULL_POINTER (ecma_collection_chunk_t, next_chunk_cp); - - cur_char_buf_iter_p = (lit_utf8_byte_t *) chunk_p->data; - cur_char_buf_end_p = cur_char_buf_iter_p + sizeof (chunk_p->data); - - next_chunk_cp = chunk_p->next_chunk_cp; - } - - JERRY_ASSERT (cur_char_buf_iter_p + 1 <= cur_char_buf_end_p); - - if (skip_bytes == 0) - { - skip_bytes = lit_get_unicode_char_size_by_utf8_first_byte (*cur_char_buf_iter_p); - length += (skip_bytes == 4) ? 2 : 1; - skip_bytes--; - } - else - { - skip_bytes--; - } - cur_char_buf_iter_p++; - } - - JERRY_ASSERT (char_index == chars_number); - - return length; -} /* ecma_get_chars_collection_length */ - -/** - * Compare two collection of ecma-chars. - * - * @return true - if collections are equal, - * false - otherwise. - */ -static bool -ecma_compare_chars_collection (const ecma_collection_header_t* header1_p, /**< first collection's header */ - const ecma_collection_header_t* header2_p) /**< second collection's header */ -{ - JERRY_ASSERT (header1_p != NULL && header2_p != NULL); - - if (header1_p->unit_number != header2_p->unit_number) - { - return false; - } - - const ecma_length_t chars_number = header1_p->unit_number; - - const lit_utf8_byte_t *cur_char_buf1_iter_p = NULL; - const lit_utf8_byte_t *cur_char_buf1_end_p = NULL; - const lit_utf8_byte_t *cur_char_buf2_iter_p = NULL; - const lit_utf8_byte_t *cur_char_buf2_end_p = NULL; - - mem_cpointer_t next_chunk1_cp = header1_p->first_chunk_cp; - mem_cpointer_t next_chunk2_cp = header2_p->first_chunk_cp; - - for (ecma_length_t char_index = 0; - char_index < chars_number; - char_index++) - { - if (cur_char_buf1_iter_p == cur_char_buf1_end_p) - { - JERRY_ASSERT (cur_char_buf2_iter_p == cur_char_buf2_end_p); - - const ecma_collection_chunk_t *chunk1_p = ECMA_GET_NON_NULL_POINTER (ecma_collection_chunk_t, next_chunk1_cp); - const ecma_collection_chunk_t *chunk2_p = ECMA_GET_NON_NULL_POINTER (ecma_collection_chunk_t, next_chunk2_cp); - - cur_char_buf1_iter_p = (lit_utf8_byte_t *) chunk1_p->data; - cur_char_buf1_end_p = cur_char_buf1_iter_p + sizeof (chunk1_p->data); - cur_char_buf2_iter_p = (lit_utf8_byte_t *) chunk2_p->data; - cur_char_buf2_end_p = cur_char_buf2_iter_p + sizeof (chunk2_p->data); - - next_chunk1_cp = chunk1_p->next_chunk_cp; - next_chunk2_cp = chunk2_p->next_chunk_cp; - } - - JERRY_ASSERT (cur_char_buf1_iter_p + 1 <= cur_char_buf1_end_p); - JERRY_ASSERT (cur_char_buf2_iter_p + 1 <= cur_char_buf2_end_p); - - if (*cur_char_buf1_iter_p++ != *cur_char_buf2_iter_p++) - { - return false; - } - } - - return true; -} /* ecma_compare_chars_collection */ - -/** - * Copy the collection of ecma-chars. - * - * @return pointer to collection copy - */ -static ecma_collection_header_t* -ecma_copy_chars_collection (const ecma_collection_header_t* collection_p) /**< collection's header */ -{ - JERRY_ASSERT (collection_p != NULL); - - ecma_collection_header_t *new_header_p = ecma_alloc_collection_header (); - *new_header_p = *collection_p; - - mem_cpointer_t* next_chunk_cp_p = &new_header_p->first_chunk_cp; - - ecma_collection_chunk_t *chunk_p = ECMA_GET_POINTER (ecma_collection_chunk_t, - collection_p->first_chunk_cp); - - while (chunk_p != NULL) - { - ecma_collection_chunk_t *new_chunk_p = ecma_alloc_collection_chunk (); - *new_chunk_p = *chunk_p; - - ECMA_SET_NON_NULL_POINTER (*next_chunk_cp_p, new_chunk_p); - next_chunk_cp_p = &new_chunk_p->next_chunk_cp; - - chunk_p = ECMA_GET_POINTER (ecma_collection_chunk_t, - chunk_p->next_chunk_cp); - } - - *next_chunk_cp_p = ECMA_NULL_POINTER; - - return new_header_p; -} /* ecma_copy_chars_collection */ - -/** - * Copy characters of the collection to buffer - */ -static void -ecma_copy_chars_collection_to_buffer (const ecma_collection_header_t *collection_p, /**< collection header */ - lit_utf8_byte_t chars_buffer[], /**< buffer for characters */ - lit_utf8_size_t buffer_size) /**< size of the buffer */ -{ - JERRY_ASSERT (collection_p != NULL); - - lit_utf8_byte_t *out_chars_buf_iter_p = chars_buffer; - - const lit_utf8_size_t chars_number = collection_p->unit_number; - - mem_cpointer_t next_chunk_cp = collection_p->first_chunk_cp; - const lit_utf8_byte_t *cur_char_buf_iter_p = NULL; - const lit_utf8_byte_t *cur_char_buf_end_p = NULL; - - for (lit_utf8_size_t char_index = 0; - char_index < chars_number; - char_index++) - { - if (cur_char_buf_iter_p == cur_char_buf_end_p) - { - const ecma_collection_chunk_t *chunk_p = ECMA_GET_NON_NULL_POINTER (ecma_collection_chunk_t, next_chunk_cp); - - cur_char_buf_iter_p = (lit_utf8_byte_t *) chunk_p->data; - cur_char_buf_end_p = cur_char_buf_iter_p + sizeof (chunk_p->data); - - next_chunk_cp = chunk_p->next_chunk_cp; - } - - JERRY_ASSERT (cur_char_buf_iter_p + 1 <= cur_char_buf_end_p); - - *out_chars_buf_iter_p++ = *cur_char_buf_iter_p++; - } - - JERRY_ASSERT (out_chars_buf_iter_p - chars_buffer <= (ssize_t) buffer_size); -} /* ecma_copy_chars_collection_to_buffer */ - -/** - * Free the collection of ecma-chars. - */ -static void -ecma_free_chars_collection (ecma_collection_header_t* collection_p) /**< collection's header */ -{ - JERRY_ASSERT (collection_p != NULL); - - ecma_collection_chunk_t *chunk_p = ECMA_GET_POINTER (ecma_collection_chunk_t, - collection_p->first_chunk_cp); - - while (chunk_p != NULL) - { - ecma_collection_chunk_t *next_chunk_p = ECMA_GET_POINTER (ecma_collection_chunk_t, - chunk_p->next_chunk_cp); - ecma_dealloc_collection_chunk (chunk_p); - - chunk_p = next_chunk_p; - } - - ecma_dealloc_collection_header (collection_p); -} /* ecma_free_chars_collection */ /** * Initialize ecma-string descriptor with string described by index in literal table @@ -433,12 +175,18 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri ecma_string_t* string_desc_p = ecma_alloc_string (); string_desc_p->refs = 1; string_desc_p->is_stack_var = false; - string_desc_p->container = ECMA_STRING_CONTAINER_HEAP_CHUNKS; + string_desc_p->container = ECMA_STRING_CONTAINER_STRING_DATA; string_desc_p->hash = lit_utf8_string_calc_hash (string_p, string_size); string_desc_p->u.common_field = 0; - ecma_collection_header_t *collection_p = ecma_new_chars_collection (string_p, string_size); - ECMA_SET_NON_NULL_POINTER (string_desc_p->u.collection_cp, collection_p); + ecma_string_data_t *data_p = (ecma_string_data_t *) mem_heap_alloc_block (string_size + ECMA_STRING_DATA_EXTRA_SIZE, + MEM_HEAP_ALLOC_SHORT_TERM); + JERRY_ASSERT (string_size == (uint16_t) string_size); + data_p->size = (uint16_t) string_size; + data_p->length = (uint16_t) lit_utf8_string_length (string_p, string_size); + memcpy (&data_p->data, string_p, string_size); + + ECMA_SET_NON_NULL_POINTER (string_desc_p->u.data_cp, data_p); return string_desc_p; } /* ecma_new_ecma_string_from_utf8 */ @@ -628,20 +376,28 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */ } lit_utf8_size_t buffer_size = str1_size + str2_size; + ssize_t bytes_copied1, bytes_copied2; - lit_utf8_byte_t *str_p = (lit_utf8_byte_t *) mem_heap_alloc_block (buffer_size, MEM_HEAP_ALLOC_SHORT_TERM); + ecma_string_t* str_concat_p = ecma_alloc_string (); + str_concat_p->refs = 1; + str_concat_p->is_stack_var = false; + str_concat_p->container = ECMA_STRING_CONTAINER_STRING_DATA; - ssize_t bytes_copied1, bytes_copied2; + str_concat_p->u.common_field = 0; + ecma_string_data_t *data_p = (ecma_string_data_t *) mem_heap_alloc_block (buffer_size + ECMA_STRING_DATA_EXTRA_SIZE, + MEM_HEAP_ALLOC_SHORT_TERM); + JERRY_ASSERT (buffer_size == (uint16_t) buffer_size); + data_p->size = (uint16_t) buffer_size; + data_p->length = (uint16_t) (ecma_string_get_length (string1_p) + ecma_string_get_length (string2_p)); - bytes_copied1 = ecma_string_to_utf8_string (string1_p, str_p, (ssize_t) str1_size); + bytes_copied1 = ecma_string_to_utf8_string (string1_p, &data_p->data, (ssize_t) str1_size); JERRY_ASSERT (bytes_copied1 > 0); - bytes_copied2 = ecma_string_to_utf8_string (string2_p, str_p + str1_size, (ssize_t) str2_size); + bytes_copied2 = ecma_string_to_utf8_string (string2_p, &data_p->data + str1_size, (ssize_t) str2_size); JERRY_ASSERT (bytes_copied2 > 0); - ecma_string_t *str_concat_p = ecma_new_ecma_string_from_utf8 (str_p, buffer_size); - - mem_heap_free_block ((void*) str_p); + str_concat_p->hash = lit_utf8_string_calc_hash (&data_p->data, buffer_size); + ECMA_SET_NON_NULL_POINTER (str_concat_p->u.data_cp, data_p); return str_concat_p; } /* ecma_concat_ecma_strings */ @@ -685,18 +441,21 @@ ecma_copy_ecma_string (ecma_string_t *string_desc_p) /**< string descriptor */ break; } - case ECMA_STRING_CONTAINER_HEAP_CHUNKS: + case ECMA_STRING_CONTAINER_STRING_DATA: { new_str_p = ecma_alloc_string (); *new_str_p = *string_desc_p; - const ecma_collection_header_t *chars_collection_p = ECMA_GET_NON_NULL_POINTER (ecma_collection_header_t, - string_desc_p->u.collection_cp); - JERRY_ASSERT (chars_collection_p != NULL); - ecma_collection_header_t *new_chars_collection_p = ecma_copy_chars_collection (chars_collection_p); + const ecma_string_data_t *data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_data_t, + string_desc_p->u.data_cp); + JERRY_ASSERT (data_p != NULL); - ECMA_SET_NON_NULL_POINTER (new_str_p->u.collection_cp, new_chars_collection_p); + lit_utf8_byte_t *new_data_p; + new_data_p = (lit_utf8_byte_t *) mem_heap_alloc_block (data_p->size + ECMA_STRING_DATA_EXTRA_SIZE, + MEM_HEAP_ALLOC_SHORT_TERM); + memcpy (new_data_p, data_p, data_p->size + ECMA_STRING_DATA_EXTRA_SIZE); + ECMA_SET_NON_NULL_POINTER (new_str_p->u.data_cp, new_data_p); break; } @@ -782,12 +541,12 @@ ecma_deref_ecma_string (ecma_string_t *string_p) /**< ecma-string */ switch ((ecma_string_container_t)string_p->container) { - case ECMA_STRING_CONTAINER_HEAP_CHUNKS: + case ECMA_STRING_CONTAINER_STRING_DATA: { - ecma_collection_header_t *chars_collection_p = ECMA_GET_NON_NULL_POINTER (ecma_collection_header_t, - string_p->u.collection_cp); + ecma_string_data_t *data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_data_t, + string_p->u.data_cp); - ecma_free_chars_collection (chars_collection_p); + mem_heap_free_block (data_p); break; } @@ -869,7 +628,7 @@ ecma_string_to_number (const ecma_string_t *str_p) /**< ecma-string */ } case ECMA_STRING_CONTAINER_LIT_TABLE: - case ECMA_STRING_CONTAINER_HEAP_CHUNKS: + case ECMA_STRING_CONTAINER_STRING_DATA: case ECMA_STRING_CONTAINER_MAGIC_STRING: case ECMA_STRING_CONTAINER_MAGIC_STRING_EX: { @@ -961,12 +720,12 @@ ecma_string_to_utf8_string (const ecma_string_t *string_desc_p, /**< ecma-string switch ((ecma_string_container_t)string_desc_p->container) { - case ECMA_STRING_CONTAINER_HEAP_CHUNKS: + case ECMA_STRING_CONTAINER_STRING_DATA: { - const ecma_collection_header_t *chars_collection_p = ECMA_GET_NON_NULL_POINTER (ecma_collection_header_t, - string_desc_p->u.collection_cp); + const ecma_string_data_t *data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_data_t, + string_desc_p->u.data_cp); - ecma_copy_chars_collection_to_buffer (chars_collection_p, buffer_p, (lit_utf8_size_t) buffer_size); + memcpy (buffer_p, &data_p->data, (size_t) required_buffer_size); break; } @@ -1098,14 +857,19 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri return (*num1_p == *num2_p); } - case ECMA_STRING_CONTAINER_HEAP_CHUNKS: + case ECMA_STRING_CONTAINER_STRING_DATA: { - const ecma_collection_header_t *chars_collection1_p = ECMA_GET_NON_NULL_POINTER (ecma_collection_header_t, - string1_p->u.collection_cp); - const ecma_collection_header_t *chars_collection2_p = ECMA_GET_NON_NULL_POINTER (ecma_collection_header_t, - string2_p->u.collection_cp); + const ecma_string_data_t *data1_p = ECMA_GET_NON_NULL_POINTER (ecma_string_data_t, + string1_p->u.data_cp); + const ecma_string_data_t *data2_p = ECMA_GET_NON_NULL_POINTER (ecma_string_data_t, + string2_p->u.data_cp); + + if (data1_p->length != data2_p->length) + { + return false; + } - return ecma_compare_chars_collection (chars_collection1_p, chars_collection2_p); + return (memcmp (&data1_p->data, &data2_p->data, strings_size) == 0); } case ECMA_STRING_CONTAINER_LIT_TABLE: { @@ -1363,12 +1127,12 @@ ecma_string_get_length (const ecma_string_t *string_p) /**< ecma-string */ } else { - JERRY_ASSERT (container == ECMA_STRING_CONTAINER_HEAP_CHUNKS); + JERRY_ASSERT (container == ECMA_STRING_CONTAINER_STRING_DATA); - const ecma_collection_header_t *collection_header_p = ECMA_GET_NON_NULL_POINTER (ecma_collection_header_t, - string_p->u.collection_cp); + const ecma_string_data_t *data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_data_t, + string_p->u.data_cp); - return ecma_get_chars_collection_length (collection_header_p); + return data_p->length; } } /* ecma_string_get_length */ @@ -1439,12 +1203,12 @@ ecma_string_get_size (const ecma_string_t *string_p) /**< ecma-string */ } else { - JERRY_ASSERT (container == ECMA_STRING_CONTAINER_HEAP_CHUNKS); + JERRY_ASSERT (container == ECMA_STRING_CONTAINER_STRING_DATA); - const ecma_collection_header_t *collection_header_p = ECMA_GET_NON_NULL_POINTER (ecma_collection_header_t, - string_p->u.collection_cp); + const ecma_string_data_t *data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_data_t, + string_p->u.data_cp); - return collection_header_p->unit_number; + return data_p->size; } } /* ecma_string_get_size */