Skip to content

Commit

Permalink
Use bit vector to store CESU-8 lookup table,
Browse files Browse the repository at this point in the history
to improve lit_get_unicode_char_size_by_utf8_first_byte performance.

JerryScript-DCO-1.0-Signed-off-by: Xin Hu [email protected]
  • Loading branch information
huxinx committed Dec 31, 2015
1 parent 50d124b commit 1e0356b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 13 deletions.
47 changes: 34 additions & 13 deletions jerry-core/lit/lit-strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,17 @@ lit_utf8_string_code_unit_at (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 stri
return code_unit;
} /* lit_utf8_string_code_unit_at */

/* CESU-8 number of bytes occupied lookup table */
#ifndef __LITTLE_ENDIAN
const __attribute__ ((aligned (CESU_8_TABLE_MEM_ALIGNMENT))) lit_utf8_byte_t table[]
{
1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0,
2, 2,
3, 0
};
#endif

/**
* Get CESU-8 encoded size of character
*
Expand All @@ -765,19 +776,29 @@ lit_utf8_string_code_unit_at (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 stri
lit_utf8_size_t
lit_get_unicode_char_size_by_utf8_first_byte (const lit_utf8_byte_t first_byte) /**< buffer with characters */
{
if ((first_byte & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
{
return 1;
}
else if ((first_byte & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
{
return 2;
}
else
{
JERRY_ASSERT ((first_byte & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER);
return 3;
}
JERRY_ASSERT (((first_byte >> 4) <= 7 || (first_byte >> 4) == 12 ||
(first_byte >> 4) == 13 || (first_byte >> 4) == 14));

#ifdef __LITTLE_ENDIAN
//compact CESU-8 length lookup table into an uint32_t, every two bits represent one item
//const lit_utf8_byte_t table[]
//{
// 1, 1, 1, 1, 1, 1, 1, 1,
// 0, 0, 0, 0,
// 2, 2,
// 3, 0
//};
// MSB ---> LSB
// on little endian platform, it is 00 11 10 10 00 00 00 00 01 01 01 01 01 01 01 01
// table index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0

const uint32_t cesu_8_store = 0x3a005555;
int shift = (first_byte >> 4) << 1;

return (cesu_8_store >> shift) & 0x3;
#else
return table[first_byte >> 4];
#endif
} /* lit_get_unicode_char_size_by_utf8_first_byte */

/**
Expand Down
3 changes: 3 additions & 0 deletions jerry-core/lit/lit-strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ lit_string_hash_t lit_utf8_string_calc_hash (const lit_utf8_byte_t *, lit_utf8_s
lit_string_hash_t lit_utf8_string_hash_combine (lit_string_hash_t, const lit_utf8_byte_t *, lit_utf8_size_t);

/* code unit access */
#ifndef __LITTLE_ENDIAN
#define CESU_8_TABLE_MEM_ALIGNMENT 16
#endif
ecma_char_t lit_utf8_string_code_unit_at (const lit_utf8_byte_t *, lit_utf8_size_t, ecma_length_t);
lit_utf8_size_t lit_get_unicode_char_size_by_utf8_first_byte (lit_utf8_byte_t);

Expand Down

0 comments on commit 1e0356b

Please sign in to comment.