diff --git a/eval.c b/eval.c index 4edf21c3..5e83fb28 100644 --- a/eval.c +++ b/eval.c @@ -1988,8 +1988,14 @@ void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) { sexp_string_size(str) += new_len - old_len; } sexp_utf8_encode_char(p, new_len, c); - if (old_len != new_len) + if (old_len != new_len) { +#if SEXP_USE_STRING_INDEX_TABLE sexp_update_string_index_lookup(ctx, str); +#elif SEXP_USE_STRING_REF_CACHE + sexp_cached_char_idx(str) = 0; + sexp_cached_cursor(str) = sexp_make_string_cursor(0); +#endif + } } sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch) { diff --git a/include/chibi/features.h b/include/chibi/features.h index a4c22954..68f1257b 100644 --- a/include/chibi/features.h +++ b/include/chibi/features.h @@ -252,6 +252,12 @@ /* */ /* #define SEXP_USE_STRING_INDEX_TABLE 1 */ +/* uncomment this to cache a string cursor for string-ref calls */ +/* The default is not to use a cache. The goal of caching is to */ +/* soften the performance impact of repeated O(n) string-ref */ +/* operations on the same string. */ +/* #define SEXP_USE_STRING_REF_CACHE 1 */ + /* uncomment this to disable automatic closing of ports */ /* If enabled, the underlying FILE* for file ports will be */ /* automatically closed when they're garbage collected. Doesn't */ diff --git a/include/chibi/sexp.h b/include/chibi/sexp.h index 86435616..9f7e5f1a 100644 --- a/include/chibi/sexp.h +++ b/include/chibi/sexp.h @@ -481,6 +481,9 @@ struct sexp_struct { sexp bytes; #if SEXP_USE_STRING_INDEX_TABLE sexp charlens; +#elif SEXP_USE_STRING_REF_CACHE + sexp_uint_t cached_char_idx; + sexp cached_cursor; #endif sexp_uint_t offset, length; #endif @@ -1198,6 +1201,10 @@ enum sexp_uniform_vector_type { #define sexp_string_offset(x) (sexp_field(x, string, SEXP_STRING, offset)) #define sexp_string_data(x) (sexp_bytes_data(sexp_string_bytes(x))+sexp_string_offset(x)) #endif +#if SEXP_USE_STRING_REF_CACHE +#define sexp_cached_char_idx(x) (sexp_field(x, string, SEXP_STRING, cached_char_idx)) +#define sexp_cached_cursor(x) (sexp_field(x, string, SEXP_STRING, cached_cursor)) +#endif #define sexp_string_maybe_null_data(x) (sexp_not(x) ? NULL : sexp_string_data(x)) #if SEXP_USE_PACKED_STRINGS diff --git a/sexp.c b/sexp.c index 5b355312..1fc881a0 100644 --- a/sexp.c +++ b/sexp.c @@ -500,6 +500,9 @@ static const char* sexp_initial_features[] = { #if SEXP_USE_STRING_INDEX_TABLE "string-index", #endif +#if SEXP_USE_STRING_REF_CACHE + "string-ref-cache", +#endif #if SEXP_USE_GREEN_THREADS "threads", #endif @@ -1254,8 +1257,12 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp_sint_t* chunklens; sexp_sint_t chunk; #endif + sexp cursor; sexp_sint_t i, j, limit; unsigned char *p; +#if SEXP_USE_STRING_REF_CACHE + unsigned char *q; +#endif sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str); sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, index); p = (unsigned char*)sexp_string_data(str); @@ -1272,12 +1279,37 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str, i -= (chunk+1) * SEXP_STRING_INDEX_TABLE_CHUNK_SIZE; } } +#elif SEXP_USE_STRING_REF_CACHE + if (i > (sexp_cached_char_idx(str) + ((sexp_string_length(str) - sexp_cached_char_idx(str)) >> 1))) { + j = sexp_string_size(str); + i = -(sexp_string_length(str) - i); + } else if (i > (sexp_cached_char_idx(str) >> 1)) { + j = sexp_unbox_string_cursor(sexp_cached_cursor(str)); + i -= sexp_cached_char_idx(str); + } +#endif + +#if SEXP_USE_STRING_REF_CACHE + if (i >= 0) { +#endif + for ( ; i>0 && j=p; i++) + q = (unsigned char*)sexp_string_utf8_prev(q); + j = q - p; + } #endif - for ( ; i>0 && jcursor: index out of range", index); - return sexp_make_string_cursor(j); + cursor = sexp_make_string_cursor(j); +#if SEXP_USE_STRING_REF_CACHE + sexp_cached_char_idx(str) = sexp_unbox_fixnum(index); + sexp_cached_cursor(str) = cursor; +#endif + return cursor; } sexp sexp_string_cursor_to_index (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp offset) { @@ -1286,7 +1318,24 @@ sexp sexp_string_cursor_to_index (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp_assert_type(ctx, sexp_string_cursorp, SEXP_STRING_CURSOR, offset); if (off < 0 || off > (sexp_sint_t)sexp_string_size(str)) return sexp_user_exception(ctx, self, "string-cursor->index: offset out of range", offset); +#if SEXP_USE_STRING_REF_CACHE + sexp_uint_t cached_idx = sexp_cached_char_idx(str); + sexp_sint_t cached_off = sexp_unbox_string_cursor(sexp_cached_cursor(str)); + unsigned char* string_data = (unsigned char*)sexp_string_data(str); + sexp_sint_t idx_delta; + if (off >= cached_off) { + idx_delta = sexp_string_utf8_length(string_data+cached_off, off-cached_off); + } else { + idx_delta = 0 - sexp_string_utf8_length(string_data+off, cached_off-off); + } + + sexp_uint_t new_idx = cached_idx + idx_delta; + sexp_cached_char_idx(str) = new_idx; + sexp_cached_cursor(str) = offset; + return sexp_make_fixnum(new_idx); +#else return sexp_make_fixnum(sexp_string_utf8_length((unsigned char*)sexp_string_data(str), off)); +#endif } sexp sexp_string_cursor_offset (sexp ctx, sexp self, sexp_sint_t n, sexp cur) { @@ -1358,6 +1407,10 @@ sexp sexp_make_string_op (sexp ctx, sexp self, sexp_sint_t n, sexp len, sexp ch) sexp_string_bytes(s) = b; sexp_string_offset(s) = 0; sexp_string_size(s) = sexp_bytes_length(b); +#if SEXP_USE_STRING_REF_CACHE + sexp_cached_char_idx(s) = 0; + sexp_cached_cursor(s) = sexp_make_string_cursor(0); +#endif sexp_update_string_index_lookup(ctx, s); sexp_gc_release2(ctx); return s;