From c09897c449846b391da43c40a9d3a0d33869b0ef Mon Sep 17 00:00:00 2001 From: Daphne Preston-Kendal Date: Fri, 10 Dec 2021 21:24:05 +0100 Subject: [PATCH 1/4] Add a feature to cache the most recent string index->cursor result This is lighter-weight than building a full index->cursor table for the string, adding a constant two words to the memory required to store a string, as opposed to one word for every n characters. The cached cursor is used for any string-ref operation requesting an index after the most-recently-requested index, making potentially quadratic repeated string-ref procedures run in linear time. In theory, it could also use a heuristic to speed up moving backwards through the string when it thinks that moving the old cursor backwards would be faster than starting again at the start of the string. In practice, my logging of when the cached cursor is actually reused during the Chibi compilation and startup process shows that the most common case of moving backwards is going back to the start of the string anyway. Benchmarks to follow. --- eval.c | 8 +++++++- include/chibi/features.h | 6 ++++++ include/chibi/sexp.h | 7 +++++++ sexp.c | 20 +++++++++++++++++++- 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/eval.c b/eval.c index 4edf21c3..5e83fb28 100644 --- a/eval.c +++ b/eval.c @@ -1988,8 +1988,14 @@ void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) { sexp_string_size(str) += new_len - old_len; } sexp_utf8_encode_char(p, new_len, c); - if (old_len != new_len) + if (old_len != new_len) { +#if SEXP_USE_STRING_INDEX_TABLE sexp_update_string_index_lookup(ctx, str); +#elif SEXP_USE_STRING_REF_CACHE + sexp_cached_char_idx(str) = 0; + sexp_cached_cursor(str) = sexp_make_string_cursor(0); +#endif + } } sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch) { diff --git a/include/chibi/features.h b/include/chibi/features.h index a4c22954..68f1257b 100644 --- a/include/chibi/features.h +++ b/include/chibi/features.h @@ -252,6 +252,12 @@ /* */ /* #define SEXP_USE_STRING_INDEX_TABLE 1 */ +/* uncomment this to cache a string cursor for string-ref calls */ +/* The default is not to use a cache. The goal of caching is to */ +/* soften the performance impact of repeated O(n) string-ref */ +/* operations on the same string. */ +/* #define SEXP_USE_STRING_REF_CACHE 1 */ + /* uncomment this to disable automatic closing of ports */ /* If enabled, the underlying FILE* for file ports will be */ /* automatically closed when they're garbage collected. Doesn't */ diff --git a/include/chibi/sexp.h b/include/chibi/sexp.h index 86435616..9f7e5f1a 100644 --- a/include/chibi/sexp.h +++ b/include/chibi/sexp.h @@ -481,6 +481,9 @@ struct sexp_struct { sexp bytes; #if SEXP_USE_STRING_INDEX_TABLE sexp charlens; +#elif SEXP_USE_STRING_REF_CACHE + sexp_uint_t cached_char_idx; + sexp cached_cursor; #endif sexp_uint_t offset, length; #endif @@ -1198,6 +1201,10 @@ enum sexp_uniform_vector_type { #define sexp_string_offset(x) (sexp_field(x, string, SEXP_STRING, offset)) #define sexp_string_data(x) (sexp_bytes_data(sexp_string_bytes(x))+sexp_string_offset(x)) #endif +#if SEXP_USE_STRING_REF_CACHE +#define sexp_cached_char_idx(x) (sexp_field(x, string, SEXP_STRING, cached_char_idx)) +#define sexp_cached_cursor(x) (sexp_field(x, string, SEXP_STRING, cached_cursor)) +#endif #define sexp_string_maybe_null_data(x) (sexp_not(x) ? NULL : sexp_string_data(x)) #if SEXP_USE_PACKED_STRINGS diff --git a/sexp.c b/sexp.c index 5b355312..bb92aa0a 100644 --- a/sexp.c +++ b/sexp.c @@ -500,6 +500,9 @@ static const char* sexp_initial_features[] = { #if SEXP_USE_STRING_INDEX_TABLE "string-index", #endif +#if SEXP_USE_STRING_REF_CACHE + "string-ref-cache", +#endif #if SEXP_USE_GREEN_THREADS "threads", #endif @@ -1254,6 +1257,7 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp_sint_t* chunklens; sexp_sint_t chunk; #endif + sexp cursor; sexp_sint_t i, j, limit; unsigned char *p; sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str); @@ -1272,12 +1276,22 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str, i -= (chunk+1) * SEXP_STRING_INDEX_TABLE_CHUNK_SIZE; } } +#elif SEXP_USE_STRING_REF_CACHE + if (i >= sexp_cached_char_idx(str)) { + j = sexp_unbox_string_cursor(sexp_cached_cursor(str)); + i -= sexp_cached_char_idx(str); + } #endif for ( ; i>0 && jcursor: index out of range", index); - return sexp_make_string_cursor(j); + cursor = sexp_make_string_cursor(j); +#if SEXP_USE_STRING_REF_CACHE + sexp_cached_char_idx(str) = sexp_unbox_fixnum(index); + sexp_cached_cursor(str) = cursor; +#endif + return cursor; } sexp sexp_string_cursor_to_index (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp offset) { @@ -1358,6 +1372,10 @@ sexp sexp_make_string_op (sexp ctx, sexp self, sexp_sint_t n, sexp len, sexp ch) sexp_string_bytes(s) = b; sexp_string_offset(s) = 0; sexp_string_size(s) = sexp_bytes_length(b); +#if SEXP_USE_STRING_REF_CACHE + sexp_cached_char_idx(s) = 0; + sexp_cached_cursor(s) = sexp_make_string_cursor(0); +#endif sexp_update_string_index_lookup(ctx, s); sexp_gc_release2(ctx); return s; From 4e0f10ad2109d50ba1b2f11bfb4b78c7ded36ec1 Mon Sep 17 00:00:00 2001 From: Daphne Preston-Kendal Date: Sat, 11 Dec 2021 10:08:17 +0100 Subject: [PATCH 2/4] Use the cursor cache to speed up string-cursor->index as well --- sexp.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sexp.c b/sexp.c index bb92aa0a..d4f016d3 100644 --- a/sexp.c +++ b/sexp.c @@ -1300,7 +1300,24 @@ sexp sexp_string_cursor_to_index (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp_assert_type(ctx, sexp_string_cursorp, SEXP_STRING_CURSOR, offset); if (off < 0 || off > (sexp_sint_t)sexp_string_size(str)) return sexp_user_exception(ctx, self, "string-cursor->index: offset out of range", offset); +#if SEXP_USE_STRING_REF_CACHE + sexp_uint_t cached_idx = sexp_cached_char_idx(str); + sexp_sint_t cached_off = sexp_unbox_string_cursor(sexp_cached_cursor(str)); + unsigned char* string_data = (unsigned char*)sexp_string_data(str); + sexp_sint_t idx_delta; + if (off >= cached_off) { + idx_delta = sexp_string_utf8_length(string_data+cached_off, off-cached_off); + } else { + idx_delta = 0 - sexp_string_utf8_length(string_data+off, cached_off-off); + } + + sexp_uint_t new_idx = cached_idx + idx_delta; + sexp_cached_char_idx(str) = new_idx; + sexp_cached_cursor(str) = offset; + return sexp_make_fixnum(new_idx); +#else return sexp_make_fixnum(sexp_string_utf8_length((unsigned char*)sexp_string_data(str), off)); +#endif } sexp sexp_string_cursor_offset (sexp ctx, sexp self, sexp_sint_t n, sexp cur) { From a746370431f5ff95d1766a14f217bd23e1d27897 Mon Sep 17 00:00:00 2001 From: Daphne Preston-Kendal Date: Sat, 11 Dec 2021 11:43:23 +0100 Subject: [PATCH 3/4] Make the string cursor able to run backwards as well as forwards --- sexp.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/sexp.c b/sexp.c index d4f016d3..2bf94e16 100644 --- a/sexp.c +++ b/sexp.c @@ -1260,6 +1260,9 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp cursor; sexp_sint_t i, j, limit; unsigned char *p; +#if SEXP_USE_STRING_REF_CACHE + unsigned char *q; +#endif sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str); sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, index); p = (unsigned char*)sexp_string_data(str); @@ -1277,13 +1280,28 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str, } } #elif SEXP_USE_STRING_REF_CACHE - if (i >= sexp_cached_char_idx(str)) { + if (i > (sexp_cached_char_idx(str) + ((sexp_string_length(str) - sexp_cached_char_idx(str)) >> 1))) { + j = sexp_string_size(str); + i = -(sexp_string_length(str) - i); + } else if (i > (sexp_cached_char_idx(str) >> 1)) { j = sexp_unbox_string_cursor(sexp_cached_cursor(str)); i -= sexp_cached_char_idx(str); } #endif - for ( ; i>0 && j= 0) { +#endif + for ( ; i>0 && j=p; i++) + q = (unsigned char*)sexp_string_utf8_prev(q); + j = q - p; + } +#endif + if (i != 0) return sexp_user_exception(ctx, self, "string-index->cursor: index out of range", index); cursor = sexp_make_string_cursor(j); From 476ae194a3924f2535d3bad9405b76574ed04a61 Mon Sep 17 00:00:00 2001 From: Daphne Preston-Kendal Date: Thu, 3 Mar 2022 18:59:04 +0100 Subject: [PATCH 4/4] Indentation fix [skip ci] --- sexp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sexp.c b/sexp.c index 2bf94e16..1fc881a0 100644 --- a/sexp.c +++ b/sexp.c @@ -1283,7 +1283,7 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str, if (i > (sexp_cached_char_idx(str) + ((sexp_string_length(str) - sexp_cached_char_idx(str)) >> 1))) { j = sexp_string_size(str); i = -(sexp_string_length(str) - i); - } else if (i > (sexp_cached_char_idx(str) >> 1)) { + } else if (i > (sexp_cached_char_idx(str) >> 1)) { j = sexp_unbox_string_cursor(sexp_cached_cursor(str)); i -= sexp_cached_char_idx(str); }