Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a feature to cache the most recent string index->cursor result #793

Merged
merged 6 commits into from
Oct 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion eval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1988,8 +1988,14 @@ void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) {
sexp_string_size(str) += new_len - old_len;
}
sexp_utf8_encode_char(p, new_len, c);
if (old_len != new_len)
if (old_len != new_len) {
#if SEXP_USE_STRING_INDEX_TABLE
sexp_update_string_index_lookup(ctx, str);
#elif SEXP_USE_STRING_REF_CACHE
sexp_cached_char_idx(str) = 0;
sexp_cached_cursor(str) = sexp_make_string_cursor(0);
#endif
}
}

sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch) {
Expand Down
6 changes: 6 additions & 0 deletions include/chibi/features.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,12 @@
/* */
/* #define SEXP_USE_STRING_INDEX_TABLE 1 */

/* uncomment this to cache a string cursor for string-ref calls */
/* The default is not to use a cache. The goal of caching is to */
/* soften the performance impact of repeated O(n) string-ref */
/* operations on the same string. */
/* #define SEXP_USE_STRING_REF_CACHE 1 */

/* uncomment this to disable automatic closing of ports */
/* If enabled, the underlying FILE* for file ports will be */
/* automatically closed when they're garbage collected. Doesn't */
Expand Down
7 changes: 7 additions & 0 deletions include/chibi/sexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,9 @@ struct sexp_struct {
sexp bytes;
#if SEXP_USE_STRING_INDEX_TABLE
sexp charlens;
#elif SEXP_USE_STRING_REF_CACHE
sexp_uint_t cached_char_idx;
sexp cached_cursor;
#endif
sexp_uint_t offset, length;
#endif
Expand Down Expand Up @@ -1198,6 +1201,10 @@ enum sexp_uniform_vector_type {
#define sexp_string_offset(x) (sexp_field(x, string, SEXP_STRING, offset))
#define sexp_string_data(x) (sexp_bytes_data(sexp_string_bytes(x))+sexp_string_offset(x))
#endif
#if SEXP_USE_STRING_REF_CACHE
#define sexp_cached_char_idx(x) (sexp_field(x, string, SEXP_STRING, cached_char_idx))
#define sexp_cached_cursor(x) (sexp_field(x, string, SEXP_STRING, cached_cursor))
#endif
#define sexp_string_maybe_null_data(x) (sexp_not(x) ? NULL : sexp_string_data(x))

#if SEXP_USE_PACKED_STRINGS
Expand Down
59 changes: 56 additions & 3 deletions sexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,9 @@ static const char* sexp_initial_features[] = {
#if SEXP_USE_STRING_INDEX_TABLE
"string-index",
#endif
#if SEXP_USE_STRING_REF_CACHE
"string-ref-cache",
#endif
#if SEXP_USE_GREEN_THREADS
"threads",
#endif
Expand Down Expand Up @@ -1254,8 +1257,12 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str,
sexp_sint_t* chunklens;
sexp_sint_t chunk;
#endif
sexp cursor;
sexp_sint_t i, j, limit;
unsigned char *p;
#if SEXP_USE_STRING_REF_CACHE
unsigned char *q;
#endif
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, index);
p = (unsigned char*)sexp_string_data(str);
Expand All @@ -1272,12 +1279,37 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str,
i -= (chunk+1) * SEXP_STRING_INDEX_TABLE_CHUNK_SIZE;
}
}
#elif SEXP_USE_STRING_REF_CACHE
if (i > (sexp_cached_char_idx(str) + ((sexp_string_length(str) - sexp_cached_char_idx(str)) >> 1))) {
j = sexp_string_size(str);
i = -(sexp_string_length(str) - i);
} else if (i > (sexp_cached_char_idx(str) >> 1)) {
j = sexp_unbox_string_cursor(sexp_cached_cursor(str));
i -= sexp_cached_char_idx(str);
}
#endif

#if SEXP_USE_STRING_REF_CACHE
if (i >= 0) {
#endif
for ( ; i>0 && j<limit; i--)
j += sexp_utf8_initial_byte_count(p[j]);
#if SEXP_USE_STRING_REF_CACHE
} else {
for (q=p+j; i<0 && q>=p; i++)
q = (unsigned char*)sexp_string_utf8_prev(q);
j = q - p;
}
#endif
for ( ; i>0 && j<limit; i--)
j += sexp_utf8_initial_byte_count(p[j]);

if (i != 0)
return sexp_user_exception(ctx, self, "string-index->cursor: index out of range", index);
return sexp_make_string_cursor(j);
cursor = sexp_make_string_cursor(j);
#if SEXP_USE_STRING_REF_CACHE
sexp_cached_char_idx(str) = sexp_unbox_fixnum(index);
sexp_cached_cursor(str) = cursor;
#endif
return cursor;
}

sexp sexp_string_cursor_to_index (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp offset) {
Expand All @@ -1286,7 +1318,24 @@ sexp sexp_string_cursor_to_index (sexp ctx, sexp self, sexp_sint_t n, sexp str,
sexp_assert_type(ctx, sexp_string_cursorp, SEXP_STRING_CURSOR, offset);
if (off < 0 || off > (sexp_sint_t)sexp_string_size(str))
return sexp_user_exception(ctx, self, "string-cursor->index: offset out of range", offset);
#if SEXP_USE_STRING_REF_CACHE
sexp_uint_t cached_idx = sexp_cached_char_idx(str);
sexp_sint_t cached_off = sexp_unbox_string_cursor(sexp_cached_cursor(str));
unsigned char* string_data = (unsigned char*)sexp_string_data(str);
sexp_sint_t idx_delta;
if (off >= cached_off) {
idx_delta = sexp_string_utf8_length(string_data+cached_off, off-cached_off);
} else {
idx_delta = 0 - sexp_string_utf8_length(string_data+off, cached_off-off);
}

sexp_uint_t new_idx = cached_idx + idx_delta;
sexp_cached_char_idx(str) = new_idx;
sexp_cached_cursor(str) = offset;
return sexp_make_fixnum(new_idx);
#else
return sexp_make_fixnum(sexp_string_utf8_length((unsigned char*)sexp_string_data(str), off));
#endif
}

sexp sexp_string_cursor_offset (sexp ctx, sexp self, sexp_sint_t n, sexp cur) {
Expand Down Expand Up @@ -1358,6 +1407,10 @@ sexp sexp_make_string_op (sexp ctx, sexp self, sexp_sint_t n, sexp len, sexp ch)
sexp_string_bytes(s) = b;
sexp_string_offset(s) = 0;
sexp_string_size(s) = sexp_bytes_length(b);
#if SEXP_USE_STRING_REF_CACHE
sexp_cached_char_idx(s) = 0;
sexp_cached_cursor(s) = sexp_make_string_cursor(0);
#endif
sexp_update_string_index_lookup(ctx, s);
sexp_gc_release2(ctx);
return s;
Expand Down