From 09f887758c03f2e434430ec0251750fa32d98a7e Mon Sep 17 00:00:00 2001 From: Thomas Koutcher Date: Tue, 23 Feb 2021 23:13:25 +0100 Subject: [PATCH] Fix loop when wrapping line with ISO8859-1 character Fixes #1087 --- include/tig/string.h | 3 +++ src/pager.c | 7 ++++++- src/string.c | 38 ++++++++++++++++++++++++++++++++++---- 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/include/tig/string.h b/include/tig/string.h index 290d313db..2c01c7bbb 100644 --- a/include/tig/string.h +++ b/include/tig/string.h @@ -84,6 +84,7 @@ void string_copy_rev_from_commit_line(char *dst, const char *src); #define string_concat_path(dst, path1, path2) \ string_format(dst, !*path1 || path1[strlen(path1) - 1] == '/' ? "%s%s" : "%s/%s", path1, path2) +size_t string_expanded_length(const char *src, size_t srclen, size_t tabsize, size_t max_size); size_t string_expand(char *dst, size_t dstlen, const char *src, int srclen, int tabsize); char *string_trim_end(char *name); @@ -125,5 +126,7 @@ int utf8_width_of(const char *text, int max_bytes, int max_width); bool utf8_string_contains_uppercase(const char *text); +bool utf8_is_valid(const char *string); + #endif /* vim: set ts=8 sw=8 noexpandtab: */ diff --git a/src/pager.c b/src/pager.c index b570de262..69d08d69f 100644 --- a/src/pager.c +++ b/src/pager.c @@ -77,9 +77,14 @@ pager_wrap_line(struct view *view, const char *data, enum line_type type) int width; int trimmed; bool wrapped = !!first_line; - size_t linelen = utf8_length(&data, datalen, 0, &width, view->width, &trimmed, wrapped, opt_tab_size); + size_t linelen; struct line *line; + if (utf8_is_valid(data)) + linelen = utf8_length(&data, datalen, 0, &width, view->width, &trimmed, wrapped, opt_tab_size); + else + linelen = string_expanded_length(data, datalen, opt_tab_size, view->width - !!wrapped); + line = add_line_text_at_(view, view->lines, data, linelen, type, 1, wrapped); if (!line) break; diff --git a/src/string.c b/src/string.c index 2638d9254..27fce6220 100644 --- a/src/string.c +++ b/src/string.c @@ -88,6 +88,24 @@ string_copy_rev_from_commit_line(char *dst, const char *src) string_copy_rev(dst, src); } +size_t +string_expanded_length(const char *src, size_t srclen, size_t tabsize, size_t max_size) +{ + size_t size, pos; + + for (size = pos = 0; pos < srclen && size < max_size; pos++) { + if (src[pos] == '\t') { + size_t expanded = tabsize - (size % tabsize); + + size += expanded; + } else { + size++; + } + } + + return pos; +} + size_t string_expand(char *dst, size_t dstlen, const char *src, int srclen, int tabsize) { @@ -311,10 +329,6 @@ utf8_length(const char **start, int max_chars, size_t skip, int *width, size_t m * it is a single- or double-width character. */ unicode = utf8_to_unicode(string, bytes); - /* FIXME: Graceful handling of invalid Unicode character. */ - if (!unicode) - break; - ucwidth = unicode == '\t' ? tab_size - (*width % tab_size) : utf8proc_charwidth((utf8proc_int32_t) unicode); if (skip > 0) { @@ -385,4 +399,20 @@ utf8_string_contains_uppercase(const char *search) return utf8_string_contains(search, UTF8PROC_CATEGORY_LU); } +bool +utf8_is_valid(const char *string) +{ + for (;;) { + int32_t unicode; + ssize_t slen = utf8proc_iterate((const unsigned char *) string, -1, &unicode); + + if (slen <= 0 || !utf8proc_codepoint_valid(unicode)) + break; + if (unicode == 0) + return true; + string += slen; + } + return false; +} + /* vim: set ts=8 sw=8 noexpandtab: */