[F] Fix incorrect behaviour in case of wide east asian symbols
This commit is contained in:
93
lib/fort.c
93
lib/fort.c
@@ -480,6 +480,12 @@ utf8_nonnull utf8_weak void *utf8dup(const void *src);
|
||||
// excluding the null terminating byte.
|
||||
utf8_nonnull utf8_pure utf8_weak size_t utf8len(const void *str);
|
||||
|
||||
// Visible width of utf8string.
|
||||
utf8_nonnull utf8_pure utf8_weak size_t utf8width(const void *str);
|
||||
|
||||
// Visible width of codepoint.
|
||||
utf8_nonnull utf8_pure utf8_weak int utf8cwidth(utf8_int32_t c);
|
||||
|
||||
// Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
|
||||
// src2 respectively, case insensitive. Checking at most n bytes of each utf8
|
||||
// string.
|
||||
@@ -823,6 +829,83 @@ size_t utf8len(const void *str)
|
||||
return length;
|
||||
}
|
||||
|
||||
// See
|
||||
// https://unicode.org/Public/UNIDATA/EastAsianWidth.txt
|
||||
// http://www.unicode.org/reports/tr11/tr11-33.html
|
||||
int utf8cwidth(utf8_int32_t c)
|
||||
{
|
||||
// TODO: add non printable characters check
|
||||
if (c == 0)
|
||||
return 0;
|
||||
|
||||
if (c < 0x1100)
|
||||
return 1;
|
||||
|
||||
// Fullwidth
|
||||
if ((0x3000 == c) ||
|
||||
(0xFF01 <= c && c <= 0xFF60) ||
|
||||
(0xFFE0 <= c && c <= 0xFFE6)) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
// Wide
|
||||
if ((0x1100 <= c && c <= 0x115F) ||
|
||||
(0x11A3 <= c && c <= 0x11A7) ||
|
||||
(0x11FA <= c && c <= 0x11FF) ||
|
||||
(0x2329 <= c && c <= 0x232A) ||
|
||||
(0x2E80 <= c && c <= 0x2E99) ||
|
||||
(0x2E9B <= c && c <= 0x2EF3) ||
|
||||
(0x2F00 <= c && c <= 0x2FD5) ||
|
||||
(0x2FF0 <= c && c <= 0x2FFB) ||
|
||||
(0x3001 <= c && c <= 0x303E) ||
|
||||
(0x3041 <= c && c <= 0x3096) ||
|
||||
(0x3099 <= c && c <= 0x30FF) ||
|
||||
(0x3105 <= c && c <= 0x312D) ||
|
||||
(0x3131 <= c && c <= 0x318E) ||
|
||||
(0x3190 <= c && c <= 0x31BA) ||
|
||||
(0x31C0 <= c && c <= 0x31E3) ||
|
||||
(0x31F0 <= c && c <= 0x321E) ||
|
||||
(0x3220 <= c && c <= 0x3247) ||
|
||||
(0x3250 <= c && c <= 0x32FE) ||
|
||||
(0x3300 <= c && c <= 0x4DBF) ||
|
||||
(0x4E00 <= c && c <= 0xA48C) ||
|
||||
(0xA490 <= c && c <= 0xA4C6) ||
|
||||
(0xA960 <= c && c <= 0xA97C) ||
|
||||
(0xAC00 <= c && c <= 0xD7A3) ||
|
||||
(0xD7B0 <= c && c <= 0xD7C6) ||
|
||||
(0xD7CB <= c && c <= 0xD7FB) ||
|
||||
(0xF900 <= c && c <= 0xFAFF) ||
|
||||
(0xFE10 <= c && c <= 0xFE19) ||
|
||||
(0xFE30 <= c && c <= 0xFE52) ||
|
||||
(0xFE54 <= c && c <= 0xFE66) ||
|
||||
(0xFE68 <= c && c <= 0xFE6B) ||
|
||||
(0x1B000 <= c && c <= 0x1B001) ||
|
||||
(0x1F200 <= c && c <= 0x1F202) ||
|
||||
(0x1F210 <= c && c <= 0x1F23A) ||
|
||||
(0x1F240 <= c && c <= 0x1F248) ||
|
||||
(0x1F250 <= c && c <= 0x1F251) ||
|
||||
(0x20000 <= c && c <= 0x2F73F) ||
|
||||
(0x2B740 <= c && c <= 0x2FFFD) ||
|
||||
(0x30000 <= c && c <= 0x3FFFD)) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t utf8width(const void *str)
|
||||
{
|
||||
size_t length = 0;
|
||||
utf8_int32_t c = 0;
|
||||
|
||||
str = utf8codepoint(str, &c);
|
||||
while (c != 0) {
|
||||
length += utf8cwidth(c);
|
||||
str = utf8codepoint(str, &c);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
int utf8ncasecmp(const void *src1, const void *src2, size_t n)
|
||||
{
|
||||
utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
|
||||
@@ -6217,8 +6300,8 @@ size_t string_buffer_raw_capacity(const f_string_buffer_t *buffer)
|
||||
}
|
||||
|
||||
#ifdef FT_HAVE_UTF8
|
||||
FT_INTERNAL
|
||||
size_t ut8_width(const void *beg, const void *end)
|
||||
static
|
||||
size_t utf8_width(const void *beg, const void *end)
|
||||
{
|
||||
size_t sz = (size_t)((const char *)end - (const char *)beg);
|
||||
char *tmp = (char *)F_MALLOC(sizeof(char) * (sz + 1));
|
||||
@@ -6227,7 +6310,7 @@ size_t ut8_width(const void *beg, const void *end)
|
||||
|
||||
memcpy(tmp, beg, sz);
|
||||
tmp[sz] = '\0';
|
||||
size_t result = utf8len(tmp);
|
||||
size_t result = utf8width(tmp);
|
||||
F_FREE(tmp);
|
||||
return result;
|
||||
}
|
||||
@@ -6277,7 +6360,7 @@ size_t buffer_text_visible_width(const f_string_buffer_t *buffer)
|
||||
if (beg == NULL || end == NULL)
|
||||
return max_length;
|
||||
|
||||
max_length = MAX(max_length, (size_t)ut8_width(beg, end));
|
||||
max_length = MAX(max_length, (size_t)utf8_width(beg, end));
|
||||
++n;
|
||||
}
|
||||
#endif /* FT_HAVE_WCHAR */
|
||||
@@ -6307,7 +6390,7 @@ buffer_substring(const f_string_buffer_t *buffer, size_t buffer_row, const void
|
||||
case UTF8_BUF:
|
||||
utf8_n_substring(buffer->str.u8str, '\n', buffer_row, begin, end);
|
||||
if ((*(const char **)begin) && (*(const char **)end))
|
||||
*str_it_width = ut8_width(*begin, *end);
|
||||
*str_it_width = utf8_width(*begin, *end);
|
||||
break;
|
||||
#endif /* FT_HAVE_UTF8 */
|
||||
default:
|
||||
|
@@ -46,8 +46,8 @@ SOFTWARE.
|
||||
|
||||
#define LIBFORT_MAJOR_VERSION 0
|
||||
#define LIBFORT_MINOR_VERSION 2
|
||||
#define LIBFORT_REVISION 1
|
||||
#define LIBFORT_VERSION_STR "0.2.1"
|
||||
#define LIBFORT_REVISION 2
|
||||
#define LIBFORT_VERSION_STR "0.2.2"
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
|
Reference in New Issue
Block a user