[F] Fix incorrect behaviour in case of wide east asian symbols

This commit is contained in:
seleznevae
2019-09-08 09:55:56 +03:00
parent 6082281d0e
commit d944b8c364
9 changed files with 252 additions and 25 deletions

View File

@@ -480,6 +480,12 @@ utf8_nonnull utf8_weak void *utf8dup(const void *src);
// excluding the null terminating byte.
utf8_nonnull utf8_pure utf8_weak size_t utf8len(const void *str);
// Visible width of utf8string.
utf8_nonnull utf8_pure utf8_weak size_t utf8width(const void *str);
// Visible width of codepoint.
utf8_nonnull utf8_pure utf8_weak int utf8cwidth(utf8_int32_t c);
// Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
// src2 respectively, case insensitive. Checking at most n bytes of each utf8
// string.
@@ -823,6 +829,83 @@ size_t utf8len(const void *str)
return length;
}
// See
// https://unicode.org/Public/UNIDATA/EastAsianWidth.txt
// http://www.unicode.org/reports/tr11/tr11-33.html
int utf8cwidth(utf8_int32_t c)
{
// TODO: add non printable characters check
if (c == 0)
return 0;
if (c < 0x1100)
return 1;
// Fullwidth
if ((0x3000 == c) ||
(0xFF01 <= c && c <= 0xFF60) ||
(0xFFE0 <= c && c <= 0xFFE6)) {
return 2;
}
// Wide
if ((0x1100 <= c && c <= 0x115F) ||
(0x11A3 <= c && c <= 0x11A7) ||
(0x11FA <= c && c <= 0x11FF) ||
(0x2329 <= c && c <= 0x232A) ||
(0x2E80 <= c && c <= 0x2E99) ||
(0x2E9B <= c && c <= 0x2EF3) ||
(0x2F00 <= c && c <= 0x2FD5) ||
(0x2FF0 <= c && c <= 0x2FFB) ||
(0x3001 <= c && c <= 0x303E) ||
(0x3041 <= c && c <= 0x3096) ||
(0x3099 <= c && c <= 0x30FF) ||
(0x3105 <= c && c <= 0x312D) ||
(0x3131 <= c && c <= 0x318E) ||
(0x3190 <= c && c <= 0x31BA) ||
(0x31C0 <= c && c <= 0x31E3) ||
(0x31F0 <= c && c <= 0x321E) ||
(0x3220 <= c && c <= 0x3247) ||
(0x3250 <= c && c <= 0x32FE) ||
(0x3300 <= c && c <= 0x4DBF) ||
(0x4E00 <= c && c <= 0xA48C) ||
(0xA490 <= c && c <= 0xA4C6) ||
(0xA960 <= c && c <= 0xA97C) ||
(0xAC00 <= c && c <= 0xD7A3) ||
(0xD7B0 <= c && c <= 0xD7C6) ||
(0xD7CB <= c && c <= 0xD7FB) ||
(0xF900 <= c && c <= 0xFAFF) ||
(0xFE10 <= c && c <= 0xFE19) ||
(0xFE30 <= c && c <= 0xFE52) ||
(0xFE54 <= c && c <= 0xFE66) ||
(0xFE68 <= c && c <= 0xFE6B) ||
(0x1B000 <= c && c <= 0x1B001) ||
(0x1F200 <= c && c <= 0x1F202) ||
(0x1F210 <= c && c <= 0x1F23A) ||
(0x1F240 <= c && c <= 0x1F248) ||
(0x1F250 <= c && c <= 0x1F251) ||
(0x20000 <= c && c <= 0x2F73F) ||
(0x2B740 <= c && c <= 0x2FFFD) ||
(0x30000 <= c && c <= 0x3FFFD)) {
return 2;
}
return 1;
}
size_t utf8width(const void *str)
{
size_t length = 0;
utf8_int32_t c = 0;
str = utf8codepoint(str, &c);
while (c != 0) {
length += utf8cwidth(c);
str = utf8codepoint(str, &c);
}
return length;
}
int utf8ncasecmp(const void *src1, const void *src2, size_t n)
{
utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
@@ -6217,8 +6300,8 @@ size_t string_buffer_raw_capacity(const f_string_buffer_t *buffer)
}
#ifdef FT_HAVE_UTF8
FT_INTERNAL
size_t ut8_width(const void *beg, const void *end)
static
size_t utf8_width(const void *beg, const void *end)
{
size_t sz = (size_t)((const char *)end - (const char *)beg);
char *tmp = (char *)F_MALLOC(sizeof(char) * (sz + 1));
@@ -6227,7 +6310,7 @@ size_t ut8_width(const void *beg, const void *end)
memcpy(tmp, beg, sz);
tmp[sz] = '\0';
size_t result = utf8len(tmp);
size_t result = utf8width(tmp);
F_FREE(tmp);
return result;
}
@@ -6277,7 +6360,7 @@ size_t buffer_text_visible_width(const f_string_buffer_t *buffer)
if (beg == NULL || end == NULL)
return max_length;
max_length = MAX(max_length, (size_t)ut8_width(beg, end));
max_length = MAX(max_length, (size_t)utf8_width(beg, end));
++n;
}
#endif /* FT_HAVE_WCHAR */
@@ -6307,7 +6390,7 @@ buffer_substring(const f_string_buffer_t *buffer, size_t buffer_row, const void
case UTF8_BUF:
utf8_n_substring(buffer->str.u8str, '\n', buffer_row, begin, end);
if ((*(const char **)begin) && (*(const char **)end))
*str_it_width = ut8_width(*begin, *end);
*str_it_width = utf8_width(*begin, *end);
break;
#endif /* FT_HAVE_UTF8 */
default:

View File

@@ -46,8 +46,8 @@ SOFTWARE.
#define LIBFORT_MAJOR_VERSION 0
#define LIBFORT_MINOR_VERSION 2
#define LIBFORT_REVISION 1
#define LIBFORT_VERSION_STR "0.2.1"
#define LIBFORT_REVISION 2
#define LIBFORT_VERSION_STR "0.2.2"
/*****************************************************************************