From da5cbc04044f2afc536f4d9f7f7049e383687290 Mon Sep 17 00:00:00 2001 From: seleznevae Date: Thu, 3 Oct 2019 23:07:24 +0300 Subject: [PATCH] [A] Add function `ft_set_u8strwid_func` to set custom function to compute width of utf8 strings --- ChangeLog.md | 1 + examples/9-non_ascii_table.c | 35 +++++++++++++++++++++++++++ examples/9-non_ascii_table.cpp | 37 ++++++++++++++++++++++++++++- lib/fort.c | 27 +++++++++++++++++++++ lib/fort.h | 21 ++++++++++++++++ src/fort.h | 21 ++++++++++++++++ src/fort_impl.c | 6 +++++ src/string_buffer.c | 15 ++++++++++++ src/string_buffer.h | 6 +++++ tests/wb_tests/test_string_buffer.c | 25 +++++++++++++++++++ 10 files changed, 193 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 59a6fdd..49dc437 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -3,6 +3,7 @@ ### API - Changes in C++ API (introduced classes `char_table` and `utf8-table` instead of `table`). +- Add function `ft_set_u8strwid_func` to set custom function to compute width of utf8 strings. ### Internal diff --git a/examples/9-non_ascii_table.c b/examples/9-non_ascii_table.c index 55a6885..02016fd 100644 --- a/examples/9-non_ascii_table.c +++ b/examples/9-non_ascii_table.c @@ -5,6 +5,24 @@ #include "fort.h" +#if defined(FT_HAVE_UTF8) +/* Custom function to compute visible width of utf8 strings */ +int u8strwid(const void *beg, const void *end, size_t *width) +{ +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + const char *emojis[] = {"😃", "😍"}; + const size_t sz = sizeof(emojis) / sizeof(emojis[0]); + const size_t raw_len = (const char *)end - (const char *)beg; + + for (size_t i = 0; i < sz; ++i) { + if (memcmp(beg, emojis[i], MIN(strlen(emojis[i]), raw_len)) == 0) { + *width = 2; /* On my terminal emojis have width of 2 chars */ + return 0; + } + } + return 1; +} +#endif int main(void) { @@ -28,6 +46,23 @@ int main(void) printf("%s\n", table_str); ft_destroy_table(table); } + + /* Example of providing custom function to compute utf8 string width */ + { + ft_set_u8strwid_func(&u8strwid); + + ft_table_t *table = ft_create_table(); + ft_set_border_style(table, FT_NICE_STYLE); + + ft_set_cell_prop(table, 0, FT_ANY_COLUMN, FT_CPROP_ROW_TYPE, FT_ROW_HEADER); + ft_u8write_ln(table, "SMILING", "Native"); + ft_u8write_ln(table, "SMILING FACE WITH OPEN MOUTH", "😃"); + ft_u8write_ln(table, "SMILING FACE WITH HEART-SHAPED EYES", "😍"); + + const char *table_str = (const char *)ft_to_u8string(table); + printf("%s\n", table_str); + ft_destroy_table(table); + } #endif /* Example of wchar table */ diff --git a/examples/9-non_ascii_table.cpp b/examples/9-non_ascii_table.cpp index cbdf636..5181baf 100644 --- a/examples/9-non_ascii_table.cpp +++ b/examples/9-non_ascii_table.cpp @@ -1,8 +1,27 @@ #include - +#include #include "fort.hpp" +#if defined(FT_HAVE_UTF8) +/* Custom function to compute visible width of utf8 strings */ +int u8strwid(const void *beg, const void *end, size_t *width) +{ + const char *emojis[] = {"😃", "😍"}; + const size_t sz = sizeof(emojis) / sizeof(emojis[0]); + const size_t raw_len = (const char *)end - (const char *)beg; + + for (size_t i = 0; i < sz; ++i) { + if (memcmp(beg, emojis[i], std::min(strlen(emojis[i]), raw_len)) == 0) { + *width = 2; /* On my terminal emojis have width of 2 chars */ + return 0; + } + } + return 1; +} +#endif + + int main(void) { @@ -24,6 +43,22 @@ int main(void) std::cout << table.to_string() << std::endl; } + + + /* Example of providing custom function to compute utf8 string width */ + { + ft_set_u8strwid_func(&u8strwid); + + fort::utf8_table table; + table.set_border_style(FT_NICE_STYLE); + + table << fort::header + << "Description" << "Native" << fort::endr + << "SMILING FACE WITH OPEN MOUTH" << "😃" << fort::endr + << "SMILING FACE WITH HEART-SHAPED EYES" << "😍" << fort::endr; + + std::cout << table.to_string() << std::endl; + } #endif return 0; diff --git a/lib/fort.c b/lib/fort.c index 74fe555..0553666 100644 --- a/lib/fort.c +++ b/lib/fort.c @@ -1821,6 +1821,12 @@ FT_INTERNAL int buffer_printf(f_string_buffer_t *buffer, size_t buffer_row, f_conv_context_t *cntx, size_t cod_width, const char *content_style_tag, const char *reset_content_style_tag); +#ifdef FT_HAVE_UTF8 +FT_INTERNAL +void buffer_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)); +#endif /* FT_HAVE_UTF8 */ + + #endif /* STRING_BUFFER_H */ /******************************************************** @@ -3528,6 +3534,12 @@ const void *ft_to_u8string(const ft_table_t *table) { return (const void *)ft_to_string_impl(table, UTF8_BUF); } + +void ft_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)) +{ + buffer_set_u8strwid_func(u8strwid); +} + #endif /* FT_HAVE_UTF8 */ /******************************************************** @@ -6300,9 +6312,24 @@ size_t string_buffer_raw_capacity(const f_string_buffer_t *buffer) } #ifdef FT_HAVE_UTF8 +/* User provided function to compute utf8 string visible width */ +static int (*_custom_u8strwid)(const void *beg, const void *end, size_t *width) = NULL; + +FT_INTERNAL +void buffer_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)) +{ + _custom_u8strwid = u8strwid; +} + static size_t utf8_width(const void *beg, const void *end) { + if (_custom_u8strwid) { + size_t width = 0; + if (!_custom_u8strwid(beg, end, &width)) + return width; + } + size_t sz = (size_t)((const char *)end - (const char *)beg); char *tmp = (char *)F_MALLOC(sizeof(char) * (sz + 1)); // @todo: add check to tmp diff --git a/lib/fort.h b/lib/fort.h index ca3ac6d..3492645 100644 --- a/lib/fort.h +++ b/lib/fort.h @@ -915,6 +915,27 @@ int ft_u8printf_ln(ft_table_t *table, const char *fmt, ...) FT_PRINTF_ATTRIBUTE_ const void *ft_to_u8string(const ft_table_t *table); +/** + * Set custom function to compute visible width of utf8 string. + * + * libfort internally has a very simple logic to compute visible width of utf8 + * strings. It considers that each codepoint will occupy one position on the + * terminal in case of monowidth font (some east asians wide and fullwidth + * characters (see http://www.unicode.org/reports/tr11/tr11-33.html) will occupy + * 2 positions). This logic is very simple and covers wide range of cases. But + * obviously there a lot of cases when it is not sufficient. In such cases user + * should use some external libraries and provide an appropriate function to + * libfort. + * + * @param u8strwid + * User provided function to evaluate width of utf8 string ( beg - start of + * utf8 string, end - end of utf8 string (not included), width - pointer to + * the result). If function succeed it should return 0, otherwise some non- + * zero value. If function returns nonzero value libfort fallbacks to default + * internal algorithm. + */ +void ft_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)); + #endif /* FT_HAVE_UTF8 */ diff --git a/src/fort.h b/src/fort.h index ca3ac6d..3492645 100644 --- a/src/fort.h +++ b/src/fort.h @@ -915,6 +915,27 @@ int ft_u8printf_ln(ft_table_t *table, const char *fmt, ...) FT_PRINTF_ATTRIBUTE_ const void *ft_to_u8string(const ft_table_t *table); +/** + * Set custom function to compute visible width of utf8 string. + * + * libfort internally has a very simple logic to compute visible width of utf8 + * strings. It considers that each codepoint will occupy one position on the + * terminal in case of monowidth font (some east asians wide and fullwidth + * characters (see http://www.unicode.org/reports/tr11/tr11-33.html) will occupy + * 2 positions). This logic is very simple and covers wide range of cases. But + * obviously there a lot of cases when it is not sufficient. In such cases user + * should use some external libraries and provide an appropriate function to + * libfort. + * + * @param u8strwid + * User provided function to evaluate width of utf8 string ( beg - start of + * utf8 string, end - end of utf8 string (not included), width - pointer to + * the result). If function succeed it should return 0, otherwise some non- + * zero value. If function returns nonzero value libfort fallbacks to default + * internal algorithm. + */ +void ft_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)); + #endif /* FT_HAVE_UTF8 */ diff --git a/src/fort_impl.c b/src/fort_impl.c index f939b76..a3eca58 100644 --- a/src/fort_impl.c +++ b/src/fort_impl.c @@ -983,4 +983,10 @@ const void *ft_to_u8string(const ft_table_t *table) { return (const void *)ft_to_string_impl(table, UTF8_BUF); } + +void ft_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)) +{ + buffer_set_u8strwid_func(u8strwid); +} + #endif /* FT_HAVE_UTF8 */ diff --git a/src/string_buffer.c b/src/string_buffer.c index 02cf4bd..dfafbcc 100644 --- a/src/string_buffer.c +++ b/src/string_buffer.c @@ -444,9 +444,24 @@ size_t string_buffer_raw_capacity(const f_string_buffer_t *buffer) } #ifdef FT_HAVE_UTF8 +/* User provided function to compute utf8 string visible width */ +static int (*_custom_u8strwid)(const void *beg, const void *end, size_t *width) = NULL; + +FT_INTERNAL +void buffer_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)) +{ + _custom_u8strwid = u8strwid; +} + static size_t utf8_width(const void *beg, const void *end) { + if (_custom_u8strwid) { + size_t width = 0; + if (!_custom_u8strwid(beg, end, &width)) + return width; + } + size_t sz = (size_t)((const char *)end - (const char *)beg); char *tmp = (char *)F_MALLOC(sizeof(char) * (sz + 1)); // @todo: add check to tmp diff --git a/src/string_buffer.h b/src/string_buffer.h index 6280819..0e28b85 100644 --- a/src/string_buffer.h +++ b/src/string_buffer.h @@ -73,4 +73,10 @@ FT_INTERNAL int buffer_printf(f_string_buffer_t *buffer, size_t buffer_row, f_conv_context_t *cntx, size_t cod_width, const char *content_style_tag, const char *reset_content_style_tag); +#ifdef FT_HAVE_UTF8 +FT_INTERNAL +void buffer_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)); +#endif /* FT_HAVE_UTF8 */ + + #endif /* STRING_BUFFER_H */ diff --git a/tests/wb_tests/test_string_buffer.c b/tests/wb_tests/test_string_buffer.c index dd1f933..ffed2a8 100644 --- a/tests/wb_tests/test_string_buffer.c +++ b/tests/wb_tests/test_string_buffer.c @@ -349,6 +349,20 @@ void test_str_n_substring(void) #endif } +#if defined(FT_HAVE_UTF8) +/* Custom function to compute visible width of utf8 strings */ +int u8strwid(const void *beg, const void *end, size_t *width) +{ + const char *custom_str = "custom_string"; + const size_t raw_len = (const char *)end - (const char *)beg; + if (memcmp(beg, custom_str, MIN(strlen(custom_str), raw_len)) == 0) { + *width = 25; + return 0; + } + return 1; +} +#endif + void test_buffer_text_visible_width(void) { f_string_buffer_t *buffer = create_string_buffer(200, CHAR_BUF); @@ -467,6 +481,17 @@ void test_buffer_text_visible_width(void) assert_true(buffer_text_visible_width(buffer) == 30); + /* Test custom width function for utf8 strings */ + ft_set_u8strwid_func(&u8strwid); + buffer->str.u8str = (void *)"custom_string"; + assert_true(buffer_text_visible_width(buffer) == 25); + buffer->str.u8str = (void *)"123456789012345678901234\ncustom_string"; + assert_true(buffer_text_visible_width(buffer) == 25); + buffer->str.u8str = (void *)"12345678901234567890123456\ncustom_string"; + assert_true(buffer_text_visible_width(buffer) == 26); + buffer->str.u8str = (void *)"common_string"; + assert_true(buffer_text_visible_width(buffer) == 13); + ft_set_u8strwid_func(NULL); #endif buffer->type = CHAR_BUF;