From c76bcd0feed005aaf9db28a76f4883f3ae98295b Mon Sep 17 00:00:00 2001 From: Gavin Smith Date: Mon, 23 Oct 2023 19:51:00 +0100 Subject: [PATCH 1/5] * tp/Texinfo/XS/xspara.c (get_utf8_codepoint): Wrapper for mbrtowc/btowc. [_WIN32]: Do not call btowc, as it was tested to be very slow on MinGW. Report from Eli Zaretskii. --- ChangeLog | 7 ++++++ tp/Texinfo/XS/xspara.c | 48 +++++++++++++++++++++++------------------- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index e619109f5b..c4379ec56b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2023-10-23 Gavin Smith + + * tp/Texinfo/XS/xspara.c (get_utf8_codepoint): + Wrapper for mbrtowc/btowc. + [_WIN32]: Do not call btowc, as it was tested to be very slow + on MinGW. Report from Eli Zaretskii. + 2023-10-18 Gavin Smith Texinfo 7.1 diff --git a/tp/Texinfo/XS/xspara.c b/tp/Texinfo/XS/xspara.c index 7c6895a7ff..e1cddcdc2a 100644 --- a/tp/Texinfo/XS/xspara.c +++ b/tp/Texinfo/XS/xspara.c @@ -684,6 +684,30 @@ xspara_end (void) /* characters triggering an end of sentence */ #define end_sentence_characters ".?!" +/* Wrapper for mbrtowc. Set *PWC and return length of codepoint in bytes. */ +size_t +get_utf8_codepoint (wchar_t *pwc, const char *mbs, size_t n) +{ +#ifdef _WIN32 + /* Use the above implementation of mbrtowc. Do not use btowc as + does not exist as standard on MS-Windows, and was tested to be + very slow on MinGW. */ + return mbrtowc (pwc, mbs, n, NULL); +#else + if (!PRINTABLE_ASCII(*mbs)) + { + return mbrtowc (pwc, mbs, n, NULL); + } + else + { + /* Functionally the same as mbrtowc but (tested) slightly quicker. */ + *pwc = btowc (*mbs); + return 1; + } +#endif +} + + /* Add WORD to paragraph in RESULT, not refilling WORD. If we go past the end of the line start a new one. TRANSPARENT means that the letters in WORD are ignored for the purpose of deciding whether a full stop ends a sentence @@ -730,18 +754,7 @@ xspara__add_next (TEXT *result, char *word, int word_len, int transparent) if (!strchr (end_sentence_characters after_punctuation_characters, *p)) { - if (!PRINTABLE_ASCII(*p)) - { - wchar_t wc = L'\0'; - mbrtowc (&wc, p, len, NULL); - state.last_letter = wc; - break; - } - else - { - state.last_letter = btowc (*p); - break; - } + get_utf8_codepoint (&state.last_letter, p, len); } } } @@ -1013,16 +1026,7 @@ xspara_add_text (char *text, int len) } /************** Not a white space character. *****************/ - if (!PRINTABLE_ASCII(*p)) - { - char_len = mbrtowc (&wc, p, len, NULL); - } - else - { - /* Functonally the same as mbrtowc but (tested) slightly quicker. */ - char_len = 1; - wc = btowc (*p); - } + char_len = get_utf8_codepoint (&wc, p, len); if ((long) char_len == 0) break; /* Null character. Shouldn't happen. */ -- 2.42.1