From 691b7215a0d2847b5224544598c8c687b7a53a02 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 27 Jun 2021 21:04:13 +0530 Subject: [PATCH] Unicode input kitten: Fix searching when a word has more than 1024 matches Fixes #3773 --- docs/changelog.rst | 2 ++ kittens/unicode_input/unicode_names.c | 50 ++++++++++++++++++--------- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 53943a7d5..74575dac4 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -44,6 +44,8 @@ To update |kitty|, :doc:`follow the instructions `. - Allow using superscripts in tab titles (:iss:`3763`) +- Unicode input kitten: Fix searching when a word has more than 1024 matches (:iss:`3773`) + 0.21.1 [2021-06-14] ---------------------- diff --git a/kittens/unicode_input/unicode_names.c b/kittens/unicode_input/unicode_names.c index 766cd0aea..7d364c103 100644 --- a/kittens/unicode_input/unicode_names.c +++ b/kittens/unicode_input/unicode_names.c @@ -19,27 +19,43 @@ all_words(PYNOARG) { return ans; } -static inline void -add_matches(const word_trie *wt, char_type *codepoints, size_t *pos, const size_t sz) { +typedef struct MatchingCodepoints { + char_type *codepoints; + size_t capacity, pos; +} MatchingCodepoints; + +static bool +ensure_space(MatchingCodepoints *ans) { + if (ans->capacity > ans->pos + 1) return true; + ans->capacity = MAX(1024u, ans->pos + 256); + ans->codepoints = realloc(ans->codepoints, sizeof(ans->codepoints[0]) * ans->capacity); + if (ans->codepoints) return true; + PyErr_NoMemory(); + return false; +} + +static void +add_matches(const word_trie *wt, MatchingCodepoints *ans) { size_t num = mark_groups[wt->match_offset]; - for (size_t i = wt->match_offset + 1; i < wt->match_offset + 1 + num && *pos < sz; i++, (*pos)++) { - codepoints[*pos] = mark_to_cp[mark_groups[i]]; + for (size_t i = wt->match_offset + 1; i < wt->match_offset + 1 + num; i++) { + if (!ensure_space(ans)) return; + ans->codepoints[ans->pos++] = mark_to_cp[mark_groups[i]]; } } static void -process_trie_node(const word_trie *wt, char_type *codepoints, size_t *pos, const size_t sz) { - if (wt->match_offset) add_matches(wt, codepoints, pos, sz); +process_trie_node(const word_trie *wt, MatchingCodepoints *ans) { + if (wt->match_offset) add_matches(wt, ans); size_t num_children = children_array[wt->children_offset]; if (!num_children) return; for (size_t c = wt->children_offset + 1; c < wt->children_offset + 1 + num_children; c++) { - if (*pos > sz) return; + if (!ensure_space(ans)) return; uint32_t x = children_array[c]; - process_trie_node(&all_trie_nodes[x >> 8], codepoints, pos, sz); + process_trie_node(&all_trie_nodes[x >> 8], ans); } } -static inline PyObject* +static PyObject* codepoints_for_word(const char *word, size_t len) { const word_trie *wt = all_trie_nodes; for (size_t i = 0; i < len; i++) { @@ -57,14 +73,16 @@ codepoints_for_word(const char *word, size_t len) { } if (!found) return PyFrozenSet_New(NULL); } - static char_type codepoints[1024]; - size_t cpos = 0; - process_trie_node(wt, codepoints, &cpos, arraysz(codepoints)); - PyObject *ans = PyFrozenSet_New(NULL); if (ans == NULL) return NULL; - for (size_t i = 0; i < cpos; i++) { - PyObject *t = PyLong_FromUnsignedLong(codepoints[i]); if (t == NULL) { Py_DECREF(ans); return NULL; } - int ret = PySet_Add(ans, t); Py_DECREF(t); if (ret != 0) { Py_DECREF(ans); return NULL; } + MatchingCodepoints m = {0}; + process_trie_node(wt, &m); + if (PyErr_Occurred()) return NULL; + PyObject *ans = PyFrozenSet_New(NULL); + if (ans == NULL) { free(m.codepoints); return NULL; } + for (size_t i = 0; i < m.pos; i++) { + PyObject *t = PyLong_FromUnsignedLong(m.codepoints[i]); if (t == NULL) { Py_DECREF(ans); free(m.codepoints); return NULL; } + int ret = PySet_Add(ans, t); Py_DECREF(t); if (ret != 0) { Py_DECREF(ans); free(m.codepoints); return NULL; } } + free(m.codepoints); return ans; }