kitty/kittens/unicode_input/unicode_names.c
2022-08-08 22:31:55 +05:30

115 lines
3.4 KiB
C

/*
* unicode_names.c
* Copyright (C) 2018 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#include "names.h"
static PyObject*
all_words(PYNOARG) {
PyObject *ans = PyTuple_New(arraysz(all_words_map));
if (!ans) return NULL;
for (size_t i = 0; i < arraysz(all_words_map); i++) {
PyObject *w = PyUnicode_FromString(all_words_map[i]);
if (w == NULL) { Py_DECREF(ans); return NULL; }
PyTuple_SET_ITEM(ans, i, w);
}
return ans;
}
static void
add_matches(const word_trie *wt, PyObject *ans) {
size_t num = mark_groups[wt->match_offset];
for (size_t i = wt->match_offset + 1; i < wt->match_offset + 1 + num; i++) {
PyObject *t = PyLong_FromUnsignedLong(mark_to_cp[mark_groups[i]]);
if (!t) return;
int ret = PySet_Add(ans, t);
Py_DECREF(t);
if (ret != 0) return;
}
}
static void
process_trie_node(const word_trie *wt, PyObject *ans) {
if (wt->match_offset) { add_matches(wt, ans); if (PyErr_Occurred()) return; }
size_t num_children = children_array[wt->children_offset];
if (!num_children) return;
for (size_t c = wt->children_offset + 1; c < wt->children_offset + 1 + num_children; c++) {
uint32_t x = children_array[c];
process_trie_node(&all_trie_nodes[x >> 8], ans);
if (PyErr_Occurred()) return;
}
}
static PyObject*
codepoints_for_word(const char *word, size_t len) {
const word_trie *wt = all_trie_nodes;
for (size_t i = 0; i < len; i++) {
unsigned char ch = word[i];
size_t num_children = children_array[wt->children_offset];
if (!num_children) return PyFrozenSet_New(NULL);
bool found = false;
for (size_t c = wt->children_offset + 1; c < wt->children_offset + 1 + num_children; c++) {
uint32_t x = children_array[c];
if ((x & 0xff) == ch) {
found = true;
wt = &all_trie_nodes[x >> 8];
break;
}
}
if (!found) return PyFrozenSet_New(NULL);
}
PyObject *ans = PyFrozenSet_New(NULL);
if (!ans) return NULL;
process_trie_node(wt, ans);
if (PyErr_Occurred()) return NULL;
return ans;
}
static PyObject*
cfw(PyObject *self UNUSED, PyObject *args) {
const char *word;
if (!PyArg_ParseTuple(args, "s", &word)) return NULL;
return codepoints_for_word(word, strlen(word));
}
static PyObject*
nfc(PyObject *self UNUSED, PyObject *args) {
unsigned int cp;
if (!PyArg_ParseTuple(args, "I", &cp)) return NULL;
const char *n = name_for_codepoint(cp);
if (n == NULL) Py_RETURN_NONE;
return PyUnicode_FromString(n);
}
static PyMethodDef module_methods[] = {
{"all_words", (PyCFunction)all_words, METH_NOARGS, ""},
{"codepoints_for_word", (PyCFunction)cfw, METH_VARARGS, ""},
{"name_for_codepoint", (PyCFunction)nfc, METH_VARARGS, ""},
{NULL, NULL, 0, NULL} /* Sentinel */
};
static int
exec_module(PyObject *m UNUSED) {
return 0;
}
IGNORE_PEDANTIC_WARNINGS
static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
END_IGNORE_PEDANTIC_WARNINGS
static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "unicode_names", /* name of module */
.m_doc = NULL,
.m_slots = slots,
.m_methods = module_methods
};
EXPORTED PyMODINIT_FUNC
PyInit_unicode_names(void) {
return PyModuleDef_Init(&module);
}