Fix character names for control characters not being read from unicode database

Also allow unicode_names.c to be compiled with python 2 so I can re-use
it in calibre.
This commit is contained in:
Kovid Goyal 2018-05-01 10:13:22 +05:30
parent aa93c3fb66
commit f7001ea068
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
6 changed files with 15978 additions and 15966 deletions

View File

@ -55,7 +55,9 @@ def parse_ucd():
for line in get_data('ucd/UnicodeData.txt'):
parts = [x.strip() for x in line.split(';')]
codepoint = int(parts[0], 16)
name = parts[1]
name = parts[1] or parts[10]
if name == '<control>':
name = parts[10]
if name:
name_map[codepoint] = name
for word in name.lower().split():

File diff suppressed because one or more lines are too long

View File

@ -85,12 +85,13 @@ nfc(PyObject *self UNUSED, PyObject *args) {
}
static PyMethodDef module_methods[] = {
METHODB(all_words, METH_NOARGS),
{"all_words", (PyCFunction)all_words, METH_NOARGS, ""},
{"codepoints_for_word", (PyCFunction)cfw, METH_VARARGS, ""},
{"name_for_codepoint", (PyCFunction)nfc, METH_VARARGS, ""},
{NULL, NULL, 0, NULL} /* Sentinel */
};
#if PY_VERSION_HEX >= 0x03000000
static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "unicode_names", /* name of module */
@ -108,3 +109,13 @@ PyInit_unicode_names(void) {
if (m == NULL) return NULL;
return m;
}
#else
EXPORTED
initunicode_names(void) {
PyObject *m;
m = Py_InitModule3("unicode_names", module_methods,
""
);
if (m == NULL) return;
}
#endif

2
kitty/emoji.h generated
View File

@ -1,4 +1,4 @@
// unicode data, built from the unicode standard on: 2018-04-24
// unicode data, built from the unicode standard on: 2018-05-01
// see gen-wcwidth.py
#pragma once
#include "data-types.h"

View File

@ -1,4 +1,4 @@
// unicode data, built from the unicode standard on: 2018-04-24
// unicode data, built from the unicode standard on: 2018-05-01
// see gen-wcwidth.py
#include "data-types.h"

2
kitty/wcwidth-std.h generated
View File

@ -1,4 +1,4 @@
// unicode data, built from the unicode standard on: 2018-04-24
// unicode data, built from the unicode standard on: 2018-05-01
// see gen-wcwidth.py
#pragma once
#include "data-types.h"