Dont use the python unicodedata module as we use libunistring

No sense in loading two hude unicode datasets into memory
This commit is contained in:
Kovid Goyal 2017-09-14 18:18:38 +05:30
parent 21accfe114
commit ed3427f349
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 11 additions and 2 deletions

View File

@ -4,7 +4,6 @@
import ctypes
import sys
import unicodedata
from collections import namedtuple
from functools import lru_cache
from itertools import chain
@ -252,7 +251,7 @@ def missing_glyph(width):
def render_cell(text=' ', bold=False, italic=False):
# TODO: Handle non-normalizable combining chars. Probably need to use
# harfbuzz for that
text = unicodedata.normalize('NFC', text)[0]
text = text[0]
width = wcwidth(text)
try:
bitmap_char = render_char(text, bold, italic, width)

View File

@ -41,6 +41,8 @@ line_text_at(char_type ch, combining_type cc) {
PyUnicode_WriteChar(ans, 0, ch);
} else {
Py_UCS4 cc1 = cc & CC_MASK, cc2 = cc >> 16;
Py_UCS4 normalized = normalize(ch, cc1, cc2);
if (normalized) { return line_text_at(normalized, 0); }
Py_UCS4 maxc = (ch > cc1) ? MAX(ch, cc2) : MAX(cc1, cc2);
ans = PyUnicode_New(cc2 ? 3 : 2, maxc);
if (ans == NULL) return PyErr_NoMemory();

View File

@ -1,6 +1,7 @@
#pragma once
#include <unictype.h>
#include <uninorm.h>
static inline bool
is_combining_char(uint32_t ch) {
@ -22,3 +23,10 @@ static inline bool
is_url_char(uint32_t ch) {
return ch && !uc_is_general_category_withtable(ch, UC_CATEGORY_MASK_C | UC_CATEGORY_MASK_Z);
}
static inline uint32_t
normalize(uint32_t ch, uint32_t cc1, uint32_t cc2) {
uint32_t ans = uc_composition(ch, cc1);
if (ans && cc2) ans = uc_composition(ans, cc2);
return ans;
}