Dont use the python unicodedata module as we use libunistring

No sense in loading two hude unicode datasets into memory
2017-09-14 18:18:38 +05:30 · 2017-09-14 18:18:38 +05:30 · ed3427f349
commit ed3427f349
parent 21accfe114
3 changed files with 11 additions and 2 deletions
--- a/kitty/fonts/freetype.py
+++ b/kitty/fonts/freetype.py
@ -4,7 +4,6 @@

 import ctypes
 import sys
-import unicodedata
 from collections import namedtuple
 from functools import lru_cache
 from itertools import chain
@ -252,7 +251,7 @@ def missing_glyph(width):
 def render_cell(text=' ', bold=False, italic=False):
    # TODO: Handle non-normalizable combining chars. Probably need to use
    # harfbuzz for that
-    text = unicodedata.normalize('NFC', text)[0]
+    text = text[0]
    width = wcwidth(text)
    try:
        bitmap_char = render_char(text, bold, italic, width)
--- a/kitty/line.c
+++ b/kitty/line.c
@ -41,6 +41,8 @@ line_text_at(char_type ch, combining_type cc) {
        PyUnicode_WriteChar(ans, 0, ch);
    } else {
        Py_UCS4 cc1 = cc & CC_MASK, cc2 = cc >> 16;
+        Py_UCS4 normalized = normalize(ch, cc1, cc2);
+        if (normalized) { return line_text_at(normalized, 0); }
        Py_UCS4 maxc = (ch > cc1) ? MAX(ch, cc2) : MAX(cc1, cc2);
        ans = PyUnicode_New(cc2 ? 3 : 2, maxc);
        if (ans == NULL) return PyErr_NoMemory();
--- a/kitty/unicode-data.h
+++ b/kitty/unicode-data.h
@ -1,6 +1,7 @@
 #pragma once

 #include <unictype.h>
+#include <uninorm.h>

 static inline bool 
 is_combining_char(uint32_t ch) {
@ -22,3 +23,10 @@ static inline bool
 is_url_char(uint32_t ch) {
    return ch && !uc_is_general_category_withtable(ch, UC_CATEGORY_MASK_C | UC_CATEGORY_MASK_Z);
 }
+
+static inline uint32_t
+normalize(uint32_t ch, uint32_t cc1, uint32_t cc2) {
+    uint32_t ans = uc_composition(ch, cc1);
+    if (ans && cc2) ans = uc_composition(ans, cc2);
+    return ans;
+}