Round-trip the zwj unicode character

Rendering of sequences containing zwj is still not implemented, since it can cause the collapse of an unbounded number of characters into a single cell. However, kitty at least preserves the zwj by storing it as a combining character.
2018-08-04 18:29:45 +05:30 · 2018-08-04 18:29:45 +05:30 · 094ddd9333
commit 094ddd9333
parent e05d48a574
7 changed files with 170 additions and 142 deletions
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -45,6 +45,12 @@ Changelog
 - Implement support for emoji skin tone modifiers (:iss:`787`)
 - Round-trip the zwj unicode character. Rendering of sequences containing zwj
  is still not implemented, since it can cause the collapse of an unbounded
  number of characters into a single cell. However, kitty at least preserves
  the zwj by storing it as a combining character.
 0.11.3 [2018-07-10]
 ------------------------------
--- a/gen-wcwidth.py
+++ b/gen-wcwidth.py
@ -42,7 +42,8 @@ def get_data(fname, folder='UCD'):
 class_maps = {}
 name_map = {}
 word_search_map = defaultdict(set)
-marks = set(emoji_skin_tone_modifiers)
+zwj = 0x200d
 marks = set(emoji_skin_tone_modifiers) | {zwj}
 not_assigned = set(range(0, sys.maxunicode))
@ -196,12 +197,13 @@ def gen_emoji():
        p('\treturn false;\n}')
-def category_test(name, p, classes, comment, static=False, extra_chars=frozenset()):
+def category_test(name, p, classes, comment, static=False, extra_chars=frozenset(), exclude=frozenset()):
    static = 'static inline ' if static else ''
    chars = set()
    for c in classes:
        chars |= class_maps[c]
    chars |= extra_chars
    chars -= exclude
    p(f'{static}bool\n{name}(char_type code) {{')
    p(f'\t// {comment} ({len(chars)} codepoints)' + ' {{' '{')
    p('\tswitch(code) {')
@ -257,9 +259,11 @@ def gen_ucd():
                {c for c in class_maps if c.startswith('M')},
                'M category (marks)',
                # See https://github.com/harfbuzz/harfbuzz/issues/169
-                extra_chars=emoji_skin_tone_modifiers
+                extra_chars=emoji_skin_tone_modifiers | {zwj}
        )
-        category_test('is_ignored_char', p, 'Cc Cf Cs'.split(), 'Control characters and non-characters', extra_chars=non_characters)
+        category_test(
            'is_ignored_char', p, 'Cc Cf Cs'.split(),
            'Control characters and non-characters', extra_chars=non_characters, exclude={zwj})
        category_test('is_word_char', p, {c for c in class_maps if c[0] in 'LN'}, 'L and N categories')
        category_test('is_CZ_category', p, cz, 'C and Z categories')
        category_test('is_P_category', p, {c for c in class_maps if c[0] == 'P'}, 'P category (punctuation)')
@ -272,7 +276,7 @@ def gen_ucd():
        p('combining_type mark_for_codepoint(char_type c) {')
        rmap = codepoint_to_mark_map(p, mark_map)
        p('}\n')
-        if rmap[0xfe0e] != 1280:
+        if rmap[0xfe0e] != 1281:
            raise ValueError('The mark for 0xfe0e has changed, you have to update VS15 to {} and VS16 to {} in unicode-data.h'.format(
                rmap[0xfe0e], rmap[0xfe0f]
            ))
--- a/kitty/boss.py
+++ b/kitty/boss.py
@ -505,6 +505,8 @@ class Boss:
        if w is not None:
            tm = self.os_window_map.get(w.os_window_id)
            if tm is not None:
                tm.update_tab_bar_data()
                tm.mark_tab_bar_dirty()
                t = tm.tab_for_id(w.tab_id)
                if t is not None:
                    t.relayout_borders()
--- a/kitty/unicode-data.c
+++ b/kitty/unicode-data.c
--- a/kitty/unicode-data.h
+++ b/kitty/unicode-data.h
@ -1,7 +1,7 @@
 #pragma once
 #include "data-types.h"
-#define VS15 1280
+#define VS15 1281
-#define VS16 1281
+#define VS16 1282
 bool is_combining_char(char_type ch);
 bool is_ignored_char(char_type ch);
--- a/kitty/wcwidth-std.h
+++ b/kitty/wcwidth-std.h
@ -8,7 +8,7 @@ START_ALLOW_CASE_RANGE
 static int
 wcwidth_std(int32_t code) {
 	switch(code) {
-		// Marks (2239 codepoints) {{{
+		// Marks (2240 codepoints) {{{
 		case 0x0:
 			return 0;
 		case 0x300 ... 0x36f:
@ -307,6 +307,8 @@ wcwidth_std(int32_t code) {
 			return 0;
 		case 0x1dfb ... 0x1dff:
 			return 0;
 		case 0x200d:
 			return 0;
 		case 0x20d0 ... 0x20f0:
 			return 0;
 		case 0x2cef ... 0x2cf1:
@ -565,7 +567,7 @@ wcwidth_std(int32_t code) {
 			return 0;
 		// }}}
-		// Non-printing characters (2264 codepoints) {{{
+		// Non-printing characters (2263 codepoints) {{{
 		case 0x1 ... 0x1f:
 			return -1;
 		case 0x7f ... 0x9f:
@ -584,7 +586,9 @@ wcwidth_std(int32_t code) {
 			return -1;
 		case 0x180e:
 			return -1;
-		case 0x200b ... 0x200f:
+		case 0x200b ... 0x200c:
 			return -1;
 		case 0x200e ... 0x200f:
 			return -1;
 		case 0x202a ... 0x202e:
 			return -1;
--- a/kitty_tests/screen.py
+++ b/kitty_tests/screen.py
@ -96,6 +96,13 @@ class TestScreen(BaseTest):
        self.ae(str(s.line(0)), q)
        self.ae(s.cursor.x, 2)
    def test_zwj(self):
        s = self.create_screen(cols=20)
        q = '\U0001f468\u200d\U0001f469\u200d\U0001f467\u200d\U0001f466'
        s.draw(q)
        self.ae(q, str(s.line(0)))
        self.ae(s.cursor.x, 8)
    def test_char_manipulation(self):
        s = self.create_screen()