Implement support for emoji skin tone modifiers

Fixes #787
2018-08-04 10:06:25 +05:30 · 2018-08-04 10:06:25 +05:30 · 000c1cf306
commit 000c1cf306
parent 28447d3389
8 changed files with 38 additions and 14 deletions
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -41,9 +41,9 @@ Changelog
 - The :opt:`focus_follows_mouse` option now also works across top-level kitty OS windows
  (:iss:`754`)

-
 - Fix detection of URLs in HTML source code (URLs inside quotes) (:iss:`785`)

+- Implement support for emoji skin tone modifiers (:iss:`787`)

 0.11.3 [2018-07-10]
 ------------------------------
--- a/gen-wcwidth.py
+++ b/gen-wcwidth.py
@ -20,6 +20,7 @@ non_characters |= frozenset(range(0xffff, 0x10ffff + 1, 0x10000))
 non_characters |= frozenset(range(0xfdd0, 0xfdf0))
 if len(non_characters) != 66:
    raise SystemExit('non_characters table incorrect')
+emoji_skin_tone_modifiers = frozenset(range(0x1f3fb, 0x1F3FF + 1))


 def get_data(fname, folder='UCD'):
@ -41,7 +42,7 @@ def get_data(fname, folder='UCD'):
 class_maps = {}
 name_map = {}
 word_search_map = defaultdict(set)
-marks = set()
+marks = set(emoji_skin_tone_modifiers)
 not_assigned = set(range(0, sys.maxunicode))


@ -251,7 +252,13 @@ def gen_ucd():
    cz = {c for c in class_maps if c[0] in 'CZ'}
    with create_header('kitty/unicode-data.c') as p:
        p('#include "unicode-data.h"')
-        category_test('is_combining_char', p, {c for c in class_maps if c.startswith('M')}, 'M category (marks)')
+        category_test(
+                'is_combining_char', p,
+                {c for c in class_maps if c.startswith('M')},
+                'M category (marks)',
+                # See https://github.com/harfbuzz/harfbuzz/issues/169
+                extra_chars=emoji_skin_tone_modifiers
+        )
        category_test('is_ignored_char', p, 'Cc Cf Cs'.split(), 'Control characters and non-characters', extra_chars=non_characters)
        category_test('is_word_char', p, {c for c in class_maps if c[0] in 'LN'}, 'L and N categories')
        category_test('is_CZ_category', p, cz, 'C and Z categories')
--- a/kittens/unicode_input/names.h
+++ b/kittens/unicode_input/names.h
@ -1,4 +1,4 @@
-// unicode data, built from the unicode standard on: 2018-06-14
+// unicode data, built from the unicode standard on: 2018-08-04
 // see gen-wcwidth.py
 #pragma once
 #include "data-types.h"
--- a/kitty/emoji.h
+++ b/kitty/emoji.h
@ -1,4 +1,4 @@
-// unicode data, built from the unicode standard on: 2018-06-14
+// unicode data, built from the unicode standard on: 2018-08-04
 // see gen-wcwidth.py
 #pragma once
 #include "data-types.h"
--- a/kitty/line.c
+++ b/kitty/line.c
@ -304,7 +304,10 @@ width(Line *self, PyObject *val) {
 void
 line_add_combining_char(Line *self, uint32_t ch, unsigned int x) {
    CPUCell *cell = self->cpu_cells + x;
-    if (!cell->ch) return;  // dont allow adding combining chars to a null cell
+    if (!cell->ch) {
+        if (x > 0 && (self->gpu_cells[x-1].attrs & WIDTH_MASK) == 2 && self->cpu_cells[x-1].ch) cell = self->cpu_cells + x - 1;
+        else return; // dont allow adding combining chars to a null cell
+    }
    for (unsigned i = 0; i < arraysz(cell->cc_idx); i++) {
        if (!cell->cc_idx[i]) { cell->cc_idx[i] = mark_for_codepoint(ch); return; }
    }
--- a/kitty/unicode-data.c
+++ b/kitty/unicode-data.c
--- a/kitty/wcwidth-std.h
+++ b/kitty/wcwidth-std.h
@ -1,4 +1,4 @@
-// unicode data, built from the unicode standard on: 2018-06-14
+// unicode data, built from the unicode standard on: 2018-08-04
 // see gen-wcwidth.py
 #pragma once
 #include "data-types.h"
@ -8,7 +8,7 @@ START_ALLOW_CASE_RANGE
 static int
 wcwidth_std(int32_t code) {
 	switch(code) {
-		// Marks (2234 codepoints) {{{
+		// Marks (2239 codepoints) {{{
 		case 0x0:
 			return 0;
 		case 0x300 ... 0x36f:
@ -559,6 +559,8 @@ wcwidth_std(int32_t code) {
 			return 0;
 		case 0x1e944 ... 0x1e94a:
 			return 0;
+		case 0x1f3fb ... 0x1f3ff:
+			return 0;
 		case 0xe0100 ... 0xe01ef:
 			return 0;
 		// }}}
@ -1209,7 +1211,7 @@ wcwidth_std(int32_t code) {
 			return -2;
 		// }}}

-		// East Asian double width (181796 codepoints) {{{
+		// East Asian double width (181791 codepoints) {{{
 		case 0x1100 ... 0x115f:
 			return 2;
 		case 0x231a ... 0x231b:
@ -1386,7 +1388,9 @@ wcwidth_std(int32_t code) {
 			return 2;
 		case 0x1f3f4:
 			return 2;
-		case 0x1f3f8 ... 0x1f43e:
+		case 0x1f3f8 ... 0x1f3fa:
+			return 2;
+		case 0x1f400 ... 0x1f43e:
 			return 2;
 		case 0x1f440:
 			return 2;
--- a/kitty_tests/screen.py
+++ b/kitty_tests/screen.py
@ -89,6 +89,13 @@ class TestScreen(BaseTest):
        self.ae(str(s.line(4)), 'a\u0306b1\u030623')
        self.ae((s.cursor.x, s.cursor.y), (2, 4))

+    def test_emoji_skin_tone_modifiers(self):
+        s = self.create_screen()
+        q = chr(0x1f469) + chr(0x1f3fd)
+        s.draw(q)
+        self.ae(str(s.line(0)), q)
+        self.ae(s.cursor.x, 2)
+
    def test_char_manipulation(self):
        s = self.create_screen()