Round-trip the zwj unicode character

Rendering of sequences containing zwj is still not implemented, since it
can cause the collapse of an unbounded number of characters into a
single cell. However, kitty at least preserves the zwj by storing it as
a combining character.
This commit is contained in:
Kovid Goyal 2018-08-04 18:29:45 +05:30
parent e05d48a574
commit 094ddd9333
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
7 changed files with 170 additions and 142 deletions

View File

@ -45,6 +45,12 @@ Changelog
- Implement support for emoji skin tone modifiers (:iss:`787`)
- Round-trip the zwj unicode character. Rendering of sequences containing zwj
is still not implemented, since it can cause the collapse of an unbounded
number of characters into a single cell. However, kitty at least preserves
the zwj by storing it as a combining character.
0.11.3 [2018-07-10]
------------------------------

View File

@ -42,7 +42,8 @@ def get_data(fname, folder='UCD'):
class_maps = {}
name_map = {}
word_search_map = defaultdict(set)
marks = set(emoji_skin_tone_modifiers)
zwj = 0x200d
marks = set(emoji_skin_tone_modifiers) | {zwj}
not_assigned = set(range(0, sys.maxunicode))
@ -196,12 +197,13 @@ def gen_emoji():
p('\treturn false;\n}')
def category_test(name, p, classes, comment, static=False, extra_chars=frozenset()):
def category_test(name, p, classes, comment, static=False, extra_chars=frozenset(), exclude=frozenset()):
static = 'static inline ' if static else ''
chars = set()
for c in classes:
chars |= class_maps[c]
chars |= extra_chars
chars -= exclude
p(f'{static}bool\n{name}(char_type code) {{')
p(f'\t// {comment} ({len(chars)} codepoints)' + ' {{' '{')
p('\tswitch(code) {')
@ -257,9 +259,11 @@ def gen_ucd():
{c for c in class_maps if c.startswith('M')},
'M category (marks)',
# See https://github.com/harfbuzz/harfbuzz/issues/169
extra_chars=emoji_skin_tone_modifiers
extra_chars=emoji_skin_tone_modifiers | {zwj}
)
category_test('is_ignored_char', p, 'Cc Cf Cs'.split(), 'Control characters and non-characters', extra_chars=non_characters)
category_test(
'is_ignored_char', p, 'Cc Cf Cs'.split(),
'Control characters and non-characters', extra_chars=non_characters, exclude={zwj})
category_test('is_word_char', p, {c for c in class_maps if c[0] in 'LN'}, 'L and N categories')
category_test('is_CZ_category', p, cz, 'C and Z categories')
category_test('is_P_category', p, {c for c in class_maps if c[0] == 'P'}, 'P category (punctuation)')
@ -272,7 +276,7 @@ def gen_ucd():
p('combining_type mark_for_codepoint(char_type c) {')
rmap = codepoint_to_mark_map(p, mark_map)
p('}\n')
if rmap[0xfe0e] != 1280:
if rmap[0xfe0e] != 1281:
raise ValueError('The mark for 0xfe0e has changed, you have to update VS15 to {} and VS16 to {} in unicode-data.h'.format(
rmap[0xfe0e], rmap[0xfe0f]
))

View File

@ -505,6 +505,8 @@ class Boss:
if w is not None:
tm = self.os_window_map.get(w.os_window_id)
if tm is not None:
tm.update_tab_bar_data()
tm.mark_tab_bar_dirty()
t = tm.tab_for_id(w.tab_id)
if t is not None:
t.relayout_borders()

File diff suppressed because one or more lines are too long

View File

@ -1,7 +1,7 @@
#pragma once
#include "data-types.h"
#define VS15 1280
#define VS16 1281
#define VS15 1281
#define VS16 1282
bool is_combining_char(char_type ch);
bool is_ignored_char(char_type ch);

10
kitty/wcwidth-std.h generated
View File

@ -8,7 +8,7 @@ START_ALLOW_CASE_RANGE
static int
wcwidth_std(int32_t code) {
switch(code) {
// Marks (2239 codepoints) {{{
// Marks (2240 codepoints) {{{
case 0x0:
return 0;
case 0x300 ... 0x36f:
@ -307,6 +307,8 @@ wcwidth_std(int32_t code) {
return 0;
case 0x1dfb ... 0x1dff:
return 0;
case 0x200d:
return 0;
case 0x20d0 ... 0x20f0:
return 0;
case 0x2cef ... 0x2cf1:
@ -565,7 +567,7 @@ wcwidth_std(int32_t code) {
return 0;
// }}}
// Non-printing characters (2264 codepoints) {{{
// Non-printing characters (2263 codepoints) {{{
case 0x1 ... 0x1f:
return -1;
case 0x7f ... 0x9f:
@ -584,7 +586,9 @@ wcwidth_std(int32_t code) {
return -1;
case 0x180e:
return -1;
case 0x200b ... 0x200f:
case 0x200b ... 0x200c:
return -1;
case 0x200e ... 0x200f:
return -1;
case 0x202a ... 0x202e:
return -1;

View File

@ -96,6 +96,13 @@ class TestScreen(BaseTest):
self.ae(str(s.line(0)), q)
self.ae(s.cursor.x, 2)
def test_zwj(self):
s = self.create_screen(cols=20)
q = '\U0001f468\u200d\U0001f469\u200d\U0001f467\u200d\U0001f466'
s.draw(q)
self.ae(q, str(s.line(0)))
self.ae(s.cursor.x, 8)
def test_char_manipulation(self):
s = self.create_screen()