diff --git a/gen-wcwidth.py b/gen-wcwidth.py index 4a2f6aac9..ad96bf340 100755 --- a/gen-wcwidth.py +++ b/gen-wcwidth.py @@ -274,7 +274,8 @@ def category_test( comment: str, use_static: bool = False, extra_chars: Union[FrozenSet[int], Set[int]] = frozenset(), - exclude: Union[Set[int], FrozenSet[int]] = frozenset() + exclude: Union[Set[int], FrozenSet[int]] = frozenset(), + least_check_return: Optional[str] = None ) -> None: static = 'static inline ' if use_static else '' chars: Set[int] = set() @@ -284,6 +285,9 @@ def category_test( chars -= exclude p(f'{static}bool\n{name}(char_type code) {{') p(f'\t// {comment} ({len(chars)} codepoints)' + ' {{' '{') + if least_check_return is not None: + least = min(chars) + p(f'\tif (LIKELY(code < {least})) return {least_check_return};') p('\tswitch(code) {') for spec in get_ranges(list(chars)): write_case(spec, p) @@ -337,7 +341,8 @@ def gen_ucd() -> None: {c for c in class_maps if c.startswith('M')}, 'M category (marks)', # See https://github.com/harfbuzz/harfbuzz/issues/169 - extra_chars=emoji_skin_tone_modifiers | {zwj} + extra_chars=emoji_skin_tone_modifiers | {zwj}, + least_check_return='false' ) category_test( 'is_ignored_char', p, 'Cc Cf Cs'.split(), @@ -421,7 +426,7 @@ def gen_names() -> None: p('}; // }}}\n') # The trie - p('typedef struct {{ uint32_t children_offset; uint32_t match_offset; }} word_trie;\n') + p('typedef struct { uint32_t children_offset; uint32_t match_offset; } word_trie;\n') all_trie_nodes: List['TrieNode'] = [] # noqa class TrieNode: diff --git a/kittens/unicode_input/names.h b/kittens/unicode_input/names.h index cad24d9d2..7c5cf9455 100644 --- a/kittens/unicode_input/names.h +++ b/kittens/unicode_input/names.h @@ -1,4 +1,4 @@ -// unicode data, built from the unicode standard on: 2020-04-06 +// unicode data, built from the unicode standard on: 2020-08-06 // see gen-wcwidth.py #pragma once #include "data-types.h" diff --git a/kitty/emoji.h b/kitty/emoji.h index a3e6f1f97..ab1982908 100644 --- a/kitty/emoji.h +++ b/kitty/emoji.h @@ -1,4 +1,4 @@ -// unicode data, built from the unicode standard on: 2020-04-06 +// unicode data, built from the unicode standard on: 2020-08-06 // see gen-wcwidth.py #pragma once #include "data-types.h" diff --git a/kitty/unicode-data.c b/kitty/unicode-data.c index 3d1c28864..b3e13ee78 100644 --- a/kitty/unicode-data.c +++ b/kitty/unicode-data.c @@ -1,4 +1,4 @@ -// unicode data, built from the unicode standard on: 2020-04-06 +// unicode data, built from the unicode standard on: 2020-08-06 // see gen-wcwidth.py #include "data-types.h" @@ -8,6 +8,7 @@ START_ALLOW_CASE_RANGE bool is_combining_char(char_type code) { // M category (marks) (2301 codepoints) {{{ + if (LIKELY(code < 768)) return false; switch(code) { case 0x300 ... 0x36f: return true; diff --git a/kitty/wcwidth-std.h b/kitty/wcwidth-std.h index f319afcd5..43a74dc11 100644 --- a/kitty/wcwidth-std.h +++ b/kitty/wcwidth-std.h @@ -1,4 +1,4 @@ -// unicode data, built from the unicode standard on: 2020-04-06 +// unicode data, built from the unicode standard on: 2020-08-06 // see gen-wcwidth.py #pragma once #include "data-types.h"