Speed up is_combining_char() in the common case

2020-08-06 17:45:40 +05:30 · 2020-08-06 17:45:40 +05:30 · a835b56a51
commit a835b56a51
parent 5555a66638
5 changed files with 13 additions and 7 deletions
--- a/gen-wcwidth.py
+++ b/gen-wcwidth.py
@ -274,7 +274,8 @@ def category_test(
    comment: str,
    use_static: bool = False,
    extra_chars: Union[FrozenSet[int], Set[int]] = frozenset(),
-    exclude: Union[Set[int], FrozenSet[int]] = frozenset()
+    exclude: Union[Set[int], FrozenSet[int]] = frozenset(),
+    least_check_return: Optional[str] = None
 ) -> None:
    static = 'static inline ' if use_static else ''
    chars: Set[int] = set()
@ -284,6 +285,9 @@ def category_test(
    chars -= exclude
    p(f'{static}bool\n{name}(char_type code) {{')
    p(f'\t// {comment} ({len(chars)} codepoints)' + ' {{' '{')
+    if least_check_return is not None:
+        least = min(chars)
+        p(f'\tif (LIKELY(code < {least})) return {least_check_return};')
    p('\tswitch(code) {')
    for spec in get_ranges(list(chars)):
        write_case(spec, p)
@ -337,7 +341,8 @@ def gen_ucd() -> None:
                {c for c in class_maps if c.startswith('M')},
                'M category (marks)',
                # See https://github.com/harfbuzz/harfbuzz/issues/169
-                extra_chars=emoji_skin_tone_modifiers | {zwj}
+                extra_chars=emoji_skin_tone_modifiers | {zwj},
+                least_check_return='false'
        )
        category_test(
            'is_ignored_char', p, 'Cc Cf Cs'.split(),
@ -421,7 +426,7 @@ def gen_names() -> None:
        p('}; // }}}\n')

        # The trie
-        p('typedef struct {{ uint32_t children_offset; uint32_t match_offset; }} word_trie;\n')
+        p('typedef struct { uint32_t children_offset; uint32_t match_offset; } word_trie;\n')
        all_trie_nodes: List['TrieNode'] = []  # noqa

        class TrieNode:
--- a/kittens/unicode_input/names.h
+++ b/kittens/unicode_input/names.h
@ -1,4 +1,4 @@
-// unicode data, built from the unicode standard on: 2020-04-06
+// unicode data, built from the unicode standard on: 2020-08-06
 // see gen-wcwidth.py
 #pragma once
 #include "data-types.h"
--- a/kitty/emoji.h
+++ b/kitty/emoji.h
@ -1,4 +1,4 @@
-// unicode data, built from the unicode standard on: 2020-04-06
+// unicode data, built from the unicode standard on: 2020-08-06
 // see gen-wcwidth.py
 #pragma once
 #include "data-types.h"
--- a/kitty/unicode-data.c
+++ b/kitty/unicode-data.c
@ -1,4 +1,4 @@
-// unicode data, built from the unicode standard on: 2020-04-06
+// unicode data, built from the unicode standard on: 2020-08-06
 // see gen-wcwidth.py
 #include "data-types.h"

@ -8,6 +8,7 @@ START_ALLOW_CASE_RANGE
 bool
 is_combining_char(char_type code) {
 	// M category (marks) (2301 codepoints) {{{
+	if (LIKELY(code < 768)) return false;
 	switch(code) {
 		case 0x300 ... 0x36f:
 			return true;
--- a/kitty/wcwidth-std.h
+++ b/kitty/wcwidth-std.h
@ -1,4 +1,4 @@
-// unicode data, built from the unicode standard on: 2020-04-06
+// unicode data, built from the unicode standard on: 2020-08-06
 // see gen-wcwidth.py
 #pragma once
 #include "data-types.h"