Speed up is_ignored_char in the common case

This commit is contained in:
Kovid Goyal 2020-08-06 18:05:33 +05:30
parent 8f9616c230
commit 628b92f20b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 9 additions and 2 deletions

View File

@ -275,7 +275,8 @@ def category_test(
use_static: bool = False, use_static: bool = False,
extra_chars: Union[FrozenSet[int], Set[int]] = frozenset(), extra_chars: Union[FrozenSet[int], Set[int]] = frozenset(),
exclude: Union[Set[int], FrozenSet[int]] = frozenset(), exclude: Union[Set[int], FrozenSet[int]] = frozenset(),
least_check_return: Optional[str] = None least_check_return: Optional[str] = None,
ascii_range: Optional[str] = None
) -> None: ) -> None:
static = 'static inline ' if use_static else '' static = 'static inline ' if use_static else ''
chars: Set[int] = set() chars: Set[int] = set()
@ -288,6 +289,8 @@ def category_test(
if least_check_return is not None: if least_check_return is not None:
least = min(chars) least = min(chars)
p(f'\tif (LIKELY(code < {least})) return {least_check_return};') p(f'\tif (LIKELY(code < {least})) return {least_check_return};')
if ascii_range is not None:
p(f'\tif (LIKELY(0x20 <= code && code <= 0x7e)) return {ascii_range};')
p('\tswitch(code) {') p('\tswitch(code) {')
for spec in get_ranges(list(chars)): for spec in get_ranges(list(chars)):
write_case(spec, p) write_case(spec, p)
@ -346,7 +349,10 @@ def gen_ucd() -> None:
) )
category_test( category_test(
'is_ignored_char', p, 'Cc Cf Cs'.split(), 'is_ignored_char', p, 'Cc Cf Cs'.split(),
'Control characters and non-characters', extra_chars=non_characters, exclude={zwj}) 'Control characters and non-characters',
extra_chars=non_characters, exclude={zwj},
ascii_range='false'
)
category_test('is_word_char', p, {c for c in class_maps if c[0] in 'LN'}, 'L and N categories') category_test('is_word_char', p, {c for c in class_maps if c[0] in 'LN'}, 'L and N categories')
category_test('is_CZ_category', p, cz, 'C and Z categories') category_test('is_CZ_category', p, cz, 'C and Z categories')
category_test('is_P_category', p, {c for c in class_maps if c[0] == 'P'}, 'P category (punctuation)') category_test('is_P_category', p, {c for c in class_maps if c[0] == 'P'}, 'P category (punctuation)')

1
kitty/unicode-data.c generated
View File

@ -602,6 +602,7 @@ is_combining_char(char_type code) {
bool bool
is_ignored_char(char_type code) { is_ignored_char(char_type code) {
// Control characters and non-characters (2339 codepoints) {{{ // Control characters and non-characters (2339 codepoints) {{{
if (LIKELY(0x20 <= code && code <= 0x7e)) return false;
switch(code) { switch(code) {
case 0x0 ... 0x1f: case 0x0 ... 0x1f:
return true; return true;