Function to detect flag pairs

This commit is contained in:
Kovid Goyal 2020-04-06 21:16:14 +05:30
parent bf4e8c490c
commit 9bc2ab3245
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 57 additions and 1 deletions

View File

@ -10,7 +10,7 @@ from contextlib import contextmanager
from datetime import date from datetime import date
from functools import partial from functools import partial
from html.entities import html5 from html.entities import html5
from itertools import groupby from itertools import groupby, repeat
from operator import itemgetter from operator import itemgetter
from typing import ( from typing import (
Callable, DefaultDict, Dict, FrozenSet, Generator, Iterable, List, Callable, DefaultDict, Dict, FrozenSet, Generator, Iterable, List,
@ -123,6 +123,7 @@ all_emoji: Set[int] = set()
emoji_presentation_bases: Set[int] = set() emoji_presentation_bases: Set[int] = set()
narrow_emoji: Set[int] = set() narrow_emoji: Set[int] = set()
wide_emoji: Set[int] = set() wide_emoji: Set[int] = set()
flags: Dict[int, List[int]] = {}
def parse_basic_emoji(spec: str) -> None: def parse_basic_emoji(spec: str) -> None:
@ -149,6 +150,7 @@ def parse_flag_emoji_sequence(spec: str) -> None:
all_emoji.update(chars) all_emoji.update(chars)
wide_emoji.update(chars) wide_emoji.update(chars)
emoji_presentation_bases.update(chars) emoji_presentation_bases.update(chars)
flags.setdefault(left, []).append(right)
def parse_emoji_tag_sequence(spec: str) -> None: def parse_emoji_tag_sequence(spec: str) -> None:
@ -263,6 +265,22 @@ def gen_emoji() -> None:
p('\t}') p('\t}')
p('\treturn false;\n}') p('\treturn false;\n}')
p('static inline bool is_flag_pair(char_type a, char_type b) {')
p('static const unsigned char flag_combinations[26][26] = {')
for i in range(26):
q = 0x1F1E6 + i
vals = flags.get(q, [])
arr = list(repeat(0, 26))
for x in vals:
idx = x - 0x1F1E6
arr[idx] = 1
comma = '' if i == 0 else ','
p(comma, '{', ', '.join(map(str, arr)), '}')
p('};')
p('if (a < 0x1F1E6 || b < 0x1F1E6 || a >= 0x1F1E6 + 26 || b >= 0x1F1E6 + 26) return false;')
p('return flag_combinations[a][b];')
p('};')
def category_test( def category_test(
name: str, name: str,

32
kitty/emoji.h generated
View File

@ -781,5 +781,37 @@ is_symbol(char_type code) {
} }
return false; return false;
} }
static inline bool is_flag_pair(char_type a, char_type b) {
static const unsigned char flag_combinations[26][26] = {
{ 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1 }
, { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1 }
, { 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1 }
, { 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }
, { 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0 }
, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }
, { 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0 }
, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0 }
, { 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 }
, { 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
, { 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1 }
, { 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0 }
, { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
, { 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1 }
, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
, { 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0 }
, { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
, { 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0 }
, { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1 }
, { 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1 }
, { 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1 }
, { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }
, { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }
, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
, { 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }
, { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }
};
if (a < 0x1F1E6 || b < 0x1F1E6 || a >= 0x1F1E6 + 26 || b >= 0x1F1E6 + 26) return false;
return flag_combinations[a][b];
};
END_ALLOW_CASE_RANGE END_ALLOW_CASE_RANGE

View File

@ -29,3 +29,9 @@ static inline bool
is_private_use(char_type ch) { is_private_use(char_type ch) {
return (0xe000 <= ch && ch <= 0xf8ff) || (0xF0000 <= ch && ch <= 0xFFFFF) || (0x100000 <= ch && ch <= 0x10FFFF); return (0xe000 <= ch && ch <= 0xf8ff) || (0xF0000 <= ch && ch <= 0xFFFFF) || (0x100000 <= ch && ch <= 0x10FFFF);
} }
static inline bool
is_flag_codepoint(char_type ch) {
return 0x1F1E6 <= ch && ch <= 0x1F1FF;
}