Implement searching for unicode characters by name

This commit is contained in:
Kovid Goyal 2018-02-12 14:08:54 +05:30
parent 193ac6cd81
commit dab852c412
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 135 additions and 47 deletions

View File

@ -62,7 +62,7 @@ UNDERLINE_STYLES = {name: i + 1 for i, name in enumerate(
'straight double curly'.split())} 'straight double curly'.split())}
def _color(color, intense=False, base=30): def color_code(color, intense=False, base=30):
if isinstance(color, str): if isinstance(color, str):
e = str((base + 60 if intense else base) + STANDARD_COLORS[color]) e = str((base + 60 if intense else base) + STANDARD_COLORS[color])
elif isinstance(color, int): elif isinstance(color, int):
@ -72,23 +72,27 @@ def _color(color, intense=False, base=30):
return e return e
def colored(text, color, intense=False): def sgr(*parts):
e = _color(color, intense) return '\033[{}m'.format(';'.join(parts))
return '\033[{}m{}\033[39m'.format(e, text)
def colored(text, color, intense=False, reset_to=None, reset_to_intense=False):
e = color_code(color, intense)
return '\033[{}m{}\033[{}m'.format(e, text, 39 if reset_to is None else color_code(reset_to, reset_to_intense))
def styled(text, fg=None, bg=None, fg_intense=False, bg_intense=False, italic=None, bold=None, underline=None, underline_color=None, reverse=None): def styled(text, fg=None, bg=None, fg_intense=False, bg_intense=False, italic=None, bold=None, underline=None, underline_color=None, reverse=None):
start, end = [], [] start, end = [], []
if fg is not None: if fg is not None:
start.append(_color(fg, fg_intense)) start.append(color_code(fg, fg_intense))
end.append('39') end.append('39')
if bg is not None: if bg is not None:
start.append(_color(bg, bg_intense, 40)) start.append(color_code(bg, bg_intense, 40))
end.append('49') end.append('49')
if underline_color is not None: if underline_color is not None:
if isinstance(underline_color, str): if isinstance(underline_color, str):
underline_color = STANDARD_COLORS[underline_color] underline_color = STANDARD_COLORS[underline_color]
start.append(_color(underline_color, base=50)) start.append(color_code(underline_color, base=50))
end.append('59') end.append('59')
if underline is not None: if underline is not None:
start.append('4:{}'.format(UNDERLINE_STYLES[underline])) start.append('4:{}'.format(UNDERLINE_STYLES[underline]))

View File

@ -10,13 +10,15 @@ from gettext import gettext as _
from kitty.config import cached_values_for from kitty.config import cached_values_for
from kitty.fast_data_types import wcswidth from kitty.fast_data_types import wcswidth
from kitty.key_encoding import ( from kitty.key_encoding import (
ESCAPE, F1, F2, RELEASE, backspace_key, enter_key DOWN, ESCAPE, F1, F2, LEFT, RELEASE, RIGHT, SHIFT, TAB, UP, backspace_key,
enter_key
) )
from ..tui.handler import Handler from ..tui.handler import Handler
from ..tui.loop import Loop from ..tui.loop import Loop
from ..tui.operations import ( from ..tui.operations import (
clear_screen, colored, cursor, set_line_wrapping, set_window_title, styled clear_screen, color_code, colored, cursor, set_line_wrapping,
set_window_title, sgr, styled
) )
HEX, NAME = 'HEX', 'NAME' HEX, NAME = 'HEX', 'NAME'
@ -25,10 +27,10 @@ HEX, NAME = 'HEX', 'NAME'
@lru_cache(maxsize=256) @lru_cache(maxsize=256)
def points_for_word(w): def points_for_word(w):
from .unicode_names import codepoints_for_word from .unicode_names import codepoints_for_word
return codepoints_for_word(w.lower()) return codepoints_for_word(w)
@lru_cache(maxsize=256) @lru_cache(maxsize=4096)
def name(cp): def name(cp):
from .unicode_names import name_for_codepoint from .unicode_names import name_for_codepoint
if isinstance(cp, str): if isinstance(cp, str):
@ -36,6 +38,25 @@ def name(cp):
return (name_for_codepoint(cp) or '').capitalize() return (name_for_codepoint(cp) or '').capitalize()
@lru_cache(maxsize=256)
def codepoints_matching_search(text):
parts = text.lower().split()
ans = []
if parts and parts[0]:
codepoints = points_for_word(parts[0])
for word in parts[1:]:
pts = points_for_word(word)
if pts:
intersection = codepoints & pts
if intersection:
codepoints = intersection
continue
codepoints = {c for c in codepoints if word in name(c).lower()}
if codepoints:
ans = list(sorted(codepoints))
return ans
FAINT = 242 FAINT = 242
DEFAULT_SET = tuple(map( DEFAULT_SET = tuple(map(
ord, ord,
@ -65,14 +86,21 @@ class Table:
self.layout_dirty = True self.layout_dirty = True
self.last_rows = self.last_cols = -1 self.last_rows = self.last_cols = -1
self.codepoints = [] self.codepoints = []
self.current_idx = 0
self.text = '' self.text = ''
self.num_cols = self.num_rows = 0 self.num_cols = self.num_rows = 0
self.mode = HEX self.mode = HEX
@property
def current_codepoint(self):
if self.codepoints:
return self.codepoints[self.current_idx]
def set_codepoints(self, codepoints, mode=HEX): def set_codepoints(self, codepoints, mode=HEX):
self.codepoints = codepoints self.codepoints = codepoints
self.mode = HEX self.mode = mode
self.layout_dirty = True self.layout_dirty = True
self.current_idx = 0
def codepoint_at_hint(self, hint): def codepoint_at_hint(self, hint):
return self.codepoints[decode_hint(hint)] return self.codepoints[decode_hint(hint)]
@ -83,9 +111,38 @@ class Table:
self.last_cols, self.last_rows = cols, rows self.last_cols, self.last_rows = cols, rows
self.layout_dirty = False self.layout_dirty = False
if self.mode is NAME:
def as_parts(i, codepoint): def as_parts(i, codepoint):
return encode_hint(i).ljust(idx_size), chr(codepoint), name(codepoint) return encode_hint(i).ljust(idx_size), chr(codepoint), name(codepoint)
def cell(i, idx, c, desc):
is_current = i == self.current_idx
if is_current:
yield sgr(color_code('gray', base=40))
yield colored(idx, 'green') + ' '
yield colored(c, 'black' if is_current else 'gray', True) + ' '
w = wcswidth(c)
if w < 2:
yield ' ' * (2 - w)
if len(desc) > space_for_desc:
desc = desc[:space_for_desc - 1] + ''
yield colored(desc, FAINT)
extra = space_for_desc - len(desc)
if extra > 0:
yield ' ' * extra
if is_current:
yield sgr('49')
else:
def as_parts(i, codepoint):
return encode_hint(i).ljust(idx_size), chr(codepoint), ''
def cell(i, idx, c, desc):
yield colored(idx, 'green') + ' '
yield colored(c, 'gray', True) + ' '
w = wcswidth(c)
if w < 2:
yield ' ' * (2 - w)
num = len(self.codepoints) num = len(self.codepoints)
if num < 1: if num < 1:
self.text = '' self.text = ''
@ -94,35 +151,40 @@ class Table:
idx_size = len(encode_hint(num - 1)) idx_size = len(encode_hint(num - 1))
parts = [as_parts(i, c) for i, c in enumerate(self.codepoints)] parts = [as_parts(i, c) for i, c in enumerate(self.codepoints)]
if self.mode is NAME:
sizes = [idx_size + 2 + len(p[2]) + 2 for p in parts] sizes = [idx_size + 2 + len(p[2]) + 2 for p in parts]
else:
sizes = [idx_size + 3 for p in parts]
longest = max(sizes) if sizes else 0 longest = max(sizes) if sizes else 0
col_width = longest + 2 col_width = longest + 2
col_width = min(col_width, 40) col_width = min(col_width, 40)
space_for_desc = col_width - 2 - idx_size - 4 space_for_desc = col_width - 2 - idx_size - 4
num_cols = cols // col_width num_cols = self.num_cols = cols // col_width
buf = [] buf = []
rows_left = rows
a = buf.append a = buf.append
rows_left = self.num_rows = rows
for i, (idx, c, desc) in enumerate(parts): for i, (idx, c, desc) in enumerate(parts):
if i > 0 and i % num_cols == 0: if i > 0 and i % num_cols == 0:
rows_left -= 1 rows_left -= 1
if rows_left == 0: if rows_left == 0:
break break
a('\r\n') buf.append('\r\n')
a(colored(idx, 'green')), a(' '), a(colored(c, 'gray', True)), a(' ') buf.extend(cell(i, idx, c, desc))
w = wcswidth(c)
if w < 2:
a(' ' * (2 - w))
if len(desc) > space_for_desc:
desc = desc[:space_for_desc - 1] + ''
a(colored(desc, FAINT))
extra = space_for_desc - len(desc)
if extra > 0:
a(' ' * extra)
a(' ') a(' ')
self.text = ''.join(buf) self.text = ''.join(buf)
return self.text return self.text
def move_current(self, rows=0, cols=0):
if cols:
self.current_idx = (self.current_idx + len(self.codepoints) + cols) % len(self.codepoints)
self.layout_dirty = True
if rows:
amt = rows * self.num_cols
self.current_idx += amt
self.current_idx = max(0, min(self.current_idx, len(self.codepoints) - 1))
self.layout_dirty = True
class UnicodeInput(Handler): class UnicodeInput(Handler):
@ -132,14 +194,27 @@ class UnicodeInput(Handler):
self.current_input = '' self.current_input = ''
self.current_char = None self.current_char = None
self.prompt_template = '{}> ' self.prompt_template = '{}> '
self.last_updated_code_point_at = None
self.choice_line = '' self.choice_line = ''
self.mode = globals().get(cached_values.get('mode', 'HEX'), 'HEX') self.mode = globals().get(cached_values.get('mode', 'HEX'), 'HEX')
self.table = Table() self.table = Table()
if self.mode is HEX:
self.table.set_codepoints(self.recent, self.mode)
self.update_prompt() self.update_prompt()
def update_codepoints(self):
codepoints = None
if self.mode is HEX:
q = self.mode, None
codepoints = self.recent
elif self.mode is NAME:
q = self.mode, self.current_input
if q != self.last_updated_code_point_at:
codepoints = codepoints_matching_search(self.current_input)
if q != self.last_updated_code_point_at:
self.last_updated_code_point_at = q
self.table.set_codepoints(codepoints, self.mode)
def update_current_char(self): def update_current_char(self):
self.update_codepoints()
self.current_char = None self.current_char = None
if self.mode is HEX: if self.mode is HEX:
try: try:
@ -151,17 +226,9 @@ class UnicodeInput(Handler):
except Exception: except Exception:
pass pass
else: else:
parts = self.current_input.split() cc = self.table.current_codepoint
if parts and parts[0]: if cc:
codepoints = points_for_word(parts[0]) self.current_char = chr(cc)
for word in parts[1:]:
pts = points_for_word(word)
if pts:
codepoints &= pts
if codepoints:
codepoints = tuple(sorted(codepoints))
self.current_char = chr(codepoints[0])
# name_map = {c: name(c) for c in codepoints}
if self.current_char is not None: if self.current_char is not None:
code = ord(self.current_char) code = ord(self.current_char)
if code <= 32 or code == 127 or 128 <= code <= 159 or 0xd800 <= code <= 0xdbff or 0xDC00 <= code <= 0xDFFF: if code <= 32 or code == 127 or 128 <= code <= 159 or 0xd800 <= code <= 0xdbff or 0xDC00 <= code <= 0xDFFF:
@ -210,18 +277,21 @@ class UnicodeInput(Handler):
self.print(text) self.print(text)
y += 1 y += 1
if self.mode is HEX: if self.mode is NAME:
writeln(_('Enter the hex code for the character')) writeln(_('Enter the hex code for the character'))
else: elif self.mode is HEX:
writeln(_('Enter words from the name of the character')) writeln(_('Enter words from the name of the character'))
else:
writeln(_('Enter the index for the character you want from the list below'))
self.write(self.prompt) self.write(self.prompt)
self.write(self.current_input) self.write(self.current_input)
with cursor(self.write): with cursor(self.write):
writeln() writeln()
if self.choice_line:
writeln(self.choice_line) writeln(self.choice_line)
if self.mode is HEX: if self.mode is HEX:
writeln(styled('Use r followed by the index for the recent entries below', fg=FAINT)) writeln(styled(_('Type {} followed by the index for the recent entries below').format('r'), fg=FAINT))
elif self.mode is NAME:
writeln(styled(_('Use Tab or the arrow keys to choose a character from below'), fg=FAINT))
self.table_at = y self.table_at = y
self.write(self.table.layout(self.screen_size.rows - self.table_at, self.screen_size.cols)) self.write(self.table.layout(self.screen_size.rows - self.table_at, self.screen_size.cols))
@ -246,6 +316,20 @@ class UnicodeInput(Handler):
self.switch_mode(HEX) self.switch_mode(HEX)
elif key_event.key is F2: elif key_event.key is F2:
self.switch_mode(NAME) self.switch_mode(NAME)
elif self.mode is NAME:
if key_event.key is TAB:
if key_event.mods == SHIFT:
self.table.move_current(cols=-1), self.refresh()
elif not key_event.mods:
self.table.move_current(cols=1), self.refresh()
elif key_event.key is LEFT and not key_event.mods:
self.table.move_current(cols=-1), self.refresh()
elif key_event.key is RIGHT and not key_event.mods:
self.table.move_current(cols=1), self.refresh()
elif key_event.key is UP and not key_event.mods:
self.table.move_current(rows=-1), self.refresh()
elif key_event.key is DOWN and not key_event.mods:
self.table.move_current(rows=1), self.refresh()
def switch_mode(self, mode): def switch_mode(self, mode):
if mode is not self.mode: if mode is not self.mode:

View File

@ -27,7 +27,7 @@ codepoints_for_word(const char *word, size_t len) {
for (unsigned short i = 1; i <= words[0]; i++) { for (unsigned short i = 1; i <= words[0]; i++) {
unsigned short word_idx = words[i]; unsigned short word_idx = words[i];
const char *w = idx_to_word[word_idx]; const char *w = idx_to_word[word_idx];
if(strncmp(word, w, len) == 0) { if (strncmp(word, w, len) == 0 && strlen(w) == len) {
const char_type* codepoints = codepoints_for_word_idx[word_idx]; const char_type* codepoints = codepoints_for_word_idx[word_idx];
for (char_type i = 1; i <= codepoints[0]; i++) { for (char_type i = 1; i <= codepoints[0]; i++) {
PyObject *t = PyLong_FromUnsignedLong(codepoints[i]); if (t == NULL) { Py_DECREF(ans); return NULL; } PyObject *t = PyLong_FromUnsignedLong(codepoints[i]); if (t == NULL) { Py_DECREF(ans); return NULL; }