Hints kitten: Overhaul making it more intelligent

It now uses information about the geometry of the screen. So URLs that stretch over multiple lines are detected even if there is a hard line break in between (some programs like mutt generate these).
2018-05-19 14:31:00 +05:30 · 2018-05-19 14:31:00 +05:30 · ad1109b6fe
commit ad1109b6fe
parent c2bd6a66d9
4 changed files with 183 additions and 97 deletions
--- a/gen-wcwidth.py
+++ b/gen-wcwidth.py
@ -219,10 +219,12 @@ def codepoint_to_mark_map(p, mark_map):
    return rmap
-def classes_to_regex(classes):
+def classes_to_regex(classes, exclude=''):
    chars = set()
    for c in classes:
        chars |= class_maps[c]
    for c in map(ord, exclude):
        chars.discard(c)
    def as_string(codepoint):
        if codepoint < 256:
@ -261,7 +263,7 @@ def gen_ucd():
                rmap[0xfe0e], rmap[0xfe0f]
            ))
    with open('kittens/hints/url_regex.py', 'w') as f:
-        f.write("url_delimiters = '{}'  # noqa".format(''.join(classes_to_regex(cz))))
+        f.write("url_delimiters = '{}'  # noqa".format(''.join(classes_to_regex(cz, exclude='\n'))))
 def gen_names():
--- a/kittens/hints/main.py
+++ b/kittens/hints/main.py
@ -12,15 +12,15 @@ from gettext import gettext as _
 from kitty.cli import parse_args
 from kitty.fast_data_types import set_clipboard_string
 from kitty.key_encoding import ESCAPE, backspace_key, enter_key
 from kitty.utils import screen_size_function
 from ..tui.handler import Handler
 from ..tui.loop import Loop
-from ..tui.operations import (
+from ..tui.operations import faint, styled
    clear_screen, faint, set_cursor_visible, set_window_title, styled
 )
 URL_PREFIXES = 'http https file ftp'.split()
 HINT_ALPHABET = string.digits + string.ascii_lowercase
 screen_size = screen_size_function()
 class Mark(object):
@ -46,12 +46,8 @@ def decode_hint(x):
    return int(x, 36)
-def render(lines, current_input):
+def highlight_mark(m, text, current_input):
    ans = []
    def mark(m):
    hint = encode_hint(m.index)
        text = m.text
    if current_input and not hint.startswith(current_input):
        return faint(text)
    hint = hint[len(current_input):] or ' '
@ -65,31 +61,22 @@ def render(lines, current_input):
        text, fg='gray', fg_intense=True, bold=True
    )
    for line, marks in lines:
        if not marks:
            ans.append(faint(line))
            continue
        buf = []
-        for i, m in enumerate(marks):
+def render(text, current_input, all_marks):
-            if i == 0 and m.start:
+    for mark in reversed(all_marks):
-                buf.append(faint(line[:m.start]))
+        mtext = highlight_mark(mark, text[mark.start:mark.end], current_input)
-            buf.append(mark(m))
+        text = text[:mark.start] + mtext + text[mark.end:]
            if m is not marks[-1]:
                buf.append(faint(line[m.end:marks[i + 1].start]))
-        rest = line[marks[-1].end:]
+    text = text.replace('\0', '')
        if rest:
            buf.append(faint(rest))
-        ans.append(''.join(buf))
+    return text.replace('\n', '\r\n').rstrip()
    return '\r\n'.join(ans)
 class Hints(Handler):
-    def __init__(self, lines, index_map, args):
+    def __init__(self, text, all_marks, index_map, args):
-        self.lines, self.index_map = tuple(lines), index_map
+        self.text, self.index_map = text, index_map
        self.all_marks = all_marks
        self.current_input = ''
        self.current_text = None
        self.args = args
@ -97,8 +84,9 @@ class Hints(Handler):
        self.chosen = None
    def init_terminal_state(self):
-        self.write(set_cursor_visible(False))
+        self.cmd.set_cursor_visible(False)
-        self.write(set_window_title(self.window_title))
+        self.cmd.set_window_title(self.window_title)
        self.cmd.set_line_wrapping(False)
    def initialize(self):
        self.init_terminal_state()
@ -152,50 +140,82 @@ class Hints(Handler):
    def draw_screen(self):
        if self.current_text is None:
-            self.current_text = render(self.lines, self.current_input)
+            self.current_text = render(self.text, self.current_input, self.all_marks)
-        self.write(clear_screen())
+        self.cmd.clear_screen()
        self.write(self.current_text)
-def regex_finditer(pat, minimum_match_length, line):
+def regex_finditer(pat, minimum_match_length, text):
-    for m in pat.finditer(line):
+    for m in pat.finditer(text):
        s, e = m.span(pat.groups)
        while e > s + 1 and text[e-1] == '\0':
            e -= 1
        if e - s >= minimum_match_length:
            yield s, e
 closing_bracket_map = {'(': ')', '[': ']', '{': '}', '<': '>'}
 opening_brackets = ''.join(closing_bracket_map)
 postprocessor_map = {}
-def find_urls(pat, line):
+def postprocessor(func):
-    for m in pat.finditer(line):
+    postprocessor_map[func.__name__] = func
-        s, e = m.span()
+    return func
-        if s > 4 and line[s - 5:s] == 'link:':  # asciidoc URLs
+
-            url = line[s:e]
+
@postprocessor
 def url(text, s, e):
    if s > 4 and text[s - 5:s] == 'link:':  # asciidoc URLs
        url = text[s:e]
        idx = url.rfind('[')
        if idx > -1:
            e -= len(url) - idx
-        while line[e - 1] in '.,?!' and e > 1:  # remove trailing punctuation
+    while text[e - 1] in '.,?!' and e > 1:  # remove trailing punctuation
        e -= 1
-        # Detect a bracketed URL
+    # Remove trailing bracket if matched by leading bracket
-        if s > 0 and e > s + 4 and line[s-1] in '({[<' and line[e-1] == closing_bracket_map[line[s-1]]:
+    if s > 0 and e < len(text) and text[s-1] in opening_brackets and text[e-1] == closing_bracket_map[text[s-1]]:
        e -= 1
-        yield s, e
+    # Remove trailing quote if matched by leading quote
    if s > 0 and e < len(text) and text[s-1] in '\'"' and text[e-1] == text[s-1]:
        e -= 1
    return s, e
-def mark(finditer, line, all_marks):
+@postprocessor
-    marks = []
+def brackets(text, s, e):
-    for s, e in finditer(line):
+    # Remove matching brackets
-        idx = len(all_marks)
+    if e > s and e <= len(text):
-        text = line[s:e]
+        before = text[s]
-        marks.append(Mark(idx, s, e, text))
+        if before in '({[<' and text[e-1] == closing_bracket_map[before]:
-        all_marks.append(marks[-1])
+            s += 1
-    return line, marks
+            e -= 1
    return s, e
-def run_loop(args, lines, index_map):
+@postprocessor
 def quotes(text, s, e):
    # Remove matching quotes
    if e > s and e <= len(text):
        before = text[s]
        if before in '\'"' and text[e-1] == before:
            s += 1
            e -= 1
    return s, e
 def mark(pattern, post_processors, text, args):
    pat = re.compile(pattern)
    for idx, (s, e) in enumerate(regex_finditer(pat, args.minimum_match_length, text)):
        for func in post_processors:
            s, e = func(text, s, e)
        mark_text = text[s:e].replace('\n', '').replace('\0', '')
        yield Mark(idx, s, e, mark_text)
 def run_loop(args, text, all_marks, index_map):
    loop = Loop()
-    handler = Hints(lines, index_map, args)
+    handler = Hints(text, all_marks, index_map, args)
    loop.loop(handler)
    if handler.chosen and loop.return_code == 0:
        return {'match': handler.chosen, 'program': args.program}
@ -206,33 +226,56 @@ def escape(chars):
    return chars.replace('\\', '\\\\').replace('-', r'\-').replace(']', r'\]')
-def run(args, text):
+def functions_for(args):
    post_processors = []
    if args.type == 'url':
        from .url_regex import url_delimiters
-        url_pat = '(?:{})://[^{}]{{3,}}'.format(
+        pattern = '(?:{})://[^{}]{{3,}}'.format(
            '|'.join(args.url_prefixes.split(',')), url_delimiters
        )
-        finditer = partial(find_urls, re.compile(url_pat))
+        post_processors.append(url)
    elif args.type == 'path':
-        finditer = partial(regex_finditer, re.compile(r'(?:\S*/\S+)|(?:\S+[.][a-zA-Z0-9]{2,5})'), args.minimum_match_length)
+        pattern = r'(?:\S*/\S+)|(?:\S+[.][a-zA-Z0-9]{2,5})'
        post_processors.extend((brackets, quotes))
    elif args.type == 'line':
-        finditer = partial(regex_finditer, re.compile(r'(?m)^\s*(.+)\s*$'), args.minimum_match_length)
+        pattern = '(?m)^\\s*(.+)[\\s\0]*$'
    elif args.type == 'word':
        chars = args.word_characters
        if chars is None:
            import json
            chars = json.loads(os.environ['KITTY_COMMON_OPTS'])['select_by_word_characters']
-        pat = re.compile(r'(?u)[{}\w]{{{},}}'.format(escape(chars), args.minimum_match_length))
+        pattern = r'(?u)[{}\w]{{{},}}'.format(escape(chars), args.minimum_match_length)
-        finditer = partial(regex_finditer, pat, args.minimum_match_length)
+        post_processors.extend((brackets, quotes))
    else:
-        finditer = partial(regex_finditer, re.compile(args.regex), args.minimum_match_length)
+        pattern = args.regex
    return pattern, post_processors
 def convert_text(text, cols):
    lines = []
-    all_marks = []
+    for full_line in text.split('\n'):
-    for line in text.splitlines():
+        if full_line:
-        marked = mark(finditer, line, all_marks)
+            for line in full_line.split('\r'):
-        lines.append(marked)
+                if line:
                    lines.append(line.ljust(cols, '\0'))
    return '\n'.join(lines)
 def parse_input(text):
    try:
        cols = int(os.environ['OVERLAID_WINDOW_COLS'])
    except KeyError:
        cols = screen_size().cols
    return convert_text(text, cols)
 def run(args, text):
    try:
        pattern, post_processors = functions_for(args)
        text = parse_input(text)
        all_marks = tuple(mark(pattern, post_processors, text, args))
        if not all_marks:
-        input(_('No {} found, press Enter to abort.').format(
+            input(_('No {} found, press Enter to quit.').format(
                'URLs' if args.type == 'url' else 'matches'
                ))
            return
@ -241,10 +284,16 @@ def run(args, text):
        for m in all_marks:
            m.index = largest_index - m.index
        index_map = {m.index: m for m in all_marks}
    except Exception:
        import traceback
        traceback.print_exc()
        input('Press Enter to quit.')
        raise SystemExit(1)
-    return run_loop(args, lines, index_map)
+    return run_loop(args, text, all_marks, index_map)
 # CLI {{{
 OPTIONS = partial(r'''
 --program
 default=default
@ -284,8 +333,12 @@ The minimum number of characters to consider a match.
 '''.format, ','.join(sorted(URL_PREFIXES)))
-def main(args):
+def parse_hints_args(args):
    msg = 'Select text from the screen using the keyboard. Defaults to searching for URLs.'
    return parse_args(args[1:], OPTIONS, '', msg, 'hints')
 def main(args):
    text = ''
    if sys.stdin.isatty():
        if '--help' not in args and '-h' not in args:
@ -296,7 +349,7 @@ def main(args):
        text = sys.stdin.buffer.read().decode('utf-8')
        sys.stdin = open('/dev/tty')
    try:
-        args, items = parse_args(args[1:], OPTIONS, '', msg, 'hints')
+        args, items = parse_hints_args(args[1:])
    except SystemExit as e:
        if e.code != 0:
            print(e.args[0], file=sys.stderr)
@ -325,7 +378,7 @@ def handle_result(args, data, target_window_id, boss):
        boss.open_url(data['match'], None if program == 'default' else program, cwd=cwd)
-handle_result.type_of_input = 'text'
+handle_result.type_of_input = 'screen'
 if __name__ == '__main__':
@ -333,3 +386,4 @@ if __name__ == '__main__':
    ans = main(sys.argv)
    if ans:
        print(ans)
 # }}}
--- a/kittens/hints/url_regex.py
+++ b/kittens/hints/url_regex.py
@ -1 +1 @@
-url_delimiters = '\x00-\x20\x7f-\xa0\xad\u0600-\u0605\u061c\u06dd\u070f\u08e2\u1680\u180e\u2000-\u200f\u2028-\u202f\u205f-\u2064\u2066-\u206f\u3000\ud800-\uf8ff\ufeff\ufff9-\ufffb\U000110bd\U0001bca0-\U0001bca3\U0001d173-\U0001d17a\U000e0001\U000e0020-\U000e007f\U000f0000-\U000ffffd\U00100000-\U0010fffd'  # noqa
+url_delimiters = '\x00-\x09\x0b-\x20\x7f-\xa0\xad\u0600-\u0605\u061c\u06dd\u070f\u08e2\u1680\u180e\u2000-\u200f\u2028-\u202f\u205f-\u2064\u2066-\u206f\u3000\ud800-\uf8ff\ufeff\ufff9-\ufffb\U000110bd\U0001bca0-\U0001bca3\U0001d173-\U0001d17a\U000e0001\U000e0020-\U000e007f\U000f0000-\U000ffffd\U00100000-\U0010fffd'  # noqa
--- a/kitty_tests/hints.py
+++ b/kitty_tests/hints.py
@ -0,0 +1,30 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 # License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
 from . import BaseTest
 class TestHints(BaseTest):
    def test_url_hints(self):
        from kittens.hints.main import parse_hints_args, functions_for, mark, convert_text
        args = parse_hints_args([])[0]
        pattern, post_processors = functions_for(args)
        def create_marks(text, cols=20):
            text = convert_text(text, cols)
            return tuple(mark(pattern, post_processors, text, args))
        def t(text, url, cols=20):
            marks = create_marks(text, cols)
            urls = [m.text for m in marks]
            self.ae(urls, [url])
        u = 'http://test.me/'
        t(u, 'http://test.me/')
        t('"{}"'.format(u), u)
        t('({})'.format(u), u)
        t(u + '\nxxx', u + 'xxx', len(u))
        t('link:{}[xxx]'.format(u), u)
		`@ -1 +1 @@`
			`url_delimiters = '\x00-\x20\x7f-\xa0\xad\u0600-\u0605\u061c\u06dd\u070f\u08e2\u1680\u180e\u2000-\u200f\u2028-\u202f\u205f-\u2064\u2066-\u206f\u3000\ud800-\uf8ff\ufeff\ufff9-\ufffb\U000110bd\U0001bca0-\U0001bca3\U0001d173-\U0001d17a\U000e0001\U000e0020-\U000e007f\U000f0000-\U000ffffd\U00100000-\U0010fffd' # noqa`				`url_delimiters = '\x00-\x09\x0b-\x20\x7f-\xa0\xad\u0600-\u0605\u061c\u06dd\u070f\u08e2\u1680\u180e\u2000-\u200f\u2028-\u202f\u205f-\u2064\u2066-\u206f\u3000\ud800-\uf8ff\ufeff\ufff9-\ufffb\U000110bd\U0001bca0-\U0001bca3\U0001d173-\U0001d17a\U000e0001\U000e0020-\U000e007f\U000f0000-\U000ffffd\U00100000-\U0010fffd' # noqa`