From b02dc836fb53ec122d2c5e761768deca23fdf045 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Oct 2016 10:41:55 +0530 Subject: [PATCH] Use a regex for faster filtering of uinicode data --- kitty/screen.py | 123 ++++++++++++++++++++++++++--------------------- kitty/unicode.py | 45 +++++++++++++++++ 2 files changed, 113 insertions(+), 55 deletions(-) create mode 100644 kitty/unicode.py diff --git a/kitty/screen.py b/kitty/screen.py index b5363d232..b5bc21b08 100644 --- a/kitty/screen.py +++ b/kitty/screen.py @@ -12,7 +12,8 @@ from PyQt5.QtCore import QObject, pyqtSignal from pyte import charsets as cs, control as ctrl, graphics as g, modes as mo from .data_types import Line, Cursor, rewrap_lines -from .utils import wcwidth +from .utils import wcwidth, is_simple_string +from .unicode import ignore_pat #: A container for screen's scroll margins. @@ -28,7 +29,6 @@ Savepoint = namedtuple("Savepoint", [ "origin", "wrap" ]) -IGNORED_CATEGORIES = ('Cc', 'Cf', 'Cn', 'Cs') class Screen(QObject): @@ -292,64 +292,75 @@ class Screen(QObject): return self.utf8_decoder.decode(data) return "".join(self.g0_charset[b] for b in data) - def draw(self, data: bytes): + def _fast_draw(self, data: str) -> None: + while data: + pass # TODO: Implement me + + def _draw_char(self, char: str, char_width: int) -> None: + # If this was the last column in a line and auto wrap mode is + # enabled, move the cursor to the beginning of the next line, + # otherwise replace characters already displayed with newly + # entered. + if self.cursor.x + char_width > self.columns - 1: + if mo.DECAWM in self.mode: + self.carriage_return() + self.linefeed() + self.linebuf[self.cursor.y].continued = True + else: + extra = self.cursor.x + char_width + 1 - self.columns + self.cursor.x -= extra + + # If Insert mode is set, new characters move old characters to + # the right, otherwise terminal is in Replace mode and new + # characters replace old characters at cursor position. + do_insert = mo.IRM in self.mode and char_width > 0 + if do_insert: + self.insert_characters(char_width) + + cx = self.cursor.x + line = self.linebuf[self.cursor.y] + if char_width: + line.char[cx], line.width[cx] = ord(char), char_width + if char_width > 1: + for i in range(1, char_width): + line.char[cx + i] = line.width[cx + i] = 0 + line.apply_cursor(self.cursor, cx, char_width) + elif unicodedata.combining(char): + # A zero-cell character is combined with the previous + # character either on this or preceeding line. + if cx: + last = chr(line.char[cx - 1]) + normalized = unicodedata.normalize("NFC", last + char) + line.char[cx - 1] = ord(normalized) + elif self.cursor.y: + lline = self.linebuf[self.cursor.y - 1] + last = chr(lline.char[self.columns - 1]) + normalized = unicodedata.normalize("NFC", last + char) + lline.char[self.columns - 1] = ord(normalized) + + # .. note:: We can't use :meth:`cursor_forward()`, because that + # way, we'll never know when to linefeed. + if char_width > 0: + self.cursor.x = min(self.cursor.x + char_width, self.columns - 1) + if not do_insert: + self.update_cell_range(self.cursor.y, cx, self.cursor.x) + + def draw(self, data: bytes) -> None: """ Displays decoded characters at the current cursor position and creates new lines as need if DECAWM is set. """ orig_x, orig_y = self.cursor.x, self.cursor.y self._notify_cursor_position = False + data = self._decode(data) try: - for char in self._decode(data): - if unicodedata.category(char) in IGNORED_CATEGORIES: - continue - char_width = wcwidth(char) - - # If this was the last column in a line and auto wrap mode is - # enabled, move the cursor to the beginning of the next line, - # otherwise replace characters already displayed with newly - # entered. - if self.cursor.x + char_width > self.columns - 1: - if mo.DECAWM in self.mode: - self.carriage_return() - self.linefeed() - self.linebuf[self.cursor.y].continued = True - else: - extra = self.cursor.x + char_width + 1 - self.columns - self.cursor.x -= extra - - # If Insert mode is set, new characters move old characters to - # the right, otherwise terminal is in Replace mode and new - # characters replace old characters at cursor position. - do_insert = mo.IRM in self.mode and char_width > 0 - if do_insert: - self.insert_characters(char_width) - - cx = self.cursor.x - line = self.linebuf[self.cursor.y] - if char_width: - line.char[cx], line.width[cx] = ord(char), char_width - if char_width > 1: - for i in range(1, char_width): - line.char[cx + i] = line.width[cx + i] = 0 - line.apply_cursor(self.cursor, cx, char_width) - elif unicodedata.combining(char): - # A zero-cell character is combined with the previous - # character either on this or preceeding line. - if cx: - last = chr(line.char[cx - 1]) - normalized = unicodedata.normalize("NFC", last + char) - line.char[cx - 1] = ord(normalized) - elif self.cursor.y: - lline = self.linebuf[self.cursor.y - 1] - last = chr(lline.char[self.columns - 1]) - normalized = unicodedata.normalize("NFC", last + char) - lline.char[self.columns - 1] = ord(normalized) - - # .. note:: We can't use :meth:`cursor_forward()`, because that - # way, we'll never know when to linefeed. - if char_width > 0: - self.cursor.x = min(self.cursor.x + char_width, self.columns - 1) - if not do_insert: - self.update_cell_range(self.cursor.y, cx, self.cursor.x) + if is_simple_string(data): + return self._fast_draw(data) + data = ignore_pat.sub('', data) + if data: + widths = list(map(wcwidth, data)) + if sum(widths) == len(data): + return self._fast_draw(data) + for char, char_width in zip(data, widths): + self._draw_char(char, char_width) finally: self._notify_cursor_position = True if orig_x != self.cursor.x or orig_y != self.cursor.y: @@ -527,6 +538,7 @@ class Screen(QObject): x = self.cursor.x num = min(self.columns - x, count) line = self.linebuf[y] + # TODO: Handle wide chars that get split at the right edge. line.right_shift(x, num) line.apply_cursor(self.cursor, x, num, clear_char=True) self.update_cell_range(y, x, self.columns) @@ -546,6 +558,7 @@ class Screen(QObject): if top <= y <= bottom: x = self.cursor.x num = min(self.columns - x, count) + # TODO: Handle deletion of wide chars line = self.linebuf[y] line.left_shift(x, num) line.apply_cursor(self.cursor, self.columns - num, num, clear_char=True) diff --git a/kitty/unicode.py b/kitty/unicode.py new file mode 100644 index 000000000..ad9d2c6d0 --- /dev/null +++ b/kitty/unicode.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2016, Kovid Goyal + +import sys +import re +import unicodedata +import itertools +from collections import defaultdict + +IGNORED_CATEGORIES = ('Cc', 'Cf', 'Cn', 'Cs') + +chars_to_ignore = '\x00-\x1f\x7f-\x9f\xad\u0378-\u0379\u0380-\u0383\u038b\u038d\u03a2\u0530\u0557-\u0558\u0560\u0588\u058b-\u058c\u0590\u05c8-\u05cf\u05eb-\u05ef\u05f5-\u0605\u061c-\u061d\u06dd\u070e-\u070f\u074b-\u074c\u07b2-\u07bf\u07fb-\u07ff\u082e-\u082f\u083f\u085c-\u085d\u085f-\u089f\u08b5-\u08e2\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09c5-\u09c6\u09c9-\u09ca\u09cf-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09fc-\u0a00\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a50\u0a52-\u0a58\u0a5d\u0a5f-\u0a65\u0a76-\u0a80\u0a84\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0acf\u0ad1-\u0adf\u0ae4-\u0ae5\u0af2-\u0af8\u0afa-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34\u0b3a-\u0b3b\u0b45-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b64-\u0b65\u0b78-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bcf\u0bd1-\u0bd6\u0bd8-\u0be5\u0bfb-\u0bff\u0c04\u0c0d\u0c11\u0c29\u0c3a-\u0c3c\u0c45\u0c49\u0c4e-\u0c54\u0c57\u0c5b-\u0c5f\u0c64-\u0c65\u0c70-\u0c77\u0c80\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbb\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce4-\u0ce5\u0cf0\u0cf3-\u0d00\u0d04\u0d0d\u0d11\u0d3b-\u0d3c\u0d45\u0d49\u0d4f-\u0d56\u0d58-\u0d5e\u0d64-\u0d65\u0d76-\u0d78\u0d80-\u0d81\u0d84\u0d97-\u0d99\u0db2\u0dbc\u0dbe-\u0dbf\u0dc7-\u0dc9\u0dcb-\u0dce\u0dd5\u0dd7\u0de0-\u0de5\u0df0-\u0df1\u0df5-\u0e00\u0e3b-\u0e3e\u0e5c-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0edb\u0ee0-\u0eff\u0f48\u0f6d-\u0f70\u0f98\u0fbd\u0fcd\u0fdb-\u0fff\u10c6\u10c8-\u10cc\u10ce-\u10cf\u1249\u124e-\u124f\u1257\u1259\u125e-\u125f\u1289\u128e-\u128f\u12b1\u12b6-\u12b7\u12bf\u12c1\u12c6-\u12c7\u12d7\u1311\u1316-\u1317\u135b-\u135c\u137d-\u137f\u139a-\u139f\u13f6-\u13f7\u13fe-\u13ff\u169d-\u169f\u16f9-\u16ff\u170d\u1715-\u171f\u1737-\u173f\u1754-\u175f\u176d\u1771\u1774-\u177f\u17de-\u17df\u17ea-\u17ef\u17fa-\u17ff\u180e-\u180f\u181a-\u181f\u1878-\u187f\u18ab-\u18af\u18f6-\u18ff\u191f\u192c-\u192f\u193c-\u193f\u1941-\u1943\u196e-\u196f\u1975-\u197f\u19ac-\u19af\u19ca-\u19cf\u19db-\u19dd\u1a1c-\u1a1d\u1a5f\u1a7d-\u1a7e\u1a8a-\u1a8f\u1a9a-\u1a9f\u1aae-\u1aaf\u1abf-\u1aff\u1b4c-\u1b4f\u1b7d-\u1b7f\u1bf4-\u1bfb\u1c38-\u1c3a\u1c4a-\u1c4c\u1c80-\u1cbf\u1cc8-\u1ccf\u1cf7\u1cfa-\u1cff\u1df6-\u1dfb\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fc5\u1fd4-\u1fd5\u1fdc\u1ff0-\u1ff1\u1ff5\u1fff\u200b-\u200f\u202a-\u202e\u2060-\u206f\u2072-\u2073\u208f\u209d-\u209f\u20bf-\u20cf\u20f1-\u20ff\u218c-\u218f\u23fb-\u23ff\u2427-\u243f\u244b-\u245f\u2b74-\u2b75\u2b96-\u2b97\u2bba-\u2bbc\u2bc9\u2bd2-\u2beb\u2bf0-\u2bff\u2c2f\u2c5f\u2cf4-\u2cf8\u2d26\u2d28-\u2d2c\u2d2e-\u2d2f\u2d68-\u2d6e\u2d71-\u2d7e\u2d97-\u2d9f\u2da7\u2daf\u2db7\u2dbf\u2dc7\u2dcf\u2dd7\u2ddf\u2e43-\u2e7f\u2e9a\u2ef4-\u2eff\u2fd6-\u2fef\u2ffc-\u2fff\u3040\u3097-\u3098\u3100-\u3104\u312e-\u3130\u318f\u31bb-\u31bf\u31e4-\u31ef\u321f\u32ff\u4db6-\u4dbf\u9fd6-\u9fff\ua48d-\ua48f\ua4c7-\ua4cf\ua62c-\ua63f\ua6f8-\ua6ff\ua7ae-\ua7af\ua7b8-\ua7f6\ua82c-\ua82f\ua83a-\ua83f\ua878-\ua87f\ua8c5-\ua8cd\ua8da-\ua8df\ua8fe-\ua8ff\ua954-\ua95e\ua97d-\ua97f\ua9ce\ua9da-\ua9dd\ua9ff\uaa37-\uaa3f\uaa4e-\uaa4f\uaa5a-\uaa5b\uaac3-\uaada\uaaf7-\uab00\uab07-\uab08\uab0f-\uab10\uab17-\uab1f\uab27\uab2f\uab66-\uab6f\uabee-\uabef\uabfa-\uabff\ud7a4-\ud7af\ud7c7-\ud7ca\ud7fc-\udfff\ufa6e-\ufa6f\ufada-\ufaff\ufb07-\ufb12\ufb18-\ufb1c\ufb37\ufb3d\ufb3f\ufb42\ufb45\ufbc2-\ufbd2\ufd40-\ufd4f\ufd90-\ufd91\ufdc8-\ufdef\ufdfe-\ufdff\ufe1a-\ufe1f\ufe53\ufe67\ufe6c-\ufe6f\ufe75\ufefd-\uff00\uffbf-\uffc1\uffc8-\uffc9\uffd0-\uffd1\uffd8-\uffd9\uffdd-\uffdf\uffe7\uffef-\ufffb\ufffe-\uffff\U0001000c\U00010027\U0001003b\U0001003e\U0001004e-\U0001004f\U0001005e-\U0001007f\U000100fb-\U000100ff\U00010103-\U00010106\U00010134-\U00010136\U0001018d-\U0001018f\U0001019c-\U0001019f\U000101a1-\U000101cf\U000101fe-\U0001027f\U0001029d-\U0001029f\U000102d1-\U000102df\U000102fc-\U000102ff\U00010324-\U0001032f\U0001034b-\U0001034f\U0001037b-\U0001037f\U0001039e\U000103c4-\U000103c7\U000103d6-\U000103ff\U0001049e-\U0001049f\U000104aa-\U000104ff\U00010528-\U0001052f\U00010564-\U0001056e\U00010570-\U000105ff\U00010737-\U0001073f\U00010756-\U0001075f\U00010768-\U000107ff\U00010806-\U00010807\U00010809\U00010836\U00010839-\U0001083b\U0001083d-\U0001083e\U00010856\U0001089f-\U000108a6\U000108b0-\U000108df\U000108f3\U000108f6-\U000108fa\U0001091c-\U0001091e\U0001093a-\U0001093e\U00010940-\U0001097f\U000109b8-\U000109bb\U000109d0-\U000109d1\U00010a04\U00010a07-\U00010a0b\U00010a14\U00010a18\U00010a34-\U00010a37\U00010a3b-\U00010a3e\U00010a48-\U00010a4f\U00010a59-\U00010a5f\U00010aa0-\U00010abf\U00010ae7-\U00010aea\U00010af7-\U00010aff\U00010b36-\U00010b38\U00010b56-\U00010b57\U00010b73-\U00010b77\U00010b92-\U00010b98\U00010b9d-\U00010ba8\U00010bb0-\U00010bff\U00010c49-\U00010c7f\U00010cb3-\U00010cbf\U00010cf3-\U00010cf9\U00010d00-\U00010e5f\U00010e7f-\U00010fff\U0001104e-\U00011051\U00011070-\U0001107e\U000110bd\U000110c2-\U000110cf\U000110e9-\U000110ef\U000110fa-\U000110ff\U00011135\U00011144-\U0001114f\U00011177-\U0001117f\U000111ce-\U000111cf\U000111e0\U000111f5-\U000111ff\U00011212\U0001123e-\U0001127f\U00011287\U00011289\U0001128e\U0001129e\U000112aa-\U000112af\U000112eb-\U000112ef\U000112fa-\U000112ff\U00011304\U0001130d-\U0001130e\U00011311-\U00011312\U00011329\U00011331\U00011334\U0001133a-\U0001133b\U00011345-\U00011346\U00011349-\U0001134a\U0001134e-\U0001134f\U00011351-\U00011356\U00011358-\U0001135c\U00011364-\U00011365\U0001136d-\U0001136f\U00011375-\U0001147f\U000114c8-\U000114cf\U000114da-\U0001157f\U000115b6-\U000115b7\U000115de-\U000115ff\U00011645-\U0001164f\U0001165a-\U0001167f\U000116b8-\U000116bf\U000116ca-\U000116ff\U0001171a-\U0001171c\U0001172c-\U0001172f\U00011740-\U0001189f\U000118f3-\U000118fe\U00011900-\U00011abf\U00011af9-\U00011fff\U0001239a-\U000123ff\U0001246f\U00012475-\U0001247f\U00012544-\U00012fff\U0001342f-\U000143ff\U00014647-\U000167ff\U00016a39-\U00016a3f\U00016a5f\U00016a6a-\U00016a6d\U00016a70-\U00016acf\U00016aee-\U00016aef\U00016af6-\U00016aff\U00016b46-\U00016b4f\U00016b5a\U00016b62\U00016b78-\U00016b7c\U00016b90-\U00016eff\U00016f45-\U00016f4f\U00016f7f-\U00016f8e\U00016fa0-\U0001afff\U0001b002-\U0001bbff\U0001bc6b-\U0001bc6f\U0001bc7d-\U0001bc7f\U0001bc89-\U0001bc8f\U0001bc9a-\U0001bc9b\U0001bca0-\U0001cfff\U0001d0f6-\U0001d0ff\U0001d127-\U0001d128\U0001d173-\U0001d17a\U0001d1e9-\U0001d1ff\U0001d246-\U0001d2ff\U0001d357-\U0001d35f\U0001d372-\U0001d3ff\U0001d455\U0001d49d\U0001d4a0-\U0001d4a1\U0001d4a3-\U0001d4a4\U0001d4a7-\U0001d4a8\U0001d4ad\U0001d4ba\U0001d4bc\U0001d4c4\U0001d506\U0001d50b-\U0001d50c\U0001d515\U0001d51d\U0001d53a\U0001d53f\U0001d545\U0001d547-\U0001d549\U0001d551\U0001d6a6-\U0001d6a7\U0001d7cc-\U0001d7cd\U0001da8c-\U0001da9a\U0001daa0\U0001dab0-\U0001e7ff\U0001e8c5-\U0001e8c6\U0001e8d7-\U0001edff\U0001ee04\U0001ee20\U0001ee23\U0001ee25-\U0001ee26\U0001ee28\U0001ee33\U0001ee38\U0001ee3a\U0001ee3c-\U0001ee41\U0001ee43-\U0001ee46\U0001ee48\U0001ee4a\U0001ee4c\U0001ee50\U0001ee53\U0001ee55-\U0001ee56\U0001ee58\U0001ee5a\U0001ee5c\U0001ee5e\U0001ee60\U0001ee63\U0001ee65-\U0001ee66\U0001ee6b\U0001ee73\U0001ee78\U0001ee7d\U0001ee7f\U0001ee8a\U0001ee9c-\U0001eea0\U0001eea4\U0001eeaa\U0001eebc-\U0001eeef\U0001eef2-\U0001efff\U0001f02c-\U0001f02f\U0001f094-\U0001f09f\U0001f0af-\U0001f0b0\U0001f0c0\U0001f0d0\U0001f0f6-\U0001f0ff\U0001f10d-\U0001f10f\U0001f12f\U0001f16c-\U0001f16f\U0001f19b-\U0001f1e5\U0001f203-\U0001f20f\U0001f23b-\U0001f23f\U0001f249-\U0001f24f\U0001f252-\U0001f2ff\U0001f57a\U0001f5a4\U0001f6d1-\U0001f6df\U0001f6ed-\U0001f6ef\U0001f6f4-\U0001f6ff\U0001f774-\U0001f77f\U0001f7d5-\U0001f7ff\U0001f80c-\U0001f80f\U0001f848-\U0001f84f\U0001f85a-\U0001f85f\U0001f888-\U0001f88f\U0001f8ae-\U0001f90f\U0001f919-\U0001f97f\U0001f985-\U0001f9bf\U0001f9c1-\U0001ffff\U0002a6d7-\U0002a6ff\U0002b735-\U0002b73f\U0002b81e-\U0002b81f\U0002cea2-\U0002f7ff\U0002fa1e-\U000e00ff\U000e01f0-\U000effff\U000ffffe-\U000fffff\U0010fffe-\U0010ffff' # noqa {{{ +# }}} + +ignore_pat = re.compile('[' + chars_to_ignore + ']') + + +def ranges(i): + for a, b in itertools.groupby(enumerate(i), lambda r: r[1] - r[0]): + b = list(b) + yield b[0][1], b[-1][1] + +if __name__ == '__main__': + unicode_category = defaultdict(list) + for c in map(chr, range(sys.maxunicode + 1)): + unicode_category[unicodedata.category(c)].append(c) + igchars = [] + for c in IGNORED_CATEGORIES: + igchars += unicode_category[c] + igchars = sorted(map(ord, igchars)) + ans = [] + for l, r in ranges(igchars): + if l == r: + ans.append(chr(l)) + else: + ans.append('{}-{}'.format(chr(l), chr(r))) + igchars = ''.join(ans) + + with open(__file__, encoding='utf-8') as f: + raw = f.read() + nraw = re.sub(r'^chars_to_ignore = .+$', ('chars_to_ignore = {!r} # noqa {{' '{{' '{{').format(igchars), raw, flags=re.MULTILINE) + print(nraw) + with open(__file__, 'w', encoding='utf-8') as f: + f.write(nraw)