Use a regex for faster filtering of uinicode data

This commit is contained in:
Kovid Goyal 2016-10-17 10:41:55 +05:30
parent f345ac1bdd
commit b02dc836fb
2 changed files with 113 additions and 55 deletions

View File

@ -12,7 +12,8 @@ from PyQt5.QtCore import QObject, pyqtSignal
from pyte import charsets as cs, control as ctrl, graphics as g, modes as mo from pyte import charsets as cs, control as ctrl, graphics as g, modes as mo
from .data_types import Line, Cursor, rewrap_lines from .data_types import Line, Cursor, rewrap_lines
from .utils import wcwidth from .utils import wcwidth, is_simple_string
from .unicode import ignore_pat
#: A container for screen's scroll margins. #: A container for screen's scroll margins.
@ -28,7 +29,6 @@ Savepoint = namedtuple("Savepoint", [
"origin", "origin",
"wrap" "wrap"
]) ])
IGNORED_CATEGORIES = ('Cc', 'Cf', 'Cn', 'Cs')
class Screen(QObject): class Screen(QObject):
@ -292,17 +292,11 @@ class Screen(QObject):
return self.utf8_decoder.decode(data) return self.utf8_decoder.decode(data)
return "".join(self.g0_charset[b] for b in data) return "".join(self.g0_charset[b] for b in data)
def draw(self, data: bytes): def _fast_draw(self, data: str) -> None:
""" Displays decoded characters at the current cursor position and while data:
creates new lines as need if DECAWM is set. """ pass # TODO: Implement me
orig_x, orig_y = self.cursor.x, self.cursor.y
self._notify_cursor_position = False
try:
for char in self._decode(data):
if unicodedata.category(char) in IGNORED_CATEGORIES:
continue
char_width = wcwidth(char)
def _draw_char(self, char: str, char_width: int) -> None:
# If this was the last column in a line and auto wrap mode is # If this was the last column in a line and auto wrap mode is
# enabled, move the cursor to the beginning of the next line, # enabled, move the cursor to the beginning of the next line,
# otherwise replace characters already displayed with newly # otherwise replace characters already displayed with newly
@ -350,6 +344,23 @@ class Screen(QObject):
self.cursor.x = min(self.cursor.x + char_width, self.columns - 1) self.cursor.x = min(self.cursor.x + char_width, self.columns - 1)
if not do_insert: if not do_insert:
self.update_cell_range(self.cursor.y, cx, self.cursor.x) self.update_cell_range(self.cursor.y, cx, self.cursor.x)
def draw(self, data: bytes) -> None:
""" Displays decoded characters at the current cursor position and
creates new lines as need if DECAWM is set. """
orig_x, orig_y = self.cursor.x, self.cursor.y
self._notify_cursor_position = False
data = self._decode(data)
try:
if is_simple_string(data):
return self._fast_draw(data)
data = ignore_pat.sub('', data)
if data:
widths = list(map(wcwidth, data))
if sum(widths) == len(data):
return self._fast_draw(data)
for char, char_width in zip(data, widths):
self._draw_char(char, char_width)
finally: finally:
self._notify_cursor_position = True self._notify_cursor_position = True
if orig_x != self.cursor.x or orig_y != self.cursor.y: if orig_x != self.cursor.x or orig_y != self.cursor.y:
@ -527,6 +538,7 @@ class Screen(QObject):
x = self.cursor.x x = self.cursor.x
num = min(self.columns - x, count) num = min(self.columns - x, count)
line = self.linebuf[y] line = self.linebuf[y]
# TODO: Handle wide chars that get split at the right edge.
line.right_shift(x, num) line.right_shift(x, num)
line.apply_cursor(self.cursor, x, num, clear_char=True) line.apply_cursor(self.cursor, x, num, clear_char=True)
self.update_cell_range(y, x, self.columns) self.update_cell_range(y, x, self.columns)
@ -546,6 +558,7 @@ class Screen(QObject):
if top <= y <= bottom: if top <= y <= bottom:
x = self.cursor.x x = self.cursor.x
num = min(self.columns - x, count) num = min(self.columns - x, count)
# TODO: Handle deletion of wide chars
line = self.linebuf[y] line = self.linebuf[y]
line.left_shift(x, num) line.left_shift(x, num)
line.apply_cursor(self.cursor, self.columns - num, num, clear_char=True) line.apply_cursor(self.cursor, self.columns - num, num, clear_char=True)

45
kitty/unicode.py Normal file

File diff suppressed because one or more lines are too long