Add support for unicode combining chars and wide chars

This commit is contained in:
Kovid Goyal 2016-10-18 20:30:18 +05:30
parent 2aa047a09d
commit 3ae0e4e5ac
5 changed files with 118 additions and 40 deletions

View File

@ -81,7 +81,7 @@ COL_SHIFT = 32
class Line:
__slots__ = 'char color decoration_fg continued'.split()
__slots__ = 'char color decoration_fg continued combining_chars'.split()
def __init__(self, sz: int, other=None):
if other is None:
@ -90,11 +90,13 @@ class Line:
self.color = z8[:]
self.decoration_fg = z4[:]
self.continued = False
self.combining_chars = {}
else:
self.char = other.char[:]
self.color = other.color[:]
self.decoration_fg = other.decoration_fg[:]
self.continued = other.continued
self.combining_chars = other.combining_chars.copy()
def __eq__(self, other):
if not isinstance(other, Line):
@ -102,7 +104,7 @@ class Line:
for x in self.__slots__:
if getattr(self, x) != getattr(other, x):
return False
return self.continued == other.continued
return True
def __ne__(self, other):
return not self.__eq__(other)
@ -117,6 +119,10 @@ class Line:
to.char[dest] = self.char[src]
to.color[dest] = self.color[src]
to.decoration_fg[dest] = self.decoration_fg[src]
to.combining_chars.pop(dest, None)
cc = self.combining_chars.get(src)
if cc is not None:
to.combining_chars[dest] = cc
def cursor_to_attrs(self, c: Cursor) -> int:
return ((c.decoration & 0b11) << DECORATION_SHIFT) | ((c.bold & 0b1) << BOLD_SHIFT) | \
@ -132,6 +138,8 @@ class Line:
w = 1 if clear_char else sattrs & WIDTH_MASK
attrs = w | self.cursor_to_attrs(c)
self.char[i] = (ch & CHAR_MASK) | (attrs << ATTRS_SHIFT)
if clear_char:
self.combining_chars.pop(i, None)
def cursor_from(self, x: int, ypos: int=0) -> Cursor:
c = Cursor(x, ypos)
@ -158,8 +166,19 @@ class Line:
self.char[dx] = ch | (attrs << ATTRS_SHIFT)
self.color[dx], self.decoration_fg[dx] = col, dfg
dx += 1
if self.combining_chars:
for i in range(cursor.x, cursor.x + sz):
self.combining_chars.pop(i, None)
def copy_slice(self, src, dest, num):
if self.combining_chars:
scc = self.combining_chars.copy()
for i in range(num):
cc = scc.get(src + i)
if cc is None:
self.combining_chars.pop(dest + i, None)
else:
self.combining_chars[dest + i] = cc
src, dest = slice(src, src + num), slice(dest, dest + num)
for a in (self.char, self.color, self.decoration_fg):
a[dest] = a[src]
@ -178,8 +197,12 @@ class Line:
if snum:
self.copy_slice(src_start, dest_start, snum)
def __iter__(self):
for i in range(len(self)):
yield self.text_at(i)
def __str__(self) -> str:
return ''.join(map(lambda c: chr(c & CHAR_MASK), filter(None, self.char)))
return ''.join(self)
def __repr__(self) -> str:
return repr(str(self))
@ -187,8 +210,15 @@ class Line:
def width(self, i):
return (self.char[i] >> ATTRS_SHIFT) & 0b11
def char_at(self, i):
return chr(self.char[i] & CHAR_MASK)
def text_at(self, i):
ch = self.char[i] & CHAR_MASK
if ch:
ans = chr(ch)
cc = self.combining_chars.get(i)
if cc is not None:
ans += cc
return ans
return ''
def set_char(self, i: int, ch: str, width: int=1, cursor: Cursor=None) -> None:
if cursor is None:
@ -200,6 +230,11 @@ class Line:
self.color[i], self.decoration_fg[i] = col, cursor.decoration_fg
a |= width & WIDTH_MASK
self.char[i] = (a << ATTRS_SHIFT) | (ord(ch) & CHAR_MASK)
self.combining_chars.pop(i, None)
def add_combining_char(self, i: int, ch: str):
# TODO: Handle the case when i is the second cell of a double-width char
self.combining_chars[i] = self.combining_chars.get(i, '') + ch
def set_bold(self, i, val):
c = self.char[i]

View File

@ -289,7 +289,7 @@ class Screen(QObject):
return self.utf8_decoder.decode(data)
return "".join(self.g0_charset[b] for b in data)
def _fast_draw(self, data: str) -> None:
def _draw_fast(self, data: str) -> None:
do_insert = mo.IRM in self.mode
pos = 0
while pos < len(data):
@ -302,8 +302,10 @@ class Screen(QObject):
self.linebuf[self.cursor.y].continued = True
space_left_in_line = self.columns
else:
space_left_in_line = min(len_left, self.columns)
self.cursor.x = self.columns - space_left_in_line
space_left_in_line = 1
len_left = 1
pos = len(data) - 1
self.cursor.x = self.columns - 1
write_sz = min(len_left, space_left_in_line)
line = self.linebuf[self.cursor.y]
if do_insert:
@ -316,20 +318,14 @@ class Screen(QObject):
self.update_cell_range(self.cursor.y, cx, right)
def _draw_char(self, char: str, char_width: int) -> None:
# If this was the last column in a line and auto wrap mode is
# enabled, move the cursor to the beginning of the next line,
# otherwise replace characters already displayed with newly
# entered.
space_left_in_line = self.columns - self.cursor.x
if space_left_in_line < char_width:
if mo.DECAWM in self.mode:
self.carriage_return()
self.linefeed()
self.linebuf[self.cursor.y].continued = True
space_left_in_line = self.columns
else:
self.cursor.x = self.columns - char_width
space_left_in_line = char_width
do_insert = mo.IRM in self.mode
@ -339,27 +335,22 @@ class Screen(QObject):
if do_insert:
line.right_shift(self.cursor.x, char_width)
line.set_char(cx, char, char_width, self.cursor)
self.cursor.x += 1
if char_width == 2:
line.set_char(cx, '\0', 0, self.cursor)
elif unicodedata.combining(char):
# A zero-cell character is combined with the previous
# character either on this or preceeding line.
if cx:
last = line.char_at(cx - 1)
normalized = unicodedata.normalize("NFC", last + char)
line.set_char(cx - 1, normalized[0])
elif self.cursor.y:
lline = self.linebuf[self.cursor.y - 1]
last = chr(lline.char_at(self.columns - 1))
normalized = unicodedata.normalize("NFC", last + char)
lline.set_char(self.columns - 1, normalized[0])
# .. note:: We can't use :meth:`cursor_forward()`, because that
# way, we'll never know when to linefeed.
if char_width > 0:
self.cursor.x = min(self.cursor.x + char_width, self.columns)
line.set_char(self.cursor.x, '\0', 0, self.cursor)
self.cursor.x += 1
right = self.columns - 1 if do_insert else max(0, min(self.cursor.x - 1, self.columns - 1))
self.update_cell_range(self.cursor.y, cx, right)
elif unicodedata.combining(char):
# A zero-cell character is combined with the previous
# character either on this or the preceeding line.
if cx > 0:
line.add_combining_char(cx - 1, char)
self.update_cell_range(self.cursor.y, cx - 1, cx - 1)
elif self.cursor.y > 0:
lline = self.linebuf[self.cursor.y - 1]
lline.add_combining_char(self.columns - 1, char)
self.update_cell_range(self.cursor.y - 1, self.columns - 1, self.columns - 1)
def draw(self, data: bytes) -> None:
""" Displays decoded characters at the current cursor position and
@ -369,12 +360,12 @@ class Screen(QObject):
data = self._decode(data)
try:
if is_simple_string(data):
return self._fast_draw(data)
return self._draw_fast(data)
data = ignore_pat.sub('', data)
if data:
widths = list(map(wcwidth, data))
if sum(widths) == len(data):
return self._fast_draw(data)
return self._draw_fast(data)
for char, char_width in zip(data, widths):
self._draw_char(char, char_width)
finally:

View File

@ -111,6 +111,6 @@ class TerminalWidget(QWidget):
if bg is not None:
r = QRect(x, y, self.cell_width, self.cell_height)
painter.fillRect(r, bg)
char = line.char[col]
if char not in (0, 32): # 32 = <space>
painter.drawText(x, y + self.baseline_offset, chr(char))
text = line.text_at(col)
if text.rstrip():
painter.drawText(x, y + self.baseline_offset, text)

View File

@ -10,6 +10,7 @@ import shlex
import fcntl
import signal
import ctypes
import unicodedata
from functools import lru_cache
from PyQt5.QtGui import QFontMetrics
@ -26,6 +27,8 @@ wcwidth_native.restype = ctypes.c_int
@lru_cache(maxsize=2**13)
def wcwidth(c: str) -> int:
if unicodedata.combining(c):
return 0
if current_font_metrics is None:
return min(2, wcwidth_native(c))
w = current_font_metrics.widthChar(c)

View File

@ -32,13 +32,13 @@ class TestScreen(BaseTest):
s.reset(), t.reset()
s.reset_mode(mo.DECAWM)
s.draw(b'0123456789')
self.ae(str(s.linebuf[0]), '56789')
self.ae(str(s.linebuf[0]), '01239')
self.ae(s.cursor.x, 5), self.ae(s.cursor.y, 0)
self.assertChanges(t, ignore='cursor', cells={0: ((0, 4),)})
s.draw(b'ab')
self.ae(str(s.linebuf[0]), '567ab')
self.ae(str(s.linebuf[0]), '0123b')
self.ae(s.cursor.x, 5), self.ae(s.cursor.y, 0)
self.assertChanges(t, ignore='cursor', cells={0: ((3, 4),)})
self.assertChanges(t, ignore='cursor', cells={0: ((4, 4),)})
# Now test in insert mode
s.reset(), t.reset()
@ -51,3 +51,52 @@ class TestScreen(BaseTest):
self.ae(str(s.linebuf[4]), 'ab123')
self.ae((s.cursor.x, s.cursor.y), (2, 4))
self.assertChanges(t, ignore='cursor', cells={4: ((0, 4),)})
def test_draw_char(self):
# Test in line-wrap, non-insert mode
s, t = self.create_screen()
s.draw('ココx'.encode('utf-8'))
self.ae(str(s.linebuf[0]), 'ココx')
self.ae(tuple(map(s.linebuf[0].width, range(5))), (2, 0, 2, 0, 1))
self.ae(s.cursor.x, 5), self.ae(s.cursor.y, 0)
self.assertChanges(t, ignore='cursor', cells={0: ((0, 4),)})
s.draw('ニチハ'.encode('utf-8'))
self.ae(str(s.linebuf[0]), 'ココx')
self.ae(str(s.linebuf[1]), 'ニチ ')
self.ae(str(s.linebuf[2]), '')
self.assertChanges(t, ignore='cursor', cells={0: ((5, 5),), 1: ((0, 3),), 2: ((0, 1),)})
self.ae(s.cursor.x, 2), self.ae(s.cursor.y, 2)
s.draw('Ƶ̧\u0308'.encode('utf-8'))
self.ae(str(s.linebuf[2]), 'ハƵ̧\u0308 ')
self.ae(s.cursor.x, 3), self.ae(s.cursor.y, 2)
self.assertChanges(t, ignore='cursor', cells={2: ((2, 2),)})
s.draw(b'xy'), s.draw('\u0306'.encode('utf-8'))
self.ae(str(s.linebuf[2]), 'ハƵ̧\u0308xy\u0306')
self.ae(s.cursor.x, 5), self.ae(s.cursor.y, 2)
self.assertChanges(t, ignore='cursor', cells={2: ((3, 4),)})
s.draw(b'c' * 15)
self.ae(str(s.linebuf[0]), 'ニチ ')
# Now test without line-wrap
s.reset(), t.reset()
s.reset_mode(mo.DECAWM)
s.draw('0\u030612345\u03066789\u0306'.encode('utf-8'))
self.ae(str(s.linebuf[0]), '0\u03061239\u0306')
self.ae(s.cursor.x, 5), self.ae(s.cursor.y, 0)
self.assertChanges(t, ignore='cursor', cells={0: ((0, 4),)})
s.draw('ab\u0306'.encode('utf-8'))
self.ae(str(s.linebuf[0]), '0\u0306123b\u0306')
self.ae(s.cursor.x, 5), self.ae(s.cursor.y, 0)
self.assertChanges(t, ignore='cursor', cells={0: ((4, 4),)})
# Now test in insert mode
s.reset(), t.reset()
s.set_mode(mo.IRM)
s.draw('1\u03062345'.encode('utf-8') * 5)
s.cursor_back(5)
self.ae(s.cursor.x, 0), self.ae(s.cursor.y, 4)
t.reset()
s.draw('a\u0306b'.encode('utf-8'))
self.ae(str(s.linebuf[4]), 'a\u0306b1\u030623')
self.ae((s.cursor.x, s.cursor.y), (2, 4))
self.assertChanges(t, ignore='cursor', cells={4: ((0, 4),)})