From ce1514963bdf9bdadab56cfb55483f59e69db793 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 30 Nov 2016 19:44:41 +0530 Subject: [PATCH] Various charset related fixes Fix changing of charset not taking effect immediately while parsing a single block of bytes Fix incorrect mapping of C0 control codes in some of the charsets --- kitty/charsets.c | 22 ++++++++++---------- kitty/data-types.h | 5 ++--- kitty/modes.h | 3 +++ kitty/parser.c | 48 ++++++++++++++++++------------------------- kitty/screen.c | 2 ++ kitty_tests/parser.py | 6 ++++++ 6 files changed, 44 insertions(+), 42 deletions(-) diff --git a/kitty/charsets.c b/kitty/charsets.c index 25e766661..dbd426497 100644 --- a/kitty/charsets.c +++ b/kitty/charsets.c @@ -51,8 +51,8 @@ static uint32_t charset_translations[5][256] = { 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, - 0x0028, 0x0029, 0x002a, 0x2192, 0x2190, 0x2191, 0x2193, 0x002f, - 0x2588, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, @@ -81,10 +81,10 @@ static uint32_t charset_translations[5][256] = { }, /* IBM Codepage 437 mapped to Unicode */ { - 0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, - 0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c, - 0x25b6, 0x25c0, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x25ac, 0x21a8, - 0x2191, 0x2193, 0x2192, 0x2190, 0x221f, 0x2194, 0x25b2, 0x25bc, + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, @@ -116,11 +116,11 @@ static uint32_t charset_translations[5][256] = { }, // VAX 42 map { - 0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, - 0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c, - 0x25b6, 0x25c0, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x25ac, 0x21a8, - 0x2191, 0x2193, 0x2192, 0x2190, 0x221f, 0x2194, 0x25b2, 0x25bc, - 0x0020, 0x043b, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x0435, diff --git a/kitty/data-types.h b/kitty/data-types.h index 0030ea67e..6a55e62c4 100644 --- a/kitty/data-types.h +++ b/kitty/data-types.h @@ -235,7 +235,7 @@ PyTypeObject ScreenModes_Type; #define SAVEPOINTS_SZ 256 typedef struct { - uint8_t charset; + unsigned int charset; uint32_t utf8_state, *g0_charset, *g1_charset; Cursor cursor; bool mDECOM; @@ -257,8 +257,7 @@ typedef struct { typedef struct { PyObject_HEAD - unsigned int columns, lines, margin_top, margin_bottom; - uint8_t charset; + unsigned int columns, lines, margin_top, margin_bottom, charset; uint32_t utf8_state, *g0_charset, *g1_charset; Cursor *cursor; SavepointBuffer main_savepoints, alt_savepoints; diff --git a/kitty/modes.h b/kitty/modes.h index b1554879c..19d4de4e0 100644 --- a/kitty/modes.h +++ b/kitty/modes.h @@ -55,6 +55,9 @@ // *Text Cursor Enable Mode*: determines if the text cursor is visible. #define DECTCEM (25 << 5) +// National Replacement Character Set Mode +#define DECNRCM (42 << 5) + // Xterm mouse protocol #define MOUSE_BUTTON_TRACKING (1000 << 5) #define MOUSE_MOTION_TRACKING (1002 << 5) diff --git a/kitty/parser.c b/kitty/parser.c index ae2c1bbea..545fda3bd 100644 --- a/kitty/parser.c +++ b/kitty/parser.c @@ -237,7 +237,7 @@ handle_esc_mode_char(Screen *screen, uint32_t ch, PyObject DUMP_UNUSED *dump_cal case '0': case 'U': case 'V': - CALL_ED2(screen_designate_charset, screen->parser_buf[0], ch); + CALL_ED2(screen_designate_charset, screen->parser_buf[0] - '(', ch); break; default: REPORT_ERROR("Unknown charset: 0x%x", ch); break; } @@ -621,38 +621,30 @@ dispatch_unicode_char(Screen *screen, uint32_t codepoint, PyObject DUMP_UNUSED * #undef HANDLE } -static inline void -parse_utf8(Screen *screen, uint8_t *buf, Py_ssize_t len, PyObject DUMP_UNUSED *dump_callback) { +static inline void +_parse_bytes(Screen *screen, uint8_t *buf, Py_ssize_t len, PyObject DUMP_UNUSED *dump_callback) { uint32_t prev = screen->utf8_state, codepoint = 0; - for (unsigned int i = 0; i < len; i++, prev = screen->utf8_state) { - switch (decode_utf8(&screen->utf8_state, &codepoint, buf[i])) { - case UTF8_ACCEPT: - dispatch_unicode_char(screen, codepoint, dump_callback); + for (unsigned int i = 0; i < len; i++) { + switch(screen->charset) { + case 0: + dispatch_unicode_char(screen, screen->g0_charset[buf[i]], dump_callback); break; - case UTF8_REJECT: - screen->utf8_state = UTF8_ACCEPT; - if (prev != UTF8_ACCEPT) i--; + case 1: + dispatch_unicode_char(screen, screen->g1_charset[buf[i]], dump_callback); + break; + default: + switch (decode_utf8(&screen->utf8_state, &codepoint, buf[i])) { + case UTF8_ACCEPT: + dispatch_unicode_char(screen, codepoint, dump_callback); + break; + case UTF8_REJECT: + screen->utf8_state = UTF8_ACCEPT; + if (prev != UTF8_ACCEPT) i--; + break; + } break; } } - -} - -static inline void -_parse_bytes(Screen *screen, uint8_t *buf, Py_ssize_t len, PyObject DUMP_UNUSED *dump_callback) { -#define DECODE(charset) for (unsigned int i = 0; i < len; i++) dispatch_unicode_char(screen, screen->charset[buf[i]], dump_callback); - switch(screen->charset) { - case 0: - DECODE(g0_charset); - break; - case 1: - DECODE(g1_charset); - break; - default: - parse_utf8(screen, buf, len, dump_callback); - break; - } -#undef DECODE FLUSH_DRAW; } // }}} diff --git a/kitty/screen.c b/kitty/screen.c index 3f9ecc65b..1291f30ec 100644 --- a/kitty/screen.c +++ b/kitty/screen.c @@ -352,6 +352,7 @@ set_mode_from_const(Screen *self, unsigned int mode, bool val) { case DECCKM: case DECSCLM: + case DECNRCM: break; // we ignore these modes case DECTCEM: self->modes.mDECTCEM = val; @@ -1151,6 +1152,7 @@ static PyMemberDef members[] = { {"columns", T_UINT, offsetof(Screen, columns), READONLY, "columns"}, {"margin_top", T_UINT, offsetof(Screen, margin_top), READONLY, "margin_top"}, {"margin_bottom", T_UINT, offsetof(Screen, margin_bottom), READONLY, "margin_bottom"}, + {"charset", T_UINT, offsetof(Screen, charset), READONLY, "charset"}, {NULL} }; diff --git a/kitty_tests/parser.py b/kitty_tests/parser.py index f854204de..d2db39451 100644 --- a/kitty_tests/parser.py +++ b/kitty_tests/parser.py @@ -69,6 +69,12 @@ class TestParser(BaseTest): pb('\033c123', ('screen_reset', ), '123') self.ae(str(s.line(0)), '123 ') + def test_charsets(self): + s = self.create_screen() + pb = partial(self.parse_bytes_dump, s) + pb('\033)0\x0e/_', ('screen_designate_charset', 1, ord('0')), ('screen_change_charset', 1), '/\xa0') + pb('\033%G_', ('screen_change_charset', 2), '_') + def test_csi_codes(self): s = self.create_screen() pb = partial(self.parse_bytes_dump, s)