Various charset related fixes

Fix changing of charset not taking effect immediately while parsing a single block of bytes Fix incorrect mapping of C0 control codes in some of the charsets
2016-11-30 19:44:41 +05:30 · 2016-11-30 19:44:41 +05:30 · ce1514963b
commit ce1514963b
parent c713712f89
6 changed files with 44 additions and 42 deletions
--- a/kitty/charsets.c
+++ b/kitty/charsets.c
@ -51,8 +51,8 @@ static uint32_t charset_translations[5][256] = {
    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
    0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
-    0x0028, 0x0029, 0x002a, 0x2192, 0x2190, 0x2191, 0x2193, 0x002f,
-    0x2588, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+    0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
    0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
    0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
    0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
@ -81,10 +81,10 @@ static uint32_t charset_translations[5][256] = {
  },
  /* IBM Codepage 437 mapped to Unicode */
  {
-    0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, 
-    0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c,
-    0x25b6, 0x25c0, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x25ac, 0x21a8,
-    0x2191, 0x2193, 0x2192, 0x2190, 0x221f, 0x2194, 0x25b2, 0x25bc,
+    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+    0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+    0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
    0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
@ -116,11 +116,11 @@ static uint32_t charset_translations[5][256] = {
  }, 
  // VAX 42 map
  {
-    0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
-    0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c,
-    0x25b6, 0x25c0, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x25ac, 0x21a8,
-    0x2191, 0x2193, 0x2192, 0x2190, 0x221f, 0x2194, 0x25b2, 0x25bc,
-    0x0020, 0x043b, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+    0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+    0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
+    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
    0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
    0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x0435,
--- a/kitty/data-types.h
+++ b/kitty/data-types.h
@ -235,7 +235,7 @@ PyTypeObject ScreenModes_Type;
 #define SAVEPOINTS_SZ 256

 typedef struct {
-    uint8_t charset;
+    unsigned int charset;
    uint32_t utf8_state, *g0_charset, *g1_charset;
    Cursor cursor;
    bool mDECOM;
@ -257,8 +257,7 @@ typedef struct {
 typedef struct {
    PyObject_HEAD

-    unsigned int columns, lines, margin_top, margin_bottom;
-    uint8_t charset;
+    unsigned int columns, lines, margin_top, margin_bottom, charset;
    uint32_t utf8_state, *g0_charset, *g1_charset;
    Cursor *cursor;
    SavepointBuffer main_savepoints, alt_savepoints;
--- a/kitty/modes.h
+++ b/kitty/modes.h
@ -55,6 +55,9 @@
 // *Text Cursor Enable Mode*: determines if the text cursor is visible.
 #define DECTCEM (25 << 5)

+// National Replacement Character Set Mode
+#define DECNRCM (42 << 5)
+
 // Xterm mouse protocol
 #define MOUSE_BUTTON_TRACKING (1000 << 5)
 #define MOUSE_MOTION_TRACKING  (1002 << 5)
--- a/kitty/parser.c
+++ b/kitty/parser.c
@ -237,7 +237,7 @@ handle_esc_mode_char(Screen *screen, uint32_t ch, PyObject DUMP_UNUSED *dump_cal
                        case '0':
                        case 'U':
                        case 'V':
-                            CALL_ED2(screen_designate_charset, screen->parser_buf[0], ch);
+                            CALL_ED2(screen_designate_charset, screen->parser_buf[0] - '(', ch); break;
                        default:
                            REPORT_ERROR("Unknown charset: 0x%x", ch); break;
                    }
@ -621,38 +621,30 @@ dispatch_unicode_char(Screen *screen, uint32_t codepoint, PyObject DUMP_UNUSED *
 #undef HANDLE
 }

-static inline void
-parse_utf8(Screen *screen, uint8_t *buf, Py_ssize_t len, PyObject DUMP_UNUSED *dump_callback) {
+static inline void 
+_parse_bytes(Screen *screen, uint8_t *buf, Py_ssize_t len, PyObject DUMP_UNUSED *dump_callback) {
    uint32_t prev = screen->utf8_state, codepoint = 0;
-    for (unsigned int i = 0; i < len; i++, prev = screen->utf8_state) {
-        switch (decode_utf8(&screen->utf8_state, &codepoint, buf[i])) {
-            case UTF8_ACCEPT:
-                dispatch_unicode_char(screen, codepoint, dump_callback);
+    for (unsigned int i = 0; i < len; i++) {
+        switch(screen->charset) {
+            case 0:
+                dispatch_unicode_char(screen, screen->g0_charset[buf[i]], dump_callback);
                break;
-            case UTF8_REJECT:
-                screen->utf8_state = UTF8_ACCEPT;
-                if (prev != UTF8_ACCEPT) i--;
+            case 1:
+                dispatch_unicode_char(screen, screen->g1_charset[buf[i]], dump_callback);
+                break;
+            default:
+                switch (decode_utf8(&screen->utf8_state, &codepoint, buf[i])) {
+                    case UTF8_ACCEPT:
+                        dispatch_unicode_char(screen, codepoint, dump_callback);
+                        break;
+                    case UTF8_REJECT:
+                        screen->utf8_state = UTF8_ACCEPT;
+                        if (prev != UTF8_ACCEPT) i--;
+                        break;
+                }
                break;
        }
    }
-
-}
-
-static inline void 
-_parse_bytes(Screen *screen, uint8_t *buf, Py_ssize_t len, PyObject DUMP_UNUSED *dump_callback) {
-#define DECODE(charset) for (unsigned int i = 0; i < len; i++) dispatch_unicode_char(screen, screen->charset[buf[i]], dump_callback);
-    switch(screen->charset) {
-        case 0:
-            DECODE(g0_charset);
-            break;
-        case 1:
-            DECODE(g1_charset);
-            break;
-        default:
-            parse_utf8(screen, buf, len, dump_callback);
-            break;
-    }
-#undef DECODE
 FLUSH_DRAW;
 }
 // }}}
--- a/kitty/screen.c
+++ b/kitty/screen.c
@ -352,6 +352,7 @@ set_mode_from_const(Screen *self, unsigned int mode, bool val) {

        case DECCKM:
        case DECSCLM:
+        case DECNRCM:
            break;  // we ignore these modes
        case DECTCEM: 
            self->modes.mDECTCEM = val; 
@ -1151,6 +1152,7 @@ static PyMemberDef members[] = {
    {"columns", T_UINT, offsetof(Screen, columns), READONLY, "columns"},
    {"margin_top", T_UINT, offsetof(Screen, margin_top), READONLY, "margin_top"},
    {"margin_bottom", T_UINT, offsetof(Screen, margin_bottom), READONLY, "margin_bottom"},
+    {"charset", T_UINT, offsetof(Screen, charset), READONLY, "charset"},
    {NULL}
 };
 
--- a/kitty_tests/parser.py
+++ b/kitty_tests/parser.py
@ -69,6 +69,12 @@ class TestParser(BaseTest):
        pb('\033c123', ('screen_reset', ), '123')
        self.ae(str(s.line(0)), '123  ')

+    def test_charsets(self):
+        s = self.create_screen()
+        pb = partial(self.parse_bytes_dump, s)
+        pb('\033)0\x0e/_', ('screen_designate_charset', 1, ord('0')), ('screen_change_charset', 1), '/\xa0')
+        pb('\033%G_', ('screen_change_charset', 2), '_')
+
    def test_csi_codes(self):
        s = self.create_screen()
        pb = partial(self.parse_bytes_dump, s)