diff --git a/kitty/data-types.h b/kitty/data-types.h index 4a0c3fbbf..59af1ad01 100644 --- a/kitty/data-types.h +++ b/kitty/data-types.h @@ -253,7 +253,7 @@ PyTypeObject ScreenModes_Type; #define SAVEPOINTS_SZ 256 typedef struct { - uint32_t utf8_state, *g0_charset, *g1_charset, *g_charset; + uint32_t utf8_state, utf8_codepoint, *g0_charset, *g1_charset, *g_charset; bool use_latin1; Cursor cursor; bool mDECOM, mDECAWM, mDECSCNM; @@ -274,7 +274,7 @@ typedef struct { PyObject_HEAD unsigned int columns, lines, margin_top, margin_bottom, charset; - uint32_t utf8_state, *g0_charset, *g1_charset, *g_charset; + uint32_t utf8_state, utf8_codepoint, *g0_charset, *g1_charset, *g_charset; bool use_latin1; Cursor *cursor; SavepointBuffer main_savepoints, alt_savepoints; diff --git a/kitty/parser.c b/kitty/parser.c index 62799392f..3219292d7 100644 --- a/kitty/parser.c +++ b/kitty/parser.c @@ -225,10 +225,10 @@ handle_esc_mode_char(Screen *screen, uint32_t ch, PyObject DUMP_UNUSED *dump_cal switch(ch) { case '@': REPORT_COMMAND(screen_use_latin1, 1); - screen->use_latin1 = true; screen->utf8_state = 0; break; + screen->use_latin1 = true; screen->utf8_state = 0; screen->utf8_codepoint = 0; break; case 'G': REPORT_COMMAND(screen_use_latin1, 0); - screen->use_latin1 = false; screen->utf8_state = 0; break; + screen->use_latin1 = false; screen->utf8_state = 0; screen->utf8_codepoint = 0; break; default: REPORT_ERROR("Unhandled Esc %% code: 0x%x", ch); break; } @@ -688,17 +688,17 @@ dispatch_unicode_char(Screen *screen, uint32_t codepoint, PyObject DUMP_UNUSED * static inline void _parse_bytes(Screen *screen, uint8_t *buf, Py_ssize_t len, PyObject DUMP_UNUSED *dump_callback) { - uint32_t prev = screen->utf8_state, codepoint = 0; + uint32_t prev = screen->utf8_state; for (unsigned int i = 0; i < len; i++) { if (screen->use_latin1) dispatch_unicode_char(screen, latin1_charset[buf[i]], dump_callback); else { - switch (decode_utf8(&screen->utf8_state, &codepoint, buf[i])) { + switch (decode_utf8(&screen->utf8_state, &screen->utf8_codepoint, buf[i])) { case UTF8_ACCEPT: - dispatch_unicode_char(screen, codepoint, dump_callback); + dispatch_unicode_char(screen, screen->utf8_codepoint, dump_callback); break; case UTF8_REJECT: screen->utf8_state = UTF8_ACCEPT; - if (prev != UTF8_ACCEPT) i--; + if (prev != UTF8_ACCEPT && i > 0) i--; break; } prev = screen->utf8_state; diff --git a/kitty/screen.c b/kitty/screen.c index 1e3cc4d70..3fec81a76 100644 --- a/kitty/screen.c +++ b/kitty/screen.c @@ -29,6 +29,7 @@ init_tabstops(bool *tabstops, index_type count) { self->g1_charset = self->g0_charset; \ self->g_charset = self->g0_charset; \ self->utf8_state = 0; \ + self->utf8_codepoint = 0; \ self->use_latin1 = false; static PyObject* @@ -638,6 +639,7 @@ savepoints_pop(SavepointBuffer *self) { #define COPY_CHARSETS(self, sp) \ sp->utf8_state = self->utf8_state; \ + sp->utf8_codepoint = self->utf8_codepoint; \ sp->g0_charset = self->g0_charset; \ sp->g1_charset = self->g1_charset; \ sp->g_charset = self->g_charset; \ diff --git a/kitty_tests/parser.py b/kitty_tests/parser.py index ef3621bd3..fee0c43d0 100644 --- a/kitty_tests/parser.py +++ b/kitty_tests/parser.py @@ -72,6 +72,10 @@ class TestParser(BaseTest): self.ae(str(s.line(0)), '123 ') def test_charsets(self): + s = self.create_screen() + pb = partial(self.parse_bytes_dump, s) + pb(b'\xc3') + pb(b'\xa1', ('draw', b'\xc3\xa1'.decode('utf-8'))) s = self.create_screen() pb = partial(self.parse_bytes_dump, s) pb('\033)0\x0e/_', ('screen_designate_charset', 1, ord('0')), ('screen_change_charset', 1), '/_')