This commit is contained in:
Kovid Goyal 2017-04-28 08:31:07 +05:30
parent f5d957e8ff
commit 898a8075be
4 changed files with 14 additions and 8 deletions

View File

@ -253,7 +253,7 @@ PyTypeObject ScreenModes_Type;
#define SAVEPOINTS_SZ 256
typedef struct {
uint32_t utf8_state, *g0_charset, *g1_charset, *g_charset;
uint32_t utf8_state, utf8_codepoint, *g0_charset, *g1_charset, *g_charset;
bool use_latin1;
Cursor cursor;
bool mDECOM, mDECAWM, mDECSCNM;
@ -274,7 +274,7 @@ typedef struct {
PyObject_HEAD
unsigned int columns, lines, margin_top, margin_bottom, charset;
uint32_t utf8_state, *g0_charset, *g1_charset, *g_charset;
uint32_t utf8_state, utf8_codepoint, *g0_charset, *g1_charset, *g_charset;
bool use_latin1;
Cursor *cursor;
SavepointBuffer main_savepoints, alt_savepoints;

View File

@ -225,10 +225,10 @@ handle_esc_mode_char(Screen *screen, uint32_t ch, PyObject DUMP_UNUSED *dump_cal
switch(ch) {
case '@':
REPORT_COMMAND(screen_use_latin1, 1);
screen->use_latin1 = true; screen->utf8_state = 0; break;
screen->use_latin1 = true; screen->utf8_state = 0; screen->utf8_codepoint = 0; break;
case 'G':
REPORT_COMMAND(screen_use_latin1, 0);
screen->use_latin1 = false; screen->utf8_state = 0; break;
screen->use_latin1 = false; screen->utf8_state = 0; screen->utf8_codepoint = 0; break;
default:
REPORT_ERROR("Unhandled Esc %% code: 0x%x", ch); break;
}
@ -688,17 +688,17 @@ dispatch_unicode_char(Screen *screen, uint32_t codepoint, PyObject DUMP_UNUSED *
static inline void
_parse_bytes(Screen *screen, uint8_t *buf, Py_ssize_t len, PyObject DUMP_UNUSED *dump_callback) {
uint32_t prev = screen->utf8_state, codepoint = 0;
uint32_t prev = screen->utf8_state;
for (unsigned int i = 0; i < len; i++) {
if (screen->use_latin1) dispatch_unicode_char(screen, latin1_charset[buf[i]], dump_callback);
else {
switch (decode_utf8(&screen->utf8_state, &codepoint, buf[i])) {
switch (decode_utf8(&screen->utf8_state, &screen->utf8_codepoint, buf[i])) {
case UTF8_ACCEPT:
dispatch_unicode_char(screen, codepoint, dump_callback);
dispatch_unicode_char(screen, screen->utf8_codepoint, dump_callback);
break;
case UTF8_REJECT:
screen->utf8_state = UTF8_ACCEPT;
if (prev != UTF8_ACCEPT) i--;
if (prev != UTF8_ACCEPT && i > 0) i--;
break;
}
prev = screen->utf8_state;

View File

@ -29,6 +29,7 @@ init_tabstops(bool *tabstops, index_type count) {
self->g1_charset = self->g0_charset; \
self->g_charset = self->g0_charset; \
self->utf8_state = 0; \
self->utf8_codepoint = 0; \
self->use_latin1 = false;
static PyObject*
@ -638,6 +639,7 @@ savepoints_pop(SavepointBuffer *self) {
#define COPY_CHARSETS(self, sp) \
sp->utf8_state = self->utf8_state; \
sp->utf8_codepoint = self->utf8_codepoint; \
sp->g0_charset = self->g0_charset; \
sp->g1_charset = self->g1_charset; \
sp->g_charset = self->g_charset; \

View File

@ -72,6 +72,10 @@ class TestParser(BaseTest):
self.ae(str(s.line(0)), '123 ')
def test_charsets(self):
s = self.create_screen()
pb = partial(self.parse_bytes_dump, s)
pb(b'\xc3')
pb(b'\xa1', ('draw', b'\xc3\xa1'.decode('utf-8')))
s = self.create_screen()
pb = partial(self.parse_bytes_dump, s)
pb('\033)0\x0e/_', ('screen_designate_charset', 1, ord('0')), ('screen_change_charset', 1), '/_')