More charset fixes
Properly use the G0/G1 charsets only to map characters being drawn to the screen. And only decode bytes using either Latin-1 or UTF-8, defaulting to UTF-8 at startup/reset.
This commit is contained in:
parent
83787fbc40
commit
489504cda5
@ -51,8 +51,8 @@ static uint32_t charset_translations[5][256] = {
|
|||||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||||
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
|
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
|
||||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
0x0028, 0x0029, 0x002a, 0x2192, 0x2190, 0x2191, 0x2193, 0x002f,
|
||||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
0x2588, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||||
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
|
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
|
||||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||||
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
|
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
|
||||||
@ -81,10 +81,10 @@ static uint32_t charset_translations[5][256] = {
|
|||||||
},
|
},
|
||||||
/* IBM Codepage 437 mapped to Unicode */
|
/* IBM Codepage 437 mapped to Unicode */
|
||||||
{
|
{
|
||||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
|
||||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c,
|
||||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
0x25b6, 0x25c0, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x25ac, 0x21a8,
|
||||||
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
|
0x2191, 0x2193, 0x2192, 0x2190, 0x221f, 0x2194, 0x25b2, 0x25bc,
|
||||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
||||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||||
@ -116,11 +116,11 @@ static uint32_t charset_translations[5][256] = {
|
|||||||
},
|
},
|
||||||
// VAX 42 map
|
// VAX 42 map
|
||||||
{
|
{
|
||||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
|
||||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c,
|
||||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
0x25b6, 0x25c0, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x25ac, 0x21a8,
|
||||||
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
|
0x2191, 0x2193, 0x2192, 0x2190, 0x221f, 0x2194, 0x25b2, 0x25bc,
|
||||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
0x0020, 0x043b, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
||||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||||
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x0435,
|
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x0435,
|
||||||
@ -204,6 +204,9 @@ translation_table(uint32_t which) {
|
|||||||
return charset_translations[0];
|
return charset_translations[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t *latin1_charset = charset_translations[0];
|
||||||
|
|
||||||
// UTF-8 decode taken from: http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
|
// UTF-8 decode taken from: http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
|
||||||
|
|
||||||
static const uint8_t utf8_data[] = {
|
static const uint8_t utf8_data[] = {
|
||||||
|
|||||||
@ -235,8 +235,8 @@ PyTypeObject ScreenModes_Type;
|
|||||||
#define SAVEPOINTS_SZ 256
|
#define SAVEPOINTS_SZ 256
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned int charset;
|
uint32_t utf8_state, *g0_charset, *g1_charset, *g_charset;
|
||||||
uint32_t utf8_state, *g0_charset, *g1_charset;
|
bool use_latin1;
|
||||||
Cursor cursor;
|
Cursor cursor;
|
||||||
bool mDECOM;
|
bool mDECOM;
|
||||||
bool mDECAWM;
|
bool mDECAWM;
|
||||||
@ -258,7 +258,8 @@ typedef struct {
|
|||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
|
|
||||||
unsigned int columns, lines, margin_top, margin_bottom, charset;
|
unsigned int columns, lines, margin_top, margin_bottom, charset;
|
||||||
uint32_t utf8_state, *g0_charset, *g1_charset;
|
uint32_t utf8_state, *g0_charset, *g1_charset, *g_charset;
|
||||||
|
bool use_latin1;
|
||||||
Cursor *cursor;
|
Cursor *cursor;
|
||||||
SavepointBuffer main_savepoints, alt_savepoints;
|
SavepointBuffer main_savepoints, alt_savepoints;
|
||||||
PyObject *callbacks;
|
PyObject *callbacks;
|
||||||
@ -379,6 +380,7 @@ void set_icon(Screen *self, PyObject*);
|
|||||||
void set_dynamic_color(Screen *self, unsigned int code, PyObject*);
|
void set_dynamic_color(Screen *self, unsigned int code, PyObject*);
|
||||||
void set_color_table_color(Screen *self, unsigned int code, PyObject*);
|
void set_color_table_color(Screen *self, unsigned int code, PyObject*);
|
||||||
uint32_t* translation_table(uint32_t which);
|
uint32_t* translation_table(uint32_t which);
|
||||||
|
uint32_t *latin1_charset;
|
||||||
void screen_request_capabilities(Screen *, PyObject *);
|
void screen_request_capabilities(Screen *, PyObject *);
|
||||||
void report_device_attributes(Screen *self, unsigned int UNUSED mode, bool UNUSED secondary);
|
void report_device_attributes(Screen *self, unsigned int UNUSED mode, bool UNUSED secondary);
|
||||||
void select_graphic_rendition(Screen *self, unsigned int *params, unsigned int count);
|
void select_graphic_rendition(Screen *self, unsigned int *params, unsigned int count);
|
||||||
|
|||||||
@ -218,9 +218,11 @@ handle_esc_mode_char(Screen *screen, uint32_t ch, PyObject DUMP_UNUSED *dump_cal
|
|||||||
case '%':
|
case '%':
|
||||||
switch(ch) {
|
switch(ch) {
|
||||||
case '@':
|
case '@':
|
||||||
CALL_ED1(screen_change_charset, 0); break;
|
REPORT_COMMAND(screen_use_latin1, 1);
|
||||||
|
screen->use_latin1 = true; screen->utf8_state = 0; break;
|
||||||
case 'G':
|
case 'G':
|
||||||
CALL_ED1(screen_change_charset, 2); break;
|
REPORT_COMMAND(screen_use_latin1, 0);
|
||||||
|
screen->use_latin1 = false; screen->utf8_state = 0; break;
|
||||||
default:
|
default:
|
||||||
REPORT_ERROR("Unhandled Esc %% code: 0x%x", ch); break;
|
REPORT_ERROR("Unhandled Esc %% code: 0x%x", ch); break;
|
||||||
}
|
}
|
||||||
@ -631,12 +633,9 @@ static inline void
|
|||||||
_parse_bytes(Screen *screen, uint8_t *buf, Py_ssize_t len, PyObject DUMP_UNUSED *dump_callback) {
|
_parse_bytes(Screen *screen, uint8_t *buf, Py_ssize_t len, PyObject DUMP_UNUSED *dump_callback) {
|
||||||
uint32_t prev = screen->utf8_state, codepoint = 0;
|
uint32_t prev = screen->utf8_state, codepoint = 0;
|
||||||
for (unsigned int i = 0; i < len; i++) {
|
for (unsigned int i = 0; i < len; i++) {
|
||||||
switch(screen->charset) {
|
switch(screen->use_latin1) {
|
||||||
case 0:
|
case true:
|
||||||
dispatch_unicode_char(screen, screen->g0_charset[buf[i]], dump_callback);
|
dispatch_unicode_char(screen, latin1_charset[buf[i]], dump_callback);
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
dispatch_unicode_char(screen, screen->g1_charset[buf[i]], dump_callback);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
switch (decode_utf8(&screen->utf8_state, &codepoint, buf[i])) {
|
switch (decode_utf8(&screen->utf8_state, &codepoint, buf[i])) {
|
||||||
|
|||||||
@ -26,8 +26,9 @@ init_tabstops(bool *tabstops, index_type count) {
|
|||||||
#define RESET_CHARSETS \
|
#define RESET_CHARSETS \
|
||||||
self->g0_charset = translation_table(0); \
|
self->g0_charset = translation_table(0); \
|
||||||
self->g1_charset = self->g0_charset; \
|
self->g1_charset = self->g0_charset; \
|
||||||
self->charset = 2; \
|
self->g_charset = self->g0_charset; \
|
||||||
self->utf8_state = 0;
|
self->utf8_state = 0; \
|
||||||
|
self->use_latin1 = false;
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
new(PyTypeObject *type, PyObject *args, PyObject UNUSED *kwds) {
|
new(PyTypeObject *type, PyObject *args, PyObject UNUSED *kwds) {
|
||||||
@ -152,9 +153,13 @@ dealloc(Screen* self) {
|
|||||||
// Draw text {{{
|
// Draw text {{{
|
||||||
|
|
||||||
void
|
void
|
||||||
screen_change_charset(Screen *self, uint32_t to) {
|
screen_change_charset(Screen *self, uint32_t which) {
|
||||||
self->charset = to;
|
switch(which) {
|
||||||
self->utf8_state = 0;
|
case 0:
|
||||||
|
self->g_charset = self->g0_charset; break;
|
||||||
|
case 1:
|
||||||
|
self->g_charset = self->g1_charset; break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -178,8 +183,9 @@ safe_wcwidth(uint32_t ch) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
screen_draw(Screen *self, uint32_t ch) {
|
screen_draw(Screen *self, uint32_t och) {
|
||||||
if (is_ignored_char(ch)) return;
|
if (is_ignored_char(och)) return;
|
||||||
|
uint32_t ch = och < 256 ? self->g_charset[och] : och;
|
||||||
unsigned int x = self->cursor->x, y = self->cursor->y;
|
unsigned int x = self->cursor->x, y = self->cursor->y;
|
||||||
unsigned int char_width = safe_wcwidth(ch);
|
unsigned int char_width = safe_wcwidth(ch);
|
||||||
if (self->columns - self->cursor->x < char_width) {
|
if (self->columns - self->cursor->x < char_width) {
|
||||||
@ -584,7 +590,8 @@ savepoints_pop(SavepointBuffer *self) {
|
|||||||
sp->utf8_state = self->utf8_state; \
|
sp->utf8_state = self->utf8_state; \
|
||||||
sp->g0_charset = self->g0_charset; \
|
sp->g0_charset = self->g0_charset; \
|
||||||
sp->g1_charset = self->g1_charset; \
|
sp->g1_charset = self->g1_charset; \
|
||||||
sp->charset = self->charset; \
|
sp->g_charset = self->g_charset; \
|
||||||
|
sp->use_latin1 = self->use_latin1;
|
||||||
|
|
||||||
void
|
void
|
||||||
screen_save_cursor(Screen *self) {
|
screen_save_cursor(Screen *self) {
|
||||||
|
|||||||
@ -72,8 +72,9 @@ class TestParser(BaseTest):
|
|||||||
def test_charsets(self):
|
def test_charsets(self):
|
||||||
s = self.create_screen()
|
s = self.create_screen()
|
||||||
pb = partial(self.parse_bytes_dump, s)
|
pb = partial(self.parse_bytes_dump, s)
|
||||||
pb('\033)0\x0e/_', ('screen_designate_charset', 1, ord('0')), ('screen_change_charset', 1), '/\xa0')
|
pb('\033)0\x0e/_', ('screen_designate_charset', 1, ord('0')), ('screen_change_charset', 1), '/_')
|
||||||
pb('\033%G_', ('screen_change_charset', 2), '_')
|
self.ae(str(s.line(0)), '/\xa0 ')
|
||||||
|
pb('\033%G_', ('screen_use_latin1', 0), '_')
|
||||||
|
|
||||||
def test_csi_codes(self):
|
def test_csi_codes(self):
|
||||||
s = self.create_screen()
|
s = self.create_screen()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user