Handle non-BMP combining characters
Use a level of indirection to store combining characters. This allows combining characters to be stored using only two bytes, even if they are after USHORT_MAX
This commit is contained in:
parent
32632264ee
commit
80301d465b
@ -231,7 +231,7 @@ PyInit_fast_data_types(void) {
|
|||||||
|
|
||||||
#define OOF(n) #n, offsetof(Cell, n)
|
#define OOF(n) #n, offsetof(Cell, n)
|
||||||
if (PyModule_AddObject(m, "CELL", Py_BuildValue("{sI sI sI sI sI sI sI sI sI}",
|
if (PyModule_AddObject(m, "CELL", Py_BuildValue("{sI sI sI sI sI sI sI sI sI}",
|
||||||
OOF(ch), OOF(fg), OOF(bg), OOF(decoration_fg), OOF(cc), OOF(sprite_x), OOF(sprite_y), OOF(sprite_z), "size", sizeof(Cell))) != 0) return NULL;
|
OOF(ch), OOF(fg), OOF(bg), OOF(decoration_fg), OOF(cc_idx), OOF(sprite_x), OOF(sprite_y), OOF(sprite_z), "size", sizeof(Cell))) != 0) return NULL;
|
||||||
#undef OOF
|
#undef OOF
|
||||||
PyModule_AddIntConstant(m, "BOLD", BOLD_SHIFT);
|
PyModule_AddIntConstant(m, "BOLD", BOLD_SHIFT);
|
||||||
PyModule_AddIntConstant(m, "ITALIC", ITALIC_SHIFT);
|
PyModule_AddIntConstant(m, "ITALIC", ITALIC_SHIFT);
|
||||||
|
|||||||
@ -56,7 +56,6 @@ typedef enum MouseShapes { BEAM, HAND, ARROW } MouseShape;
|
|||||||
#define REVERSE_SHIFT 6
|
#define REVERSE_SHIFT 6
|
||||||
#define STRIKE_SHIFT 7
|
#define STRIKE_SHIFT 7
|
||||||
#define COL_MASK 0xFFFFFFFF
|
#define COL_MASK 0xFFFFFFFF
|
||||||
#define CC_SHIFT 16
|
|
||||||
#define UTF8_ACCEPT 0
|
#define UTF8_ACCEPT 0
|
||||||
#define UTF8_REJECT 1
|
#define UTF8_REJECT 1
|
||||||
#define DECORATION_FG_CODE 58
|
#define DECORATION_FG_CODE 58
|
||||||
@ -138,7 +137,7 @@ typedef struct {
|
|||||||
attrs_type attrs;
|
attrs_type attrs;
|
||||||
// The following are only needed on the CPU, not the GPU
|
// The following are only needed on the CPU, not the GPU
|
||||||
char_type ch;
|
char_type ch;
|
||||||
combining_type cc[2];
|
combining_type cc_idx[2];
|
||||||
} Cell;
|
} Cell;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|||||||
@ -9,6 +9,7 @@
|
|||||||
#include "fonts.h"
|
#include "fonts.h"
|
||||||
#include "state.h"
|
#include "state.h"
|
||||||
#include "emoji.h"
|
#include "emoji.h"
|
||||||
|
#include "unicode-data.h"
|
||||||
|
|
||||||
#define MISSING_GLYPH 4
|
#define MISSING_GLYPH 4
|
||||||
#define MAX_NUM_EXTRA_GLYPHS 8
|
#define MAX_NUM_EXTRA_GLYPHS 8
|
||||||
@ -327,8 +328,8 @@ face_has_codepoint(PyObject* face, char_type cp) {
|
|||||||
static inline bool
|
static inline bool
|
||||||
has_cell_text(Font *self, Cell *cell) {
|
has_cell_text(Font *self, Cell *cell) {
|
||||||
if (!face_has_codepoint(self->face, cell->ch)) return false;
|
if (!face_has_codepoint(self->face, cell->ch)) return false;
|
||||||
for (unsigned i = 0; i < arraysz(cell->cc) && cell->cc[i]; i++) {
|
for (unsigned i = 0; i < arraysz(cell->cc_idx) && cell->cc_idx[i]; i++) {
|
||||||
if (!face_has_codepoint(self->face, cell->cc[i])) return false;
|
if (!face_has_codepoint(self->face, codepoint_for_mark(cell->cc_idx[i]))) return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -492,12 +493,12 @@ load_hb_buffer(Cell *first_cell, index_type num_cells) {
|
|||||||
hb_buffer_clear_contents(harfbuzz_buffer);
|
hb_buffer_clear_contents(harfbuzz_buffer);
|
||||||
while (num_cells) {
|
while (num_cells) {
|
||||||
attrs_type prev_width = 0;
|
attrs_type prev_width = 0;
|
||||||
for (num = 0; num_cells && num < arraysz(shape_buffer) - 20 - arraysz(first_cell->cc); first_cell++, num_cells--) {
|
for (num = 0; num_cells && num < arraysz(shape_buffer) - 20 - arraysz(first_cell->cc_idx); first_cell++, num_cells--) {
|
||||||
if (prev_width == 2) { prev_width = 0; continue; }
|
if (prev_width == 2) { prev_width = 0; continue; }
|
||||||
shape_buffer[num++] = first_cell->ch;
|
shape_buffer[num++] = first_cell->ch;
|
||||||
prev_width = first_cell->attrs & WIDTH_MASK;
|
prev_width = first_cell->attrs & WIDTH_MASK;
|
||||||
for (unsigned i = 0; i < arraysz(first_cell->cc) && first_cell->cc[i]; i++) {
|
for (unsigned i = 0; i < arraysz(first_cell->cc_idx) && first_cell->cc_idx[i]; i++) {
|
||||||
shape_buffer[num++] = first_cell->cc[i];
|
shape_buffer[num++] = codepoint_for_mark(first_cell->cc_idx[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
hb_buffer_add_utf32(harfbuzz_buffer, shape_buffer, num, 0, num);
|
hb_buffer_add_utf32(harfbuzz_buffer, shape_buffer, num, 0, num);
|
||||||
@ -576,7 +577,7 @@ static GroupState group_state = {0};
|
|||||||
static inline unsigned int
|
static inline unsigned int
|
||||||
num_codepoints_in_cell(Cell *cell) {
|
num_codepoints_in_cell(Cell *cell) {
|
||||||
unsigned int ans = 1;
|
unsigned int ans = 1;
|
||||||
for (unsigned i = 0; i < arraysz(cell->cc) && cell->cc[i]; i++) ans++;
|
for (unsigned i = 0; i < arraysz(cell->cc_idx) && cell->cc_idx[i]; i++) ans++;
|
||||||
return ans;
|
return ans;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -654,7 +655,7 @@ check_cell_consumed(CellData *cell_data, Cell *last_cell) {
|
|||||||
cell_data->current_codepoint = cell_data->cell->ch;
|
cell_data->current_codepoint = cell_data->cell->ch;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
cell_data->current_codepoint = cell_data->cell->cc[cell_data->codepoints_consumed - 1];
|
cell_data->current_codepoint = codepoint_for_mark(cell_data->cell->cc_idx[cell_data->codepoints_consumed - 1]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1044,10 +1045,10 @@ get_fallback_font(PyObject UNUSED *self, PyObject *args) {
|
|||||||
int bold, italic;
|
int bold, italic;
|
||||||
if (!PyArg_ParseTuple(args, "Upp", &text, &bold, &italic)) return NULL;
|
if (!PyArg_ParseTuple(args, "Upp", &text, &bold, &italic)) return NULL;
|
||||||
Cell cell = {0};
|
Cell cell = {0};
|
||||||
static Py_UCS4 char_buf[2 + arraysz(cell.cc)];
|
static Py_UCS4 char_buf[2 + arraysz(cell.cc_idx)];
|
||||||
if (!PyUnicode_AsUCS4(text, char_buf, arraysz(char_buf), 1)) return NULL;
|
if (!PyUnicode_AsUCS4(text, char_buf, arraysz(char_buf), 1)) return NULL;
|
||||||
cell.ch = char_buf[0];
|
cell.ch = char_buf[0];
|
||||||
for (unsigned i = 0; i + 1 < PyUnicode_GetLength(text) && i < arraysz(cell.cc); i++) cell.cc[i] = char_buf[i + 1];
|
for (unsigned i = 0; i + 1 < PyUnicode_GetLength(text) && i < arraysz(cell.cc_idx); i++) cell.cc_idx[i] = mark_for_codepoint(char_buf[i + 1]);
|
||||||
if (bold) cell.attrs |= 1 << BOLD_SHIFT;
|
if (bold) cell.attrs |= 1 << BOLD_SHIFT;
|
||||||
if (italic) cell.attrs |= 1 << ITALIC_SHIFT;
|
if (italic) cell.attrs |= 1 << ITALIC_SHIFT;
|
||||||
ssize_t ans = fallback_font(&cell);
|
ssize_t ans = fallback_font(&cell);
|
||||||
|
|||||||
28
kitty/line.c
28
kitty/line.c
@ -38,9 +38,9 @@ PyObject*
|
|||||||
cell_text(Cell *cell) {
|
cell_text(Cell *cell) {
|
||||||
PyObject *ans;
|
PyObject *ans;
|
||||||
unsigned num = 1;
|
unsigned num = 1;
|
||||||
static Py_UCS4 buf[arraysz(cell->cc) + 1];
|
static Py_UCS4 buf[arraysz(cell->cc_idx) + 1];
|
||||||
buf[0] = cell->ch;
|
buf[0] = cell->ch;
|
||||||
for (unsigned i = 0; i < arraysz(cell->cc) && cell->cc[i]; i++) buf[num++] = cell->cc[i];
|
for (unsigned i = 0; i < arraysz(cell->cc_idx) && cell->cc_idx[i]; i++) buf[num++] = codepoint_for_mark(cell->cc_idx[i]);
|
||||||
ans = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, num);
|
ans = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, num);
|
||||||
return ans;
|
return ans;
|
||||||
}
|
}
|
||||||
@ -166,7 +166,7 @@ cell_as_unicode(Cell *cell, bool include_cc, Py_UCS4 *buf, char_type zero_char)
|
|||||||
size_t n = 1;
|
size_t n = 1;
|
||||||
buf[0] = cell->ch ? cell->ch : zero_char;
|
buf[0] = cell->ch ? cell->ch : zero_char;
|
||||||
if (include_cc) {
|
if (include_cc) {
|
||||||
for (unsigned i = 0; i < arraysz(cell->cc) && cell->cc[i]; i++) buf[n++] = cell->cc[i];
|
for (unsigned i = 0; i < arraysz(cell->cc_idx) && cell->cc_idx[i]; i++) buf[n++] = codepoint_for_mark(cell->cc_idx[i]);
|
||||||
}
|
}
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
@ -175,7 +175,7 @@ size_t
|
|||||||
cell_as_utf8(Cell *cell, bool include_cc, char *buf, char_type zero_char) {
|
cell_as_utf8(Cell *cell, bool include_cc, char *buf, char_type zero_char) {
|
||||||
size_t n = encode_utf8(cell->ch ? cell->ch : zero_char, buf);
|
size_t n = encode_utf8(cell->ch ? cell->ch : zero_char, buf);
|
||||||
if (include_cc) {
|
if (include_cc) {
|
||||||
for (unsigned i = 0; i < arraysz(cell->cc) && cell->cc[i]; i++) n += encode_utf8(cell->cc[i], buf + n);
|
for (unsigned i = 0; i < arraysz(cell->cc_idx) && cell->cc_idx[i]; i++) n += encode_utf8(codepoint_for_mark(cell->cc_idx[i]), buf + n);
|
||||||
}
|
}
|
||||||
buf[n] = 0;
|
buf[n] = 0;
|
||||||
return n;
|
return n;
|
||||||
@ -188,7 +188,7 @@ unicode_in_range(Line *self, index_type start, index_type limit, bool include_cc
|
|||||||
static Py_UCS4 buf[4096];
|
static Py_UCS4 buf[4096];
|
||||||
if (leading_char) buf[n++] = leading_char;
|
if (leading_char) buf[n++] = leading_char;
|
||||||
char_type previous_width = 0;
|
char_type previous_width = 0;
|
||||||
for(index_type i = start; i < limit && n < arraysz(buf) - 2 - arraysz(self->cells->cc); i++) {
|
for(index_type i = start; i < limit && n < arraysz(buf) - 2 - arraysz(self->cells->cc_idx); i++) {
|
||||||
char_type ch = self->cells[i].ch;
|
char_type ch = self->cells[i].ch;
|
||||||
if (ch == 0) {
|
if (ch == 0) {
|
||||||
if (previous_width == 2) { previous_width = 0; continue; };
|
if (previous_width == 2) { previous_width = 0; continue; };
|
||||||
@ -249,8 +249,8 @@ line_as_ansi(Line *self, Py_UCS4 *buf, index_type buflen) {
|
|||||||
t = prev_cursor; prev_cursor = cursor; cursor = t;
|
t = prev_cursor; prev_cursor = cursor; cursor = t;
|
||||||
if (*sgr) WRITE_SGR(sgr);
|
if (*sgr) WRITE_SGR(sgr);
|
||||||
WRITE_CH(ch);
|
WRITE_CH(ch);
|
||||||
for(unsigned c = 0; c < arraysz(self->cells[pos].cc) && self->cells[pos].cc[c]; c++) {
|
for(unsigned c = 0; c < arraysz(self->cells[pos].cc_idx) && self->cells[pos].cc_idx[c]; c++) {
|
||||||
WRITE_CH(self->cells[pos].cc[c]);
|
WRITE_CH(codepoint_for_mark(self->cells[pos].cc_idx[c]));
|
||||||
}
|
}
|
||||||
previous_width = attrs & WIDTH_MASK;
|
previous_width = attrs & WIDTH_MASK;
|
||||||
}
|
}
|
||||||
@ -301,10 +301,10 @@ void
|
|||||||
line_add_combining_char(Line *self, uint32_t ch, unsigned int x) {
|
line_add_combining_char(Line *self, uint32_t ch, unsigned int x) {
|
||||||
Cell *cell = self->cells + x;
|
Cell *cell = self->cells + x;
|
||||||
if (!cell->ch) return; // dont allow adding combining chars to a null cell
|
if (!cell->ch) return; // dont allow adding combining chars to a null cell
|
||||||
for (unsigned i = 0; i < arraysz(cell->cc); i++) {
|
for (unsigned i = 0; i < arraysz(cell->cc_idx); i++) {
|
||||||
if (!cell->cc[i]) { cell->cc[i] = (combining_type)ch; return; }
|
if (!cell->cc_idx[i]) { cell->cc_idx[i] = mark_for_codepoint(ch); return; }
|
||||||
}
|
}
|
||||||
cell->cc[arraysz(cell->cc) - 1] = (combining_type)ch;
|
cell->cc_idx[arraysz(cell->cc_idx) - 1] = mark_for_codepoint(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
@ -354,7 +354,7 @@ set_text(Line* self, PyObject *args) {
|
|||||||
self->cells[i].fg = fg;
|
self->cells[i].fg = fg;
|
||||||
self->cells[i].bg = bg;
|
self->cells[i].bg = bg;
|
||||||
self->cells[i].decoration_fg = dfg;
|
self->cells[i].decoration_fg = dfg;
|
||||||
memset(self->cells[i].cc, 0, sizeof(self->cells[i].cc));
|
memset(self->cells[i].cc_idx, 0, sizeof(self->cells[i].cc_idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
@ -386,7 +386,7 @@ line_clear_text(Line *self, unsigned int at, unsigned int num, char_type ch) {
|
|||||||
attrs_type width = ch ? 1 : 0;
|
attrs_type width = ch ? 1 : 0;
|
||||||
#define PREFIX \
|
#define PREFIX \
|
||||||
for (index_type i = at; i < MIN(self->xnum, at + num); i++) { \
|
for (index_type i = at; i < MIN(self->xnum, at + num); i++) { \
|
||||||
self->cells[i].ch = ch; memset(self->cells[i].cc, 0, sizeof(self->cells[i].cc)); \
|
self->cells[i].ch = ch; memset(self->cells[i].cc_idx, 0, sizeof(self->cells[i].cc_idx)); \
|
||||||
self->cells[i].attrs = (self->cells[i].attrs & ATTRS_MASK_WITHOUT_WIDTH) | width; \
|
self->cells[i].attrs = (self->cells[i].attrs & ATTRS_MASK_WITHOUT_WIDTH) | width; \
|
||||||
}
|
}
|
||||||
if (CHAR_IS_BLANK(ch)) {
|
if (CHAR_IS_BLANK(ch)) {
|
||||||
@ -416,7 +416,7 @@ line_apply_cursor(Line *self, Cursor *cursor, unsigned int at, unsigned int num,
|
|||||||
for (index_type i = at; i < self->xnum && i < at + num; i++) {
|
for (index_type i = at; i < self->xnum && i < at + num; i++) {
|
||||||
if (clear_char) {
|
if (clear_char) {
|
||||||
self->cells[i].ch = BLANK_CHAR;
|
self->cells[i].ch = BLANK_CHAR;
|
||||||
memset(self->cells[i].cc, 0, sizeof(self->cells[i].cc));
|
memset(self->cells[i].cc_idx, 0, sizeof(self->cells[i].cc_idx));
|
||||||
self->cells[i].attrs = attrs;
|
self->cells[i].attrs = attrs;
|
||||||
clear_sprite_position(self->cells[i]);
|
clear_sprite_position(self->cells[i]);
|
||||||
} else {
|
} else {
|
||||||
@ -491,7 +491,7 @@ line_set_char(Line *self, unsigned int at, uint32_t ch, unsigned int width, Curs
|
|||||||
self->cells[at].decoration_fg = cursor->decoration_fg & COL_MASK;
|
self->cells[at].decoration_fg = cursor->decoration_fg & COL_MASK;
|
||||||
}
|
}
|
||||||
self->cells[at].ch = ch;
|
self->cells[at].ch = ch;
|
||||||
memset(self->cells[at].cc, 0, sizeof(self->cells[at].cc));
|
memset(self->cells[at].cc_idx, 0, sizeof(self->cells[at].cc_idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
|
|||||||
@ -6,6 +6,8 @@ bool is_ignored_char(char_type ch);
|
|||||||
bool is_word_char(char_type ch);
|
bool is_word_char(char_type ch);
|
||||||
bool is_CZ_category(char_type);
|
bool is_CZ_category(char_type);
|
||||||
bool is_P_category(char_type);
|
bool is_P_category(char_type);
|
||||||
|
char_type codepoint_for_mark(combining_type m);
|
||||||
|
combining_type mark_for_codepoint(char_type c);
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
is_url_char(uint32_t ch) {
|
is_url_char(uint32_t ch) {
|
||||||
|
|||||||
@ -140,7 +140,7 @@ class TestDataTypes(BaseTest):
|
|||||||
|
|
||||||
lb = filled_line_buf(5, 5, filled_cursor())
|
lb = filled_line_buf(5, 5, filled_cursor())
|
||||||
l0 = lb.line(0)
|
l0 = lb.line(0)
|
||||||
l0.add_combining_char(1, 'a')
|
l0.add_combining_char(1, '\u0300')
|
||||||
l0.clear_text(1, 2)
|
l0.clear_text(1, 2)
|
||||||
self.ae(str(l0), '0 00')
|
self.ae(str(l0), '0 00')
|
||||||
self.assertEqualAttributes(l0.cursor_from(1), l0.cursor_from(0))
|
self.assertEqualAttributes(l0.cursor_from(1), l0.cursor_from(0))
|
||||||
@ -164,14 +164,14 @@ class TestDataTypes(BaseTest):
|
|||||||
lb.line(0)[lb.xnum]
|
lb.line(0)[lb.xnum]
|
||||||
l0 = lb.line(0)
|
l0 = lb.line(0)
|
||||||
l0.set_text(' ', 0, len(' '), C())
|
l0.set_text(' ', 0, len(' '), C())
|
||||||
l0.add_combining_char(0, '1')
|
l0.add_combining_char(0, '\u0300')
|
||||||
self.ae(l0[0], ' 1')
|
self.ae(l0[0], ' \u0300')
|
||||||
l0.add_combining_char(0, '2')
|
l0.add_combining_char(0, '\U000e0100')
|
||||||
self.ae(l0[0], ' 12')
|
self.ae(l0[0], ' \u0300\U000e0100')
|
||||||
l0.add_combining_char(0, '3')
|
l0.add_combining_char(0, '\u0302')
|
||||||
self.ae(l0[0], ' 13')
|
self.ae(l0[0], ' \u0300\u0302')
|
||||||
self.ae(l0[1], '\0')
|
self.ae(l0[1], '\0')
|
||||||
self.ae(str(l0), ' 13')
|
self.ae(str(l0), ' \u0300\u0302')
|
||||||
t = 'Testing with simple text'
|
t = 'Testing with simple text'
|
||||||
lb = LineBuf(2, len(t))
|
lb = LineBuf(2, len(t))
|
||||||
l0 = lb.line(0)
|
l0 = lb.line(0)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user