diff --git a/kitty/data-types.c b/kitty/data-types.c index bc27c0639..571f43d54 100644 --- a/kitty/data-types.c +++ b/kitty/data-types.c @@ -231,7 +231,7 @@ PyInit_fast_data_types(void) { #define OOF(n) #n, offsetof(Cell, n) if (PyModule_AddObject(m, "CELL", Py_BuildValue("{sI sI sI sI sI sI sI sI sI}", - OOF(ch), OOF(fg), OOF(bg), OOF(decoration_fg), OOF(cc), OOF(sprite_x), OOF(sprite_y), OOF(sprite_z), "size", sizeof(Cell))) != 0) return NULL; + OOF(ch), OOF(fg), OOF(bg), OOF(decoration_fg), OOF(cc_idx), OOF(sprite_x), OOF(sprite_y), OOF(sprite_z), "size", sizeof(Cell))) != 0) return NULL; #undef OOF PyModule_AddIntConstant(m, "BOLD", BOLD_SHIFT); PyModule_AddIntConstant(m, "ITALIC", ITALIC_SHIFT); diff --git a/kitty/data-types.h b/kitty/data-types.h index d5dcc0924..82c9af8e1 100644 --- a/kitty/data-types.h +++ b/kitty/data-types.h @@ -56,7 +56,6 @@ typedef enum MouseShapes { BEAM, HAND, ARROW } MouseShape; #define REVERSE_SHIFT 6 #define STRIKE_SHIFT 7 #define COL_MASK 0xFFFFFFFF -#define CC_SHIFT 16 #define UTF8_ACCEPT 0 #define UTF8_REJECT 1 #define DECORATION_FG_CODE 58 @@ -138,7 +137,7 @@ typedef struct { attrs_type attrs; // The following are only needed on the CPU, not the GPU char_type ch; - combining_type cc[2]; + combining_type cc_idx[2]; } Cell; typedef struct { diff --git a/kitty/fonts.c b/kitty/fonts.c index 28a5620ac..b62e14519 100644 --- a/kitty/fonts.c +++ b/kitty/fonts.c @@ -9,6 +9,7 @@ #include "fonts.h" #include "state.h" #include "emoji.h" +#include "unicode-data.h" #define MISSING_GLYPH 4 #define MAX_NUM_EXTRA_GLYPHS 8 @@ -327,8 +328,8 @@ face_has_codepoint(PyObject* face, char_type cp) { static inline bool has_cell_text(Font *self, Cell *cell) { if (!face_has_codepoint(self->face, cell->ch)) return false; - for (unsigned i = 0; i < arraysz(cell->cc) && cell->cc[i]; i++) { - if (!face_has_codepoint(self->face, cell->cc[i])) return false; + for (unsigned i = 0; i < arraysz(cell->cc_idx) && cell->cc_idx[i]; i++) { + if (!face_has_codepoint(self->face, codepoint_for_mark(cell->cc_idx[i]))) return false; } return true; } @@ -492,12 +493,12 @@ load_hb_buffer(Cell *first_cell, index_type num_cells) { hb_buffer_clear_contents(harfbuzz_buffer); while (num_cells) { attrs_type prev_width = 0; - for (num = 0; num_cells && num < arraysz(shape_buffer) - 20 - arraysz(first_cell->cc); first_cell++, num_cells--) { + for (num = 0; num_cells && num < arraysz(shape_buffer) - 20 - arraysz(first_cell->cc_idx); first_cell++, num_cells--) { if (prev_width == 2) { prev_width = 0; continue; } shape_buffer[num++] = first_cell->ch; prev_width = first_cell->attrs & WIDTH_MASK; - for (unsigned i = 0; i < arraysz(first_cell->cc) && first_cell->cc[i]; i++) { - shape_buffer[num++] = first_cell->cc[i]; + for (unsigned i = 0; i < arraysz(first_cell->cc_idx) && first_cell->cc_idx[i]; i++) { + shape_buffer[num++] = codepoint_for_mark(first_cell->cc_idx[i]); } } hb_buffer_add_utf32(harfbuzz_buffer, shape_buffer, num, 0, num); @@ -576,7 +577,7 @@ static GroupState group_state = {0}; static inline unsigned int num_codepoints_in_cell(Cell *cell) { unsigned int ans = 1; - for (unsigned i = 0; i < arraysz(cell->cc) && cell->cc[i]; i++) ans++; + for (unsigned i = 0; i < arraysz(cell->cc_idx) && cell->cc_idx[i]; i++) ans++; return ans; } @@ -654,7 +655,7 @@ check_cell_consumed(CellData *cell_data, Cell *last_cell) { cell_data->current_codepoint = cell_data->cell->ch; break; default: - cell_data->current_codepoint = cell_data->cell->cc[cell_data->codepoints_consumed - 1]; + cell_data->current_codepoint = codepoint_for_mark(cell_data->cell->cc_idx[cell_data->codepoints_consumed - 1]); break; } } @@ -1044,10 +1045,10 @@ get_fallback_font(PyObject UNUSED *self, PyObject *args) { int bold, italic; if (!PyArg_ParseTuple(args, "Upp", &text, &bold, &italic)) return NULL; Cell cell = {0}; - static Py_UCS4 char_buf[2 + arraysz(cell.cc)]; + static Py_UCS4 char_buf[2 + arraysz(cell.cc_idx)]; if (!PyUnicode_AsUCS4(text, char_buf, arraysz(char_buf), 1)) return NULL; cell.ch = char_buf[0]; - for (unsigned i = 0; i + 1 < PyUnicode_GetLength(text) && i < arraysz(cell.cc); i++) cell.cc[i] = char_buf[i + 1]; + for (unsigned i = 0; i + 1 < PyUnicode_GetLength(text) && i < arraysz(cell.cc_idx); i++) cell.cc_idx[i] = mark_for_codepoint(char_buf[i + 1]); if (bold) cell.attrs |= 1 << BOLD_SHIFT; if (italic) cell.attrs |= 1 << ITALIC_SHIFT; ssize_t ans = fallback_font(&cell); diff --git a/kitty/line.c b/kitty/line.c index c2dc70e03..8a9dc2641 100644 --- a/kitty/line.c +++ b/kitty/line.c @@ -38,9 +38,9 @@ PyObject* cell_text(Cell *cell) { PyObject *ans; unsigned num = 1; - static Py_UCS4 buf[arraysz(cell->cc) + 1]; + static Py_UCS4 buf[arraysz(cell->cc_idx) + 1]; buf[0] = cell->ch; - for (unsigned i = 0; i < arraysz(cell->cc) && cell->cc[i]; i++) buf[num++] = cell->cc[i]; + for (unsigned i = 0; i < arraysz(cell->cc_idx) && cell->cc_idx[i]; i++) buf[num++] = codepoint_for_mark(cell->cc_idx[i]); ans = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, num); return ans; } @@ -166,7 +166,7 @@ cell_as_unicode(Cell *cell, bool include_cc, Py_UCS4 *buf, char_type zero_char) size_t n = 1; buf[0] = cell->ch ? cell->ch : zero_char; if (include_cc) { - for (unsigned i = 0; i < arraysz(cell->cc) && cell->cc[i]; i++) buf[n++] = cell->cc[i]; + for (unsigned i = 0; i < arraysz(cell->cc_idx) && cell->cc_idx[i]; i++) buf[n++] = codepoint_for_mark(cell->cc_idx[i]); } return n; } @@ -175,7 +175,7 @@ size_t cell_as_utf8(Cell *cell, bool include_cc, char *buf, char_type zero_char) { size_t n = encode_utf8(cell->ch ? cell->ch : zero_char, buf); if (include_cc) { - for (unsigned i = 0; i < arraysz(cell->cc) && cell->cc[i]; i++) n += encode_utf8(cell->cc[i], buf + n); + for (unsigned i = 0; i < arraysz(cell->cc_idx) && cell->cc_idx[i]; i++) n += encode_utf8(codepoint_for_mark(cell->cc_idx[i]), buf + n); } buf[n] = 0; return n; @@ -188,7 +188,7 @@ unicode_in_range(Line *self, index_type start, index_type limit, bool include_cc static Py_UCS4 buf[4096]; if (leading_char) buf[n++] = leading_char; char_type previous_width = 0; - for(index_type i = start; i < limit && n < arraysz(buf) - 2 - arraysz(self->cells->cc); i++) { + for(index_type i = start; i < limit && n < arraysz(buf) - 2 - arraysz(self->cells->cc_idx); i++) { char_type ch = self->cells[i].ch; if (ch == 0) { if (previous_width == 2) { previous_width = 0; continue; }; @@ -249,8 +249,8 @@ line_as_ansi(Line *self, Py_UCS4 *buf, index_type buflen) { t = prev_cursor; prev_cursor = cursor; cursor = t; if (*sgr) WRITE_SGR(sgr); WRITE_CH(ch); - for(unsigned c = 0; c < arraysz(self->cells[pos].cc) && self->cells[pos].cc[c]; c++) { - WRITE_CH(self->cells[pos].cc[c]); + for(unsigned c = 0; c < arraysz(self->cells[pos].cc_idx) && self->cells[pos].cc_idx[c]; c++) { + WRITE_CH(codepoint_for_mark(self->cells[pos].cc_idx[c])); } previous_width = attrs & WIDTH_MASK; } @@ -301,10 +301,10 @@ void line_add_combining_char(Line *self, uint32_t ch, unsigned int x) { Cell *cell = self->cells + x; if (!cell->ch) return; // dont allow adding combining chars to a null cell - for (unsigned i = 0; i < arraysz(cell->cc); i++) { - if (!cell->cc[i]) { cell->cc[i] = (combining_type)ch; return; } + for (unsigned i = 0; i < arraysz(cell->cc_idx); i++) { + if (!cell->cc_idx[i]) { cell->cc_idx[i] = mark_for_codepoint(ch); return; } } - cell->cc[arraysz(cell->cc) - 1] = (combining_type)ch; + cell->cc_idx[arraysz(cell->cc_idx) - 1] = mark_for_codepoint(ch); } static PyObject* @@ -354,7 +354,7 @@ set_text(Line* self, PyObject *args) { self->cells[i].fg = fg; self->cells[i].bg = bg; self->cells[i].decoration_fg = dfg; - memset(self->cells[i].cc, 0, sizeof(self->cells[i].cc)); + memset(self->cells[i].cc_idx, 0, sizeof(self->cells[i].cc_idx)); } Py_RETURN_NONE; @@ -386,7 +386,7 @@ line_clear_text(Line *self, unsigned int at, unsigned int num, char_type ch) { attrs_type width = ch ? 1 : 0; #define PREFIX \ for (index_type i = at; i < MIN(self->xnum, at + num); i++) { \ - self->cells[i].ch = ch; memset(self->cells[i].cc, 0, sizeof(self->cells[i].cc)); \ + self->cells[i].ch = ch; memset(self->cells[i].cc_idx, 0, sizeof(self->cells[i].cc_idx)); \ self->cells[i].attrs = (self->cells[i].attrs & ATTRS_MASK_WITHOUT_WIDTH) | width; \ } if (CHAR_IS_BLANK(ch)) { @@ -416,7 +416,7 @@ line_apply_cursor(Line *self, Cursor *cursor, unsigned int at, unsigned int num, for (index_type i = at; i < self->xnum && i < at + num; i++) { if (clear_char) { self->cells[i].ch = BLANK_CHAR; - memset(self->cells[i].cc, 0, sizeof(self->cells[i].cc)); + memset(self->cells[i].cc_idx, 0, sizeof(self->cells[i].cc_idx)); self->cells[i].attrs = attrs; clear_sprite_position(self->cells[i]); } else { @@ -491,7 +491,7 @@ line_set_char(Line *self, unsigned int at, uint32_t ch, unsigned int width, Curs self->cells[at].decoration_fg = cursor->decoration_fg & COL_MASK; } self->cells[at].ch = ch; - memset(self->cells[at].cc, 0, sizeof(self->cells[at].cc)); + memset(self->cells[at].cc_idx, 0, sizeof(self->cells[at].cc_idx)); } static PyObject* diff --git a/kitty/unicode-data.h b/kitty/unicode-data.h index 37f446f1e..f21da69d9 100644 --- a/kitty/unicode-data.h +++ b/kitty/unicode-data.h @@ -6,6 +6,8 @@ bool is_ignored_char(char_type ch); bool is_word_char(char_type ch); bool is_CZ_category(char_type); bool is_P_category(char_type); +char_type codepoint_for_mark(combining_type m); +combining_type mark_for_codepoint(char_type c); static inline bool is_url_char(uint32_t ch) { diff --git a/kitty_tests/datatypes.py b/kitty_tests/datatypes.py index 37c251b82..244a1ade7 100644 --- a/kitty_tests/datatypes.py +++ b/kitty_tests/datatypes.py @@ -140,7 +140,7 @@ class TestDataTypes(BaseTest): lb = filled_line_buf(5, 5, filled_cursor()) l0 = lb.line(0) - l0.add_combining_char(1, 'a') + l0.add_combining_char(1, '\u0300') l0.clear_text(1, 2) self.ae(str(l0), '0 00') self.assertEqualAttributes(l0.cursor_from(1), l0.cursor_from(0)) @@ -164,14 +164,14 @@ class TestDataTypes(BaseTest): lb.line(0)[lb.xnum] l0 = lb.line(0) l0.set_text(' ', 0, len(' '), C()) - l0.add_combining_char(0, '1') - self.ae(l0[0], ' 1') - l0.add_combining_char(0, '2') - self.ae(l0[0], ' 12') - l0.add_combining_char(0, '3') - self.ae(l0[0], ' 13') + l0.add_combining_char(0, '\u0300') + self.ae(l0[0], ' \u0300') + l0.add_combining_char(0, '\U000e0100') + self.ae(l0[0], ' \u0300\U000e0100') + l0.add_combining_char(0, '\u0302') + self.ae(l0[0], ' \u0300\u0302') self.ae(l0[1], '\0') - self.ae(str(l0), ' 13') + self.ae(str(l0), ' \u0300\u0302') t = 'Testing with simple text' lb = LineBuf(2, len(t)) l0 = lb.line(0)