Use an enum for UTF8 decoder state

This commit is contained in:
Kovid Goyal 2021-03-31 15:19:27 +05:30
parent 9838a336d5
commit 0372242d12
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
7 changed files with 15 additions and 16 deletions

View File

@ -30,8 +30,6 @@ typedef uint8_t len_t;
typedef uint32_t text_t; typedef uint32_t text_t;
#define LEN_MAX UINT8_MAX #define LEN_MAX UINT8_MAX
#define UTF8_ACCEPT 0
#define UTF8_REJECT 1
#define IS_LOWERCASE(x) (x) >= 'a' && (x) <= 'z' #define IS_LOWERCASE(x) (x) >= 'a' && (x) <= 'z'
#define IS_UPPERCASE(x) (x) >= 'A' && (x) <= 'Z' #define IS_UPPERCASE(x) (x) >= 'A' && (x) <= 'Z'
#define LOWERCASE(x) ((IS_UPPERCASE(x)) ? (x) + 32 : (x)) #define LOWERCASE(x) ((IS_UPPERCASE(x)) ? (x) + 32 : (x))

11
kitty/charsets.c generated
View File

@ -7,10 +7,8 @@
// Taken from consolemap.c in the linux vt driver sourcecode // Taken from consolemap.c in the linux vt driver sourcecode
#include <stddef.h> #include "data-types.h"
#include <stdint.h>
#define UTF8_ACCEPT 0
#define UTF8_REJECT 1
static uint32_t charset_translations[5][256] = { static uint32_t charset_translations[5][256] = {
/* 8-bit Latin-1 mapped to Unicode -- trivial mapping */ /* 8-bit Latin-1 mapped to Unicode -- trivial mapping */
@ -231,7 +229,7 @@ static const uint8_t utf8_data[] = {
}; };
uint32_t uint32_t
decode_utf8(uint32_t* state, uint32_t* codep, uint8_t byte) { decode_utf8(UTF8State* state, uint32_t* codep, uint8_t byte) {
uint32_t type = utf8_data[byte]; uint32_t type = utf8_data[byte];
*codep = (*state != UTF8_ACCEPT) ? *codep = (*state != UTF8_ACCEPT) ?
@ -245,7 +243,8 @@ decode_utf8(uint32_t* state, uint32_t* codep, uint8_t byte) {
size_t size_t
decode_utf8_string(const char *src, size_t sz, uint32_t *dest) { decode_utf8_string(const char *src, size_t sz, uint32_t *dest) {
// dest must be a zeroed array of size at least sz // dest must be a zeroed array of size at least sz
uint32_t codep = 0, state = 0, prev = UTF8_ACCEPT; uint32_t codep = 0;
UTF8State state = 0, prev = UTF8_ACCEPT;
size_t i, d; size_t i, d;
for (i = 0, d = 0; i < sz; i++) { for (i = 0, d = 0; i < sz; i++) {
switch(decode_utf8(&state, &codep, src[i])) { switch(decode_utf8(&state, &codep, src[i])) {

View File

@ -80,8 +80,6 @@ typedef enum { TILING, SCALED, MIRRORED } BackgroundImageLayout;
#define ATTRS_MASK_FOR_SGR (ATTRS_MASK_WITHOUT_MARK | ATTRS_MASK_WITHOUT_WIDTH) #define ATTRS_MASK_FOR_SGR (ATTRS_MASK_WITHOUT_MARK | ATTRS_MASK_WITHOUT_WIDTH)
#define MARK_MASK 3 #define MARK_MASK 3
#define COL_MASK 0xFFFFFFFF #define COL_MASK 0xFFFFFFFF
#define UTF8_ACCEPT 0
#define UTF8_REJECT 1
#define DECORATION_FG_CODE 58 #define DECORATION_FG_CODE 58
#define CHAR_IS_BLANK(ch) ((ch) == 32 || (ch) == 0) #define CHAR_IS_BLANK(ch) ((ch) == 32 || (ch) == 0)
#define CONTINUED_MASK 1 #define CONTINUED_MASK 1
@ -153,6 +151,8 @@ typedef enum { TILING, SCALED, MIRRORED } BackgroundImageLayout;
#endif #endif
typedef enum UTF8State { UTF8_ACCEPT = 0, UTF8_REJECT = 1} UTF8State;
typedef struct { typedef struct {
uint32_t left, top, right, bottom; uint32_t left, top, right, bottom;
} Region; } Region;

View File

@ -196,7 +196,8 @@ static inline bool
pagerhist_ensure_start_is_valid_utf8(PagerHistoryBuf *ph) { pagerhist_ensure_start_is_valid_utf8(PagerHistoryBuf *ph) {
uint8_t scratch[8]; uint8_t scratch[8];
size_t num = ringbuf_memcpy_from(scratch, ph->ringbuf, arraysz(scratch)); size_t num = ringbuf_memcpy_from(scratch, ph->ringbuf, arraysz(scratch));
uint32_t state = UTF8_ACCEPT, codep; uint32_t codep;
UTF8State state = UTF8_ACCEPT;
size_t count = 0; size_t count = 0;
size_t last_reject_at = 0; size_t last_reject_at = 0;
while (count < num) { while (count < num) {
@ -333,7 +334,7 @@ get_line(HistoryBuf *self, index_type y, Line *l) { init_line(self, index_of(sel
static inline char_type static inline char_type
pagerhist_remove_char(PagerHistoryBuf *ph, unsigned *count, uint8_t record[8]) { pagerhist_remove_char(PagerHistoryBuf *ph, unsigned *count, uint8_t record[8]) {
uint32_t codep, state = UTF8_ACCEPT; uint32_t codep; UTF8State state = UTF8_ACCEPT;
*count = 0; *count = 0;
size_t num = ringbuf_bytes_used(ph->ringbuf); size_t num = ringbuf_bytes_used(ph->ringbuf);
while (num--) { while (num--) {

View File

@ -90,7 +90,7 @@ serialize(const EncodingData *data, char *output, const char csi_trailer) {
} }
if (third_field_not_empty) { if (third_field_not_empty) {
const char *p = data->text; const char *p = data->text;
uint32_t codep, state = UTF8_ACCEPT; uint32_t codep; UTF8State state = UTF8_ACCEPT;
bool first = true; bool first = true;
while(*p) { while(*p) {
if (decode_utf8(&state, &codep, *p) == UTF8_ACCEPT) { if (decode_utf8(&state, &codep, *p) == UTF8_ACCEPT) {
@ -397,7 +397,7 @@ encode_key(const KeyEvent *ev, char *output) {
static inline bool static inline bool
startswith_ascii_control_char(const char *p) { startswith_ascii_control_char(const char *p) {
if (!p || !*p) return true; if (!p || !*p) return true;
uint32_t codep, state = UTF8_ACCEPT; uint32_t codep; UTF8State state = UTF8_ACCEPT;
while(*p) { while(*p) {
if (decode_utf8(&state, &codep, *p) == UTF8_ACCEPT) { if (decode_utf8(&state, &codep, *p) == UTF8_ACCEPT) {
return codep < 32 || codep == 127; return codep < 32 || codep == 127;

View File

@ -622,7 +622,7 @@ screen_draw_overlay_text(Screen *self, const char *utf8_text) {
self->overlay_line.ynum = self->cursor->y; self->overlay_line.ynum = self->cursor->y;
self->overlay_line.xstart = self->cursor->x; self->overlay_line.xstart = self->cursor->x;
self->overlay_line.xnum = 0; self->overlay_line.xnum = 0;
uint32_t codepoint = 0, state = UTF8_ACCEPT; uint32_t codepoint = 0; UTF8State state = UTF8_ACCEPT;
bool orig_line_wrap_mode = self->modes.mDECAWM; bool orig_line_wrap_mode = self->modes.mDECAWM;
self->modes.mDECAWM = false; self->modes.mDECAWM = false;
self->cursor->reverse ^= true; self->cursor->reverse ^= true;

View File

@ -83,7 +83,8 @@ typedef struct {
CellPixelSize cell_size; CellPixelSize cell_size;
OverlayLine overlay_line; OverlayLine overlay_line;
id_type window_id; id_type window_id;
uint32_t utf8_state, utf8_codepoint, *g0_charset, *g1_charset, *g_charset; uint32_t utf8_codepoint, *g0_charset, *g1_charset, *g_charset;
UTF8State utf8_state;
unsigned int current_charset; unsigned int current_charset;
Selections selections, url_ranges; Selections selections, url_ranges;
struct { struct {