Rewrite wcswidth as a state machine
This commit is contained in:
parent
0b9a37139e
commit
0862e85577
@ -1867,27 +1867,39 @@ screen_wcswidth(PyObject UNUSED *self, PyObject *str) {
|
|||||||
unsigned long ans = 0;
|
unsigned long ans = 0;
|
||||||
char_type prev_ch = 0;
|
char_type prev_ch = 0;
|
||||||
int prev_width = 0;
|
int prev_width = 0;
|
||||||
bool in_sgr = false;
|
typedef enum {NORMAL, IN_SGR, FLAG_PAIR_STARTED} WCSState;
|
||||||
|
WCSState state = NORMAL;
|
||||||
for (i = 0; i < len; i++) {
|
for (i = 0; i < len; i++) {
|
||||||
char_type ch = PyUnicode_READ(kind, data, i);
|
char_type ch = PyUnicode_READ(kind, data, i);
|
||||||
if (in_sgr) {
|
switch(state) {
|
||||||
if (ch == 'm') in_sgr = false;
|
case IN_SGR: {
|
||||||
continue;
|
if (ch == 'm') state = NORMAL;
|
||||||
}
|
} continue;
|
||||||
if (ch == 0x1b && i + 1 < len && PyUnicode_READ(kind, data, i + 1) == '[') { in_sgr = true; continue; }
|
|
||||||
if (ch == 0xfe0f) {
|
case FLAG_PAIR_STARTED: {
|
||||||
|
state = NORMAL;
|
||||||
|
if (is_flag_pair(prev_ch, ch)) break;
|
||||||
|
} /* fallthrough */
|
||||||
|
|
||||||
|
case NORMAL: {
|
||||||
|
if (ch == 0x1b && i + 1 < len && PyUnicode_READ(kind, data, i + 1) == '[') { state = IN_SGR; continue; }
|
||||||
|
switch(ch) {
|
||||||
|
case 0xfe0f: {
|
||||||
if (is_emoji_presentation_base(prev_ch) && prev_width == 1) {
|
if (is_emoji_presentation_base(prev_ch) && prev_width == 1) {
|
||||||
ans += 1;
|
ans += 1;
|
||||||
prev_width = 2;
|
prev_width = 2;
|
||||||
} else prev_width = 0;
|
} else prev_width = 0;
|
||||||
} else if (ch == 0xfe0e) {
|
} break;
|
||||||
|
|
||||||
|
case 0xfe0e: {
|
||||||
if (is_emoji_presentation_base(prev_ch) && prev_width == 2) {
|
if (is_emoji_presentation_base(prev_ch) && prev_width == 2) {
|
||||||
ans -= 1;
|
ans -= 1;
|
||||||
prev_width = 1;
|
prev_width = 1;
|
||||||
} else prev_width = 0;
|
} else prev_width = 0;
|
||||||
} else if (is_flag_pair(prev_ch, ch)) {
|
} break;
|
||||||
prev_width = 2;
|
|
||||||
} else {
|
default: {
|
||||||
|
if (is_flag_codepoint(ch)) state = FLAG_PAIR_STARTED;
|
||||||
int w = wcwidth_std(ch);
|
int w = wcwidth_std(ch);
|
||||||
switch(w) {
|
switch(w) {
|
||||||
case -1:
|
case -1:
|
||||||
@ -1899,7 +1911,10 @@ screen_wcswidth(PyObject UNUSED *self, PyObject *str) {
|
|||||||
prev_width = 1; break;
|
prev_width = 1; break;
|
||||||
}
|
}
|
||||||
ans += prev_width;
|
ans += prev_width;
|
||||||
}
|
} break;
|
||||||
|
} break; // switch(ch)
|
||||||
|
} break; // case NORMAL
|
||||||
|
} // switch(state)
|
||||||
prev_ch = ch;
|
prev_ch = ch;
|
||||||
}
|
}
|
||||||
return PyLong_FromUnsignedLong(ans);
|
return PyLong_FromUnsignedLong(ans);
|
||||||
|
|||||||
@ -362,6 +362,7 @@ class TestDataTypes(BaseTest):
|
|||||||
self.ae(wcswidth('\U0001f1e6a'), 3)
|
self.ae(wcswidth('\U0001f1e6a'), 3)
|
||||||
self.ae(wcswidth('\U0001F1E6a\U0001F1E8a'), 6)
|
self.ae(wcswidth('\U0001F1E6a\U0001F1E8a'), 6)
|
||||||
self.ae(wcswidth('\U0001F1E6\U0001F1E8a'), 3)
|
self.ae(wcswidth('\U0001F1E6\U0001F1E8a'), 3)
|
||||||
|
self.ae(wcswidth('\U0001F1E6\U0001F1E8\U0001F1E6'), 4)
|
||||||
# Regional indicator symbols (unicode flags) are defined as having
|
# Regional indicator symbols (unicode flags) are defined as having
|
||||||
# Emoji_Presentation so must have width 2
|
# Emoji_Presentation so must have width 2
|
||||||
self.ae(tuple(map(w, '\U0001f1ee\U0001f1f3')), (2, 2))
|
self.ae(tuple(map(w, '\U0001f1ee\U0001f1f3')), (2, 2))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user