Make wcswidth ignore more escape codes

Needed to ignore OSC 8 hyperlinks since we will
eventually have to send those with SGR formatting to the hints
kitten.
This commit is contained in:
Kovid Goyal 2020-09-17 20:16:17 +05:30
parent b06f4f2574
commit 6461dccbdc
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 49 additions and 5 deletions

View File

@ -2061,13 +2061,17 @@ screen_wcswidth(PyObject UNUSED *self, PyObject *str) {
unsigned long ans = 0;
char_type prev_ch = 0;
int prev_width = 0;
typedef enum {NORMAL, IN_SGR, FLAG_PAIR_STARTED} WCSState;
typedef enum {NORMAL, IN_CSI, FLAG_PAIR_STARTED, IN_ST_TERMINATED} WCSState;
WCSState state = NORMAL;
for (i = 0; i < len; i++) {
char_type ch = PyUnicode_READ(kind, data, i);
switch(state) {
case IN_SGR: {
if (ch == 'm') state = NORMAL;
case IN_CSI: {
if (0x40 <= ch && ch <= 0x7e) state = NORMAL;
} continue;
case IN_ST_TERMINATED: {
if (ch == 0x9c) state = NORMAL;
else if (ch == 0x1b && i + 1 < len && PyUnicode_READ(kind, data, i + 1) == '\\') { i++; state = NORMAL; }
} continue;
case FLAG_PAIR_STARTED: {
@ -2076,8 +2080,46 @@ screen_wcswidth(PyObject UNUSED *self, PyObject *str) {
} /* fallthrough */
case NORMAL: {
if (ch == 0x1b && i + 1 < len && PyUnicode_READ(kind, data, i + 1) == '[') { state = IN_SGR; continue; }
switch(ch) {
case 0x1b: {
prev_width = 0;
if (i + 1 < len) {
switch (PyUnicode_READ(kind, data, i + 1)) {
case '[':
state = IN_CSI; i++; continue;
case 'P':
case ']':
case 'X':
case '^':
case '_':
state = IN_ST_TERMINATED; i++; continue;
case 'D':
case 'E':
case 'H':
case 'M':
case 'N':
case 'O':
case 'Z':
case '6':
case '7':
case '8':
case '9':
case '=':
case '>':
case 'F':
case 'c':
case 'l':
case 'm':
case 'n':
case 'o':
case '|':
case '}':
case '~':
i++; continue;
}
}
} break;
case 0xfe0f: {
if (is_emoji_presentation_base(prev_ch) && prev_width == 1) {
ans += 1;

View File

@ -354,9 +354,11 @@ class TestDataTypes(BaseTest):
def test_utils(self):
def w(x):
return wcwidth(ord(x))
self.ae(wcswidth('a\033[2mb'), 2)
self.ae(wcswidth('\033a\033[2mb'), 2)
self.ae(wcswidth('a\033]8;id=moo;https://foo\033\\a'), 2)
self.ae(tuple(map(w, 'a1\0コニチ ✔')), (1, 1, 0, 2, 2, 2, 1, 1))
self.ae(wcswidth('\u2716\u2716\ufe0f\U0001f337'), 5)
self.ae(wcswidth('\033a\033[2mb'), 2)
self.ae(wcswidth('\u25b6\ufe0f'), 2)
self.ae(wcswidth('\U0001f610\ufe0e'), 1)
self.ae(wcswidth('\U0001f1e6a'), 3)