From f9a22d0bc78b06e5570a8e7ee32d7e8c8e972222 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 25 Nov 2022 21:03:09 +0530 Subject: [PATCH] Port ANSI escape code parser to Go --- kitty/data-types.c | 4 +- kitty_tests/datatypes.py | 1 + tools/utils/shlex/ansi_c_escapes.go | 148 ++++++++++++++++++++++++++++ tools/utils/shlex/shlex_test.go | 44 ++++++--- 4 files changed, 179 insertions(+), 18 deletions(-) create mode 100644 tools/utils/shlex/ansi_c_escapes.go diff --git a/kitty/data-types.c b/kitty/data-types.c index 7886e38c8..4f7751d3d 100644 --- a/kitty/data-types.c +++ b/kitty/data-types.c @@ -261,11 +261,11 @@ expand_ansi_c_escapes(PyObject *self UNUSED, PyObject *src) { case CONTROL_CHAR: w(ch & 0x1f); state = NORMAL; break; case HEX_DIGIT: { if (hex_digit_idx < max_num_hex_digits && (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))) add_digit(16) - else { write_digits(16); w(ch); } + else { write_digits(16); idx--; } }; break; case OCT_DIGIT: { if ('0' <= ch && ch <= '7' && hex_digit_idx < max_num_hex_digits) add_digit(16) - else { write_digits(8); w(ch); } + else { write_digits(8); idx--; } }; break; case PREV_ESC: { state = NORMAL; diff --git a/kitty_tests/datatypes.py b/kitty_tests/datatypes.py index b45806d1b..c3be731a0 100644 --- a/kitty_tests/datatypes.py +++ b/kitty_tests/datatypes.py @@ -586,6 +586,7 @@ class TestDataTypes(BaseTest): r'a\128b': 'a\0128b', r'a\u1234e': 'a\u1234e', r'a\U1f1eez': 'a\U0001f1eez', + r'a\x1\\': "a\x01\\", }.items(): actual = expand_ansi_c_escapes(src) self.ae(expected, actual) diff --git a/tools/utils/shlex/ansi_c_escapes.go b/tools/utils/shlex/ansi_c_escapes.go new file mode 100644 index 000000000..945c85c01 --- /dev/null +++ b/tools/utils/shlex/ansi_c_escapes.go @@ -0,0 +1,148 @@ +// License: GPLv3 Copyright: 2022, Kovid Goyal, + +package shlex + +import ( + "fmt" + "strconv" + "strings" +) + +var _ = fmt.Print + +type state int + +const ( + normal state = iota + control_char + backslash + hex_digit + oct_digit +) + +type ansi_c struct { + state state + max_num_of_digits, digit_idx int + digits [16]rune + output strings.Builder +} + +func is_hex_char(ch rune) bool { + return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F') +} + +func is_oct_char(ch rune) bool { + return '0' <= ch && ch <= '7' +} + +func (self *ansi_c) write_digits(base int) { + if self.digit_idx > 0 { + text := string(self.digits[:self.digit_idx]) + val, err := strconv.ParseUint(text, base, 32) + if err == nil { + self.output.WriteRune(rune(val)) + } + } + self.digit_idx = 0 + self.state = normal +} + +func (self *ansi_c) parse(ch rune) { + switch self.state { + case normal: + if ch == '\\' { + self.state = backslash + } else { + self.output.WriteRune(ch) + } + case control_char: + self.output.WriteRune(ch & 0x1f) + self.state = normal + case hex_digit: + if self.digit_idx < self.max_num_of_digits && is_hex_char(ch) { + self.digits[self.digit_idx] = ch + self.digit_idx++ + } else { + self.write_digits(16) + self.parse(ch) + } + case oct_digit: + if self.digit_idx < self.max_num_of_digits && is_oct_char(ch) { + self.digits[self.digit_idx] = ch + self.digit_idx++ + } else { + self.write_digits(8) + self.parse(ch) + } + case backslash: + self.state = normal + switch ch { + default: + self.output.WriteRune('\\') + self.output.WriteRune(ch) + case 'a': + self.output.WriteRune(7) + case 'b': + self.output.WriteRune(8) + case 'c': + self.state = control_char + case 'e', 'E': + self.output.WriteRune(27) + case 'f': + self.output.WriteRune(12) + case 'n': + self.output.WriteRune(10) + case 'r': + self.output.WriteRune(13) + case 't': + self.output.WriteRune(9) + case 'v': + self.output.WriteRune(11) + case 'x': + self.max_num_of_digits, self.digit_idx, self.state = 2, 0, hex_digit + case 'u': + self.max_num_of_digits, self.digit_idx, self.state = 4, 0, hex_digit + case 'U': + self.max_num_of_digits, self.digit_idx, self.state = 8, 0, hex_digit + case '0', '1', '2', '3', '4', '5', '6', '7': + self.max_num_of_digits, self.digit_idx, self.state = 3, 1, oct_digit + self.digits[0] = ch + case '\\': + self.output.WriteRune('\\') + case '?': + self.output.WriteRune('?') + case '"': + self.output.WriteRune('"') + case '\'': + self.output.WriteRune('\'') + + } + } +} + +func (self *ansi_c) finish() string { + switch self.state { + case hex_digit: + self.write_digits(16) + case oct_digit: + self.write_digits(8) + case backslash: + self.output.WriteRune('\\') + case control_char: + self.output.WriteString("\\c") + } + self.state = normal + self.digit_idx = 0 + s := self.output.String() + self.output.Reset() + return s +} + +func ExpandANSICEscapes(src string) string { + p := ansi_c{} + p.output.Grow(len(src)) + for _, ch := range src { + p.parse(ch) + } + return p.finish() +} diff --git a/tools/utils/shlex/shlex_test.go b/tools/utils/shlex/shlex_test.go index 4882a8aec..db5d348f8 100644 --- a/tools/utils/shlex/shlex_test.go +++ b/tools/utils/shlex/shlex_test.go @@ -1,19 +1,3 @@ -/* -Copyright 2012 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package shlex import ( @@ -133,3 +117,31 @@ func TestSplitForCompletion(t *testing.T) { test(`a "b c"`, 2, "a", "b c") test(`a "b c`, 2, "a", "b c") } + +func TestExpandANSICEscapes(t *testing.T) { + var m = map[string]string{ + "abc": "abc", + `a\ab`: "a\ab", + `a\eb`: "a\x1bb", + `a\r\nb`: "a\r\nb", + `a\c b`: "a\000b", + `a\c`: "a\\c", + `a\x1bb`: "a\x1bb", + `a\x1b`: "a\x1b", + `a\x1`: "a\x01", + `a\x1\\`: "a\x01\\", + `a\x1g`: "a\x01g", + `a\z\"`: "a\\z\"", + `a\123b`: "a\123b", + `a\128b`: "a\0128b", + `a\u1234e`: "a\u1234e", + `a\U1f1eez`: "a\U0001f1eez", + } + for q, expected := range m { + actual := ExpandANSICEscapes(q) + if diff := cmp.Diff(expected, actual); diff != "" { + t.Fatalf("Failed to process: %#v\n%s", q, diff) + } + } + +}