Port ANSI escape code parser to Go

This commit is contained in:
Kovid Goyal 2022-11-25 21:03:09 +05:30
parent 402c8b6803
commit f9a22d0bc7
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 179 additions and 18 deletions

View File

@ -261,11 +261,11 @@ expand_ansi_c_escapes(PyObject *self UNUSED, PyObject *src) {
case CONTROL_CHAR: w(ch & 0x1f); state = NORMAL; break; case CONTROL_CHAR: w(ch & 0x1f); state = NORMAL; break;
case HEX_DIGIT: { case HEX_DIGIT: {
if (hex_digit_idx < max_num_hex_digits && (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))) add_digit(16) if (hex_digit_idx < max_num_hex_digits && (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))) add_digit(16)
else { write_digits(16); w(ch); } else { write_digits(16); idx--; }
}; break; }; break;
case OCT_DIGIT: { case OCT_DIGIT: {
if ('0' <= ch && ch <= '7' && hex_digit_idx < max_num_hex_digits) add_digit(16) if ('0' <= ch && ch <= '7' && hex_digit_idx < max_num_hex_digits) add_digit(16)
else { write_digits(8); w(ch); } else { write_digits(8); idx--; }
}; break; }; break;
case PREV_ESC: { case PREV_ESC: {
state = NORMAL; state = NORMAL;

View File

@ -586,6 +586,7 @@ class TestDataTypes(BaseTest):
r'a\128b': 'a\0128b', r'a\128b': 'a\0128b',
r'a\u1234e': 'a\u1234e', r'a\u1234e': 'a\u1234e',
r'a\U1f1eez': 'a\U0001f1eez', r'a\U1f1eez': 'a\U0001f1eez',
r'a\x1\\': "a\x01\\",
}.items(): }.items():
actual = expand_ansi_c_escapes(src) actual = expand_ansi_c_escapes(src)
self.ae(expected, actual) self.ae(expected, actual)

View File

@ -0,0 +1,148 @@
// License: GPLv3 Copyright: 2022, Kovid Goyal, <kovid at kovidgoyal.net>
package shlex
import (
"fmt"
"strconv"
"strings"
)
var _ = fmt.Print
type state int
const (
normal state = iota
control_char
backslash
hex_digit
oct_digit
)
type ansi_c struct {
state state
max_num_of_digits, digit_idx int
digits [16]rune
output strings.Builder
}
func is_hex_char(ch rune) bool {
return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F')
}
func is_oct_char(ch rune) bool {
return '0' <= ch && ch <= '7'
}
func (self *ansi_c) write_digits(base int) {
if self.digit_idx > 0 {
text := string(self.digits[:self.digit_idx])
val, err := strconv.ParseUint(text, base, 32)
if err == nil {
self.output.WriteRune(rune(val))
}
}
self.digit_idx = 0
self.state = normal
}
func (self *ansi_c) parse(ch rune) {
switch self.state {
case normal:
if ch == '\\' {
self.state = backslash
} else {
self.output.WriteRune(ch)
}
case control_char:
self.output.WriteRune(ch & 0x1f)
self.state = normal
case hex_digit:
if self.digit_idx < self.max_num_of_digits && is_hex_char(ch) {
self.digits[self.digit_idx] = ch
self.digit_idx++
} else {
self.write_digits(16)
self.parse(ch)
}
case oct_digit:
if self.digit_idx < self.max_num_of_digits && is_oct_char(ch) {
self.digits[self.digit_idx] = ch
self.digit_idx++
} else {
self.write_digits(8)
self.parse(ch)
}
case backslash:
self.state = normal
switch ch {
default:
self.output.WriteRune('\\')
self.output.WriteRune(ch)
case 'a':
self.output.WriteRune(7)
case 'b':
self.output.WriteRune(8)
case 'c':
self.state = control_char
case 'e', 'E':
self.output.WriteRune(27)
case 'f':
self.output.WriteRune(12)
case 'n':
self.output.WriteRune(10)
case 'r':
self.output.WriteRune(13)
case 't':
self.output.WriteRune(9)
case 'v':
self.output.WriteRune(11)
case 'x':
self.max_num_of_digits, self.digit_idx, self.state = 2, 0, hex_digit
case 'u':
self.max_num_of_digits, self.digit_idx, self.state = 4, 0, hex_digit
case 'U':
self.max_num_of_digits, self.digit_idx, self.state = 8, 0, hex_digit
case '0', '1', '2', '3', '4', '5', '6', '7':
self.max_num_of_digits, self.digit_idx, self.state = 3, 1, oct_digit
self.digits[0] = ch
case '\\':
self.output.WriteRune('\\')
case '?':
self.output.WriteRune('?')
case '"':
self.output.WriteRune('"')
case '\'':
self.output.WriteRune('\'')
}
}
}
func (self *ansi_c) finish() string {
switch self.state {
case hex_digit:
self.write_digits(16)
case oct_digit:
self.write_digits(8)
case backslash:
self.output.WriteRune('\\')
case control_char:
self.output.WriteString("\\c")
}
self.state = normal
self.digit_idx = 0
s := self.output.String()
self.output.Reset()
return s
}
func ExpandANSICEscapes(src string) string {
p := ansi_c{}
p.output.Grow(len(src))
for _, ch := range src {
p.parse(ch)
}
return p.finish()
}

View File

@ -1,19 +1,3 @@
/*
Copyright 2012 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package shlex package shlex
import ( import (
@ -133,3 +117,31 @@ func TestSplitForCompletion(t *testing.T) {
test(`a "b c"`, 2, "a", "b c") test(`a "b c"`, 2, "a", "b c")
test(`a "b c`, 2, "a", "b c") test(`a "b c`, 2, "a", "b c")
} }
func TestExpandANSICEscapes(t *testing.T) {
var m = map[string]string{
"abc": "abc",
`a\ab`: "a\ab",
`a\eb`: "a\x1bb",
`a\r\nb`: "a\r\nb",
`a\c b`: "a\000b",
`a\c`: "a\\c",
`a\x1bb`: "a\x1bb",
`a\x1b`: "a\x1b",
`a\x1`: "a\x01",
`a\x1\\`: "a\x01\\",
`a\x1g`: "a\x01g",
`a\z\"`: "a\\z\"",
`a\123b`: "a\123b",
`a\128b`: "a\0128b",
`a\u1234e`: "a\u1234e",
`a\U1f1eez`: "a\U0001f1eez",
}
for q, expected := range m {
actual := ExpandANSICEscapes(q)
if diff := cmp.Diff(expected, actual); diff != "" {
t.Fatalf("Failed to process: %#v\n%s", q, diff)
}
}
}