Work on an escape code parser

This commit is contained in:
Kovid Goyal 2022-08-22 10:33:31 +05:30
parent 7eb6cb2407
commit 84cb2638d6
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 336 additions and 0 deletions

View File

@ -0,0 +1,287 @@
package utils
import "bytes"
type parser_state uint8
type csi_state uint8
type csi_char_type uint8
var bracketed_paste_start = []byte{'2', '0', '0', '~'}
const (
normal parser_state = iota
esc
csi
st
esc_st
c1_st
bracketed_paste
)
const (
parameter csi_state = iota
intermediate
)
const (
unknown_csi_char csi_char_type = iota
parameter_csi_char
intermediate_csi_char
final_csi_char
)
type EscapeCodeParser struct {
state parser_state
utf8_state UTF8State
csi_state csi_state
current_buffer []byte
bracketed_paste_buffer []UTF8State
current_callback func([]byte)
// Whether to send escape code bytes as soon as they are received or to
// buffer and send full escape codes
Streaming bool
// Callbacks
HandleRune func(rune)
HandleCSI func([]byte)
HandleOSC func([]byte)
HandleDCS func([]byte)
HandlePM func([]byte)
HandleSOS func([]byte)
HandleAPC func([]byte)
}
func (self *EscapeCodeParser) Parse(data []byte) {
prev := UTF8_ACCEPT
codep := UTF8_ACCEPT
for i := 0; i < len(data); i++ {
switch self.state {
case normal, bracketed_paste:
switch decode_utf8(&self.utf8_state, &codep, data[i]) {
case UTF8_ACCEPT:
self.dispatch_char(codep)
case UTF8_REJECT:
self.utf8_state = UTF8_ACCEPT
if prev != UTF8_ACCEPT && i > 0 {
i = i - 1
}
}
prev = self.utf8_state
default:
self.dispatch_byte(data[i])
}
}
}
func (self *EscapeCodeParser) Reset() {
self.reset_state()
}
func (self *EscapeCodeParser) write_ch(ch byte) {
if self.Streaming {
if self.current_callback != nil {
var data [1]byte = [1]byte{ch}
self.current_callback(data[:])
}
if self.state == csi && len(self.current_buffer) < 4 {
self.current_buffer = append(self.current_buffer, ch)
}
} else {
self.current_buffer = append(self.current_buffer, ch)
}
}
func csi_type(ch byte) csi_char_type {
if 0x30 <= ch && ch <= 0x3f {
return parameter_csi_char
}
if 0x40 <= ch && ch <= 0x7E {
return final_csi_char
}
if 0x20 <= ch && ch <= 0x2F {
return intermediate_csi_char
}
return unknown_csi_char
}
func (self *EscapeCodeParser) reset_state() {
self.current_buffer = self.current_buffer[:0]
self.bracketed_paste_buffer = self.bracketed_paste_buffer[:0]
self.state = normal
self.utf8_state = UTF8_ACCEPT
self.current_callback = nil
self.csi_state = parameter
}
func (self *EscapeCodeParser) dispatch_esc_code() {
if self.state == csi && bytes.Equal(self.current_buffer, bracketed_paste_start) {
self.reset_state()
self.state = bracketed_paste
return
}
if self.current_callback != nil {
self.current_callback(self.current_buffer)
}
self.reset_state()
}
func (self *EscapeCodeParser) invalid_escape_code() {
self.reset_state()
}
func (self *EscapeCodeParser) dispatch_rune(ch UTF8State) {
if self.HandleRune != nil {
self.HandleRune(rune(ch))
}
}
func (self *EscapeCodeParser) bp_buffer_equals(chars []UTF8State) bool {
if len(self.bracketed_paste_buffer) != len(chars) {
return false
}
for i, q := range chars {
if self.bracketed_paste_buffer[i] != q {
return false
}
}
return true
}
func (self *EscapeCodeParser) dispatch_char(ch UTF8State) {
if self.state == bracketed_paste {
dispatch := func() {
if len(self.bracketed_paste_buffer) > 0 {
for _, c := range self.bracketed_paste_buffer {
self.dispatch_rune(c)
}
self.bracketed_paste_buffer = self.bracketed_paste_buffer[:0]
}
self.dispatch_rune(ch)
}
handle_ch := func(chars ...UTF8State) {
if self.bp_buffer_equals(chars) {
self.bracketed_paste_buffer = append(self.bracketed_paste_buffer, ch)
if self.bracketed_paste_buffer[len(self.bracketed_paste_buffer)-1] == '~' {
self.reset_state()
}
} else {
dispatch()
}
}
switch ch {
case 0x1b:
handle_ch()
case '[':
handle_ch(0x1b)
case '2':
handle_ch(0x1b, '[')
case '0':
handle_ch(0x1b, '[', '2')
case '1':
handle_ch(0x1b, '[', '2', '0')
case '~':
handle_ch(0x1b, '[', '2', '0', '1')
default:
dispatch()
}
return
} // end bracketed_paste
switch ch {
case 0x1b:
self.state = esc
case 0x90:
self.state = st
self.current_callback = self.HandleDCS
case 0x9b:
self.state = csi
self.current_callback = self.HandleCSI
case 0x9d:
self.state = st
self.current_callback = self.HandleOSC
case 0x98:
self.state = st
self.current_callback = self.HandleSOS
case 0x9e:
self.state = st
self.current_callback = self.HandlePM
case 0x9f:
self.state = st
self.current_callback = self.HandleAPC
default:
self.dispatch_rune(ch)
}
}
func (self *EscapeCodeParser) dispatch_byte(ch byte) {
switch self.state {
case esc:
switch ch {
case 'P':
self.state = st
self.current_callback = self.HandleDCS
case '[':
self.state = csi
self.csi_state = parameter
self.current_callback = self.HandleCSI
case ']':
self.state = st
self.current_callback = self.HandleOSC
case '^':
self.state = st
self.current_callback = self.HandlePM
case '_':
self.state = st
self.current_callback = self.HandleAPC
default:
self.state = normal
}
case csi:
self.write_ch(ch)
switch self.csi_state {
case parameter:
switch csi_type(ch) {
case intermediate_csi_char:
self.csi_state = intermediate
case final_csi_char:
self.dispatch_esc_code()
case unknown_csi_char:
self.invalid_escape_code()
}
case intermediate:
switch csi_type(ch) {
case parameter_csi_char, unknown_csi_char:
self.invalid_escape_code()
case final_csi_char:
self.dispatch_esc_code()
}
}
case st:
if ch == 0x1b {
self.state = esc_st
} else if ch == 0xc2 {
self.state = c1_st
} else {
self.write_ch(ch)
}
case esc_st:
if ch == '\\' {
self.dispatch_esc_code()
} else {
self.state = st
self.write_ch(0x1b)
if ch != 0x1b {
self.write_ch(ch)
}
}
case c1_st:
if ch == 0x9c {
self.dispatch_esc_code()
} else {
self.state = st
self.write_ch(0xc2)
self.write_ch(ch)
}
}
}

View File

@ -0,0 +1,49 @@
package utils
import (
"testing"
)
func TestEscapeCodeParsing(t *testing.T) {
type test_parse_collection struct {
actual, expected string
}
var d test_parse_collection
add := func(prefix string, b []byte) {
d.actual += "\n" + prefix + ": " + string(b)
}
var test_parser = EscapeCodeParser{
HandleCSI: func(b []byte) { add("CSI", b) },
HandleOSC: func(b []byte) { add("OSC", b) },
HandleDCS: func(b []byte) { add("DCS", b) },
HandleSOS: func(b []byte) { add("SOS", b) },
HandlePM: func(b []byte) { add("PM", b) },
HandleAPC: func(b []byte) { add("APC", b) },
HandleRune: func(b rune) { add("CH", []byte(string(b))) },
}
reset_test_parser := func() {
test_parser.Reset()
d = test_parse_collection{}
}
check_test_result := func() {
if d.actual != d.expected {
t.Fatalf("actual != expected: %#v != %#v", string(d.actual), string(d.expected))
}
}
test := func(raw string, expected string) {
reset_test_parser()
d.expected = "\n" + expected
test_parser.Parse([]byte(raw))
check_test_result()
}
test("\x1b[31m\xc2\x9bm", "CSI: 31m\nCSI: m")
test("ab\nc", "CH: a\nCH: b\nCH: \n\nCH: c")
test("a\x1b[200m\x1b[mb\x1b[5:3;2;4~", "CH: a\nCSI: 200m\nCSI: m\nCH: b\nCSI: 5:3;2;4~")
}