Fix the escape code parser not preserving complete utf-8 state between calls to ParseBytes()
Also allow it to be driven byte-by-byte
This commit is contained in:
parent
d260d2f480
commit
5436408463
@ -4,10 +4,12 @@ package wcswidth
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"kitty/tools/utils"
|
||||
)
|
||||
|
||||
var _ = fmt.Print
|
||||
|
||||
type parser_state uint8
|
||||
type csi_state uint8
|
||||
type csi_char_type uint8
|
||||
@ -39,12 +41,14 @@ const (
|
||||
|
||||
type EscapeCodeParser struct {
|
||||
state parser_state
|
||||
utf8_state utils.UTF8State
|
||||
utf8_state, utf8_codep utils.UTF8State
|
||||
csi_state csi_state
|
||||
current_buffer []byte
|
||||
bracketed_paste_buffer []utils.UTF8State
|
||||
current_callback func([]byte) error
|
||||
|
||||
ReplaceInvalidUtf8Bytes bool
|
||||
|
||||
// Callbacks
|
||||
HandleRune func(rune) error
|
||||
HandleEndOfBracketedPaste func()
|
||||
@ -58,42 +62,50 @@ type EscapeCodeParser struct {
|
||||
|
||||
func (self *EscapeCodeParser) InBracketedPaste() bool { return self.state == bracketed_paste }
|
||||
|
||||
var reparse_byte = errors.New("")
|
||||
|
||||
func (self *EscapeCodeParser) ParseString(s string) error {
|
||||
return self.Parse(utils.UnsafeStringToBytes(s))
|
||||
}
|
||||
|
||||
func (self *EscapeCodeParser) Parse(data []byte) error {
|
||||
prev := utils.UTF8_ACCEPT
|
||||
codep := utils.UTF8_ACCEPT
|
||||
for i := 0; i < len(data); i++ {
|
||||
switch self.state {
|
||||
case normal, bracketed_paste:
|
||||
switch utils.DecodeUtf8(&self.utf8_state, &codep, data[i]) {
|
||||
case utils.UTF8_ACCEPT:
|
||||
err := self.dispatch_char(codep)
|
||||
if err != nil {
|
||||
self.Reset()
|
||||
return err
|
||||
}
|
||||
case utils.UTF8_REJECT:
|
||||
self.utf8_state = utils.UTF8_ACCEPT
|
||||
if prev != utils.UTF8_ACCEPT && i > 0 {
|
||||
i--
|
||||
}
|
||||
}
|
||||
prev = self.utf8_state
|
||||
default:
|
||||
err := self.dispatch_byte(data[i])
|
||||
func (self *EscapeCodeParser) ParseByte(b byte) error {
|
||||
switch self.state {
|
||||
case normal, bracketed_paste:
|
||||
prev_utf8_state := self.utf8_state
|
||||
switch utils.DecodeUtf8(&self.utf8_state, &self.utf8_codep, b) {
|
||||
case utils.UTF8_ACCEPT:
|
||||
err := self.dispatch_char(self.utf8_codep)
|
||||
if err != nil {
|
||||
self.reset_state()
|
||||
if err != reparse_byte {
|
||||
return err
|
||||
}
|
||||
case utils.UTF8_REJECT:
|
||||
self.utf8_state = utils.UTF8_ACCEPT
|
||||
if prev_utf8_state != utils.UTF8_ACCEPT {
|
||||
// reparse this byte with state set to UTF8_ACCEPT
|
||||
return self.ParseByte(b)
|
||||
}
|
||||
if self.ReplaceInvalidUtf8Bytes {
|
||||
err := self.dispatch_char(utils.UTF8State(0xfffd))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
i--
|
||||
}
|
||||
}
|
||||
default:
|
||||
err := self.dispatch_byte(b)
|
||||
if err != nil {
|
||||
self.reset_state()
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (self *EscapeCodeParser) Parse(data []byte) error {
|
||||
for _, b := range data {
|
||||
err := self.ParseByte(b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -124,6 +136,7 @@ func (self *EscapeCodeParser) reset_state() {
|
||||
self.bracketed_paste_buffer = self.bracketed_paste_buffer[:0]
|
||||
self.state = normal
|
||||
self.utf8_state = utils.UTF8_ACCEPT
|
||||
self.utf8_codep = utils.UTF8_ACCEPT
|
||||
self.current_callback = nil
|
||||
self.csi_state = parameter
|
||||
}
|
||||
@ -260,7 +273,9 @@ func (self *EscapeCodeParser) dispatch_byte(ch byte) error {
|
||||
self.current_callback = self.HandleAPC
|
||||
case 'D', 'E', 'H', 'M', 'N', 'O', 'Z', '6', '7', '8', '9', '=', '>', 'F', 'c', 'l', 'm', 'n', 'o', '|', '}', '~':
|
||||
default:
|
||||
return reparse_byte
|
||||
// we drop this dangling Esc and reparse the byte after the esc
|
||||
self.reset_state()
|
||||
return self.ParseByte(ch)
|
||||
}
|
||||
case csi:
|
||||
self.write_ch(ch)
|
||||
|
||||
@ -2,7 +2,13 @@
|
||||
|
||||
package wcswidth
|
||||
|
||||
import "kitty/tools/utils"
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"kitty/tools/utils"
|
||||
)
|
||||
|
||||
var _ = fmt.Print
|
||||
|
||||
func IsFlagCodepoint(ch rune) bool {
|
||||
return 0x1F1E6 <= ch && ch <= 0x1F1FF
|
||||
@ -19,6 +25,7 @@ type WCWidthIterator struct {
|
||||
prev_width, current_width int
|
||||
parser EscapeCodeParser
|
||||
state ecparser_state
|
||||
rune_count uint
|
||||
}
|
||||
|
||||
func CreateWCWidthIterator() *WCWidthIterator {
|
||||
@ -31,10 +38,12 @@ func (self *WCWidthIterator) Reset() {
|
||||
self.prev_ch = 0
|
||||
self.prev_width = 0
|
||||
self.current_width = 0
|
||||
self.rune_count = 0
|
||||
self.parser.Reset()
|
||||
}
|
||||
|
||||
func (self *WCWidthIterator) handle_rune(ch rune) error {
|
||||
self.rune_count += 1
|
||||
const (
|
||||
normal ecparser_state = 0
|
||||
flag_pair_started ecparser_state = 3
|
||||
@ -83,6 +92,11 @@ func (self *WCWidthIterator) handle_rune(ch rune) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (self *WCWidthIterator) ParseByte(b byte) (ans int) {
|
||||
self.parser.ParseByte(b)
|
||||
return self.current_width
|
||||
}
|
||||
|
||||
func (self *WCWidthIterator) Parse(b []byte) (ans int) {
|
||||
self.current_width = 0
|
||||
self.parser.Parse(b)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user