Re-use the escape code parser for wcswidth calculation

This commit is contained in:
Kovid Goyal 2022-08-25 06:34:06 +05:30
parent 7280c712d6
commit c8292d77f1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 36 additions and 50 deletions

View File

@ -2,6 +2,7 @@ package wcswidth
import (
"bytes"
"errors"
"kitty/tools/utils"
)
@ -54,6 +55,8 @@ type EscapeCodeParser struct {
func (self *EscapeCodeParser) InBracketedPaste() bool { return self.state == bracketed_paste }
var reparse_byte = errors.New("")
func (self *EscapeCodeParser) Parse(data []byte) error {
prev := utils.UTF8_ACCEPT
codep := utils.UTF8_ACCEPT
@ -70,15 +73,18 @@ func (self *EscapeCodeParser) Parse(data []byte) error {
case utils.UTF8_REJECT:
self.utf8_state = utils.UTF8_ACCEPT
if prev != utils.UTF8_ACCEPT && i > 0 {
i = i - 1
i--
}
}
prev = self.utf8_state
default:
err := self.dispatch_byte(data[i])
if err != nil {
self.Reset()
return err
self.reset_state()
if err != reparse_byte {
return err
}
i--
}
}
}
@ -242,8 +248,9 @@ func (self *EscapeCodeParser) dispatch_byte(ch byte) error {
case '_':
self.state = st
self.current_callback = self.HandleAPC
case 'D', 'E', 'H', 'M', 'N', 'O', 'Z', '6', '7', '8', '9', '=', '>', 'F', 'c', 'l', 'm', 'n', 'o', '|', '}', '~':
default:
self.state = normal
return reparse_byte
}
case csi:
self.write_ch(ch)

View File

@ -11,37 +11,31 @@ func IsFlagPair(a rune, b rune) bool {
type ecparser_state uint8
type WCWidthIterator struct {
prev_ch rune
prev_width int
state ecparser_state
prev_ch rune
prev_width, current_width int
parser EscapeCodeParser
state ecparser_state
}
func CreateWCWidthIterator() *WCWidthIterator {
var ans WCWidthIterator
ans.parser.HandleRune = ans.handle_rune
return &ans
}
func (self *WCWidthIterator) Reset() {
self.prev_ch = 0
self.prev_width = 0
self.state = 0
self.current_width = 0
self.parser.Reset()
}
func (self *WCWidthIterator) Step(ch rune) int {
var ans int = 0
func (self *WCWidthIterator) handle_rune(ch rune) error {
const (
normal ecparser_state = 0
in_esc ecparser_state = 1
in_csi ecparser_state = 2
flag_pair_started ecparser_state = 3
in_st_terminated ecparser_state = 4
)
switch self.state {
case in_csi:
self.prev_width = 0
if 0x40 <= ch && ch <= 0x7e {
self.state = normal
}
case in_st_terminated:
self.prev_width = 0
if ch == 0x9c || (ch == '\\' && self.prev_ch == 0x1b) {
self.state = normal
}
case flag_pair_started:
self.state = normal
if IsFlagPair(self.prev_ch, ch) {
@ -50,19 +44,16 @@ func (self *WCWidthIterator) Step(ch rune) int {
fallthrough
case normal:
switch ch {
case 0x1b:
self.prev_width = 0
self.state = in_esc
case 0xfe0f:
if IsEmojiPresentationBase(self.prev_ch) && self.prev_width == 1 {
ans += 1
self.current_width += 1
self.prev_width = 2
} else {
self.prev_width = 0
}
case 0xfe0e:
if IsEmojiPresentationBase(self.prev_ch) && self.prev_width == 2 {
ans -= 1
self.current_width -= 1
self.prev_width = 1
} else {
self.prev_width = 0
@ -81,32 +72,20 @@ func (self *WCWidthIterator) Step(ch rune) int {
default:
self.prev_width = 1
}
ans += self.prev_width
}
case in_esc:
switch ch {
case '[':
self.state = in_csi
case 'P', ']', 'X', '^', '_':
self.state = in_st_terminated
case 'D', 'E', 'H', 'M', 'N', 'O', 'Z', '6', '7', '8', '9', '=', '>', 'F', 'c', 'l', 'm', 'n', 'o', '|', '}', '~':
default:
self.prev_ch = 0x1b
self.prev_width = 0
self.state = normal
return self.Step(ch)
self.current_width += self.prev_width
}
}
self.prev_ch = ch
return ans
return nil
}
func (self *WCWidthIterator) Parse(b []byte) (ans int) {
self.current_width = 0
self.parser.Parse(b)
return self.current_width
}
func Stringwidth(text string) int {
var w WCWidthIterator
ans := 0
for _, ch := range []rune(text) {
ans += w.Step(ch)
}
return ans
w := CreateWCWidthIterator()
return w.Parse([]byte(text))
}