From c8292d77f1fb5a512d57cb0a09796d9f7e815498 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 25 Aug 2022 06:34:06 +0530 Subject: [PATCH] Re-use the escape code parser for wcswidth calculation --- tools/wcswidth/escape-code-parser.go | 15 ++++-- tools/wcswidth/wcswidth.go | 71 ++++++++++------------------ 2 files changed, 36 insertions(+), 50 deletions(-) diff --git a/tools/wcswidth/escape-code-parser.go b/tools/wcswidth/escape-code-parser.go index beda96493..bac4aa92a 100644 --- a/tools/wcswidth/escape-code-parser.go +++ b/tools/wcswidth/escape-code-parser.go @@ -2,6 +2,7 @@ package wcswidth import ( "bytes" + "errors" "kitty/tools/utils" ) @@ -54,6 +55,8 @@ type EscapeCodeParser struct { func (self *EscapeCodeParser) InBracketedPaste() bool { return self.state == bracketed_paste } +var reparse_byte = errors.New("") + func (self *EscapeCodeParser) Parse(data []byte) error { prev := utils.UTF8_ACCEPT codep := utils.UTF8_ACCEPT @@ -70,15 +73,18 @@ func (self *EscapeCodeParser) Parse(data []byte) error { case utils.UTF8_REJECT: self.utf8_state = utils.UTF8_ACCEPT if prev != utils.UTF8_ACCEPT && i > 0 { - i = i - 1 + i-- } } prev = self.utf8_state default: err := self.dispatch_byte(data[i]) if err != nil { - self.Reset() - return err + self.reset_state() + if err != reparse_byte { + return err + } + i-- } } } @@ -242,8 +248,9 @@ func (self *EscapeCodeParser) dispatch_byte(ch byte) error { case '_': self.state = st self.current_callback = self.HandleAPC + case 'D', 'E', 'H', 'M', 'N', 'O', 'Z', '6', '7', '8', '9', '=', '>', 'F', 'c', 'l', 'm', 'n', 'o', '|', '}', '~': default: - self.state = normal + return reparse_byte } case csi: self.write_ch(ch) diff --git a/tools/wcswidth/wcswidth.go b/tools/wcswidth/wcswidth.go index c65949f22..dee72297c 100644 --- a/tools/wcswidth/wcswidth.go +++ b/tools/wcswidth/wcswidth.go @@ -11,37 +11,31 @@ func IsFlagPair(a rune, b rune) bool { type ecparser_state uint8 type WCWidthIterator struct { - prev_ch rune - prev_width int - state ecparser_state + prev_ch rune + prev_width, current_width int + parser EscapeCodeParser + state ecparser_state +} + +func CreateWCWidthIterator() *WCWidthIterator { + var ans WCWidthIterator + ans.parser.HandleRune = ans.handle_rune + return &ans } func (self *WCWidthIterator) Reset() { self.prev_ch = 0 self.prev_width = 0 - self.state = 0 + self.current_width = 0 + self.parser.Reset() } -func (self *WCWidthIterator) Step(ch rune) int { - var ans int = 0 +func (self *WCWidthIterator) handle_rune(ch rune) error { const ( normal ecparser_state = 0 - in_esc ecparser_state = 1 - in_csi ecparser_state = 2 flag_pair_started ecparser_state = 3 - in_st_terminated ecparser_state = 4 ) switch self.state { - case in_csi: - self.prev_width = 0 - if 0x40 <= ch && ch <= 0x7e { - self.state = normal - } - case in_st_terminated: - self.prev_width = 0 - if ch == 0x9c || (ch == '\\' && self.prev_ch == 0x1b) { - self.state = normal - } case flag_pair_started: self.state = normal if IsFlagPair(self.prev_ch, ch) { @@ -50,19 +44,16 @@ func (self *WCWidthIterator) Step(ch rune) int { fallthrough case normal: switch ch { - case 0x1b: - self.prev_width = 0 - self.state = in_esc case 0xfe0f: if IsEmojiPresentationBase(self.prev_ch) && self.prev_width == 1 { - ans += 1 + self.current_width += 1 self.prev_width = 2 } else { self.prev_width = 0 } case 0xfe0e: if IsEmojiPresentationBase(self.prev_ch) && self.prev_width == 2 { - ans -= 1 + self.current_width -= 1 self.prev_width = 1 } else { self.prev_width = 0 @@ -81,32 +72,20 @@ func (self *WCWidthIterator) Step(ch rune) int { default: self.prev_width = 1 } - ans += self.prev_width - } - - case in_esc: - switch ch { - case '[': - self.state = in_csi - case 'P', ']', 'X', '^', '_': - self.state = in_st_terminated - case 'D', 'E', 'H', 'M', 'N', 'O', 'Z', '6', '7', '8', '9', '=', '>', 'F', 'c', 'l', 'm', 'n', 'o', '|', '}', '~': - default: - self.prev_ch = 0x1b - self.prev_width = 0 - self.state = normal - return self.Step(ch) + self.current_width += self.prev_width } } self.prev_ch = ch - return ans + return nil +} + +func (self *WCWidthIterator) Parse(b []byte) (ans int) { + self.current_width = 0 + self.parser.Parse(b) + return self.current_width } func Stringwidth(text string) int { - var w WCWidthIterator - ans := 0 - for _, ch := range []rune(text) { - ans += w.Step(ch) - } - return ans + w := CreateWCWidthIterator() + return w.Parse([]byte(text)) }