Re-use the escape code parser for wcswidth calculation

2022-08-25 06:34:06 +05:30 · 2022-08-25 06:34:06 +05:30 · c8292d77f1
commit c8292d77f1
parent 7280c712d6
2 changed files with 36 additions and 50 deletions
--- a/tools/wcswidth/escape-code-parser.go
+++ b/tools/wcswidth/escape-code-parser.go
@ -2,6 +2,7 @@ package wcswidth

 import (
 	"bytes"
+	"errors"
 	"kitty/tools/utils"
 )

@ -54,6 +55,8 @@ type EscapeCodeParser struct {

 func (self *EscapeCodeParser) InBracketedPaste() bool { return self.state == bracketed_paste }

+var reparse_byte = errors.New("")
+
 func (self *EscapeCodeParser) Parse(data []byte) error {
 	prev := utils.UTF8_ACCEPT
 	codep := utils.UTF8_ACCEPT
@ -70,15 +73,18 @@ func (self *EscapeCodeParser) Parse(data []byte) error {
 			case utils.UTF8_REJECT:
 				self.utf8_state = utils.UTF8_ACCEPT
 				if prev != utils.UTF8_ACCEPT && i > 0 {
-					i = i - 1
+					i--
 				}
 			}
 			prev = self.utf8_state
 		default:
 			err := self.dispatch_byte(data[i])
 			if err != nil {
-				self.Reset()
-				return err
+				self.reset_state()
+				if err != reparse_byte {
+					return err
+				}
+				i--
 			}
 		}
 	}
@ -242,8 +248,9 @@ func (self *EscapeCodeParser) dispatch_byte(ch byte) error {
 		case '_':
 			self.state = st
 			self.current_callback = self.HandleAPC
+		case 'D', 'E', 'H', 'M', 'N', 'O', 'Z', '6', '7', '8', '9', '=', '>', 'F', 'c', 'l', 'm', 'n', 'o', '|', '}', '~':
 		default:
-			self.state = normal
+			return reparse_byte
 		}
 	case csi:
 		self.write_ch(ch)
--- a/tools/wcswidth/wcswidth.go
+++ b/tools/wcswidth/wcswidth.go
@ -11,37 +11,31 @@ func IsFlagPair(a rune, b rune) bool {
 type ecparser_state uint8

 type WCWidthIterator struct {
-	prev_ch    rune
-	prev_width int
-	state      ecparser_state
+	prev_ch                   rune
+	prev_width, current_width int
+	parser                    EscapeCodeParser
+	state                     ecparser_state
+}
+
+func CreateWCWidthIterator() *WCWidthIterator {
+	var ans WCWidthIterator
+	ans.parser.HandleRune = ans.handle_rune
+	return &ans
 }

 func (self *WCWidthIterator) Reset() {
 	self.prev_ch = 0
 	self.prev_width = 0
-	self.state = 0
+	self.current_width = 0
+	self.parser.Reset()
 }

-func (self *WCWidthIterator) Step(ch rune) int {
-	var ans int = 0
+func (self *WCWidthIterator) handle_rune(ch rune) error {
 	const (
 		normal            ecparser_state = 0
-		in_esc            ecparser_state = 1
-		in_csi            ecparser_state = 2
 		flag_pair_started ecparser_state = 3
-		in_st_terminated  ecparser_state = 4
 	)
 	switch self.state {
-	case in_csi:
-		self.prev_width = 0
-		if 0x40 <= ch && ch <= 0x7e {
-			self.state = normal
-		}
-	case in_st_terminated:
-		self.prev_width = 0
-		if ch == 0x9c || (ch == '\\' && self.prev_ch == 0x1b) {
-			self.state = normal
-		}
 	case flag_pair_started:
 		self.state = normal
 		if IsFlagPair(self.prev_ch, ch) {
@ -50,19 +44,16 @@ func (self *WCWidthIterator) Step(ch rune) int {
 		fallthrough
 	case normal:
 		switch ch {
-		case 0x1b:
-			self.prev_width = 0
-			self.state = in_esc
 		case 0xfe0f:
 			if IsEmojiPresentationBase(self.prev_ch) && self.prev_width == 1 {
-				ans += 1
+				self.current_width += 1
 				self.prev_width = 2
 			} else {
 				self.prev_width = 0
 			}
 		case 0xfe0e:
 			if IsEmojiPresentationBase(self.prev_ch) && self.prev_width == 2 {
-				ans -= 1
+				self.current_width -= 1
 				self.prev_width = 1
 			} else {
 				self.prev_width = 0
@ -81,32 +72,20 @@ func (self *WCWidthIterator) Step(ch rune) int {
 			default:
 				self.prev_width = 1
 			}
-			ans += self.prev_width
-		}
-
-	case in_esc:
-		switch ch {
-		case '[':
-			self.state = in_csi
-		case 'P', ']', 'X', '^', '_':
-			self.state = in_st_terminated
-		case 'D', 'E', 'H', 'M', 'N', 'O', 'Z', '6', '7', '8', '9', '=', '>', 'F', 'c', 'l', 'm', 'n', 'o', '|', '}', '~':
-		default:
-			self.prev_ch = 0x1b
-			self.prev_width = 0
-			self.state = normal
-			return self.Step(ch)
+			self.current_width += self.prev_width
 		}
 	}
 	self.prev_ch = ch
-	return ans
+	return nil
+}
+
+func (self *WCWidthIterator) Parse(b []byte) (ans int) {
+	self.current_width = 0
+	self.parser.Parse(b)
+	return self.current_width
 }

 func Stringwidth(text string) int {
-	var w WCWidthIterator
-	ans := 0
-	for _, ch := range []rune(text) {
-		ans += w.Step(ch)
-	}
-	return ans
+	w := CreateWCWidthIterator()
+	return w.Parse([]byte(text))
 }