From b088ab91cf975698dd57b3494a8df9e211c45acd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 Mar 2023 13:12:17 +0530 Subject: [PATCH] Make code to convert rune offsets to byte offsets re-useable --- tools/cmd/hints/marks.go | 25 ++++--------------------- tools/utils/strings.go | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/tools/cmd/hints/marks.go b/tools/cmd/hints/marks.go index f183164cb..78b754b0a 100644 --- a/tools/cmd/hints/marks.go +++ b/tools/cmd/hints/marks.go @@ -325,34 +325,17 @@ type ErrNoMatches struct{ Type string } func adjust_python_offsets(text string, marks []Mark) error { // python returns rune based offsets (unicode chars not utf-8 bytes) - // this adjustment function assumes the marks are non overlapping - bytes := utils.UnsafeStringToBytes(text) - char_offset, byte_offset := 0, 0 - - adjust := func(x int) (sz int) { - x -= char_offset - for x > 0 { - _, d := utf8.DecodeRune(bytes) - sz += d - bytes = bytes[d:] - x-- - char_offset++ - } - byte_offset += sz - return byte_offset - } - last := 0 + adjust := utils.RuneOffsetsToByteOffsets(text) for i := range marks { mark := &marks[i] if mark.End < mark.Start { return fmt.Errorf("The end of a mark must not be before its start") } - if mark.Start < last { + s, e := adjust(mark.Start), adjust(mark.End) + if s < 0 || e < 0 { return fmt.Errorf("Overlapping marks are not supported") } - last = mark.Start - mark.Start = adjust(mark.Start) - mark.End = adjust(mark.End) + mark.Start, mark.End = s, e } return nil } diff --git a/tools/utils/strings.go b/tools/utils/strings.go index 138326646..9a453b7e5 100644 --- a/tools/utils/strings.go +++ b/tools/utils/strings.go @@ -138,3 +138,29 @@ func NewSeparatorScanner(text, separator string) *StringScanner { func Splitlines(x string, expected_number_of_lines ...int) (ans []string) { return NewLineScanner("").Split(x, expected_number_of_lines...) } + +func RuneOffsetsToByteOffsets(text string) func(int) int { + self := struct { + char_offset, byte_offset, last int + bytes []byte + }{bytes: UnsafeStringToBytes(text)} + return func(x int) (sz int) { + switch { + case x == self.last: + return self.byte_offset + case x < self.last: + return -1 + } + self.last = x + x -= self.char_offset + for x > 0 { + _, d := utf8.DecodeRune(self.bytes) + sz += d + self.bytes = self.bytes[d:] + x-- + self.char_offset++ + } + self.byte_offset += sz + return self.byte_offset + } +}