diff --git a/tools/cmd/hints/marks.go b/tools/cmd/hints/marks.go index 6ceb41916..d6d10a35b 100644 --- a/tools/cmd/hints/marks.go +++ b/tools/cmd/hints/marks.go @@ -322,6 +322,30 @@ func mark(r *regexp.Regexp, post_processors []PostProcessorFunc, group_processor type ErrNoMatches struct{ Type string } +func adjust_python_offsets(text string, marks []Mark) { + // python returns rune based offsets (unicode chars not utf-8 bytes) + // this adjustment function assumes the marks are non overlapping + bytes := utils.UnsafeStringToBytes(text) + char_offset, byte_offset := 0, 0 + + adjust := func(x int) (sz int) { + x -= char_offset + for x > 0 { + _, d := utf8.DecodeRune(bytes) + sz += d + bytes = bytes[d:] + x-- + char_offset++ + } + byte_offset += sz + return byte_offset + } + for i := range marks { + mark := &marks[i] + mark.Start = adjust(mark.Start) + mark.End = adjust(mark.End) + } +} func (self *ErrNoMatches) Error() string { none_of := "matches" switch self.Type { @@ -369,6 +393,7 @@ func find_marks(text string, opts *Options, cli_args ...string) (sanitized_text if err != nil { return "", nil, nil, fmt.Errorf("Failed to load output from custom processor %#v with error: %w", opts.CustomizeProcessing, err) } + adjust_python_offsets(sanitized_text, ans) } else if opts.Type == "hyperlink" { ans = hyperlinks } else { diff --git a/tools/cmd/hints/marks_test.go b/tools/cmd/hints/marks_test.go index 5daf7a1cf..3f5a851b5 100644 --- a/tools/cmd/hints/marks_test.go +++ b/tools/cmd/hints/marks_test.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "strconv" + "strings" "testing" "github.com/google/go-cmp/cmp" @@ -31,7 +32,7 @@ func TestHintMarking(t *testing.T) { r := func(text string, url ...string) (marks []Mark) { ptext := convert_text(text, cols) - _, marks, _, err := find_marks(ptext, opts, cli_args...) + ptext, marks, _, err := find_marks(ptext, opts, cli_args...) if err != nil { var e *ErrNoMatches if len(url) != 0 || !errors.As(err, &e) { @@ -43,6 +44,12 @@ func TestHintMarking(t *testing.T) { if diff := cmp.Diff(url, actual); diff != "" { t.Fatalf("%#v failed:\n%s", text, diff) } + for _, m := range marks { + q := strings.NewReplacer("\n", "", "\r", "", "\x00", "").Replace(ptext[m.Start:m.End]) + if diff := cmp.Diff(m.Text, q); diff != "" { + t.Fatalf("Mark start and end dont point to correct offset in text for %#v\n%s", text, diff) + } + } return } @@ -114,7 +121,7 @@ def mark(text, args, Mark, extra_cli_args, *a): `), 0o600) opts.Type = "regex" opts.CustomizeProcessing = simple - marks := r("a b", `a`, `b`) + marks := r("漢字 b", `漢字`, `b`) if diff := cmp.Diff(marks[0].Groupdict, map[string]any{"idx": float64(0), "args": []any{"extra1"}}); diff != "" { t.Fatalf("Did not get expected groupdict from custom processor:\n%s", diff) }