hints kitten: Switch to using a regex engine that supports lookaround
Note that we loose unicode char matching for --type=word because of https://github.com/dlclark/regexp2/issues/65 and of course user regexps cant use \p{N} escapes any more. Hopefully regexp2 will add support for these soon-ish. IMO lookaround is more important than \p. Fixes #6265
This commit is contained in:
parent
5b8b91b6a3
commit
65f8bb7397
@ -52,6 +52,8 @@ Detailed list of changes
|
|||||||
|
|
||||||
- hints kitten: Fix a regression in 0.28.0 that broke using sub-groups in regexp captures (:iss:`6228`)
|
- hints kitten: Fix a regression in 0.28.0 that broke using sub-groups in regexp captures (:iss:`6228`)
|
||||||
|
|
||||||
|
- hints kitten: Fix a regression in 0.28.0 that broke using lookahead/lookbehind in regexp captures (:iss:`6265`)
|
||||||
|
|
||||||
- diff kitten: Fix a regression in 0.28.0 that broke using relative paths as arguments to the kitten (:iss:`6325`)
|
- diff kitten: Fix a regression in 0.28.0 that broke using relative paths as arguments to the kitten (:iss:`6325`)
|
||||||
|
|
||||||
- Fix re-using the image id of an animated image for a still image causing a crash (:iss:`6244`)
|
- Fix re-using the image id of an animated image for a still image causing a crash (:iss:`6244`)
|
||||||
|
|||||||
2
go.mod
2
go.mod
@ -7,6 +7,7 @@ require (
|
|||||||
github.com/alecthomas/chroma/v2 v2.7.0
|
github.com/alecthomas/chroma/v2 v2.7.0
|
||||||
github.com/bmatcuk/doublestar/v4 v4.6.0
|
github.com/bmatcuk/doublestar/v4 v4.6.0
|
||||||
github.com/disintegration/imaging v1.6.2
|
github.com/disintegration/imaging v1.6.2
|
||||||
|
github.com/dlclark/regexp2 v1.9.0
|
||||||
github.com/google/go-cmp v0.5.9
|
github.com/google/go-cmp v0.5.9
|
||||||
github.com/google/uuid v1.3.0
|
github.com/google/uuid v1.3.0
|
||||||
github.com/jamesruan/go-rfc1924 v0.0.0-20170108144916-2767ca7c638f
|
github.com/jamesruan/go-rfc1924 v0.0.0-20170108144916-2767ca7c638f
|
||||||
@ -18,7 +19,6 @@ require (
|
|||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/dlclark/regexp2 v1.9.0 // indirect
|
|
||||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||||
github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect
|
github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect
|
||||||
github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b // indirect
|
github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b // indirect
|
||||||
|
|||||||
@ -13,9 +13,11 @@ import (
|
|||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/dlclark/regexp2"
|
||||||
"github.com/seancfoley/ipaddress-go/ipaddr"
|
"github.com/seancfoley/ipaddress-go/ipaddr"
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
)
|
)
|
||||||
@ -257,9 +259,9 @@ func functions_for(opts *Options) (pattern string, post_processors []PostProcess
|
|||||||
if chars == "" {
|
if chars == "" {
|
||||||
chars = RelevantKittyOpts().Select_by_word_characters
|
chars = RelevantKittyOpts().Select_by_word_characters
|
||||||
}
|
}
|
||||||
chars = regexp.QuoteMeta(chars)
|
chars = regexp2.Escape(chars)
|
||||||
chars = strings.ReplaceAll(chars, "-", "\\-")
|
chars = strings.ReplaceAll(chars, "-", "\\-")
|
||||||
pattern = fmt.Sprintf(`[%s\pL\pN]{%d,}`, chars, opts.MinimumMatchLength)
|
pattern = fmt.Sprintf(`(?u)[%s\w\d]{%d,}`, chars, opts.MinimumMatchLength)
|
||||||
post_processors = append(post_processors, PostProcessorMap()["brackets"], PostProcessorMap()["quotes"])
|
post_processors = append(post_processors, PostProcessorMap()["brackets"], PostProcessorMap()["quotes"])
|
||||||
default:
|
default:
|
||||||
pattern = opts.Regex
|
pattern = opts.Regex
|
||||||
@ -274,11 +276,112 @@ func functions_for(opts *Options) (pattern string, post_processors []PostProcess
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func mark(r *regexp.Regexp, post_processors []PostProcessorFunc, group_processors []GroupProcessorFunc, text string, opts *Options) (ans []Mark) {
|
type Capture struct {
|
||||||
|
Text string
|
||||||
|
Text_as_runes []rune
|
||||||
|
Byte_Offsets struct {
|
||||||
|
Start, End int
|
||||||
|
}
|
||||||
|
Rune_Offsets struct {
|
||||||
|
Start, End int
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self Capture) String() string {
|
||||||
|
return fmt.Sprintf("Capture(start=%d, end=%d, %#v)", self.Byte_Offsets.Start, self.Byte_Offsets.End, self.Text)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Group struct {
|
||||||
|
Name string
|
||||||
|
IsNamed bool
|
||||||
|
Captures []Capture
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self Group) LastCapture() Capture {
|
||||||
|
if len(self.Captures) == 0 {
|
||||||
|
return Capture{}
|
||||||
|
}
|
||||||
|
return self.Captures[len(self.Captures)-1]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self Group) String() string {
|
||||||
|
return fmt.Sprintf("Group(name=%#v, captures=%v)", self.Name, self.Captures)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Match struct {
|
||||||
|
Groups []Group
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self Match) HasNamedGroups() bool {
|
||||||
|
for _, g := range self.Groups {
|
||||||
|
if g.IsNamed {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func find_all_matches(re *regexp2.Regexp, text string) (ans []Match, err error) {
|
||||||
|
m, err := re.FindStringMatch(text)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rune_to_bytes := utils.RuneOffsetsToByteOffsets(text)
|
||||||
|
get_byte_offset_map := func(groups []regexp2.Group) (ans map[int]int, err error) {
|
||||||
|
ans = make(map[int]int, len(groups)*2)
|
||||||
|
rune_offsets := make([]int, 0, len(groups)*2)
|
||||||
|
for _, g := range groups {
|
||||||
|
for _, c := range g.Captures {
|
||||||
|
if _, found := ans[c.Index]; !found {
|
||||||
|
rune_offsets = append(rune_offsets, c.Index)
|
||||||
|
ans[c.Index] = -1
|
||||||
|
}
|
||||||
|
end := c.Index + c.Length
|
||||||
|
if _, found := ans[end]; !found {
|
||||||
|
rune_offsets = append(rune_offsets, end)
|
||||||
|
ans[end] = -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slices.Sort(rune_offsets)
|
||||||
|
for _, pos := range rune_offsets {
|
||||||
|
if ans[pos] = rune_to_bytes(pos); ans[pos] < 0 {
|
||||||
|
return nil, fmt.Errorf("Matches are not monotonic cannot map rune offsets to byte offsets")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for m != nil {
|
||||||
|
groups := m.Groups()
|
||||||
|
bom, err := get_byte_offset_map(groups)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
match := Match{Groups: make([]Group, len(groups))}
|
||||||
|
for i, g := range m.Groups() {
|
||||||
|
match.Groups[i].Name = g.Name
|
||||||
|
match.Groups[i].IsNamed = g.Name != "" && g.Name != strconv.Itoa(i)
|
||||||
|
for _, c := range g.Captures {
|
||||||
|
cn := Capture{Text: c.String(), Text_as_runes: c.Runes()}
|
||||||
|
cn.Rune_Offsets.End = c.Index + c.Length
|
||||||
|
cn.Rune_Offsets.Start = c.Index
|
||||||
|
cn.Byte_Offsets.Start, cn.Byte_Offsets.End = bom[c.Index], bom[cn.Rune_Offsets.End]
|
||||||
|
match.Groups[i].Captures = append(match.Groups[i].Captures, cn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ans = append(ans, match)
|
||||||
|
m, _ = re.FindNextMatch(m)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func mark(r *regexp2.Regexp, post_processors []PostProcessorFunc, group_processors []GroupProcessorFunc, text string, opts *Options) (ans []Mark) {
|
||||||
sanitize_pat := regexp.MustCompile("[\r\n\x00]")
|
sanitize_pat := regexp.MustCompile("[\r\n\x00]")
|
||||||
names := r.SubexpNames()
|
all_matches, _ := find_all_matches(r, text)
|
||||||
for i, v := range r.FindAllStringSubmatchIndex(text, -1) {
|
for i, m := range all_matches {
|
||||||
match_start, match_end := v[0], v[1]
|
full_capture := m.Groups[0].LastCapture()
|
||||||
|
match_start, match_end := full_capture.Byte_Offsets.Start, full_capture.Byte_Offsets.End
|
||||||
for match_end > match_start+1 && text[match_end-1] == 0 {
|
for match_end > match_start+1 && text[match_end-1] == 0 {
|
||||||
match_end--
|
match_end--
|
||||||
}
|
}
|
||||||
@ -296,14 +399,14 @@ func mark(r *regexp.Regexp, post_processors []PostProcessorFunc, group_processor
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
full_match = sanitize_pat.ReplaceAllLiteralString(text[match_start:match_end], "")
|
full_match = sanitize_pat.ReplaceAllLiteralString(text[match_start:match_end], "")
|
||||||
gd := make(map[string]string, len(names))
|
gd := make(map[string]string, len(m.Groups))
|
||||||
for x, name := range names {
|
for idx, g := range m.Groups {
|
||||||
if name != "" {
|
if idx > 0 && g.IsNamed {
|
||||||
idx := 2 * x
|
c := g.LastCapture()
|
||||||
if s, e := v[idx], v[idx+1]; s > -1 && e > -1 {
|
if s, e := c.Byte_Offsets.Start, c.Byte_Offsets.End; s > -1 && e > -1 {
|
||||||
s = utils.Max(s, match_start)
|
s = utils.Max(s, match_start)
|
||||||
e = utils.Min(e, match_end)
|
e = utils.Min(e, match_end)
|
||||||
gd[name] = sanitize_pat.ReplaceAllLiteralString(text[s:e], "")
|
gd[g.Name] = sanitize_pat.ReplaceAllLiteralString(text[s:e], "")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -314,16 +417,19 @@ func mark(r *regexp.Regexp, post_processors []PostProcessorFunc, group_processor
|
|||||||
for k, v := range gd {
|
for k, v := range gd {
|
||||||
gd2[k] = v
|
gd2[k] = v
|
||||||
}
|
}
|
||||||
if opts.Type == "regex" && len(names) > 1 && names[1] == "" {
|
if opts.Type == "regex" && len(m.Groups) > 1 && !m.HasNamedGroups() {
|
||||||
ms, me := v[2], v[3]
|
cp := m.Groups[1].LastCapture()
|
||||||
|
ms, me := cp.Byte_Offsets.Start, cp.Byte_Offsets.End
|
||||||
match_start = utils.Max(match_start, ms)
|
match_start = utils.Max(match_start, ms)
|
||||||
match_end = utils.Min(match_end, me)
|
match_end = utils.Min(match_end, me)
|
||||||
full_match = sanitize_pat.ReplaceAllLiteralString(text[match_start:match_end], "")
|
full_match = sanitize_pat.ReplaceAllLiteralString(text[match_start:match_end], "")
|
||||||
}
|
}
|
||||||
|
if full_match != "" {
|
||||||
ans = append(ans, Mark{
|
ans = append(ans, Mark{
|
||||||
Index: i, Start: match_start, End: match_end, Text: full_match, Groupdict: gd2,
|
Index: i, Start: match_start, End: match_end, Text: full_match, Groupdict: gd2,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -362,7 +468,7 @@ func find_marks(text string, opts *Options, cli_args ...string) (sanitized_text
|
|||||||
|
|
||||||
run_basic_matching := func() error {
|
run_basic_matching := func() error {
|
||||||
pattern, post_processors, group_processors := functions_for(opts)
|
pattern, post_processors, group_processors := functions_for(opts)
|
||||||
r, err := regexp.Compile(pattern)
|
r, err := regexp2.Compile(pattern, regexp2.RE2)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Failed to compile the regex pattern: %#v with error: %w", pattern, err)
|
return fmt.Errorf("Failed to compile the regex pattern: %#v with error: %w", pattern, err)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -111,6 +111,14 @@ func TestHintMarking(t *testing.T) {
|
|||||||
opts.Type = "regex"
|
opts.Type = "regex"
|
||||||
opts.Regex = `(?ms)^[*]?\s(\S+)`
|
opts.Regex = `(?ms)^[*]?\s(\S+)`
|
||||||
r(`* 2b687c2 - test1`, `2b687c2`)
|
r(`* 2b687c2 - test1`, `2b687c2`)
|
||||||
|
opts.Regex = `(?<=got: )sha256.{4}`
|
||||||
|
r(`got: sha256-L8=`, `sha256-L8=`)
|
||||||
|
|
||||||
|
reset()
|
||||||
|
opts.Type = "word"
|
||||||
|
r(`#one (two) 😍 a-1b `, `#one`, `two`, `a-1b`)
|
||||||
|
// non-ascii words dont match because of https://github.com/dlclark/regexp2/issues/65
|
||||||
|
// r(`fōtiz час`, `fōtiz`, `час`)
|
||||||
|
|
||||||
reset()
|
reset()
|
||||||
tdir := t.TempDir()
|
tdir := t.TempDir()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user