More work on porting the diff kitten

This commit is contained in:
Kovid Goyal 2023-03-17 16:31:58 +05:30
parent bf1f0c00f4
commit 293c0ab845
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 330 additions and 0 deletions

View File

@ -4,13 +4,64 @@ package diff
import ( import (
"fmt" "fmt"
"kitty/tools/utils"
"os" "os"
"path/filepath" "path/filepath"
"strings"
) )
var _ = fmt.Print var _ = fmt.Print
var path_name_map, remote_dirs map[string]string var path_name_map, remote_dirs map[string]string
var mimetypes_cache, data_cache *utils.LRUCache[string, string]
var lines_cache *utils.LRUCache[string, []string]
func init_caches() {
mimetypes_cache = utils.NewLRUCache[string, string](4096)
data_cache = utils.NewLRUCache[string, string](4096)
lines_cache = utils.NewLRUCache[string, []string](4096)
}
func mimetype_for_path(path string) string {
return mimetypes_cache.MustGetOrCreate(path, func(path string) string {
mt := utils.GuessMimeTypeWithFileSystemAccess(path)
if mt == "" {
mt = "application/octet-stream"
}
if utils.KnownTextualMimes[mt] {
if _, a, found := strings.Cut(mt, "/"); found {
mt = "text/" + a
}
}
return mt
})
}
func data_for_path(path string) (string, error) {
return data_cache.GetOrCreate(path, func(path string) (string, error) {
ans, err := os.ReadFile(path)
return utils.UnsafeBytesToString(ans), err
})
}
func sanitize(x string) string {
x = strings.ReplaceAll(x, "\r\n", "⏎\n")
return utils.SanitizeControlCodes(x, "░")
}
func lines_for_path(path string) ([]string, error) {
return lines_cache.GetOrCreate(path, func(path string) ([]string, error) {
ans, err := data_for_path(path)
if err != nil {
return nil, err
}
ans = sanitize(strings.ReplaceAll(ans, "\t", conf.Replace_tab_by))
lines := make([]string, 0, 256)
splitlines_like_git(ans, false, func(line string) { lines = append(lines, line) })
return lines, nil
})
}
type Collection struct { type Collection struct {
} }

View File

@ -51,6 +51,7 @@ func main(_ *cli.Command, opts_ *Options, args []string) (rc int, err error) {
if err = set_diff_command(conf.Diff_cmd); err != nil { if err = set_diff_command(conf.Diff_cmd); err != nil {
return 1, err return 1, err
} }
init_caches()
left, right := get_remote_file(args[0]), get_remote_file(args[1]) left, right := get_remote_file(args[0]), get_remote_file(args[1])
if isdir(left) != isdir(right) { if isdir(left) != isdir(right) {
return 1, fmt.Errorf("The items to be diffed should both be either directories or files. Comparing a directory to a file is not valid.'") return 1, fmt.Errorf("The items to be diffed should both be either directories or files. Comparing a directory to a file is not valid.'")

View File

@ -7,6 +7,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"kitty/tools/utils" "kitty/tools/utils"
"kitty/tools/utils/images"
"kitty/tools/utils/shlex" "kitty/tools/utils/shlex"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
@ -52,6 +53,223 @@ func set_diff_command(q string) error {
return err return err
} }
type Chunk struct {
is_context bool
left_start, right_start int
left_count, right_count int
centers []struct{ prefix_count, suffix_count int }
}
func (self *Chunk) add_line() {
self.right_count++
}
func (self *Chunk) remove_line() {
self.left_count++
}
func (self *Chunk) context_line() {
self.left_count++
self.right_count++
}
func changed_center(left, right string) (ans struct{ prefix_count, suffix_count int }) {
if len(left) > 0 && len(right) > 0 {
ll, rl := len(left), len(right)
ml := utils.Min(ll, rl)
for ans.prefix_count < ml && left[ans.prefix_count] == right[ans.prefix_count] {
ans.prefix_count++
}
if ans.prefix_count < ml {
for ans.suffix_count < ml-ans.prefix_count && left[ll-1-ans.suffix_count] == right[rl-1-ans.suffix_count] {
ans.suffix_count++
}
}
}
return
}
func (self *Chunk) finalize(left_lines, right_lines []string) {
if !self.is_context && self.left_count == self.right_count {
for i := 0; i < self.left_count; i++ {
self.centers = append(self.centers, changed_center(left_lines[self.left_start+i], right_lines[self.right_start+i]))
}
}
}
type Hunk struct {
left_start, left_count int
right_start, right_count int
title string
added_count, removed_count int
chunks []*Chunk
current_chunk *Chunk
largest_line_number int
}
func (self *Hunk) new_chunk(is_context bool) *Chunk {
left_start, right_start := self.left_start, self.right_start
if len(self.chunks) > 0 {
c := self.chunks[len(self.chunks)-1]
left_start = c.left_start + c.left_count
right_start = c.right_start + c.right_count
}
return &Chunk{is_context: is_context, left_start: left_start, right_start: right_start}
}
func (self *Hunk) ensure_diff_chunk() {
if self.current_chunk == nil || self.current_chunk.is_context {
if self.current_chunk != nil {
self.chunks = append(self.chunks, self.current_chunk)
}
self.current_chunk = self.new_chunk(false)
}
}
func (self *Hunk) ensure_context_chunk() {
if self.current_chunk == nil || !self.current_chunk.is_context {
if self.current_chunk != nil {
self.chunks = append(self.chunks, self.current_chunk)
}
self.current_chunk = self.new_chunk(true)
}
}
func (self *Hunk) add_line() {
self.ensure_diff_chunk()
self.current_chunk.add_line()
self.added_count++
}
func (self *Hunk) remove_line() {
self.ensure_diff_chunk()
self.current_chunk.remove_line()
self.removed_count++
}
func (self *Hunk) context_line() {
self.ensure_context_chunk()
self.current_chunk.context_line()
}
func (self *Hunk) finalize(left_lines, right_lines []string) error {
if self.current_chunk != nil {
self.chunks = append(self.chunks, self.current_chunk)
}
// Sanity check
c := self.chunks[len(self.chunks)-1]
if c.left_start+c.left_count != self.left_start+self.left_count {
return fmt.Errorf("Left side line mismatch %d != %d", c.left_start+c.left_count, self.left_start+self.left_count)
}
if c.right_start+c.right_count != self.right_start+self.right_count {
return fmt.Errorf("Right side line mismatch %d != %d", c.right_start+c.right_count, self.right_start+self.right_count)
}
for _, c := range self.chunks {
c.finalize(left_lines, right_lines)
}
return nil
}
type Patch struct {
all_hunks []*Hunk
largest_line_number, added_count, removed_count int
}
func (self *Patch) Len() int { return len(self.all_hunks) }
func splitlines_like_git(raw string, strip_trailing_lines bool, process_line func(string)) {
sz := len(raw)
if strip_trailing_lines {
for sz > 0 && (raw[sz-1] == '\n' || raw[sz-1] == '\r') {
sz--
}
}
start := 0
for i := 0; i < sz; i++ {
switch raw[i] {
case '\n':
process_line(raw[start:i])
start = i + 1
case '\r':
process_line(raw[start:i])
start = i + 1
if start < sz && raw[start] == '\n' {
i++
start++
}
}
}
if start < sz {
process_line(raw[start:sz])
}
}
func parse_range(x string) (start, count int) {
s, c, found := strings.Cut(x, ",")
start, _ = strconv.Atoi(s)
if start < 0 {
start = -start
}
count = 1
if found {
count, _ = strconv.Atoi(c)
}
return
}
func parse_hunk_header(line string) *Hunk {
parts := strings.SplitN(line, "@@", 3)
linespec := strings.TrimSpace(parts[1])
title := ""
if len(parts) == 3 {
title = strings.TrimSpace(parts[2])
}
left, right, _ := strings.Cut(linespec, " ")
ls, lc := parse_range(left)
rs, rc := parse_range(right)
return &Hunk{
title: title, left_start: ls - 1, left_count: lc, right_start: rs - 1, right_count: rc,
largest_line_number: utils.Max(ls-1+lc, rs-1+rc),
}
}
func parse_patch(raw string, left_lines, right_lines []string) (ans *Patch, err error) {
ans = &Patch{all_hunks: make([]*Hunk, 0, 32)}
var current_hunk *Hunk
splitlines_like_git(raw, true, func(line string) {
if strings.HasPrefix(line, "@@ ") {
current_hunk = parse_hunk_header(line)
ans.all_hunks = append(ans.all_hunks, current_hunk)
} else if current_hunk != nil {
var ch byte
if len(line) > 0 {
ch = line[0]
}
switch ch {
case '+':
current_hunk.add_line()
case '-':
current_hunk.remove_line()
case '\\':
default:
current_hunk.context_line()
}
}
})
for _, h := range ans.all_hunks {
err = h.finalize(left_lines, right_lines)
if err != nil {
return
}
ans.added_count += h.added_count
ans.removed_count += h.removed_count
}
if len(ans.all_hunks) > 0 {
ans.largest_line_number = ans.all_hunks[len(ans.all_hunks)-1].largest_line_number
}
return
}
func run_diff(file1, file2 string, num_of_context_lines int) (ok, is_different bool, patch string, err error) { func run_diff(file1, file2 string, num_of_context_lines int) (ok, is_different bool, patch string, err error) {
context := strconv.Itoa(num_of_context_lines) context := strconv.Itoa(num_of_context_lines)
cmd := utils.Map(func(x string) string { cmd := utils.Map(func(x string) string {
@ -82,3 +300,50 @@ func run_diff(file1, file2 string, num_of_context_lines int) (ok, is_different b
} }
return true, false, stdout.String(), nil return true, false, stdout.String(), nil
} }
func do_diff(file1, file2 string, context_count int) (ans *Patch, err error) {
ok, _, raw, err := run_diff(file1, file2, context_count)
if !ok {
return nil, fmt.Errorf("Failed to diff %s vs. %s with errors:\n%s", file1, file2, raw)
}
if err != nil {
return
}
left_lines, err := lines_for_path(file1)
if err != nil {
return
}
right_lines, err := lines_for_path(file2)
if err != nil {
return
}
ans, err = parse_patch(raw, left_lines, right_lines)
return
}
func diff(jobs []struct{ file1, file2 string }, context_count int) (ans map[string]*Patch, err error) {
ans = make(map[string]*Patch)
ctx := images.Context{}
type result struct {
file1, file2 string
err error
patch *Patch
}
results := make(chan result)
ctx.Parallel(0, len(jobs), func(nums <-chan int) {
for i := range nums {
job := jobs[i]
r := result{file1: job.file1, file2: job.file2}
r.patch, r.err = do_diff(job.file1, job.file2, context_count)
results <- r
}
})
close(results)
for r := range results {
if r.err != nil {
return nil, r.err
}
ans[r.file1] = r.patch
}
return ans, nil
}

View File

@ -4,6 +4,7 @@ package utils
import ( import (
"fmt" "fmt"
"regexp"
"sort" "sort"
"golang.org/x/exp/constraints" "golang.org/x/exp/constraints"
@ -143,3 +144,15 @@ func Memset[T any](dest []T, pattern ...T) []T {
} }
return dest return dest
} }
var ControlCodesPat = (&Once[*regexp.Regexp]{Run: func() *regexp.Regexp {
return regexp.MustCompile("[\x00-\x09\x0b-\x1f\x7f\x80-\x9f]")
}}).Get
func SanitizeControlCodes(raw string, replace_with ...string) string {
r := ""
if len(replace_with) > 0 {
r = replace_with[0]
}
return ControlCodesPat().ReplaceAllLiteralString(raw, r)
}