diff --git a/tools/cmd/diff/collect.go b/tools/cmd/diff/collect.go index c18417384..787d35168 100644 --- a/tools/cmd/diff/collect.go +++ b/tools/cmd/diff/collect.go @@ -4,13 +4,64 @@ package diff import ( "fmt" + "kitty/tools/utils" "os" "path/filepath" + "strings" ) var _ = fmt.Print var path_name_map, remote_dirs map[string]string +var mimetypes_cache, data_cache *utils.LRUCache[string, string] +var lines_cache *utils.LRUCache[string, []string] + +func init_caches() { + mimetypes_cache = utils.NewLRUCache[string, string](4096) + data_cache = utils.NewLRUCache[string, string](4096) + lines_cache = utils.NewLRUCache[string, []string](4096) +} + +func mimetype_for_path(path string) string { + return mimetypes_cache.MustGetOrCreate(path, func(path string) string { + mt := utils.GuessMimeTypeWithFileSystemAccess(path) + if mt == "" { + mt = "application/octet-stream" + } + if utils.KnownTextualMimes[mt] { + if _, a, found := strings.Cut(mt, "/"); found { + mt = "text/" + a + } + } + return mt + }) +} + +func data_for_path(path string) (string, error) { + return data_cache.GetOrCreate(path, func(path string) (string, error) { + ans, err := os.ReadFile(path) + return utils.UnsafeBytesToString(ans), err + }) +} + +func sanitize(x string) string { + x = strings.ReplaceAll(x, "\r\n", "⏎\n") + return utils.SanitizeControlCodes(x, "░") +} + +func lines_for_path(path string) ([]string, error) { + return lines_cache.GetOrCreate(path, func(path string) ([]string, error) { + ans, err := data_for_path(path) + if err != nil { + return nil, err + } + ans = sanitize(strings.ReplaceAll(ans, "\t", conf.Replace_tab_by)) + lines := make([]string, 0, 256) + splitlines_like_git(ans, false, func(line string) { lines = append(lines, line) }) + return lines, nil + }) +} + type Collection struct { } diff --git a/tools/cmd/diff/main.go b/tools/cmd/diff/main.go index eda62d2af..48eb0d6b9 100644 --- a/tools/cmd/diff/main.go +++ b/tools/cmd/diff/main.go @@ -51,6 +51,7 @@ func main(_ *cli.Command, opts_ *Options, args []string) (rc int, err error) { if err = set_diff_command(conf.Diff_cmd); err != nil { return 1, err } + init_caches() left, right := get_remote_file(args[0]), get_remote_file(args[1]) if isdir(left) != isdir(right) { return 1, fmt.Errorf("The items to be diffed should both be either directories or files. Comparing a directory to a file is not valid.'") diff --git a/tools/cmd/diff/patch.go b/tools/cmd/diff/patch.go index d4dc8345c..055f1a4f7 100644 --- a/tools/cmd/diff/patch.go +++ b/tools/cmd/diff/patch.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "kitty/tools/utils" + "kitty/tools/utils/images" "kitty/tools/utils/shlex" "os/exec" "path/filepath" @@ -52,6 +53,223 @@ func set_diff_command(q string) error { return err } +type Chunk struct { + is_context bool + left_start, right_start int + left_count, right_count int + centers []struct{ prefix_count, suffix_count int } +} + +func (self *Chunk) add_line() { + self.right_count++ +} + +func (self *Chunk) remove_line() { + self.left_count++ +} + +func (self *Chunk) context_line() { + self.left_count++ + self.right_count++ +} + +func changed_center(left, right string) (ans struct{ prefix_count, suffix_count int }) { + if len(left) > 0 && len(right) > 0 { + ll, rl := len(left), len(right) + ml := utils.Min(ll, rl) + for ans.prefix_count < ml && left[ans.prefix_count] == right[ans.prefix_count] { + ans.prefix_count++ + } + if ans.prefix_count < ml { + for ans.suffix_count < ml-ans.prefix_count && left[ll-1-ans.suffix_count] == right[rl-1-ans.suffix_count] { + ans.suffix_count++ + } + } + } + return +} + +func (self *Chunk) finalize(left_lines, right_lines []string) { + if !self.is_context && self.left_count == self.right_count { + for i := 0; i < self.left_count; i++ { + self.centers = append(self.centers, changed_center(left_lines[self.left_start+i], right_lines[self.right_start+i])) + } + } +} + +type Hunk struct { + left_start, left_count int + right_start, right_count int + title string + added_count, removed_count int + chunks []*Chunk + current_chunk *Chunk + largest_line_number int +} + +func (self *Hunk) new_chunk(is_context bool) *Chunk { + left_start, right_start := self.left_start, self.right_start + if len(self.chunks) > 0 { + c := self.chunks[len(self.chunks)-1] + left_start = c.left_start + c.left_count + right_start = c.right_start + c.right_count + } + return &Chunk{is_context: is_context, left_start: left_start, right_start: right_start} +} + +func (self *Hunk) ensure_diff_chunk() { + if self.current_chunk == nil || self.current_chunk.is_context { + if self.current_chunk != nil { + self.chunks = append(self.chunks, self.current_chunk) + } + self.current_chunk = self.new_chunk(false) + } +} + +func (self *Hunk) ensure_context_chunk() { + if self.current_chunk == nil || !self.current_chunk.is_context { + if self.current_chunk != nil { + self.chunks = append(self.chunks, self.current_chunk) + } + self.current_chunk = self.new_chunk(true) + } +} + +func (self *Hunk) add_line() { + self.ensure_diff_chunk() + self.current_chunk.add_line() + self.added_count++ +} + +func (self *Hunk) remove_line() { + self.ensure_diff_chunk() + self.current_chunk.remove_line() + self.removed_count++ +} + +func (self *Hunk) context_line() { + self.ensure_context_chunk() + self.current_chunk.context_line() +} + +func (self *Hunk) finalize(left_lines, right_lines []string) error { + if self.current_chunk != nil { + self.chunks = append(self.chunks, self.current_chunk) + } + // Sanity check + c := self.chunks[len(self.chunks)-1] + if c.left_start+c.left_count != self.left_start+self.left_count { + return fmt.Errorf("Left side line mismatch %d != %d", c.left_start+c.left_count, self.left_start+self.left_count) + } + if c.right_start+c.right_count != self.right_start+self.right_count { + return fmt.Errorf("Right side line mismatch %d != %d", c.right_start+c.right_count, self.right_start+self.right_count) + } + for _, c := range self.chunks { + c.finalize(left_lines, right_lines) + } + return nil +} + +type Patch struct { + all_hunks []*Hunk + largest_line_number, added_count, removed_count int +} + +func (self *Patch) Len() int { return len(self.all_hunks) } + +func splitlines_like_git(raw string, strip_trailing_lines bool, process_line func(string)) { + sz := len(raw) + if strip_trailing_lines { + for sz > 0 && (raw[sz-1] == '\n' || raw[sz-1] == '\r') { + sz-- + } + } + start := 0 + for i := 0; i < sz; i++ { + switch raw[i] { + case '\n': + process_line(raw[start:i]) + start = i + 1 + case '\r': + process_line(raw[start:i]) + start = i + 1 + if start < sz && raw[start] == '\n' { + i++ + start++ + } + } + } + if start < sz { + process_line(raw[start:sz]) + } +} + +func parse_range(x string) (start, count int) { + s, c, found := strings.Cut(x, ",") + start, _ = strconv.Atoi(s) + if start < 0 { + start = -start + } + count = 1 + if found { + count, _ = strconv.Atoi(c) + } + return +} + +func parse_hunk_header(line string) *Hunk { + parts := strings.SplitN(line, "@@", 3) + linespec := strings.TrimSpace(parts[1]) + title := "" + if len(parts) == 3 { + title = strings.TrimSpace(parts[2]) + } + left, right, _ := strings.Cut(linespec, " ") + ls, lc := parse_range(left) + rs, rc := parse_range(right) + return &Hunk{ + title: title, left_start: ls - 1, left_count: lc, right_start: rs - 1, right_count: rc, + largest_line_number: utils.Max(ls-1+lc, rs-1+rc), + } +} + +func parse_patch(raw string, left_lines, right_lines []string) (ans *Patch, err error) { + ans = &Patch{all_hunks: make([]*Hunk, 0, 32)} + var current_hunk *Hunk + splitlines_like_git(raw, true, func(line string) { + if strings.HasPrefix(line, "@@ ") { + current_hunk = parse_hunk_header(line) + ans.all_hunks = append(ans.all_hunks, current_hunk) + } else if current_hunk != nil { + var ch byte + if len(line) > 0 { + ch = line[0] + } + switch ch { + case '+': + current_hunk.add_line() + case '-': + current_hunk.remove_line() + case '\\': + default: + current_hunk.context_line() + } + } + }) + for _, h := range ans.all_hunks { + err = h.finalize(left_lines, right_lines) + if err != nil { + return + } + ans.added_count += h.added_count + ans.removed_count += h.removed_count + } + if len(ans.all_hunks) > 0 { + ans.largest_line_number = ans.all_hunks[len(ans.all_hunks)-1].largest_line_number + } + return +} + func run_diff(file1, file2 string, num_of_context_lines int) (ok, is_different bool, patch string, err error) { context := strconv.Itoa(num_of_context_lines) cmd := utils.Map(func(x string) string { @@ -82,3 +300,50 @@ func run_diff(file1, file2 string, num_of_context_lines int) (ok, is_different b } return true, false, stdout.String(), nil } + +func do_diff(file1, file2 string, context_count int) (ans *Patch, err error) { + ok, _, raw, err := run_diff(file1, file2, context_count) + if !ok { + return nil, fmt.Errorf("Failed to diff %s vs. %s with errors:\n%s", file1, file2, raw) + } + if err != nil { + return + } + left_lines, err := lines_for_path(file1) + if err != nil { + return + } + right_lines, err := lines_for_path(file2) + if err != nil { + return + } + ans, err = parse_patch(raw, left_lines, right_lines) + return +} + +func diff(jobs []struct{ file1, file2 string }, context_count int) (ans map[string]*Patch, err error) { + ans = make(map[string]*Patch) + ctx := images.Context{} + type result struct { + file1, file2 string + err error + patch *Patch + } + results := make(chan result) + ctx.Parallel(0, len(jobs), func(nums <-chan int) { + for i := range nums { + job := jobs[i] + r := result{file1: job.file1, file2: job.file2} + r.patch, r.err = do_diff(job.file1, job.file2, context_count) + results <- r + } + }) + close(results) + for r := range results { + if r.err != nil { + return nil, r.err + } + ans[r.file1] = r.patch + } + return ans, nil +} diff --git a/tools/utils/misc.go b/tools/utils/misc.go index 5e54fd5de..f6e63b607 100644 --- a/tools/utils/misc.go +++ b/tools/utils/misc.go @@ -4,6 +4,7 @@ package utils import ( "fmt" + "regexp" "sort" "golang.org/x/exp/constraints" @@ -143,3 +144,15 @@ func Memset[T any](dest []T, pattern ...T) []T { } return dest } + +var ControlCodesPat = (&Once[*regexp.Regexp]{Run: func() *regexp.Regexp { + return regexp.MustCompile("[\x00-\x09\x0b-\x1f\x7f\x80-\x9f]") +}}).Get + +func SanitizeControlCodes(raw string, replace_with ...string) string { + r := "" + if len(replace_with) > 0 { + r = replace_with[0] + } + return ControlCodesPat().ReplaceAllLiteralString(raw, r) +}