From e4d936b5ed4cf6ed8ac383ac3ea81e0ed4c51505 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 17 Mar 2023 21:25:07 +0530 Subject: [PATCH] More work on porting the diff kitten --- tools/cmd/diff/collect.go | 258 ++++++++++++++++++++++++++++++++++++-- tools/cmd/diff/main.go | 70 ++++++++++- tools/utils/tar.go | 101 +++++++++++++++ 3 files changed, 416 insertions(+), 13 deletions(-) create mode 100644 tools/utils/tar.go diff --git a/tools/cmd/diff/collect.go b/tools/cmd/diff/collect.go index 787d35168..6568b99c5 100644 --- a/tools/cmd/diff/collect.go +++ b/tools/cmd/diff/collect.go @@ -3,23 +3,43 @@ package diff import ( + "crypto/md5" "fmt" + "io/fs" "kitty/tools/utils" "os" "path/filepath" "strings" + "unicode/utf8" ) var _ = fmt.Print var path_name_map, remote_dirs map[string]string -var mimetypes_cache, data_cache *utils.LRUCache[string, string] +var mimetypes_cache, data_cache, hash_cache *utils.LRUCache[string, string] var lines_cache *utils.LRUCache[string, []string] +var is_text_cache *utils.LRUCache[string, bool] func init_caches() { - mimetypes_cache = utils.NewLRUCache[string, string](4096) - data_cache = utils.NewLRUCache[string, string](4096) - lines_cache = utils.NewLRUCache[string, []string](4096) + path_name_map = make(map[string]string, 32) + remote_dirs = make(map[string]string, 32) + const sz = 4096 + mimetypes_cache = utils.NewLRUCache[string, string](sz) + data_cache = utils.NewLRUCache[string, string](sz) + is_text_cache = utils.NewLRUCache[string, bool](sz) + lines_cache = utils.NewLRUCache[string, []string](sz) + hash_cache = utils.NewLRUCache[string, string](sz) +} + +func add_remote_dir(val string) { + x := filepath.Base(val) + idx := strings.LastIndex(x, "-") + if idx > -1 { + x = x[idx+1:] + } else { + x = "" + } + remote_dirs[val] = x } func mimetype_for_path(path string) string { @@ -44,6 +64,38 @@ func data_for_path(path string) (string, error) { }) } +func is_path_text(path string) bool { + return is_text_cache.MustGetOrCreate(path, func(path string) bool { + if strings.HasPrefix(mimetype_for_path(path), "image/") { + return false + } + s1, err := os.Stat(path) + if err == nil { + s2, err := os.Stat("/dev/null") + if err == nil && os.SameFile(s1, s2) { + return false + } + } + d, err := data_for_path(path) + if err != nil { + return false + } + return utf8.ValidString(d) + }) +} + +func hash_for_path(path string) (string, error) { + return hash_cache.GetOrCreate(path, func(path string) (string, error) { + ans, err := data_for_path(path) + if err != nil { + return "", err + } + hash := md5.Sum(utils.UnsafeStringToBytes(ans)) + return utils.UnsafeBytesToString(hash[:]), err + }) + +} + func sanitize(x string) string { x = strings.ReplaceAll(x, "\r\n", "⏎\n") return utils.SanitizeControlCodes(x, "░") @@ -63,12 +115,197 @@ func lines_for_path(path string) ([]string, error) { } type Collection struct { + changes, renames, type_map map[string]string + adds, removes *utils.Set[string] + all_paths []string + added_count, removed_count int +} + +func (self *Collection) add_change(left, right string) { + self.changes[left] = right + self.all_paths = append(self.all_paths, left) + self.type_map[left] = `diff` +} + +func (self *Collection) add_rename(left, right string) { + self.renames[left] = right + self.all_paths = append(self.all_paths, left) + self.type_map[left] = `rename` +} + +func (self *Collection) add_add(right string) { + self.adds.Add(right) + self.all_paths = append(self.all_paths, right) + self.type_map[right] = `add` + if is_path_text(right) { + num, _ := lines_for_path(right) + self.added_count += len(num) + } +} + +func (self *Collection) add_removal(left string) { + self.removes.Add(left) + self.all_paths = append(self.all_paths, left) + self.type_map[left] = `removal` + if is_path_text(left) { + num, _ := lines_for_path(left) + self.removed_count += len(num) + } +} + +func (self *Collection) finalize() { + utils.StableSortWithKey(self.all_paths, func(path string) string { + return path_name_map[path] + }) +} + +func (self *Collection) Len() int { return len(self.all_paths) } + +func (self *Collection) Items() int { return len(self.all_paths) } + +func (self *Collection) Apply(f func(path, typ, data string) error) error { + for _, path := range self.all_paths { + typ := self.type_map[path] + data := "" + switch typ { + case "diff": + data = self.changes[path] + case "rename": + data = self.renames[path] + } + if err := f(path, typ, data); err != nil { + return err + } + } + return nil +} + +func allowed(path string) bool { + name := filepath.Base(path) + for _, pat := range conf.Ignore_name { + if matched, err := filepath.Match(pat, name); err == nil && matched { + return false + } + } + return true +} + +func remote_hostname(path string) (string, string) { + for q, val := range remote_dirs { + if strings.HasPrefix(path, q) { + return q, val + } + } + return "", "" +} + +func resolve_remote_name(path, defval string) string { + remote_dir, rh := remote_hostname(path) + if remote_dir != "" && rh != "" { + r, err := filepath.Rel(remote_dir, path) + if err == nil { + return rh + ":" + r + } + } + return defval +} + +func walk(base string, names *utils.Set[string], pmap map[string]string) error { + return filepath.WalkDir(base, func(path string, d fs.DirEntry, err error) error { + is_allowed := allowed(path) + if !is_allowed { + if d.IsDir() { + return fs.SkipDir + } + return nil + } + path, err = filepath.Abs(path) + if err != nil { + return err + } + name, err := filepath.Rel(base, path) + if err != nil { + return err + } + path_name_map[path] = name + names.Add(name) + pmap[name] = path + return nil + }) +} + +func (self *Collection) collect_files(left, right string) error { + left_names, right_names := utils.NewSet[string](16), utils.NewSet[string](16) + left_path_map, right_path_map := make(map[string]string, 16), make(map[string]string, 16) + err := walk(left, left_names, left_path_map) + if err != nil { + return err + } + err = walk(right, right_names, right_path_map) + common_names := left_names.Intersect(right_names) + changed_names := utils.NewSet[string](common_names.Len()) + for n := range common_names.Iterable() { + ld, err := data_for_path(left_path_map[n]) + var rd string + if err == nil { + rd, err = data_for_path(right_path_map[n]) + } + if err != nil { + return err + } + if ld != rd { + changed_names.Add(n) + self.add_change(left_path_map[n], right_path_map[n]) + } + } + removed := left_names.Subtract(common_names) + added := right_names.Subtract(common_names) + ahash, rhash := make(map[string]string, added.Len()), make(map[string]string, removed.Len()) + for a := range added.Iterable() { + ahash[a], err = hash_for_path(right_path_map[a]) + if err != nil { + return err + } + } + for r := range removed.Iterable() { + rhash[r], err = hash_for_path(left_path_map[r]) + if err != nil { + return err + } + } + for name, rh := range rhash { + found := false + for n, ah := range ahash { + if ah == rh { + ld, _ := data_for_path(left_path_map[name]) + rd, _ := data_for_path(right_path_map[n]) + if ld == rd { + self.add_rename(left_path_map[name], right_path_map[n]) + added.Discard(n) + found = true + break + } + } + } + if !found { + self.add_removal(left_path_map[name]) + } + } + for name := range added.Iterable() { + self.add_add(right_path_map[name]) + } + return nil } func create_collection(left, right string) (ans *Collection, err error) { - path_name_map = make(map[string]string, 32) - remote_dirs = make(map[string]string, 32) - ans = &Collection{} + ans = &Collection{ + changes: make(map[string]string), + renames: make(map[string]string), + type_map: make(map[string]string), + adds: utils.NewSet[string](32), + removes: utils.NewSet[string](32), + all_paths: make([]string, 0, 32), + } left_stat, err := os.Stat(left) if err != nil { return nil, err @@ -89,11 +326,8 @@ func create_collection(left, right string) (ans *Collection, err error) { } path_name_map[pl] = resolve_remote_name(pl, left) path_name_map[pr] = resolve_remote_name(pr, right) - err = ans.add_change(pl, pr) - if err != nil { - return nil, err - } + ans.add_change(pl, pr) } - err = ans.finalize() + ans.finalize() return ans, err } diff --git a/tools/cmd/diff/main.go b/tools/cmd/diff/main.go index 48eb0d6b9..ab4b51587 100644 --- a/tools/cmd/diff/main.go +++ b/tools/cmd/diff/main.go @@ -3,12 +3,20 @@ package diff import ( + "archive/tar" + "bytes" "fmt" + "io/fs" "os" + "os/exec" + "path/filepath" + "strings" "kitty/tools/cli" + "kitty/tools/cmd/ssh" "kitty/tools/config" "kitty/tools/tui/loop" + "kitty/tools/utils" ) var _ = fmt.Print @@ -39,6 +47,54 @@ func exists(path string) bool { return err == nil } +func get_ssh_file(hostname, rpath string) (string, error) { + tdir, err := os.MkdirTemp("", "*-"+hostname) + if err != nil { + return "", err + } + add_remote_dir(tdir) + is_abs := strings.HasPrefix(rpath, "/") + for strings.HasPrefix(rpath, "/") { + rpath = rpath[1:] + } + cmd := []string{ssh.SSHExe(), hostname, "tar", "-c", "-f", "-"} + if is_abs { + cmd = append(cmd, "-C", "/") + } + cmd = append(cmd, rpath) + c := exec.Command(cmd[0], cmd[1:]...) + stdout, err := c.Output() + if err != nil { + return "", fmt.Errorf("Failed to ssh into remote host %s to get file %s with error: %w", hostname, rpath, err) + } + tf := tar.NewReader(bytes.NewReader(stdout)) + count, err := utils.ExtractAllFromTar(tf, tdir) + if err != nil { + return "", fmt.Errorf("Failed to untar data from remote host %s to get file %s with error: %w", hostname, rpath, err) + } + ans := filepath.Join(tdir, rpath) + if count == 1 { + filepath.WalkDir(tdir, func(path string, d fs.DirEntry, err error) error { + if !d.IsDir() { + ans = path + return fs.SkipAll + } + return nil + }) + } + return ans, nil +} + +func get_remote_file(path string) (string, error) { + if strings.HasPrefix(path, "ssh:") { + parts := strings.SplitN(path, ":", 3) + if len(parts) == 3 { + return get_ssh_file(parts[1], parts[2]) + } + } + return path, nil +} + func main(_ *cli.Command, opts_ *Options, args []string) (rc int, err error) { opts = opts_ conf, err = load_config(opts) @@ -52,7 +108,19 @@ func main(_ *cli.Command, opts_ *Options, args []string) (rc int, err error) { return 1, err } init_caches() - left, right := get_remote_file(args[0]), get_remote_file(args[1]) + defer func() { + for tdir := range remote_dirs { + os.RemoveAll(tdir) + } + }() + left, err := get_remote_file(args[0]) + if err != nil { + return 1, err + } + right, err := get_remote_file(args[1]) + if err != nil { + return 1, err + } if isdir(left) != isdir(right) { return 1, fmt.Errorf("The items to be diffed should both be either directories or files. Comparing a directory to a file is not valid.'") } diff --git a/tools/utils/tar.go b/tools/utils/tar.go new file mode 100644 index 000000000..1d6ad9963 --- /dev/null +++ b/tools/utils/tar.go @@ -0,0 +1,101 @@ +// License: GPLv3 Copyright: 2023, Kovid Goyal, + +package utils + +import ( + "archive/tar" + "errors" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "strings" +) + +var _ = fmt.Print + +type TarExtractOptions struct { + DontPreservePermissions bool +} + +func ExtractAllFromTar(tr *tar.Reader, dest_path string, optss ...TarExtractOptions) (count int, err error) { + opts := TarExtractOptions{} + if len(optss) > 0 { + opts = optss[0] + } + dest_path, err = filepath.Abs(dest_path) + if err != nil { + return + } + + mode := func(hdr *tar.Header) fs.FileMode { + return fs.FileMode(hdr.Mode) & (fs.ModePerm | fs.ModeSetgid | fs.ModeSetuid | fs.ModeSticky) + } + + set_metadata := func(chmod func(mode fs.FileMode) error, hdr *tar.Header) (err error) { + if !opts.DontPreservePermissions && chmod != nil { + perms := mode(hdr) + if err = chmod(perms); err != nil { + return err + } + } + count++ + return + } + + for { + var hdr *tar.Header + hdr, err = tr.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return count, err + } + dest := hdr.Name + dest = strings.TrimLeft(dest, "/") + if !filepath.IsLocal(dest) { + continue + } + dest = filepath.Join(dest_path, dest) + switch hdr.Typeflag { + case tar.TypeDir: + err = os.MkdirAll(dest, 0o700) + if err != nil { + return + } + if err = set_metadata(func(m fs.FileMode) error { return os.Chmod(dest, m) }, hdr); err != nil { + return + } + case tar.TypeReg, tar.TypeRegA: + var d *os.File + if d, err = os.Create(dest); err != nil { + return + } + err = set_metadata(d.Chmod, hdr) + if err == nil { + _, err = io.Copy(d, tr) + } + d.Close() + if err != nil { + return + } + case tar.TypeLink: + if err = os.Link(hdr.Linkname, dest); err != nil { + return + } + if err = set_metadata(func(m fs.FileMode) error { return os.Chmod(dest, m) }, hdr); err != nil { + return + } + case tar.TypeSymlink: + if err = os.Symlink(hdr.Linkname, dest); err != nil { + return + } + if err = set_metadata(nil, hdr); err != nil { + return + } + } + } + return +}