From 0a8dc4be0a239aae53aff782deadd346abdd820b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 17 Apr 2018 17:43:56 +0530 Subject: [PATCH] Start work on the diff kitten --- kittens/diff/__init__.py | 0 kittens/diff/collect.py | 101 ++++++++++++++++++++++++++++++ kittens/diff/git.py | 128 +++++++++++++++++++++++++++++++++++++++ kittens/diff/main.py | 57 +++++++++++++++++ 4 files changed, 286 insertions(+) create mode 100644 kittens/diff/__init__.py create mode 100644 kittens/diff/collect.py create mode 100644 kittens/diff/git.py create mode 100644 kittens/diff/main.py diff --git a/kittens/diff/__init__.py b/kittens/diff/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kittens/diff/collect.py b/kittens/diff/collect.py new file mode 100644 index 000000000..4f93ca2d1 --- /dev/null +++ b/kittens/diff/collect.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2018, Kovid Goyal + +import os +from functools import lru_cache +from hashlib import md5 +from mimetypes import guess_type + + +class Collection: + + def __init__(self): + self.changes = [] + self.renames = {} + self.adds = [] + self.removes = [] + + def add_change(self, left_path, right_path): + self.changes.append((left_path, right_path)) + + def add_rename(self, left_path, right_path): + self.renames[left_path] = right_path + + def add_add(self, right_path): + self.adds.append(right_path) + + def add_removal(self, left_path): + self.removes.append(left_path) + + +def collect_files(collection, left, right): + left_names, right_names = set(), set() + left_path_map, right_path_map = {}, {} + + def walk(base, names, pmap): + for dirpath, dirnames, filenames in os.walk(base): + for filename in filenames: + path = os.path.abspath(os.path.join(dirpath, filename)) + path_name_map[path] = name = os.path.relpath(path, base) + names.add(name) + pmap[name] = path + + walk(left, left_names, left_path_map), walk(right, right_names, right_path_map) + common_names = left_names & right_names + changed_names = {n for n in common_names if data_for_path(left_path_map[n]) != data_for_path(right_path_map[n])} + for n in changed_names: + collection.add_change(left_path_map[n], right_path_map[n]) + + removed = left_names - common_names + added = right_names - common_names + ahash = {a: hash_for_path(right_path_map[a]) for a in added} + rhash = {r: hash_for_path(left_path_map[r]) for r in removed} + for name, rh in rhash.items(): + for n, ah in ahash.items(): + if ah == rh and data_for_path(left_path_map[name]) == data_for_path(right_path_map[n]): + collection.add_rename(left_path_map[name], right_path_map[name]) + added.discard(n) + break + else: + collection.add_removal(left_path_map[name]) + + for name in added: + collection.add_add(right_path_map[name]) + + +@lru_cache(maxsize=1024) +def mime_type_for_path(path): + return guess_type(path)[0] or 'application/octet-stream' + + +@lru_cache(maxsize=1024) +def data_for_path(path): + with open(path, 'rb') as f: + ans = f.read() + if not mime_type_for_path(path).startswith('image/'): + try: + ans = ans.decode('utf-8') + except UnicodeDecodeError: + pass + return ans + + +@lru_cache(maxsize=1024) +def hash_for_path(path): + md5(data_for_path(path)).digest() + + +path_name_map = {} + + +def create_collection(left, right): + collection = Collection() + if os.path.isdir(left): + collect_files(collection, left, right) + else: + pl, pr = os.path.abspath(left), os.path.abspath(right) + path_name_map[left] = pl + path_name_map[right] = pr + collection.add_change(pl, pr) + return collection diff --git a/kittens/diff/git.py b/kittens/diff/git.py new file mode 100644 index 000000000..d084b2a68 --- /dev/null +++ b/kittens/diff/git.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2018, Kovid Goyal + +import concurrent.futures +import os +import subprocess + + +def run_diff(file1, file2, context=3): + # returns: ok, is_different, patch + p = subprocess.Popen([ + 'git', '--no-index', '--no-color', '--no-ext-diff', '--exit-code', '-U', str(context), '--' + ] + [file1, file2], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.DEVNULL) + stdout, stderr = p.communicate() + returncode = p.wait() + if returncode in (0, 1): + return True, returncode == 1, stdout.decode('utf-8') + return False, returncode, stderr.decode('utf-8') + + +class Hunk: + + def __init__(self, title, left, right): + self.left_start, self.left_count = left + self.right_start, self.right_count = right + self.title = title + self.left_lines = [] + self.right_lines = [] + self.left_pos = self.right_pos = 0 + + def add_line(self): + self.right_lines.append((self.right_pos, True)) + self.right_pos += 1 + + def remove_line(self): + self.left_lines.append((self.left_pos, True)) + self.left_pos += 1 + + def _even_up_sides(self): + delta = len(self.left_lines) - len(self.right_lines) + if delta != 0: + dest = self.left_lines if delta < 0 else self.right_lines + for i in range(abs(delta)): + dest.append((None, True)) + + def context_line(self): + self._even_up_sides() + self.left_lines.append((self.left_pos, False)) + self.right_lines.append((self.right_pos, False)) + self.left_pos += 1 + self.right_pos += 1 + + def finalize(self): + self._even_up_sides() + # Sanity check + if self.left_pos != self.left_count: + raise ValueError('Left side line mismatch {} != {}'.format(self.left_pos, self.left_count)) + if self.right_pos != self.right_count: + raise ValueError('Right side line mismatch {} != {}'.format(self.right_pos, self.right_count)) + + +def parse_range(x): + parts = x[1:].split(',', 1) + start = abs(int(parts[0])) + count = 1 if len(parts) < 2 else int(parts[1]) + return start, count + + +def parse_hunk_header(line): + parts = tuple(filter(None, line.split('@@', 2))) + linespec = parts[0].strip() + title = '' + if len(parts) == 2: + title = parts[1].strip() + left, right = map(parse_range, linespec.split()) + return Hunk(title, left, right) + + +def parse_patch(raw): + all_hunks = [] + for line in raw.splitlines(): + if line.startswith('@@ '): + current_hunk = parse_hunk_header(line) + all_hunks.append(current_hunk) + else: + if not all_hunks: + continue + q = line[0] + if q == '+': + all_hunks[-1].add_line() + elif q == '-': + all_hunks[-1].remove_line() + else: + all_hunks[-1].context_line() + for h in all_hunks: + h.finalize() + + +class Differ: + + def __init__(self): + self.jobs = [] + + def add_diff(self, file1, file2): + key = file1, file2 + self.jobs.append(key) + + def __call__(self): + context = 3 + ans = {} + with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor: + jobs = {executor.submit(run_diff, key[0], key[1], context): key for key in self.jobs} + for future in concurrent.futures.as_completed(jobs): + key = jobs[future] + try: + ok, returncode, output = future.result() + except FileNotFoundError: + raise SystemExit('Could not find the git executable. Is it in your PATH?') + except Exception as e: + print('Running git diff for {} vs. {} generated an exception'.format(key[0], key[1])) + raise + if not ok: + print(output) + raise SystemExit('Running git diff for {} vs. {} failed'.format(key[0], key[1])) + ans[key] = parse_patch(output) + return ans diff --git a/kittens/diff/main.py b/kittens/diff/main.py new file mode 100644 index 000000000..6c471847b --- /dev/null +++ b/kittens/diff/main.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2018, Kovid Goyal + +import os +import sys +from functools import partial + +from kitty.cli import parse_args + +from ..tui.handler import Handler +from ..tui.loop import Loop +from ..tui.operations import set_line_wrapping, set_window_title +from .collect import create_collection + + +class DiffHandler(Handler): + + def __init__(self, collection): + self.collection = collection + + def init_terminal_state(self): + self.write(set_line_wrapping(False)) + self.write(set_window_title('kitty +diff')) + + def initialize(self, *args): + Handler.initialize(self, *args) + self.init_terminal_state() + + +OPTIONS = partial('''\ +'''.format, ) + + +def main(args): + msg = 'Show a side-by-side diff of the specified files/directories' + args, items = parse_args(args[1:], OPTIONS, 'file_or_directory file_or_directory', msg, 'kitty +kitten diff') + if len(items) != 2: + raise SystemExit('You must specify exactly two files/directories to compare') + left, right = items + if os.path.isdir(left) != os.path.isdir(right): + raise SystemExit('The items to be diffed should both be either directories or files. Comparing a directory to a file is not valid.') + collection = create_collection(left, right) + + loop = Loop() + handler = DiffHandler(collection) + loop.loop(handler) + if loop.return_code != 0: + raise SystemExit(loop.return_code) + + +def handle_result(args, current_char, target_window_id, boss): + pass + + +if __name__ == '__main__': + main(sys.argv)