Start work on the diff kitten

This commit is contained in:
Kovid Goyal 2018-04-17 17:43:56 +05:30
parent 3d99361790
commit 0a8dc4be0a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 286 additions and 0 deletions

0
kittens/diff/__init__.py Normal file
View File

101
kittens/diff/collect.py Normal file
View File

@ -0,0 +1,101 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
import os
from functools import lru_cache
from hashlib import md5
from mimetypes import guess_type
class Collection:
def __init__(self):
self.changes = []
self.renames = {}
self.adds = []
self.removes = []
def add_change(self, left_path, right_path):
self.changes.append((left_path, right_path))
def add_rename(self, left_path, right_path):
self.renames[left_path] = right_path
def add_add(self, right_path):
self.adds.append(right_path)
def add_removal(self, left_path):
self.removes.append(left_path)
def collect_files(collection, left, right):
left_names, right_names = set(), set()
left_path_map, right_path_map = {}, {}
def walk(base, names, pmap):
for dirpath, dirnames, filenames in os.walk(base):
for filename in filenames:
path = os.path.abspath(os.path.join(dirpath, filename))
path_name_map[path] = name = os.path.relpath(path, base)
names.add(name)
pmap[name] = path
walk(left, left_names, left_path_map), walk(right, right_names, right_path_map)
common_names = left_names & right_names
changed_names = {n for n in common_names if data_for_path(left_path_map[n]) != data_for_path(right_path_map[n])}
for n in changed_names:
collection.add_change(left_path_map[n], right_path_map[n])
removed = left_names - common_names
added = right_names - common_names
ahash = {a: hash_for_path(right_path_map[a]) for a in added}
rhash = {r: hash_for_path(left_path_map[r]) for r in removed}
for name, rh in rhash.items():
for n, ah in ahash.items():
if ah == rh and data_for_path(left_path_map[name]) == data_for_path(right_path_map[n]):
collection.add_rename(left_path_map[name], right_path_map[name])
added.discard(n)
break
else:
collection.add_removal(left_path_map[name])
for name in added:
collection.add_add(right_path_map[name])
@lru_cache(maxsize=1024)
def mime_type_for_path(path):
return guess_type(path)[0] or 'application/octet-stream'
@lru_cache(maxsize=1024)
def data_for_path(path):
with open(path, 'rb') as f:
ans = f.read()
if not mime_type_for_path(path).startswith('image/'):
try:
ans = ans.decode('utf-8')
except UnicodeDecodeError:
pass
return ans
@lru_cache(maxsize=1024)
def hash_for_path(path):
md5(data_for_path(path)).digest()
path_name_map = {}
def create_collection(left, right):
collection = Collection()
if os.path.isdir(left):
collect_files(collection, left, right)
else:
pl, pr = os.path.abspath(left), os.path.abspath(right)
path_name_map[left] = pl
path_name_map[right] = pr
collection.add_change(pl, pr)
return collection

128
kittens/diff/git.py Normal file
View File

@ -0,0 +1,128 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
import concurrent.futures
import os
import subprocess
def run_diff(file1, file2, context=3):
# returns: ok, is_different, patch
p = subprocess.Popen([
'git', '--no-index', '--no-color', '--no-ext-diff', '--exit-code', '-U', str(context), '--'
] + [file1, file2],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.DEVNULL)
stdout, stderr = p.communicate()
returncode = p.wait()
if returncode in (0, 1):
return True, returncode == 1, stdout.decode('utf-8')
return False, returncode, stderr.decode('utf-8')
class Hunk:
def __init__(self, title, left, right):
self.left_start, self.left_count = left
self.right_start, self.right_count = right
self.title = title
self.left_lines = []
self.right_lines = []
self.left_pos = self.right_pos = 0
def add_line(self):
self.right_lines.append((self.right_pos, True))
self.right_pos += 1
def remove_line(self):
self.left_lines.append((self.left_pos, True))
self.left_pos += 1
def _even_up_sides(self):
delta = len(self.left_lines) - len(self.right_lines)
if delta != 0:
dest = self.left_lines if delta < 0 else self.right_lines
for i in range(abs(delta)):
dest.append((None, True))
def context_line(self):
self._even_up_sides()
self.left_lines.append((self.left_pos, False))
self.right_lines.append((self.right_pos, False))
self.left_pos += 1
self.right_pos += 1
def finalize(self):
self._even_up_sides()
# Sanity check
if self.left_pos != self.left_count:
raise ValueError('Left side line mismatch {} != {}'.format(self.left_pos, self.left_count))
if self.right_pos != self.right_count:
raise ValueError('Right side line mismatch {} != {}'.format(self.right_pos, self.right_count))
def parse_range(x):
parts = x[1:].split(',', 1)
start = abs(int(parts[0]))
count = 1 if len(parts) < 2 else int(parts[1])
return start, count
def parse_hunk_header(line):
parts = tuple(filter(None, line.split('@@', 2)))
linespec = parts[0].strip()
title = ''
if len(parts) == 2:
title = parts[1].strip()
left, right = map(parse_range, linespec.split())
return Hunk(title, left, right)
def parse_patch(raw):
all_hunks = []
for line in raw.splitlines():
if line.startswith('@@ '):
current_hunk = parse_hunk_header(line)
all_hunks.append(current_hunk)
else:
if not all_hunks:
continue
q = line[0]
if q == '+':
all_hunks[-1].add_line()
elif q == '-':
all_hunks[-1].remove_line()
else:
all_hunks[-1].context_line()
for h in all_hunks:
h.finalize()
class Differ:
def __init__(self):
self.jobs = []
def add_diff(self, file1, file2):
key = file1, file2
self.jobs.append(key)
def __call__(self):
context = 3
ans = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
jobs = {executor.submit(run_diff, key[0], key[1], context): key for key in self.jobs}
for future in concurrent.futures.as_completed(jobs):
key = jobs[future]
try:
ok, returncode, output = future.result()
except FileNotFoundError:
raise SystemExit('Could not find the git executable. Is it in your PATH?')
except Exception as e:
print('Running git diff for {} vs. {} generated an exception'.format(key[0], key[1]))
raise
if not ok:
print(output)
raise SystemExit('Running git diff for {} vs. {} failed'.format(key[0], key[1]))
ans[key] = parse_patch(output)
return ans

57
kittens/diff/main.py Normal file
View File

@ -0,0 +1,57 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
import os
import sys
from functools import partial
from kitty.cli import parse_args
from ..tui.handler import Handler
from ..tui.loop import Loop
from ..tui.operations import set_line_wrapping, set_window_title
from .collect import create_collection
class DiffHandler(Handler):
def __init__(self, collection):
self.collection = collection
def init_terminal_state(self):
self.write(set_line_wrapping(False))
self.write(set_window_title('kitty +diff'))
def initialize(self, *args):
Handler.initialize(self, *args)
self.init_terminal_state()
OPTIONS = partial('''\
'''.format, )
def main(args):
msg = 'Show a side-by-side diff of the specified files/directories'
args, items = parse_args(args[1:], OPTIONS, 'file_or_directory file_or_directory', msg, 'kitty +kitten diff')
if len(items) != 2:
raise SystemExit('You must specify exactly two files/directories to compare')
left, right = items
if os.path.isdir(left) != os.path.isdir(right):
raise SystemExit('The items to be diffed should both be either directories or files. Comparing a directory to a file is not valid.')
collection = create_collection(left, right)
loop = Loop()
handler = DiffHandler(collection)
loop.loop(handler)
if loop.return_code != 0:
raise SystemExit(loop.return_code)
def handle_result(args, current_char, target_window_id, boss):
pass
if __name__ == '__main__':
main(sys.argv)