More work on the diff kitten

This commit is contained in:
Kovid Goyal 2018-04-21 15:45:16 +05:30
parent 82acd6b511
commit 6929358976
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 223 additions and 33 deletions

View File

@ -8,25 +8,47 @@ from hashlib import md5
from mimetypes import guess_type
path_name_map = {}
class Collection:
def __init__(self):
self.changes = []
self.changes = {}
self.renames = {}
self.adds = []
self.removes = []
self.adds = set()
self.removes = set()
self.all_paths = []
self.type_map = {}
def add_change(self, left_path, right_path):
self.changes.append((left_path, right_path))
self.changes[left_path] = right_path
self.all_paths.append(left_path)
self.type_map[left_path] = 'diff'
def add_rename(self, left_path, right_path):
self.renames[left_path] = right_path
self.all_paths.append(left_path)
self.type_map[left_path] = 'rename'
def add_add(self, right_path):
self.adds.append(right_path)
self.adds.add(right_path)
self.all_paths.append(right_path)
self.type_map[right_path] = 'add'
def add_removal(self, left_path):
self.removes.append(left_path)
self.removes.add(left_path)
self.all_paths.append(left_path)
self.type_map[left_path] = 'removal'
def finalize(self):
self.all_paths.sort(key=path_name_map.get)
def __iter__(self):
for path in self.all_paths:
typ = self.type_map[path]
data = self.changes[path] if typ == 'diff' else None
yield path, self.type_map[path], data
def collect_files(collection, left, right):
@ -86,9 +108,6 @@ def hash_for_path(path):
md5(data_for_path(path)).digest()
path_name_map = {}
def create_collection(left, right):
collection = Collection()
if os.path.isdir(left):
@ -98,4 +117,5 @@ def create_collection(left, right):
path_name_map[left] = pl
path_name_map[right] = pr
collection.add_change(pl, pr)
collection.finalize()
return collection

9
kittens/diff/config.py Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
formats = {
'title': '',
'margin': '',
'text': '',
}

View File

@ -10,7 +10,7 @@ import subprocess
def run_diff(file1, file2, context=3):
# returns: ok, is_different, patch
p = subprocess.Popen([
'git', '--no-index', '--no-color', '--no-ext-diff', '--exit-code', '-U', str(context), '--'
'git', 'diff', '--no-color', '--no-ext-diff', '--exit-code', '-U' + str(context), '--no-index', '--'
] + [file1, file2],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.DEVNULL)
stdout, stderr = p.communicate()
@ -29,6 +29,7 @@ class Hunk:
self.left_lines = []
self.right_lines = []
self.left_pos = self.right_pos = 0
self.largest_line_number = max(self.left_start + self.left_count, self.right_start + self.right_count)
def add_line(self):
self.right_lines.append((self.right_pos, True))
@ -78,6 +79,19 @@ def parse_hunk_header(line):
return Hunk(title, left, right)
class Patch:
def __init__(self, all_hunks):
self.all_hunks = all_hunks
self.largest_line_number = self.all_hunks[-1].largest_line_number if self.all_hunks else 0
def __iter__(self):
return iter(self.all_hunks)
def __len__(self):
return len(self.all_hunks)
def parse_patch(raw):
all_hunks = []
for line in raw.splitlines():
@ -96,32 +110,38 @@ def parse_patch(raw):
all_hunks[-1].context_line()
for h in all_hunks:
h.finalize()
return Patch(all_hunks)
class Differ:
def __init__(self):
self.jmap = {}
self.jobs = []
def add_diff(self, file1, file2):
key = file1, file2
self.jobs.append(key)
self.jmap[file1] = file2
self.jobs.append(file1)
def __call__(self, context=3):
ans = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
jobs = {executor.submit(run_diff, key[0], key[1], context): key for key in self.jobs}
jobs = {executor.submit(run_diff, key, self.jmap[key], context): key for key in self.jobs}
for future in concurrent.futures.as_completed(jobs):
key = jobs[future]
try:
ok, returncode, output = future.result()
except FileNotFoundError:
raise SystemExit('Could not find the git executable. Is it in your PATH?')
except FileNotFoundError as err:
return 'Could not find the {} executable. Is it in your PATH?'.format(err.filename)
except Exception as e:
print('Running git diff for {} vs. {} generated an exception'.format(key[0], key[1]))
raise
return 'Running git diff for {} vs. {} generated an exception: {}'.format(key[0], key[1], e)
if not ok:
print(output)
raise SystemExit('Running git diff for {} vs. {} failed'.format(key[0], key[1]))
ans[key] = parse_patch(output)
return output + '\nRunning git diff for {} vs. {} failed'.format(key[0], key[1])
try:
patch = parse_patch(output)
except Exception:
import traceback
return traceback.format_exc() + '\nParsing diff for {} vs. {} failed'.format(key[0], key[1])
else:
ans[key] = patch
return ans

View File

@ -13,20 +13,42 @@ from kitty.key_encoding import ESCAPE
from ..tui.handler import Handler
from ..tui.loop import Loop
from ..tui.operations import clear_screen, set_line_wrapping, set_window_title
from .collect import create_collection
from .collect import create_collection, data_for_path
from .git import Differ
from .render import render_diff
INITIALIZING, READY, RENDERED = range(3)
INITIALIZING, COLLECTED, DIFFED = range(3)
def generate_diff(collection, context):
d = Differ()
for path, item_type, changed_path in collection:
if item_type == 'diff':
is_binary = isinstance(data_for_path(path), bytes)
if not is_binary:
d.add_diff(path, changed_path)
return d(context)
class DiffHandler(Handler):
def __init__(self, left, right):
def __init__(self, args, left, right):
self.state = INITIALIZING
self.left, self.right = left, right
self.report_traceback_on_exit = None
self.args = args
self.scroll_pos = 0
def create_collection(self):
self.start_job('diff', create_collection, self.left, self.right)
self.start_job('collect', create_collection, self.left, self.right)
def generate_diff(self):
self.start_job('diff', generate_diff, self.collection, self.args.context)
def render_diff(self):
self.diff_lines = tuple(render_diff(self.collection, self.diff_map, self.args, self.screen_size.cols))
def init_terminal_state(self):
self.write(set_line_wrapping(False))
@ -39,14 +61,11 @@ class DiffHandler(Handler):
self.create_collection()
def draw_screen(self):
if self.state is INITIALIZING:
if self.state < DIFFED:
self.write(clear_screen())
self.write(_('Calculating diff, please wait...'))
return
if self.state is READY:
self.write(clear_screen())
self.state = RENDERED
return
self.write(clear_screen())
def on_key(self, key_event):
if self.state is INITIALIZING:
@ -54,15 +73,30 @@ class DiffHandler(Handler):
self.quit_loop(0)
return
def on_resize(self, screen_size):
self.screen_size = screen_size
if self.state > COLLECTED:
self.render_diff()
self.draw_screen()
def on_job_done(self, job_id, job_result):
if 'tb' in job_result:
self.report_traceback_on_exit = job_result['tb']
self.quit_loop(1)
return
if job_id == 'diff':
if job_id == 'collect':
self.collection = job_result['result']
self.state = READY
self.write(clear_screen())
self.generate_diff()
elif job_id == 'diff':
diff_map = job_result['result']
if isinstance(diff_map, str):
self.report_traceback_on_exit = diff_map
self.quit_loop(1)
return
self.state = DIFFED
self.diff_map = diff_map
self.render_diff()
self.draw_screen()
def on_interrupt(self):
self.quit_loop(1)
@ -72,6 +106,10 @@ class DiffHandler(Handler):
OPTIONS = partial('''\
--context
type=int
default=3
Number of lines of context to show between changes.
'''.format, )
@ -85,7 +123,7 @@ def main(args):
raise SystemExit('The items to be diffed should both be either directories or files. Comparing a directory to a file is not valid.')
loop = Loop()
handler = DiffHandler(left, right)
handler = DiffHandler(args, left, right)
loop.loop(handler)
if loop.return_code != 0:
if handler.report_traceback_on_exit:

103
kittens/diff/render.py Normal file
View File

@ -0,0 +1,103 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
import re
from gettext import gettext as _
from functools import partial
from kitty.fast_data_types import wcswidth
from .collect import data_for_path, path_name_map
from .config import formats
sanitize_pat = re.compile('[\x00-\x1f\x7f\x80-\x9f]')
def human_readable(size, sep=' '):
""" Convert a size in bytes into a human readable form """
divisor, suffix = 1, "B"
for i, candidate in enumerate(('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB')):
if size < (1 << ((i + 1) * 10)):
divisor, suffix = (1 << (i * 10)), candidate
break
size = str(float(size)/divisor)
if size.find(".") > -1:
size = size[:size.find(".")+2]
if size.endswith('.0'):
size = size[:-2]
return size + sep + suffix
def sanitize_sub(m):
return '<{:x}>'.format(ord(m.group()[0]))
def sanitize(text):
return sanitize_pat.sub(sanitize_sub, text)
def fit_in(text, count):
w = wcswidth(text)
if w <= count:
return text
text = text[:count-1]
while wcswidth(text) > count - 1:
text = text[:-1]
return text + ''
def formatted(fmt, text):
return '\x1b[' + fmt + 'm' + text + '\x1b[0m'
title_format = partial(formatted, formats['title'])
margin_format = partial(formatted, formats['margin'])
text_format = partial(formatted, formats['text'])
del formatted
def place_in(text, sz):
return fit_in(text, sz).ljust(sz)
def title_lines(left_path, right_path, args, columns, margin_size):
name = fit_in(sanitize(path_name_map[left_path]), columns - 2 * margin_size)
yield title_format((' ' + name).ljust(columns))
yield title_format('' * columns)
yield title_format(' ' * columns)
def binary_lines(path, other_path, columns, margin_size):
template = _('Binary file: {}')
def fl(path):
text = template.format(human_readable(len(data_for_path(path))))
text = place_in(text, columns // 2 - margin_size)
return margin_format(' ' * margin_size) + text_format(text)
return fl(path) + fl(other_path)
def lines_for_diff(left_path, right_path, hunks, args, columns, margin_size):
return iter(())
def render_diff(collection, diff_map, args, columns):
largest_line_number = 0
for path, item_type, other_path in collection:
if item_type == 'diff':
patch = diff_map.get(path)
if patch is not None:
largest_line_number = max(largest_line_number, patch.largest_line_number)
margin_size = max(3, len(str(largest_line_number)) + 1)
for path, item_type, other_path in collection:
if item_type == 'diff':
yield from title_lines(path, other_path, args, columns, margin_size)
is_binary = isinstance(data_for_path(path), bytes)
if is_binary:
yield from binary_lines(path, other_path, columns, margin_size)
else:
yield from lines_for_diff(path, other_path, diff_map[path], args, columns, margin_size)