Add instrumentation to analyse kitty performance

This commit is contained in:
Kovid Goyal 2017-08-26 10:24:12 +05:30
parent e43bf891e4
commit a922b4c789
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
6 changed files with 107 additions and 38 deletions

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ README.html
linux-package linux-package
logo/*.iconset logo/*.iconset
test-launcher test-launcher
kitty-profile

View File

@ -292,6 +292,21 @@ come in handy for applications like powerline, without the need to use patched
fonts. See the various font related configuration directives in the fonts. See the various font related configuration directives in the
link:kitty/kitty.conf[config file]. link:kitty/kitty.conf[config file].
== Performance
The main goals for kitty performance are user perceived latency and
"smoothness" while scrolling. kitty tries hard to optimize these. To that end
it keeps a cache of each rendered glyph in video RAM so that font rendering is
not a bottleneck. Interaction with child programs takes place in a separate
thread from rendering, to improve smoothness.
You can generate detailed per-function performance data using
link:https://github.com/gperftools/gperftools[gperftools]. Build kitty with the
`--profile` flag which will create an executable called `kitty-profile`. Run
that and perform the task you want to analyse, for example, scrolling a large
file with `less`. After you quit, function call statistics will be printed to
`stdout` and you can use tools like *kcachegrind* for more detailed analysis.
== Note for Linux/macOS packagers == Note for Linux/macOS packagers
While kitty does use python, it is not a traditional python package, so please do not install it in site-packages. While kitty does use python, it is not a traditional python package, so please do not install it in site-packages.

View File

@ -9,6 +9,9 @@
#include "glfw.h" #include "glfw.h"
#include "gl.h" #include "gl.h"
#include "modes.h" #include "modes.h"
#ifdef WITH_PROFILER
#include <gperftools/profiler.h>
#endif
static char drain_buf[1024] = {0}; static char drain_buf[1024] = {0};
@ -41,6 +44,22 @@ redirect_std_streams(PyObject UNUSED *self, PyObject *args) {
Py_RETURN_NONE; Py_RETURN_NONE;
} }
#ifdef WITH_PROFILER
static PyObject*
start_profiler(PyObject UNUSED *self, PyObject *args) {
char *path;
if (!PyArg_ParseTuple(args, "s", &path)) return NULL;
ProfilerStart(path);
Py_RETURN_NONE;
}
static PyObject*
stop_profiler(PyObject UNUSED *self) {
ProfilerStop();
Py_RETURN_NONE;
}
#endif
#ifdef __APPLE__ #ifdef __APPLE__
#include "core_text.h" #include "core_text.h"
#endif #endif
@ -61,6 +80,10 @@ static PyMethodDef module_methods[] = {
{"get_fontconfig_font", (PyCFunction)get_fontconfig_font, METH_VARARGS, ""}, {"get_fontconfig_font", (PyCFunction)get_fontconfig_font, METH_VARARGS, ""},
#endif #endif
GLFW_FUNC_WRAPPERS GLFW_FUNC_WRAPPERS
#ifdef WITH_PROFILER
{"start_profiler", (PyCFunction)start_profiler, METH_VARARGS, ""},
{"stop_profiler", (PyCFunction)stop_profiler, METH_NOARGS, ""},
#endif
{NULL, NULL, 0, NULL} /* Sentinel */ {NULL, NULL, 0, NULL} /* Sentinel */
}; };

View File

@ -6,7 +6,7 @@ import argparse
import locale import locale
import os import os
import sys import sys
import tempfile from contextlib import contextmanager
from gettext import gettext as _ from gettext import gettext as _
from queue import Empty from queue import Empty
@ -87,12 +87,6 @@ def option_parser():
action='version', action='version',
version='{} {} by Kovid Goyal'.format(appname, str_version) version='{} {} by Kovid Goyal'.format(appname, str_version)
) )
a(
'--profile',
action='store_true',
default=False,
help=_('Show profiling data after exit')
)
a( a(
'--dump-commands', '--dump-commands',
action='store_true', action='store_true',
@ -259,6 +253,29 @@ def ensure_osx_locale():
os.environ['LANG'] = lang + '.UTF-8' os.environ['LANG'] = lang + '.UTF-8'
@contextmanager
def setup_profiling(args):
try:
from .fast_data_types import start_profiler, stop_profiler
except ImportError:
start_profiler = stop_profiler = None
if start_profiler is not None:
start_profiler('/tmp/kitty-profile.log')
yield
if stop_profiler is not None:
import subprocess
stop_profiler()
exe = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'kitty-profile')
cg = '/tmp/kitty-profile.callgrind'
print('Post processing profile data for', exe, '...')
subprocess.check_call(['pprof', '--callgrind', exe, '/tmp/kitty-profile.log'], stdout=open(cg, 'wb'))
try:
subprocess.Popen(['kcachegrind', cg])
except FileNotFoundError:
subprocess.check_call(['pprof', '--text', exe, '/tmp/kitty-profile.log'])
print('To view the graphical call data, use: kcachegrind', cg)
def main(): def main():
if isosx: if isosx:
ensure_osx_locale() ensure_osx_locale()
@ -299,23 +316,7 @@ def main():
if not glfw_init(): if not glfw_init():
raise SystemExit('GLFW initialization failed') raise SystemExit('GLFW initialization failed')
try: try:
if args.profile: with setup_profiling(args):
tf = tempfile.NamedTemporaryFile(prefix='kitty-profiling-stats-')
args.profile = tf.name
import cProfile
import pstats
pr = cProfile.Profile()
pr.enable()
run_app(opts, args)
pr.disable()
pr.create_stats()
s = pstats.Stats(pr)
s.add(args.profile)
tf.close()
s.strip_dirs()
s.sort_stats('time', 'name')
s.print_stats(30)
else:
run_app(opts, args) run_app(opts, args)
finally: finally:
glfw_terminate() glfw_terminate()

View File

@ -84,9 +84,13 @@ int main(int argc, char *argv[]) {
char lib[PATH_MAX+1] = {0}; char lib[PATH_MAX+1] = {0};
char *final_argv[MAX_ARGC + 1] = {0}; char *final_argv[MAX_ARGC + 1] = {0};
wchar_t *argvw[MAX_ARGC + 1] = {0}; wchar_t *argvw[MAX_ARGC + 1] = {0};
#ifdef WITH_PROFILER
num = snprintf(lib, PATH_MAX, "%s%s", exe_dir, "/");
#else
num = snprintf(lib, PATH_MAX, "%s%s", exe_dir, "/../lib/kitty"); num = snprintf(lib, PATH_MAX, "%s%s", exe_dir, "/../lib/kitty");
#endif
if (num < 0 || num >= PATH_MAX) { fprintf(stderr, "Failed to create path to /../lib/kitty\n"); return 1; } if (num < 0 || num >= PATH_MAX) { fprintf(stderr, "Failed to create path to kitty lib\n"); return 1; }
final_argv[0] = exe; final_argv[0] = exe;
final_argv[1] = lib; final_argv[1] = lib;
for (i = 1, num_args=2; i < argc && i + 1 <= MAX_ARGC; i++) { for (i = 1, num_args=2; i < argc && i + 1 <= MAX_ARGC; i++) {

View File

@ -99,7 +99,7 @@ def get_sanitize_args(cc, ccver):
return sanitize_args return sanitize_args
def init_env(debug=False, sanitize=False, native_optimizations=True): def init_env(debug=False, sanitize=False, native_optimizations=True, profile=False):
global cflags, ldflags, cc, ldpaths global cflags, ldflags, cc, ldpaths
native_optimizations = native_optimizations and not sanitize and not debug native_optimizations = native_optimizations and not sanitize and not debug
cc, ccver = cc_version() cc, ccver = cc_version()
@ -111,14 +111,16 @@ def init_env(debug=False, sanitize=False, native_optimizations=True):
if ccver < (5, 2) and cc == 'gcc': if ccver < (5, 2) and cc == 'gcc':
missing_braces = '-Wno-missing-braces' missing_braces = '-Wno-missing-braces'
optimize = '-ggdb' if debug or sanitize else '-O3' optimize = '-ggdb' if debug or sanitize else '-O3'
if profile:
optimize = '-g'
sanitize_args = get_sanitize_args(cc, ccver) if sanitize else set() sanitize_args = get_sanitize_args(cc, ccver) if sanitize else set()
cflags = os.environ.get( cflags = os.environ.get(
'OVERRIDE_CFLAGS', ( 'OVERRIDE_CFLAGS', (
'-Wextra -Wno-missing-field-initializers -Wall -std=c99 -D_XOPEN_SOURCE=700' '-Wextra -Wno-missing-field-initializers -Wall -std=c99 -D_XOPEN_SOURCE=700'
' -pedantic-errors -Werror {} {} -D{}DEBUG -fwrapv {} {} -pipe {} -fvisibility=hidden' ' -pedantic-errors -Werror {} {} -D{}DEBUG -fwrapv {} {} -pipe {} -fvisibility=hidden'
).format( ).format(
optimize, ' '.join(sanitize_args), ('' if debug else 'N'), stack_protector, missing_braces, '-march=native' optimize, ' '.join(sanitize_args), ('' if debug else 'N'), stack_protector, missing_braces,
if native_optimizations else '' '-march=native' if native_optimizations else '',
) )
) )
cflags = shlex.split(cflags cflags = shlex.split(cflags
@ -130,6 +132,9 @@ def init_env(debug=False, sanitize=False, native_optimizations=True):
cflags += shlex.split(os.environ.get('CFLAGS', '')) cflags += shlex.split(os.environ.get('CFLAGS', ''))
ldflags += shlex.split(os.environ.get('LDFLAGS', '')) ldflags += shlex.split(os.environ.get('LDFLAGS', ''))
if profile:
cflags.append('-DWITH_PROFILER')
ldflags.append('-lprofiler')
cflags.append('-pthread') cflags.append('-pthread')
# We add 4000 to the primary version because vim turns on SGR mouse mode # We add 4000 to the primary version because vim turns on SGR mouse mode
# automatically if this version is high enough # automatically if this version is high enough
@ -260,6 +265,12 @@ def option_parser():
action='store_true', action='store_true',
help='Only build changed files' help='Only build changed files'
) )
p.add_argument(
'--profile',
default=False,
action='store_true',
help='Use the -pg compile flag to add profiling information'
)
return p return p
@ -281,7 +292,7 @@ def find_c_files():
def build(args, native_optimizations=True): def build(args, native_optimizations=True):
init_env(args.debug, args.sanitize, native_optimizations) init_env(args.debug, args.sanitize, native_optimizations, args.profile)
compile_c_extension( compile_c_extension(
'kitty/fast_data_types', args.incremental, *find_c_files() 'kitty/fast_data_types', args.incremental, *find_c_files()
) )
@ -303,6 +314,25 @@ def build_test_launcher(args):
run_tool(cmd) run_tool(cmd)
def build_linux_launcher(args, launcher_dir='.', for_bundle=False):
cflags = '-Wall -Werror -fpie'.split()
libs = []
if args.profile:
cflags.append('-DWITH_PROFILER'), cflags.append('-g')
libs.append('-lprofiler')
else:
cflags.append('-O3')
if for_bundle:
cflags.append('-DFOR_BUNDLE')
cflags.append('-DPYVER="{}"'.format(sysconfig.get_python_version()))
pylib = get_python_flags(cflags)
exe = 'kitty-profile' if args.profile else 'kitty'
cmd = [cc] + cflags + [
'linux-launcher.c', '-o', os.path.join(launcher_dir, exe)
] + libs + pylib
run_tool(cmd)
def package(args, for_bundle=False): # {{{ def package(args, for_bundle=False): # {{{
ddir = args.prefix ddir = args.prefix
libdir = os.path.join(ddir, 'lib', 'kitty') libdir = os.path.join(ddir, 'lib', 'kitty')
@ -331,15 +361,7 @@ def package(args, for_bundle=False): # {{{
os.chmod(path, 0o755 if f.endswith('.so') else 0o644) os.chmod(path, 0o755 if f.endswith('.so') else 0o644)
launcher_dir = os.path.join(ddir, 'bin') launcher_dir = os.path.join(ddir, 'bin')
safe_makedirs(launcher_dir) safe_makedirs(launcher_dir)
cflags = '-O3 -Wall -Werror -fpie'.split() build_linux_launcher(args, launcher_dir, for_bundle)
if for_bundle:
cflags.append('-DFOR_BUNDLE')
cflags.append('-DPYVER="{}"'.format(sysconfig.get_python_version()))
pylib = get_python_flags(cflags)
cmd = [cc] + cflags + [
'linux-launcher.c', '-o', os.path.join(launcher_dir, 'kitty')
] + pylib
run_tool(cmd)
if not isosx: # {{{ linux desktop gunk if not isosx: # {{{ linux desktop gunk
icdir = os.path.join(ddir, 'share', 'icons', 'hicolor', '256x256') icdir = os.path.join(ddir, 'share', 'icons', 'hicolor', '256x256')
safe_makedirs(icdir) safe_makedirs(icdir)
@ -383,6 +405,9 @@ def main():
if args.action == 'build': if args.action == 'build':
build(args) build(args)
build_test_launcher(args) build_test_launcher(args)
if args.profile:
build_linux_launcher(args)
print('kitty profile executable is', 'kitty-profile')
elif args.action == 'test': elif args.action == 'test':
os.execlp( os.execlp(
sys.executable, sys.executable, os.path.join(base, 'test.py') sys.executable, sys.executable, os.path.join(base, 'test.py')