diff --git a/.gitignore b/.gitignore index 20cb7aac5..34118efaf 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ README.html linux-package logo/*.iconset test-launcher +kitty-profile diff --git a/README.asciidoc b/README.asciidoc index 942c593de..5fe477570 100644 --- a/README.asciidoc +++ b/README.asciidoc @@ -292,6 +292,21 @@ come in handy for applications like powerline, without the need to use patched fonts. See the various font related configuration directives in the link:kitty/kitty.conf[config file]. +== Performance + +The main goals for kitty performance are user perceived latency and +"smoothness" while scrolling. kitty tries hard to optimize these. To that end +it keeps a cache of each rendered glyph in video RAM so that font rendering is +not a bottleneck. Interaction with child programs takes place in a separate +thread from rendering, to improve smoothness. + +You can generate detailed per-function performance data using +link:https://github.com/gperftools/gperftools[gperftools]. Build kitty with the +`--profile` flag which will create an executable called `kitty-profile`. Run +that and perform the task you want to analyse, for example, scrolling a large +file with `less`. After you quit, function call statistics will be printed to +`stdout` and you can use tools like *kcachegrind* for more detailed analysis. + == Note for Linux/macOS packagers While kitty does use python, it is not a traditional python package, so please do not install it in site-packages. diff --git a/kitty/data-types.c b/kitty/data-types.c index 82f18916c..8d35f8b4d 100644 --- a/kitty/data-types.c +++ b/kitty/data-types.c @@ -9,6 +9,9 @@ #include "glfw.h" #include "gl.h" #include "modes.h" +#ifdef WITH_PROFILER +#include +#endif static char drain_buf[1024] = {0}; @@ -41,6 +44,22 @@ redirect_std_streams(PyObject UNUSED *self, PyObject *args) { Py_RETURN_NONE; } +#ifdef WITH_PROFILER +static PyObject* +start_profiler(PyObject UNUSED *self, PyObject *args) { + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) return NULL; + ProfilerStart(path); + Py_RETURN_NONE; +} + +static PyObject* +stop_profiler(PyObject UNUSED *self) { + ProfilerStop(); + Py_RETURN_NONE; +} +#endif + #ifdef __APPLE__ #include "core_text.h" #endif @@ -61,6 +80,10 @@ static PyMethodDef module_methods[] = { {"get_fontconfig_font", (PyCFunction)get_fontconfig_font, METH_VARARGS, ""}, #endif GLFW_FUNC_WRAPPERS +#ifdef WITH_PROFILER + {"start_profiler", (PyCFunction)start_profiler, METH_VARARGS, ""}, + {"stop_profiler", (PyCFunction)stop_profiler, METH_NOARGS, ""}, +#endif {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/kitty/main.py b/kitty/main.py index 5c710ae62..b851c6150 100644 --- a/kitty/main.py +++ b/kitty/main.py @@ -6,7 +6,7 @@ import argparse import locale import os import sys -import tempfile +from contextlib import contextmanager from gettext import gettext as _ from queue import Empty @@ -87,12 +87,6 @@ def option_parser(): action='version', version='{} {} by Kovid Goyal'.format(appname, str_version) ) - a( - '--profile', - action='store_true', - default=False, - help=_('Show profiling data after exit') - ) a( '--dump-commands', action='store_true', @@ -259,6 +253,29 @@ def ensure_osx_locale(): os.environ['LANG'] = lang + '.UTF-8' +@contextmanager +def setup_profiling(args): + try: + from .fast_data_types import start_profiler, stop_profiler + except ImportError: + start_profiler = stop_profiler = None + if start_profiler is not None: + start_profiler('/tmp/kitty-profile.log') + yield + if stop_profiler is not None: + import subprocess + stop_profiler() + exe = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'kitty-profile') + cg = '/tmp/kitty-profile.callgrind' + print('Post processing profile data for', exe, '...') + subprocess.check_call(['pprof', '--callgrind', exe, '/tmp/kitty-profile.log'], stdout=open(cg, 'wb')) + try: + subprocess.Popen(['kcachegrind', cg]) + except FileNotFoundError: + subprocess.check_call(['pprof', '--text', exe, '/tmp/kitty-profile.log']) + print('To view the graphical call data, use: kcachegrind', cg) + + def main(): if isosx: ensure_osx_locale() @@ -299,23 +316,7 @@ def main(): if not glfw_init(): raise SystemExit('GLFW initialization failed') try: - if args.profile: - tf = tempfile.NamedTemporaryFile(prefix='kitty-profiling-stats-') - args.profile = tf.name - import cProfile - import pstats - pr = cProfile.Profile() - pr.enable() - run_app(opts, args) - pr.disable() - pr.create_stats() - s = pstats.Stats(pr) - s.add(args.profile) - tf.close() - s.strip_dirs() - s.sort_stats('time', 'name') - s.print_stats(30) - else: + with setup_profiling(args): run_app(opts, args) finally: glfw_terminate() diff --git a/linux-launcher.c b/linux-launcher.c index 9d3e3ef1f..065d9f7d6 100644 --- a/linux-launcher.c +++ b/linux-launcher.c @@ -84,9 +84,13 @@ int main(int argc, char *argv[]) { char lib[PATH_MAX+1] = {0}; char *final_argv[MAX_ARGC + 1] = {0}; wchar_t *argvw[MAX_ARGC + 1] = {0}; +#ifdef WITH_PROFILER + num = snprintf(lib, PATH_MAX, "%s%s", exe_dir, "/"); +#else num = snprintf(lib, PATH_MAX, "%s%s", exe_dir, "/../lib/kitty"); +#endif - if (num < 0 || num >= PATH_MAX) { fprintf(stderr, "Failed to create path to /../lib/kitty\n"); return 1; } + if (num < 0 || num >= PATH_MAX) { fprintf(stderr, "Failed to create path to kitty lib\n"); return 1; } final_argv[0] = exe; final_argv[1] = lib; for (i = 1, num_args=2; i < argc && i + 1 <= MAX_ARGC; i++) { diff --git a/setup.py b/setup.py index 1e82f70fe..a9b00c875 100755 --- a/setup.py +++ b/setup.py @@ -99,7 +99,7 @@ def get_sanitize_args(cc, ccver): return sanitize_args -def init_env(debug=False, sanitize=False, native_optimizations=True): +def init_env(debug=False, sanitize=False, native_optimizations=True, profile=False): global cflags, ldflags, cc, ldpaths native_optimizations = native_optimizations and not sanitize and not debug cc, ccver = cc_version() @@ -111,14 +111,16 @@ def init_env(debug=False, sanitize=False, native_optimizations=True): if ccver < (5, 2) and cc == 'gcc': missing_braces = '-Wno-missing-braces' optimize = '-ggdb' if debug or sanitize else '-O3' + if profile: + optimize = '-g' sanitize_args = get_sanitize_args(cc, ccver) if sanitize else set() cflags = os.environ.get( 'OVERRIDE_CFLAGS', ( '-Wextra -Wno-missing-field-initializers -Wall -std=c99 -D_XOPEN_SOURCE=700' ' -pedantic-errors -Werror {} {} -D{}DEBUG -fwrapv {} {} -pipe {} -fvisibility=hidden' ).format( - optimize, ' '.join(sanitize_args), ('' if debug else 'N'), stack_protector, missing_braces, '-march=native' - if native_optimizations else '' + optimize, ' '.join(sanitize_args), ('' if debug else 'N'), stack_protector, missing_braces, + '-march=native' if native_optimizations else '', ) ) cflags = shlex.split(cflags @@ -130,6 +132,9 @@ def init_env(debug=False, sanitize=False, native_optimizations=True): cflags += shlex.split(os.environ.get('CFLAGS', '')) ldflags += shlex.split(os.environ.get('LDFLAGS', '')) + if profile: + cflags.append('-DWITH_PROFILER') + ldflags.append('-lprofiler') cflags.append('-pthread') # We add 4000 to the primary version because vim turns on SGR mouse mode # automatically if this version is high enough @@ -260,6 +265,12 @@ def option_parser(): action='store_true', help='Only build changed files' ) + p.add_argument( + '--profile', + default=False, + action='store_true', + help='Use the -pg compile flag to add profiling information' + ) return p @@ -281,7 +292,7 @@ def find_c_files(): def build(args, native_optimizations=True): - init_env(args.debug, args.sanitize, native_optimizations) + init_env(args.debug, args.sanitize, native_optimizations, args.profile) compile_c_extension( 'kitty/fast_data_types', args.incremental, *find_c_files() ) @@ -303,6 +314,25 @@ def build_test_launcher(args): run_tool(cmd) +def build_linux_launcher(args, launcher_dir='.', for_bundle=False): + cflags = '-Wall -Werror -fpie'.split() + libs = [] + if args.profile: + cflags.append('-DWITH_PROFILER'), cflags.append('-g') + libs.append('-lprofiler') + else: + cflags.append('-O3') + if for_bundle: + cflags.append('-DFOR_BUNDLE') + cflags.append('-DPYVER="{}"'.format(sysconfig.get_python_version())) + pylib = get_python_flags(cflags) + exe = 'kitty-profile' if args.profile else 'kitty' + cmd = [cc] + cflags + [ + 'linux-launcher.c', '-o', os.path.join(launcher_dir, exe) + ] + libs + pylib + run_tool(cmd) + + def package(args, for_bundle=False): # {{{ ddir = args.prefix libdir = os.path.join(ddir, 'lib', 'kitty') @@ -331,15 +361,7 @@ def package(args, for_bundle=False): # {{{ os.chmod(path, 0o755 if f.endswith('.so') else 0o644) launcher_dir = os.path.join(ddir, 'bin') safe_makedirs(launcher_dir) - cflags = '-O3 -Wall -Werror -fpie'.split() - if for_bundle: - cflags.append('-DFOR_BUNDLE') - cflags.append('-DPYVER="{}"'.format(sysconfig.get_python_version())) - pylib = get_python_flags(cflags) - cmd = [cc] + cflags + [ - 'linux-launcher.c', '-o', os.path.join(launcher_dir, 'kitty') - ] + pylib - run_tool(cmd) + build_linux_launcher(args, launcher_dir, for_bundle) if not isosx: # {{{ linux desktop gunk icdir = os.path.join(ddir, 'share', 'icons', 'hicolor', '256x256') safe_makedirs(icdir) @@ -383,6 +405,9 @@ def main(): if args.action == 'build': build(args) build_test_launcher(args) + if args.profile: + build_linux_launcher(args) + print('kitty profile executable is', 'kitty-profile') elif args.action == 'test': os.execlp( sys.executable, sys.executable, os.path.join(base, 'test.py')