From 728f33700ad361093cbefeceb246d274bce1cb1d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 16 Sep 2017 08:10:19 +0530 Subject: [PATCH] Fine tune the artificial delays in the render loop There are now two numbers, repaint_delay and input_delay that control how often the screen is repainted and how frequently input received from the child process is processed. This halves the CPU usage in intensive cases such as scrolling a file in less. The CPU usage of kitty + X when scrolling is now significantly lower than all the other terminals on my system. MROAWR! ... --- README.asciidoc | 15 +++++++---- kitty/boss.py | 2 +- kitty/child-monitor.c | 62 +++++++++++++++++-------------------------- kitty/config.py | 1 + kitty/data-types.h | 2 +- kitty/kitty.conf | 13 ++++++--- kitty/state.c | 7 +++++ kitty/state.h | 1 + 8 files changed, 56 insertions(+), 47 deletions(-) diff --git a/README.asciidoc b/README.asciidoc index 3ffd9cb23..1d6d96d3c 100644 --- a/README.asciidoc +++ b/README.asciidoc @@ -124,7 +124,7 @@ or a similar package manager) kitty is designed for power keyboard users. To that end all its controls work with the keyboard (although it fully supports mouse interactions as well). Its configuration is a simple, human editable, single file for -easy reproducability (I like to store config files in source control). +easy reproducibility (I like to store config files in source control). The code in kitty is designed to be simple, modular and hackable. It is written in a mix of C (for performance sensitive parts) and Python (for @@ -296,10 +296,15 @@ link:kitty/kitty.conf[config file]. == Performance The main goals for kitty performance are user perceived latency while typing -and "smoothness" while scrolling. kitty tries hard to optimize these. To that -end it keeps a cache of each rendered glyph in video RAM so that font rendering -is not a bottleneck. Interaction with child programs takes place in a separate -thread from rendering, to improve smoothness. +and "smoothness" while scrolling as well as CPU usage. kitty tries hard to find +an optimum balance for these. To that end it keeps a cache of each rendered +glyph in video RAM so that font rendering is not a bottleneck. Interaction +with child programs takes place in a separate thread from rendering, to improve +smoothness. + +There are two parameters you can tune to adjust the performance. ``repaint_delay`` +and ``input_delay``. These control the artificial delays introduced into the +render loop to reduce CPU usage. See the link:kitty/kitty.conf[config file] for details. You can generate detailed per-function performance data using link:https://github.com/gperftools/gperftools[gperftools]. Build kitty with the diff --git a/kitty/boss.py b/kitty/boss.py index fdd9848e0..6b497dada 100644 --- a/kitty/boss.py +++ b/kitty/boss.py @@ -60,7 +60,7 @@ class Boss: self.glfw_window_title = None self.shutting_down = False self.child_monitor = ChildMonitor( - opts.repaint_delay / 1000.0, glfw_window.window_id(), + glfw_window.window_id(), self.on_child_death, DumpCommands(args) if args.dump_commands or args.dump_bytes else None) set_boss(self) diff --git a/kitty/child-monitor.c b/kitty/child-monitor.c index eb52717dc..d6ab5d22d 100644 --- a/kitty/child-monitor.c +++ b/kitty/child-monitor.c @@ -30,6 +30,7 @@ extern int pthread_setname_np(const char *name); #include #define EXTRA_FDS 2 +#define wakeup_main_loop glfwPostEmptyEvent static void (*parse_func)(Screen*, PyObject*); @@ -123,10 +124,9 @@ new(PyTypeObject *type, PyObject *args, PyObject UNUSED *kwds) { ChildMonitor *self; PyObject *dump_callback, *death_notify, *wid; int ret; - double repaint_delay; if (the_monitor) { PyErr_SetString(PyExc_RuntimeError, "Can have only a single ChildMonitor instance"); return NULL; } - if (!PyArg_ParseTuple(args, "dOOO", &repaint_delay, &wid, &death_notify, &dump_callback)) return NULL; + if (!PyArg_ParseTuple(args, "OOO", &wid, &death_notify, &dump_callback)) return NULL; glfw_window_id = PyLong_AsVoidPtr(wid); if ((ret = pthread_mutex_init(&children_lock, NULL)) != 0) { PyErr_Format(PyExc_RuntimeError, "Failed to create children_lock mutex: %s", strerror(ret)); @@ -148,7 +148,6 @@ new(PyTypeObject *type, PyObject *args, PyObject UNUSED *kwds) { self->count = 0; fds[0].fd = wakeup_fds[0]; fds[1].fd = signal_fds[0]; fds[0].events = POLLIN; fds[1].events = POLLIN; - self->repaint_delay = repaint_delay; the_monitor = self; return (PyObject*) self; @@ -175,7 +174,7 @@ dealloc(ChildMonitor* self) { } static void -wakeup_() { +wakeup_io_loop() { while(true) { ssize_t ret = write(wakeup_fds[1], "w", 1); if (ret < 0) { @@ -208,7 +207,7 @@ join(ChildMonitor *self) { static PyObject * wakeup(ChildMonitor UNUSED *self) { #define wakeup_doc "wakeup() -> wakeup the ChildMonitor I/O thread, forcing it to exit from poll() if it is waiting there." - wakeup_(); + wakeup_io_loop(); Py_RETURN_NONE; } @@ -258,7 +257,7 @@ schedule_write_to_child(unsigned long id, const char *data, size_t sz) { screen->write_buf = PyMem_RawRealloc(screen->write_buf, screen->write_buf_sz); if (screen->write_buf == NULL) { fatal("Out of memory."); } } - if (screen->write_buf_used) wakeup_(); + if (screen->write_buf_used) wakeup_io_loop(); screen_mutex(unlock, write); break; } @@ -286,31 +285,26 @@ shutdown(ChildMonitor *self) { Py_RETURN_NONE; } -static inline bool -do_parse(ChildMonitor *self, Screen *screen) { - bool updated = false; +static inline void +do_parse(ChildMonitor *self, Screen *screen, double now) { screen_mutex(lock, read); if (screen->read_buf_sz) { - parse_func(screen, self->dump_callback); - if (screen->read_buf_sz >= READ_BUF_SZ) wakeup_(); // Ensure the read fd has POLLIN set - screen->read_buf_sz = 0; - updated = true; + double time_since_new_input = now - screen->new_input_at; + if (time_since_new_input >= OPT(input_delay)) { + parse_func(screen, self->dump_callback); + if (screen->read_buf_sz >= READ_BUF_SZ) wakeup_io_loop(); // Ensure the read fd has POLLIN set + screen->read_buf_sz = 0; + screen->new_input_at = 0; + } else set_maximum_wait(OPT(input_delay) - time_since_new_input); } screen_mutex(unlock, read); - if (LIKELY(updated)) { - glfwPostEmptyEvent(); - } - return updated; } -static double last_parse_at = -1000; static void parse_input(ChildMonitor *self) { // Parse all available input that was read in the I/O thread. size_t count = 0, remove_count = 0; double now = monotonic(); - double time_since_last_parse = now - last_parse_at; - bool parse_needed = time_since_last_parse >= self->repaint_delay ? true : false; children_mutex(lock); while (remove_queue_count) { remove_queue_count--; @@ -321,15 +315,11 @@ parse_input(ChildMonitor *self) { if (UNLIKELY(signal_received)) { glfwSetWindowShouldClose(glfw_window_id, true); - glfwPostEmptyEvent(); } else { - if (parse_needed) { - count = self->count; - for (size_t i = 0; i < count; i++) { - scratch[i] = children[i]; - INCREF_CHILD(scratch[i]); - } - last_parse_at = now; + count = self->count; + for (size_t i = 0; i < count; i++) { + scratch[i] = children[i]; + INCREF_CHILD(scratch[i]); } } children_mutex(unlock); @@ -345,13 +335,10 @@ parse_input(ChildMonitor *self) { for (size_t i = 0; i < count; i++) { if (!scratch[i].needs_removal) { - do_parse(self, scratch[i].screen); + do_parse(self, scratch[i].screen, now); } DECREF_CHILD(scratch[i]); } - if (!parse_needed) { - set_maximum_wait(self->repaint_delay - time_since_last_parse); - } } static PyObject * @@ -494,9 +481,9 @@ render_cursor(Window *w, double now) { } static inline bool -render(ChildMonitor *self, double now) { +render(double now) { double time_since_last_render = now - last_render_at; - if (time_since_last_render > self->repaint_delay) { + if (time_since_last_render > OPT(repaint_delay)) { draw_borders(); #define TD global_state.tab_bar_render_data if (TD.screen && global_state.num_tabs > 1) draw_cells(TD.vao_idx, TD.xstart, TD.ystart, TD.dx, TD.dy, TD.screen); @@ -536,7 +523,7 @@ render(ChildMonitor *self, double now) { glfwSwapBuffers(glfw_window_id); last_render_at = now; } else { - set_maximum_wait(self->repaint_delay - time_since_last_render); + set_maximum_wait(OPT(repaint_delay) - time_since_last_render); } return true; } @@ -595,7 +582,7 @@ main_loop(ChildMonitor *self) { while (!glfwWindowShouldClose(glfw_window_id)) { double now = monotonic(); maximum_wait = -1; - if (!render(self, now)) break; + if (!render(now)) break; if (global_state.mouse_visible && OPT(mouse_hide_wait) > 0 && now - global_state.last_mouse_activity_at > OPT(mouse_hide_wait)) { glfwSetInputMode(glfw_window_id, GLFW_CURSOR, GLFW_CURSOR_HIDDEN); global_state.mouse_visible = false; @@ -714,6 +701,7 @@ read_bytes(int fd, Screen *screen) { break; } if (UNLIKELY(len == 0)) return false; + if (screen->new_input_at == 0) screen->new_input_at = monotonic(); screen_mutex(lock, read); if (orig_sz != screen->read_buf_sz) { // The other thread consumed some of the screen read buffer @@ -828,7 +816,7 @@ io_loop(void *data) { perror("Call to poll() failed"); } } - if (data_received) glfwPostEmptyEvent(); + if (data_received) wakeup_main_loop(); } children_mutex(lock); for (i = 0; i < self->count; i++) children[i].needs_removal = true; diff --git a/kitty/config.py b/kitty/config.py index a6d06f526..d34c7a266 100644 --- a/kitty/config.py +++ b/kitty/config.py @@ -215,6 +215,7 @@ type_map = { 'cursor_opacity': to_opacity, 'open_url_modifiers': to_open_url_modifiers, 'repaint_delay': positive_int, + 'input_delay': positive_int, 'window_border_width': positive_float, 'window_margin_width': positive_float, 'window_padding_width': positive_float, diff --git a/kitty/data-types.h b/kitty/data-types.h index 10d55f897..95900d98f 100644 --- a/kitty/data-types.h +++ b/kitty/data-types.h @@ -251,6 +251,7 @@ typedef struct { unsigned int parser_state, parser_text_start, parser_buf_pos; bool parser_has_pending_text; uint8_t read_buf[READ_BUF_SZ], *write_buf; + double new_input_at; size_t read_buf_sz, write_buf_sz, write_buf_used; pthread_mutex_t read_buf_lock, write_buf_lock; @@ -267,7 +268,6 @@ typedef struct { PyObject_HEAD PyObject *dump_callback, *update_screen, *death_notify; - double repaint_delay; unsigned int count; bool shutting_down; pthread_t io_thread; diff --git a/kitty/kitty.conf b/kitty/kitty.conf index a19918958..8f44334c5 100644 --- a/kitty/kitty.conf +++ b/kitty/kitty.conf @@ -105,11 +105,18 @@ remember_window_size yes initial_window_width 640 initial_window_height 400 -# Delay (in milliseconds) between screen updates. Decreasing it, increases fps -# at the cost of more CPU usage. The default value yields ~100fps which is more -# than sufficient for most uses. +# Delay (in milliseconds) between screen updates. Decreasing it, increases +# frames-per-second (FPS) at the cost of more CPU usage. The default value +# yields ~100 FPS which is more than sufficient for most uses. repaint_delay 10 +# Delay (in milliseconds) before input from the program running in the terminal +# is processed. Note that decreasing it will increase responsiveness, but also +# increase CPU usage and might cause flicker in full screen programs that +# redraw the entire screen on each loop, because kitty is so fast that partial +# screen updates will be drawn. +input_delay 3 + # Visual bell duration. Flash the screen when a bell occurs for the specified number of # seconds. Set to zero to disable. visual_bell_duration 0.0 diff --git a/kitty/state.c b/kitty/state.c index de75d9520..ab4915070 100644 --- a/kitty/state.c +++ b/kitty/state.c @@ -126,6 +126,11 @@ color_as_int(PyObject *color) { #undef I } +static inline double +repaint_delay(PyObject *val) { + return (double)(PyLong_AsUnsignedLong(val)) / 1000.0; +} + #define dict_iter(d) { \ PyObject *key, *value; Py_ssize_t pos = 0; \ while (PyDict_Next(d, &pos, &key, &value)) @@ -155,6 +160,8 @@ PYWRAP1(set_options) { S(open_url_modifiers, PyLong_AsUnsignedLong); S(click_interval, PyFloat_AsDouble); S(url_color, color_as_int); + S(repaint_delay, repaint_delay); + S(input_delay, repaint_delay); PyObject *chars = PyObject_GetAttrString(args, "select_by_word_characters"); if (chars == NULL) return NULL; diff --git a/kitty/state.h b/kitty/state.h index 450c67f1b..6c5d9961e 100644 --- a/kitty/state.h +++ b/kitty/state.h @@ -16,6 +16,7 @@ typedef struct { unsigned int open_url_modifiers; char_type select_by_word_characters[256]; size_t select_by_word_characters_count; color_type url_color; + double repaint_delay, input_delay; } Options; typedef struct {