Fine tune the artificial delays in the render loop
There are now two numbers, repaint_delay and input_delay that control how often the screen is repainted and how frequently input received from the child process is processed. This halves the CPU usage in intensive cases such as scrolling a file in less. The CPU usage of kitty + X when scrolling is now significantly lower than all the other terminals on my system. MROAWR! ...
This commit is contained in:
parent
43ebddc28f
commit
728f33700a
@ -124,7 +124,7 @@ or a similar package manager)
|
|||||||
kitty is designed for power keyboard users. To that end all its controls
|
kitty is designed for power keyboard users. To that end all its controls
|
||||||
work with the keyboard (although it fully supports mouse interactions as
|
work with the keyboard (although it fully supports mouse interactions as
|
||||||
well). Its configuration is a simple, human editable, single file for
|
well). Its configuration is a simple, human editable, single file for
|
||||||
easy reproducability (I like to store config files in source control).
|
easy reproducibility (I like to store config files in source control).
|
||||||
|
|
||||||
The code in kitty is designed to be simple, modular and hackable. It is
|
The code in kitty is designed to be simple, modular and hackable. It is
|
||||||
written in a mix of C (for performance sensitive parts) and Python (for
|
written in a mix of C (for performance sensitive parts) and Python (for
|
||||||
@ -296,10 +296,15 @@ link:kitty/kitty.conf[config file].
|
|||||||
== Performance
|
== Performance
|
||||||
|
|
||||||
The main goals for kitty performance are user perceived latency while typing
|
The main goals for kitty performance are user perceived latency while typing
|
||||||
and "smoothness" while scrolling. kitty tries hard to optimize these. To that
|
and "smoothness" while scrolling as well as CPU usage. kitty tries hard to find
|
||||||
end it keeps a cache of each rendered glyph in video RAM so that font rendering
|
an optimum balance for these. To that end it keeps a cache of each rendered
|
||||||
is not a bottleneck. Interaction with child programs takes place in a separate
|
glyph in video RAM so that font rendering is not a bottleneck. Interaction
|
||||||
thread from rendering, to improve smoothness.
|
with child programs takes place in a separate thread from rendering, to improve
|
||||||
|
smoothness.
|
||||||
|
|
||||||
|
There are two parameters you can tune to adjust the performance. ``repaint_delay``
|
||||||
|
and ``input_delay``. These control the artificial delays introduced into the
|
||||||
|
render loop to reduce CPU usage. See the link:kitty/kitty.conf[config file] for details.
|
||||||
|
|
||||||
You can generate detailed per-function performance data using
|
You can generate detailed per-function performance data using
|
||||||
link:https://github.com/gperftools/gperftools[gperftools]. Build kitty with the
|
link:https://github.com/gperftools/gperftools[gperftools]. Build kitty with the
|
||||||
|
|||||||
@ -60,7 +60,7 @@ class Boss:
|
|||||||
self.glfw_window_title = None
|
self.glfw_window_title = None
|
||||||
self.shutting_down = False
|
self.shutting_down = False
|
||||||
self.child_monitor = ChildMonitor(
|
self.child_monitor = ChildMonitor(
|
||||||
opts.repaint_delay / 1000.0, glfw_window.window_id(),
|
glfw_window.window_id(),
|
||||||
self.on_child_death,
|
self.on_child_death,
|
||||||
DumpCommands(args) if args.dump_commands or args.dump_bytes else None)
|
DumpCommands(args) if args.dump_commands or args.dump_bytes else None)
|
||||||
set_boss(self)
|
set_boss(self)
|
||||||
|
|||||||
@ -30,6 +30,7 @@ extern int pthread_setname_np(const char *name);
|
|||||||
#include <GLFW/glfw3.h>
|
#include <GLFW/glfw3.h>
|
||||||
|
|
||||||
#define EXTRA_FDS 2
|
#define EXTRA_FDS 2
|
||||||
|
#define wakeup_main_loop glfwPostEmptyEvent
|
||||||
|
|
||||||
static void (*parse_func)(Screen*, PyObject*);
|
static void (*parse_func)(Screen*, PyObject*);
|
||||||
|
|
||||||
@ -123,10 +124,9 @@ new(PyTypeObject *type, PyObject *args, PyObject UNUSED *kwds) {
|
|||||||
ChildMonitor *self;
|
ChildMonitor *self;
|
||||||
PyObject *dump_callback, *death_notify, *wid;
|
PyObject *dump_callback, *death_notify, *wid;
|
||||||
int ret;
|
int ret;
|
||||||
double repaint_delay;
|
|
||||||
|
|
||||||
if (the_monitor) { PyErr_SetString(PyExc_RuntimeError, "Can have only a single ChildMonitor instance"); return NULL; }
|
if (the_monitor) { PyErr_SetString(PyExc_RuntimeError, "Can have only a single ChildMonitor instance"); return NULL; }
|
||||||
if (!PyArg_ParseTuple(args, "dOOO", &repaint_delay, &wid, &death_notify, &dump_callback)) return NULL;
|
if (!PyArg_ParseTuple(args, "OOO", &wid, &death_notify, &dump_callback)) return NULL;
|
||||||
glfw_window_id = PyLong_AsVoidPtr(wid);
|
glfw_window_id = PyLong_AsVoidPtr(wid);
|
||||||
if ((ret = pthread_mutex_init(&children_lock, NULL)) != 0) {
|
if ((ret = pthread_mutex_init(&children_lock, NULL)) != 0) {
|
||||||
PyErr_Format(PyExc_RuntimeError, "Failed to create children_lock mutex: %s", strerror(ret));
|
PyErr_Format(PyExc_RuntimeError, "Failed to create children_lock mutex: %s", strerror(ret));
|
||||||
@ -148,7 +148,6 @@ new(PyTypeObject *type, PyObject *args, PyObject UNUSED *kwds) {
|
|||||||
self->count = 0;
|
self->count = 0;
|
||||||
fds[0].fd = wakeup_fds[0]; fds[1].fd = signal_fds[0];
|
fds[0].fd = wakeup_fds[0]; fds[1].fd = signal_fds[0];
|
||||||
fds[0].events = POLLIN; fds[1].events = POLLIN;
|
fds[0].events = POLLIN; fds[1].events = POLLIN;
|
||||||
self->repaint_delay = repaint_delay;
|
|
||||||
the_monitor = self;
|
the_monitor = self;
|
||||||
|
|
||||||
return (PyObject*) self;
|
return (PyObject*) self;
|
||||||
@ -175,7 +174,7 @@ dealloc(ChildMonitor* self) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
wakeup_() {
|
wakeup_io_loop() {
|
||||||
while(true) {
|
while(true) {
|
||||||
ssize_t ret = write(wakeup_fds[1], "w", 1);
|
ssize_t ret = write(wakeup_fds[1], "w", 1);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
@ -208,7 +207,7 @@ join(ChildMonitor *self) {
|
|||||||
static PyObject *
|
static PyObject *
|
||||||
wakeup(ChildMonitor UNUSED *self) {
|
wakeup(ChildMonitor UNUSED *self) {
|
||||||
#define wakeup_doc "wakeup() -> wakeup the ChildMonitor I/O thread, forcing it to exit from poll() if it is waiting there."
|
#define wakeup_doc "wakeup() -> wakeup the ChildMonitor I/O thread, forcing it to exit from poll() if it is waiting there."
|
||||||
wakeup_();
|
wakeup_io_loop();
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -258,7 +257,7 @@ schedule_write_to_child(unsigned long id, const char *data, size_t sz) {
|
|||||||
screen->write_buf = PyMem_RawRealloc(screen->write_buf, screen->write_buf_sz);
|
screen->write_buf = PyMem_RawRealloc(screen->write_buf, screen->write_buf_sz);
|
||||||
if (screen->write_buf == NULL) { fatal("Out of memory."); }
|
if (screen->write_buf == NULL) { fatal("Out of memory."); }
|
||||||
}
|
}
|
||||||
if (screen->write_buf_used) wakeup_();
|
if (screen->write_buf_used) wakeup_io_loop();
|
||||||
screen_mutex(unlock, write);
|
screen_mutex(unlock, write);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -286,31 +285,26 @@ shutdown(ChildMonitor *self) {
|
|||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool
|
static inline void
|
||||||
do_parse(ChildMonitor *self, Screen *screen) {
|
do_parse(ChildMonitor *self, Screen *screen, double now) {
|
||||||
bool updated = false;
|
|
||||||
screen_mutex(lock, read);
|
screen_mutex(lock, read);
|
||||||
if (screen->read_buf_sz) {
|
if (screen->read_buf_sz) {
|
||||||
parse_func(screen, self->dump_callback);
|
double time_since_new_input = now - screen->new_input_at;
|
||||||
if (screen->read_buf_sz >= READ_BUF_SZ) wakeup_(); // Ensure the read fd has POLLIN set
|
if (time_since_new_input >= OPT(input_delay)) {
|
||||||
screen->read_buf_sz = 0;
|
parse_func(screen, self->dump_callback);
|
||||||
updated = true;
|
if (screen->read_buf_sz >= READ_BUF_SZ) wakeup_io_loop(); // Ensure the read fd has POLLIN set
|
||||||
|
screen->read_buf_sz = 0;
|
||||||
|
screen->new_input_at = 0;
|
||||||
|
} else set_maximum_wait(OPT(input_delay) - time_since_new_input);
|
||||||
}
|
}
|
||||||
screen_mutex(unlock, read);
|
screen_mutex(unlock, read);
|
||||||
if (LIKELY(updated)) {
|
|
||||||
glfwPostEmptyEvent();
|
|
||||||
}
|
|
||||||
return updated;
|
|
||||||
}
|
}
|
||||||
static double last_parse_at = -1000;
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
parse_input(ChildMonitor *self) {
|
parse_input(ChildMonitor *self) {
|
||||||
// Parse all available input that was read in the I/O thread.
|
// Parse all available input that was read in the I/O thread.
|
||||||
size_t count = 0, remove_count = 0;
|
size_t count = 0, remove_count = 0;
|
||||||
double now = monotonic();
|
double now = monotonic();
|
||||||
double time_since_last_parse = now - last_parse_at;
|
|
||||||
bool parse_needed = time_since_last_parse >= self->repaint_delay ? true : false;
|
|
||||||
children_mutex(lock);
|
children_mutex(lock);
|
||||||
while (remove_queue_count) {
|
while (remove_queue_count) {
|
||||||
remove_queue_count--;
|
remove_queue_count--;
|
||||||
@ -321,15 +315,11 @@ parse_input(ChildMonitor *self) {
|
|||||||
|
|
||||||
if (UNLIKELY(signal_received)) {
|
if (UNLIKELY(signal_received)) {
|
||||||
glfwSetWindowShouldClose(glfw_window_id, true);
|
glfwSetWindowShouldClose(glfw_window_id, true);
|
||||||
glfwPostEmptyEvent();
|
|
||||||
} else {
|
} else {
|
||||||
if (parse_needed) {
|
count = self->count;
|
||||||
count = self->count;
|
for (size_t i = 0; i < count; i++) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
scratch[i] = children[i];
|
||||||
scratch[i] = children[i];
|
INCREF_CHILD(scratch[i]);
|
||||||
INCREF_CHILD(scratch[i]);
|
|
||||||
}
|
|
||||||
last_parse_at = now;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
children_mutex(unlock);
|
children_mutex(unlock);
|
||||||
@ -345,13 +335,10 @@ parse_input(ChildMonitor *self) {
|
|||||||
|
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
if (!scratch[i].needs_removal) {
|
if (!scratch[i].needs_removal) {
|
||||||
do_parse(self, scratch[i].screen);
|
do_parse(self, scratch[i].screen, now);
|
||||||
}
|
}
|
||||||
DECREF_CHILD(scratch[i]);
|
DECREF_CHILD(scratch[i]);
|
||||||
}
|
}
|
||||||
if (!parse_needed) {
|
|
||||||
set_maximum_wait(self->repaint_delay - time_since_last_parse);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
@ -494,9 +481,9 @@ render_cursor(Window *w, double now) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
render(ChildMonitor *self, double now) {
|
render(double now) {
|
||||||
double time_since_last_render = now - last_render_at;
|
double time_since_last_render = now - last_render_at;
|
||||||
if (time_since_last_render > self->repaint_delay) {
|
if (time_since_last_render > OPT(repaint_delay)) {
|
||||||
draw_borders();
|
draw_borders();
|
||||||
#define TD global_state.tab_bar_render_data
|
#define TD global_state.tab_bar_render_data
|
||||||
if (TD.screen && global_state.num_tabs > 1) draw_cells(TD.vao_idx, TD.xstart, TD.ystart, TD.dx, TD.dy, TD.screen);
|
if (TD.screen && global_state.num_tabs > 1) draw_cells(TD.vao_idx, TD.xstart, TD.ystart, TD.dx, TD.dy, TD.screen);
|
||||||
@ -536,7 +523,7 @@ render(ChildMonitor *self, double now) {
|
|||||||
glfwSwapBuffers(glfw_window_id);
|
glfwSwapBuffers(glfw_window_id);
|
||||||
last_render_at = now;
|
last_render_at = now;
|
||||||
} else {
|
} else {
|
||||||
set_maximum_wait(self->repaint_delay - time_since_last_render);
|
set_maximum_wait(OPT(repaint_delay) - time_since_last_render);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -595,7 +582,7 @@ main_loop(ChildMonitor *self) {
|
|||||||
while (!glfwWindowShouldClose(glfw_window_id)) {
|
while (!glfwWindowShouldClose(glfw_window_id)) {
|
||||||
double now = monotonic();
|
double now = monotonic();
|
||||||
maximum_wait = -1;
|
maximum_wait = -1;
|
||||||
if (!render(self, now)) break;
|
if (!render(now)) break;
|
||||||
if (global_state.mouse_visible && OPT(mouse_hide_wait) > 0 && now - global_state.last_mouse_activity_at > OPT(mouse_hide_wait)) {
|
if (global_state.mouse_visible && OPT(mouse_hide_wait) > 0 && now - global_state.last_mouse_activity_at > OPT(mouse_hide_wait)) {
|
||||||
glfwSetInputMode(glfw_window_id, GLFW_CURSOR, GLFW_CURSOR_HIDDEN);
|
glfwSetInputMode(glfw_window_id, GLFW_CURSOR, GLFW_CURSOR_HIDDEN);
|
||||||
global_state.mouse_visible = false;
|
global_state.mouse_visible = false;
|
||||||
@ -714,6 +701,7 @@ read_bytes(int fd, Screen *screen) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (UNLIKELY(len == 0)) return false;
|
if (UNLIKELY(len == 0)) return false;
|
||||||
|
if (screen->new_input_at == 0) screen->new_input_at = monotonic();
|
||||||
screen_mutex(lock, read);
|
screen_mutex(lock, read);
|
||||||
if (orig_sz != screen->read_buf_sz) {
|
if (orig_sz != screen->read_buf_sz) {
|
||||||
// The other thread consumed some of the screen read buffer
|
// The other thread consumed some of the screen read buffer
|
||||||
@ -828,7 +816,7 @@ io_loop(void *data) {
|
|||||||
perror("Call to poll() failed");
|
perror("Call to poll() failed");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (data_received) glfwPostEmptyEvent();
|
if (data_received) wakeup_main_loop();
|
||||||
}
|
}
|
||||||
children_mutex(lock);
|
children_mutex(lock);
|
||||||
for (i = 0; i < self->count; i++) children[i].needs_removal = true;
|
for (i = 0; i < self->count; i++) children[i].needs_removal = true;
|
||||||
|
|||||||
@ -215,6 +215,7 @@ type_map = {
|
|||||||
'cursor_opacity': to_opacity,
|
'cursor_opacity': to_opacity,
|
||||||
'open_url_modifiers': to_open_url_modifiers,
|
'open_url_modifiers': to_open_url_modifiers,
|
||||||
'repaint_delay': positive_int,
|
'repaint_delay': positive_int,
|
||||||
|
'input_delay': positive_int,
|
||||||
'window_border_width': positive_float,
|
'window_border_width': positive_float,
|
||||||
'window_margin_width': positive_float,
|
'window_margin_width': positive_float,
|
||||||
'window_padding_width': positive_float,
|
'window_padding_width': positive_float,
|
||||||
|
|||||||
@ -251,6 +251,7 @@ typedef struct {
|
|||||||
unsigned int parser_state, parser_text_start, parser_buf_pos;
|
unsigned int parser_state, parser_text_start, parser_buf_pos;
|
||||||
bool parser_has_pending_text;
|
bool parser_has_pending_text;
|
||||||
uint8_t read_buf[READ_BUF_SZ], *write_buf;
|
uint8_t read_buf[READ_BUF_SZ], *write_buf;
|
||||||
|
double new_input_at;
|
||||||
size_t read_buf_sz, write_buf_sz, write_buf_used;
|
size_t read_buf_sz, write_buf_sz, write_buf_used;
|
||||||
pthread_mutex_t read_buf_lock, write_buf_lock;
|
pthread_mutex_t read_buf_lock, write_buf_lock;
|
||||||
|
|
||||||
@ -267,7 +268,6 @@ typedef struct {
|
|||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
|
|
||||||
PyObject *dump_callback, *update_screen, *death_notify;
|
PyObject *dump_callback, *update_screen, *death_notify;
|
||||||
double repaint_delay;
|
|
||||||
unsigned int count;
|
unsigned int count;
|
||||||
bool shutting_down;
|
bool shutting_down;
|
||||||
pthread_t io_thread;
|
pthread_t io_thread;
|
||||||
|
|||||||
@ -105,11 +105,18 @@ remember_window_size yes
|
|||||||
initial_window_width 640
|
initial_window_width 640
|
||||||
initial_window_height 400
|
initial_window_height 400
|
||||||
|
|
||||||
# Delay (in milliseconds) between screen updates. Decreasing it, increases fps
|
# Delay (in milliseconds) between screen updates. Decreasing it, increases
|
||||||
# at the cost of more CPU usage. The default value yields ~100fps which is more
|
# frames-per-second (FPS) at the cost of more CPU usage. The default value
|
||||||
# than sufficient for most uses.
|
# yields ~100 FPS which is more than sufficient for most uses.
|
||||||
repaint_delay 10
|
repaint_delay 10
|
||||||
|
|
||||||
|
# Delay (in milliseconds) before input from the program running in the terminal
|
||||||
|
# is processed. Note that decreasing it will increase responsiveness, but also
|
||||||
|
# increase CPU usage and might cause flicker in full screen programs that
|
||||||
|
# redraw the entire screen on each loop, because kitty is so fast that partial
|
||||||
|
# screen updates will be drawn.
|
||||||
|
input_delay 3
|
||||||
|
|
||||||
# Visual bell duration. Flash the screen when a bell occurs for the specified number of
|
# Visual bell duration. Flash the screen when a bell occurs for the specified number of
|
||||||
# seconds. Set to zero to disable.
|
# seconds. Set to zero to disable.
|
||||||
visual_bell_duration 0.0
|
visual_bell_duration 0.0
|
||||||
|
|||||||
@ -126,6 +126,11 @@ color_as_int(PyObject *color) {
|
|||||||
#undef I
|
#undef I
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline double
|
||||||
|
repaint_delay(PyObject *val) {
|
||||||
|
return (double)(PyLong_AsUnsignedLong(val)) / 1000.0;
|
||||||
|
}
|
||||||
|
|
||||||
#define dict_iter(d) { \
|
#define dict_iter(d) { \
|
||||||
PyObject *key, *value; Py_ssize_t pos = 0; \
|
PyObject *key, *value; Py_ssize_t pos = 0; \
|
||||||
while (PyDict_Next(d, &pos, &key, &value))
|
while (PyDict_Next(d, &pos, &key, &value))
|
||||||
@ -155,6 +160,8 @@ PYWRAP1(set_options) {
|
|||||||
S(open_url_modifiers, PyLong_AsUnsignedLong);
|
S(open_url_modifiers, PyLong_AsUnsignedLong);
|
||||||
S(click_interval, PyFloat_AsDouble);
|
S(click_interval, PyFloat_AsDouble);
|
||||||
S(url_color, color_as_int);
|
S(url_color, color_as_int);
|
||||||
|
S(repaint_delay, repaint_delay);
|
||||||
|
S(input_delay, repaint_delay);
|
||||||
|
|
||||||
PyObject *chars = PyObject_GetAttrString(args, "select_by_word_characters");
|
PyObject *chars = PyObject_GetAttrString(args, "select_by_word_characters");
|
||||||
if (chars == NULL) return NULL;
|
if (chars == NULL) return NULL;
|
||||||
|
|||||||
@ -16,6 +16,7 @@ typedef struct {
|
|||||||
unsigned int open_url_modifiers;
|
unsigned int open_url_modifiers;
|
||||||
char_type select_by_word_characters[256]; size_t select_by_word_characters_count;
|
char_type select_by_word_characters[256]; size_t select_by_word_characters_count;
|
||||||
color_type url_color;
|
color_type url_color;
|
||||||
|
double repaint_delay, input_delay;
|
||||||
} Options;
|
} Options;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user