From 7931654a7c29b926d91b3dd46a507e1d5c8747e0 Mon Sep 17 00:00:00 2001 From: Luflosi Date: Mon, 17 Feb 2020 15:14:35 +0100 Subject: [PATCH] Ensure valid UTF-8 when cutting string --- glfw/backend_utils.c | 18 +++++++++++++++ glfw/backend_utils.h | 1 + glfw/wl_window.c | 6 ++--- kitty_tests/glfw.py | 52 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 kitty_tests/glfw.py diff --git a/glfw/backend_utils.c b/glfw/backend_utils.c index 87af03f5d..2557111db 100644 --- a/glfw/backend_utils.c +++ b/glfw/backend_utils.c @@ -332,3 +332,21 @@ pollForEvents(EventLoopData *eld, monotonic_t timeout, watch_callback_func displ } return read_ok; } + +// Duplicate a UTF-8 encoded string +// but cut it so that it has at most max_length bytes plus the null byte. +// This does not take combining characters into account. +GLFWAPI char* utf_8_strndup(const char* source, size_t max_length) { + if (!source) return NULL; + size_t length = strnlen(source, max_length); + if (length >= max_length) { + for (length = max_length; length > 0; length--) { + if ((source[length] & 0xC0) != 0x80) break; + } + } + + char* result = malloc(length + 1); + memcpy(result, source, length); + result[length] = 0; + return result; +} diff --git a/glfw/backend_utils.h b/glfw/backend_utils.h index 7ae4f90eb..cdbbd3eee 100644 --- a/glfw/backend_utils.h +++ b/glfw/backend_utils.h @@ -95,3 +95,4 @@ unsigned dispatchTimers(EventLoopData *eld); void finalizePollData(EventLoopData *eld); bool initPollData(EventLoopData *eld, int display_fd); void wakeupEventLoop(EventLoopData *eld); +char* utf_8_strndup(const char* source, size_t max_length); diff --git a/glfw/wl_window.c b/glfw/wl_window.c index 166683c2c..64e5799a0 100644 --- a/glfw/wl_window.c +++ b/glfw/wl_window.c @@ -957,12 +957,10 @@ void _glfwPlatformSetWindowTitle(_GLFWwindow* window, const char* title) { if (window->wl.title) free(window->wl.title); - window->wl.title = _glfw_strdup(title); // Wayland cannot handle requests larger than ~8200 bytes. Sending // one causes an abort(). Since titles this large are meaningless anyway - // ensure they do not happen. One should really truncate ensuring valid UTF-8 - // but I cant be bothered. - if (title && strnlen(title, 2048) >= 2048) window->wl.title[2048] = 0; + // ensure they do not happen. + window->wl.title = utf_8_strndup(title, 2048); if (window->wl.xdg.toplevel) xdg_toplevel_set_title(window->wl.xdg.toplevel, window->wl.title); } diff --git a/kitty_tests/glfw.py b/kitty_tests/glfw.py new file mode 100644 index 000000000..be80b7548 --- /dev/null +++ b/kitty_tests/glfw.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2020, Kovid Goyal + +import sys +import unittest +from . import BaseTest + +_plat = sys.platform.lower() +is_macos = 'darwin' in _plat + + +class TestGLFW(BaseTest): + + @unittest.skipIf(is_macos, 'Skipping test on macOS because glfw-cocoa.so is not built with backend_utils') + def test_utf_8_strndup(self): + import os + import ctypes + + base = os.path.dirname(os.path.abspath(__file__)) + backend_utils = os.path.join(base, '..', 'kitty', 'glfw-x11.so') + if not os.path.exists(backend_utils): + raise Exception('Module x11 not found') + lib = ctypes.CDLL(backend_utils) + utf_8_strndup = lib.utf_8_strndup + utf_8_strndup.restype = ctypes.c_char_p + utf_8_strndup.argtypes = (ctypes.c_char_p, ctypes.c_size_t) + + def test(string): + string_bytes = bytes(string, 'utf-8') + prev_part_bytes = b'' + prev_length_bytes = -1 + for length in range(len(string) + 1): + part = string[:length] + part_bytes = bytes(part, 'utf-8') + length_bytes = len(part_bytes) + for length_bytes_2 in range(prev_length_bytes + 1, length_bytes): + self.ae(utf_8_strndup(string_bytes, length_bytes_2), prev_part_bytes) + self.ae(utf_8_strndup(string_bytes, length_bytes), part_bytes) + prev_part_bytes = part_bytes + prev_length_bytes = length_bytes + self.ae(utf_8_strndup(string_bytes, len(string_bytes) + 1), string_bytes) # Try to go one character after the end of the string + + self.ae(utf_8_strndup(None, 2), None) + self.ae(utf_8_strndup(b'', 2), b'') + + test('ö') + test('>a<') + test('>ä<') + test('>ế<') + test('>𐍈<') + test('∮ E⋅da = Q, n → ∞, 𐍈∑ f(i) = ∏ g(i)')