From fe1050fc0ed97d3f84b085cbc3884888db070051 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 30 Dec 2020 14:26:38 +0530 Subject: [PATCH] More work on disk cache --- kitty/disk-cache.c | 187 +++++++++++++++++++++++++++++++++++++++- kitty/disk-cache.h | 3 +- kitty_tests/graphics.py | 27 +++++- 3 files changed, 211 insertions(+), 6 deletions(-) diff --git a/kitty/disk-cache.c b/kitty/disk-cache.c index a2ec15d91..901adc6ed 100644 --- a/kitty/disk-cache.c +++ b/kitty/disk-cache.c @@ -5,21 +5,51 @@ * Distributed under terms of the GPL3 license. */ +#define EXTRA_INIT if (PyModule_AddFunctions(module, module_methods) != 0) return false; + #include "disk-cache.h" -#include "state.h" +#include "uthash.h" #include "loop-utils.h" #include +#include +#include + + +typedef struct { + void *hash_key; + uint8_t *data; + size_t hash_keylen, data_sz; + bool written_to_disk; + uint8_t encryption_key[64]; + char filename[8]; + UT_hash_handle hh; +} CacheEntry; + typedef struct { PyObject_HEAD char *path; + int path_fd; pthread_mutex_t lock; pthread_t write_thread; bool thread_started, lock_inited, loop_data_inited, shutting_down, fully_initialized; LoopData loop_data; PyObject *rmtree; + CacheEntry *entries, currently_writing; } DiskCache; + +void +free_cache_entry(const DiskCache *self, CacheEntry *e) { + if (e->hash_key) { free(e->hash_key); e->hash_key = NULL; } + if (e->data) { free(e->data); e->data = NULL; } + if (self->path_fd > -1 && e->filename[0]) { + unlinkat(self->path_fd, e->filename, 0); + e->filename[0] = 0; + } + free(e); +} + #define mutex(op) pthread_mutex_##op(&self->lock) static PyObject* @@ -27,6 +57,7 @@ new(PyTypeObject *type, PyObject UNUSED *args, PyObject UNUSED *kwds) { DiskCache *self; self = (DiskCache*)type->tp_alloc(type, 0); if (self) { + self->path_fd = -1; PyObject *shutil = PyImport_ImportModule("shutil"); if (!shutil) { Py_CLEAR(self); return NULL; } self->rmtree = PyObject_GetAttrString(shutil, "rmtree"); @@ -83,6 +114,17 @@ ensure_state(DiskCache *self) { if (PyErr_Occurred()) return false; } + if (self->path_fd < 0) { + while (self->path_fd < 0) { + self->path_fd = open(self->path, O_DIRECTORY | O_RDWR | O_CLOEXEC); + if (self->path_fd > -1 || errno != EINTR) break; + } + if (self->path_fd < 0) { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, self->path); + return false; + } + } + self->fully_initialized = true; return true; } @@ -108,21 +150,154 @@ dealloc(DiskCache* self) { free_loop_data(&self->loop_data); self->loop_data_inited = false; } - + if (self->entries) { + CacheEntry *tmp, *s; + HASH_ITER(hh, self->entries, s, tmp) { + HASH_DEL(self->entries, s); + free_cache_entry(self, s); s = NULL; + } + self->entries = NULL; + } + if (self->path_fd > -1) { + safe_close(self->path_fd, __FILE__, __LINE__); + self->path_fd = -1; + } if (self->path) { PyObject_CallFunction(self->rmtree, "sO", self->path, Py_True); free(self->path); self->path = NULL; } + if (self->currently_writing.hash_key) free(self->currently_writing.hash_key); + if (self->currently_writing.data) free(self->currently_writing.data); Py_CLEAR(self->rmtree); Py_TYPE(self)->tp_free((PyObject*)self); } -#define PYWRAP0(name) static PyObject* py##name(DiskCache *self, PyObject *args UNUSED) -PYWRAP0(ensure_state) { +bool +add_to_disk_cache(PyObject *self_, const void *key, size_t key_sz, const uint8_t *data, size_t data_sz) { + DiskCache *self = (DiskCache*)self_; + if (!ensure_state(self)) return false; + CacheEntry *s = NULL; + uint8_t *copied_data = malloc(data_sz); + if (!copied_data) { PyErr_NoMemory(); return false; } + memcpy(copied_data, data, data_sz); + + mutex(lock); + HASH_FIND(hh, self->entries, key, key_sz, s); + if (s == NULL) { + s = calloc(1, sizeof(CacheEntry)); + if (!s) { PyErr_NoMemory(); goto end; } + s->hash_key = malloc(key_sz); + if (!s->hash_key) { free(s); PyErr_NoMemory(); goto end; } + s->hash_keylen = key_sz; + memcpy(s->hash_key, key, key_sz); + HASH_ADD_KEYPTR(hh, self->entries, s->hash_key, s->hash_keylen, s); + } else { + s->written_to_disk = false; + if (s->data) free(s->data); + } + s->data = copied_data; s->data_sz = data_sz; copied_data = NULL; +end: + mutex(unlock); + + if (copied_data) free(copied_data); + if (PyErr_Occurred()) return false; + wakeup_write_loop(self); + return true; +} + +static void +xor_data(const uint8_t* restrict key, const size_t key_sz, uint8_t* restrict data, const size_t data_sz) { + size_t unaligned_sz = data_sz % key_sz; + size_t aligned_sz = data_sz - unaligned_sz; + for (size_t offset = 0; offset < aligned_sz; offset += key_sz) { + for (size_t i = 0; i < key_sz; i++) data[offset + i] ^= key[i]; + } + for (size_t i = 0; i < unaligned_sz; i++) data[aligned_sz + i] ^= key[i]; +} + +static void +read_from_cache_entry(const DiskCache *self, const CacheEntry *s, uint8_t *dest) { + int fd = -1; + while (fd < 0) { + fd = openat(self->path_fd, s->filename, O_CLOEXEC | O_RDONLY); + if (fd > 0 || errno != EINTR) break; + } + if (fd < 0) { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, s->filename); + return; + } + uint8_t *p = dest; + size_t sz = s->data_sz; + while (sz) { + ssize_t n = read(fd, p, sz); + if (n > 0) { + sz -= n; + p += n; + continue; + } + if (n < 0) { + if (errno == EINTR || errno == EAGAIN) continue; + PyErr_SetFromErrnoWithFilename(PyExc_OSError, s->filename); + goto end; + } + if (n == 0) { + PyErr_SetString(PyExc_OSError, "Disk cache file truncated"); + goto end; + } + } +end: + safe_close(fd, __FILE__, __LINE__); +} + +bool +read_from_disk_cache(PyObject *self_, const void *key, size_t key_sz, uint8_t **data, size_t *data_sz) { + DiskCache *self = (DiskCache*)self_; + if (!ensure_state(self)) return false; + mutex(lock); + CacheEntry *s = NULL; + HASH_FIND(hh, self->entries, key, key_sz, s); + if (!s) { PyErr_SetString(PyExc_KeyError, "No cached entry with specified key found"); goto end; } + + *data = (uint8_t*)malloc(s->data_sz); + if (!*data) { PyErr_NoMemory(); goto end; } + *data_sz = s->data_sz; + + if (s->data) { memcpy(*data, s->data, *data_sz); } + else if (self->currently_writing.hash_key && self->currently_writing.hash_keylen == key_sz && memcmp(self->currently_writing.hash_key, key, key_sz) == 0) { + memcpy(*data, self->currently_writing.data, *data_sz); + xor_data(self->currently_writing.encryption_key, sizeof(self->currently_writing.encryption_key), *data, *data_sz); + } + else { + read_from_cache_entry(self, s, *data); + xor_data(s->encryption_key, sizeof(s->encryption_key), *data, *data_sz); + } +end: + mutex(unlock); + if (PyErr_Occurred()) return false; + return true; +} + +#define PYWRAP(name) static PyObject* py##name(DiskCache *self, PyObject *args) +#define PA(fmt, ...) if (!PyArg_ParseTuple(args, fmt, __VA_ARGS__)) return NULL; +PYWRAP(ensure_state) { + (void)args; ensure_state(self); Py_RETURN_NONE; } +PYWRAP(xor_data) { + (void) self; + const char *key, *data; + Py_ssize_t keylen, data_sz; + PA("y#y#", &key, &keylen, &data, &data_sz); + PyObject *ans = PyBytes_FromStringAndSize(NULL, data_sz); + if (ans == NULL) return NULL; + void *dest = PyBytes_AS_STRING(ans); + memcpy(dest, data, data_sz); + xor_data((const uint8_t*)key, keylen, dest, data_sz); + return ans; +} + #define MW(name, arg_type) {#name, (PyCFunction)py##name, arg_type, NULL} static PyMethodDef methods[] = { MW(ensure_state, METH_NOARGS), @@ -141,6 +316,10 @@ PyTypeObject DiskCache_Type = { .tp_new = new, }; +static PyMethodDef module_methods[] = { + MW(xor_data, METH_VARARGS), + {NULL, NULL, 0, NULL} /* Sentinel */ +}; INIT_TYPE(DiskCache) PyObject* create_disk_cache(void) { return new(&DiskCache_Type, NULL, NULL); } diff --git a/kitty/disk-cache.h b/kitty/disk-cache.h index e6de3a8ec..22b9e053d 100644 --- a/kitty/disk-cache.h +++ b/kitty/disk-cache.h @@ -6,6 +6,7 @@ #pragma once -#include +#include "data-types.h" PyObject* create_disk_cache(void); +bool add_to_disk_cache(PyObject *self, const void *key, size_t key_sz, const uint8_t *data, size_t data_sz); diff --git a/kitty_tests/graphics.py b/kitty_tests/graphics.py index 289babb9e..c8d82c440 100644 --- a/kitty_tests/graphics.py +++ b/kitty_tests/graphics.py @@ -3,14 +3,18 @@ # License: GPL v3 Copyright: 2016, Kovid Goyal import os +import shutil import tempfile import unittest import zlib +from itertools import cycle from base64 import standard_b64decode, standard_b64encode from io import BytesIO +from kitty.constants import cache_dir from kitty.fast_data_types import ( - load_png_data, parse_bytes, set_send_to_gpu, shm_unlink, shm_write + load_png_data, parse_bytes, set_send_to_gpu, shm_unlink, shm_write, + xor_data ) from . import BaseTest @@ -132,6 +136,27 @@ def put_helpers(self, cw, ch): class TestGraphics(BaseTest): + def setUp(self): + self.cache_dir = cache_dir.override_dir = tempfile.mkdtemp() + self.disk_cache_dir = os.path.join(self.cache_dir, 'disk-cache') + + def tearDown(self): + shutil.rmtree(self.cache_dir) + cache_dir.override_dir = None + + def test_xor_data(self): + + def xor(skey, data): + ckey = cycle(bytearray(skey)) + return bytes(bytearray(k ^ d for k, d in zip(ckey, bytearray(data)))) + + base_data = os.urandom(64) + key = os.urandom(len(base_data)) + for base in (b'', base_data): + for extra in range(len(base_data)): + data = base + base_data[:extra] + self.assertEqual(xor_data(key, data), xor(key, data)) + def test_load_images(self): s, g, l, sl = load_helpers(self)