From d5d52ec8b95711303854c7a47b23d0838ebafb1d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 19 Sep 2021 13:32:19 +0530 Subject: [PATCH] When driving a job on a file avoid mallocs for each read --- kittens/transfer/librsync.py | 7 ++++--- kittens/transfer/rsync.c | 11 ++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/kittens/transfer/librsync.py b/kittens/transfer/librsync.py index d0b7291c9..395d9422e 100644 --- a/kittens/transfer/librsync.py +++ b/kittens/transfer/librsync.py @@ -38,7 +38,7 @@ class StreamingJob: if sz_of_unused_input > 0 and not self.finished: if no_more_data: raise RsyncError(f"{sz_of_unused_input} bytes of input data were not used") - self.prev_unused_input = input_data[-sz_of_unused_input:] + self.prev_unused_input = bytes(input_data[-sz_of_unused_input:]) if self.finished: self.commit() elif self.calls_with_no_data > 3: @@ -51,9 +51,10 @@ class StreamingJob: def drive_job_on_file(f: IO[bytes], job: 'JobCapsule') -> Iterator[bytes]: sj = StreamingJob(job) + input_buf = bytearray(IO_BUFFER_SIZE) while not sj.finished: - input_data = f.read(IO_BUFFER_SIZE) - yield sj(input_data) + sz = f.readinto(input_buf) # type: ignore + yield sj(memoryview(input_buf)[:sz]) def signature_of_file(path: str) -> Iterator[bytes]: diff --git a/kittens/transfer/rsync.c b/kittens/transfer/rsync.c index 61bc60837..35016625f 100644 --- a/kittens/transfer/rsync.c +++ b/kittens/transfer/rsync.c @@ -72,17 +72,18 @@ begin_create_signature(PyObject *self UNUSED, PyObject *args) { if (!job) { PyErr_SetString(PyExc_TypeError, "Not a job capsule"); return NULL; } \ +#define FREE_BUFFER_AFTER_FUNCTION __attribute__((cleanup(PyBuffer_Release))) + static PyObject* iter_job(PyObject *self UNUSED, PyObject *args) { - Py_ssize_t input_data_size; - char *input_data; + FREE_BUFFER_AFTER_FUNCTION Py_buffer input_buf = {0}; int eof = -1, expecting_output = 1; PyObject *job_capsule; - if (!PyArg_ParseTuple(args, "O!y#|pp", &PyCapsule_Type, &job_capsule, &input_data, &input_data_size, &eof, &expecting_output)) return NULL; + if (!PyArg_ParseTuple(args, "O!y*|pp", &PyCapsule_Type, &job_capsule, &input_buf, &eof, &expecting_output)) return NULL; GET_JOB_FROM_CAPSULE; - if (eof == -1) eof = input_data_size > 0 ? 0 : 1; + if (eof == -1) eof = input_buf.len > 0 ? 0 : 1; rs_buffers_t buffer = { - .avail_in=input_data_size, .next_in = input_data, .eof_in=eof, + .avail_in=input_buf.len, .next_in = input_buf.buf, .eof_in=eof, .avail_out=expecting_output ? IO_BUFFER_SIZE : 64 }; PyObject *ans = PyBytes_FromStringAndSize(NULL, buffer.avail_out);