When driving a job on a file avoid mallocs for each read

This commit is contained in:
Kovid Goyal 2021-09-19 13:32:19 +05:30
parent ecb0d1f325
commit d5d52ec8b9
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 10 additions and 8 deletions

View File

@ -38,7 +38,7 @@ class StreamingJob:
if sz_of_unused_input > 0 and not self.finished: if sz_of_unused_input > 0 and not self.finished:
if no_more_data: if no_more_data:
raise RsyncError(f"{sz_of_unused_input} bytes of input data were not used") raise RsyncError(f"{sz_of_unused_input} bytes of input data were not used")
self.prev_unused_input = input_data[-sz_of_unused_input:] self.prev_unused_input = bytes(input_data[-sz_of_unused_input:])
if self.finished: if self.finished:
self.commit() self.commit()
elif self.calls_with_no_data > 3: elif self.calls_with_no_data > 3:
@ -51,9 +51,10 @@ class StreamingJob:
def drive_job_on_file(f: IO[bytes], job: 'JobCapsule') -> Iterator[bytes]: def drive_job_on_file(f: IO[bytes], job: 'JobCapsule') -> Iterator[bytes]:
sj = StreamingJob(job) sj = StreamingJob(job)
input_buf = bytearray(IO_BUFFER_SIZE)
while not sj.finished: while not sj.finished:
input_data = f.read(IO_BUFFER_SIZE) sz = f.readinto(input_buf) # type: ignore
yield sj(input_data) yield sj(memoryview(input_buf)[:sz])
def signature_of_file(path: str) -> Iterator[bytes]: def signature_of_file(path: str) -> Iterator[bytes]:

View File

@ -72,17 +72,18 @@ begin_create_signature(PyObject *self UNUSED, PyObject *args) {
if (!job) { PyErr_SetString(PyExc_TypeError, "Not a job capsule"); return NULL; } \ if (!job) { PyErr_SetString(PyExc_TypeError, "Not a job capsule"); return NULL; } \
#define FREE_BUFFER_AFTER_FUNCTION __attribute__((cleanup(PyBuffer_Release)))
static PyObject* static PyObject*
iter_job(PyObject *self UNUSED, PyObject *args) { iter_job(PyObject *self UNUSED, PyObject *args) {
Py_ssize_t input_data_size; FREE_BUFFER_AFTER_FUNCTION Py_buffer input_buf = {0};
char *input_data;
int eof = -1, expecting_output = 1; int eof = -1, expecting_output = 1;
PyObject *job_capsule; PyObject *job_capsule;
if (!PyArg_ParseTuple(args, "O!y#|pp", &PyCapsule_Type, &job_capsule, &input_data, &input_data_size, &eof, &expecting_output)) return NULL; if (!PyArg_ParseTuple(args, "O!y*|pp", &PyCapsule_Type, &job_capsule, &input_buf, &eof, &expecting_output)) return NULL;
GET_JOB_FROM_CAPSULE; GET_JOB_FROM_CAPSULE;
if (eof == -1) eof = input_data_size > 0 ? 0 : 1; if (eof == -1) eof = input_buf.len > 0 ? 0 : 1;
rs_buffers_t buffer = { rs_buffers_t buffer = {
.avail_in=input_data_size, .next_in = input_data, .eof_in=eof, .avail_in=input_buf.len, .next_in = input_buf.buf, .eof_in=eof,
.avail_out=expecting_output ? IO_BUFFER_SIZE : 64 .avail_out=expecting_output ? IO_BUFFER_SIZE : 64
}; };
PyObject *ans = PyBytes_FromStringAndSize(NULL, buffer.avail_out); PyObject *ans = PyBytes_FromStringAndSize(NULL, buffer.avail_out);