From 19bce0c23c9c4ac76dee58fe772203e97a85cd46 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 23 Jan 2019 19:38:08 +0530 Subject: [PATCH] Start work on a choose kitten for fuzzy selection --- kittens/choose/__init__.py | 0 kittens/choose/choose-data-types.h | 93 +++++++++++ kittens/choose/main.c | 245 +++++++++++++++++++++++++++++ kittens/choose/output.c | 105 +++++++++++++ kittens/choose/score.c | 182 +++++++++++++++++++++ kittens/choose/unix_compat.c | 50 ++++++ kittens/choose/vector.h | 44 ++++++ kittens/choose/windows_compat.c | 107 +++++++++++++ kitty/charsets.c | 2 +- kitty/charsets.h | 2 +- setup.py | 25 ++- 11 files changed, 848 insertions(+), 7 deletions(-) create mode 100644 kittens/choose/__init__.py create mode 100644 kittens/choose/choose-data-types.h create mode 100644 kittens/choose/main.c create mode 100644 kittens/choose/output.c create mode 100644 kittens/choose/score.c create mode 100644 kittens/choose/unix_compat.c create mode 100644 kittens/choose/vector.h create mode 100644 kittens/choose/windows_compat.c diff --git a/kittens/choose/__init__.py b/kittens/choose/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kittens/choose/choose-data-types.h b/kittens/choose/choose-data-types.h new file mode 100644 index 000000000..4f67f5941 --- /dev/null +++ b/kittens/choose/choose-data-types.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2017 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#pragma once +#if defined(_MSC_VER) +#define ISWINDOWS +#define STDCALL __stdcall +#ifndef ssize_t +#include +typedef SSIZE_T ssize_t; +#ifndef SSIZE_MAX +#if defined(_WIN64) + #define SSIZE_MAX _I64_MAX +#else + #define SSIZE_MAX LONG_MAX +#endif +#endif +#endif +#else +#define STDCALL +#endif +#define _POSIX_C_SOURCE 200809L + +#include +#include +#include + +#include "vector.h" + +typedef uint8_t len_t; +typedef uint32_t text_t; + +#define LEN_MAX UINT8_MAX +#define UNUSED(x) (void)(x) +#define UTF8_ACCEPT 0 +#define UTF8_REJECT 1 +#define IS_LOWERCASE(x) (x) >= 'a' && (x) <= 'z' +#define IS_UPPERCASE(x) (x) >= 'A' && (x) <= 'Z' +#define LOWERCASE(x) ((IS_UPPERCASE(x)) ? (x) + 32 : (x)) +#define arraysz(x) (sizeof(x)/sizeof(x[0])) + +typedef struct { + text_t* src; + ssize_t src_sz; + len_t haystack_len; + len_t *positions; + double score; + ssize_t idx; +} Candidate; + +typedef struct { + Candidate *haystack; + size_t haystack_count; + text_t level1[LEN_MAX], level2[LEN_MAX], level3[LEN_MAX], needle[LEN_MAX]; + len_t level1_len, level2_len, level3_len, needle_len; + size_t haystack_size; + text_t *output; + size_t output_sz, output_pos; + int oom; +} GlobalData; + +typedef struct { + bool output_positions; + size_t limit; + int num_threads; + text_t mark_before[128], mark_after[128]; + size_t mark_before_sz, mark_after_sz; +} Options; + +VECTOR_OF(len_t, Positions) +VECTOR_OF(text_t, Chars) +VECTOR_OF(Candidate, Candidates) + + +void output_results(GlobalData *, Candidate *haystack, size_t count, Options *opts, len_t needle_len, text_t delim); +void* alloc_workspace(len_t max_haystack_len, GlobalData*); +void* free_workspace(void *v); +double score_item(void *v, text_t *haystack, len_t haystack_len, len_t *match_positions); +unsigned int encode_codepoint(text_t ch, char* dest); +size_t unescape(const char *src, char *dest, size_t destlen); +int cpu_count(); +void* alloc_threads(size_t num_threads); +#ifdef ISWINDOWS +bool start_thread(void* threads, size_t i, unsigned int (STDCALL *start_routine) (void *), void *arg); +ssize_t getdelim(char **lineptr, size_t *n, int delim, FILE *stream); +#else +bool start_thread(void* threads, size_t i, void *(*start_routine) (void *), void *arg); +#endif +void wait_for_thread(void *threads, size_t i); +void free_threads(void *threads); diff --git a/kittens/choose/main.c b/kittens/choose/main.c new file mode 100644 index 000000000..ca4907a00 --- /dev/null +++ b/kittens/choose/main.c @@ -0,0 +1,245 @@ +/* + * main.c + * Copyright (C) 2017 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "choose-data-types.h" +#include "charsets.h" + +#include +#include +#include +#include +#include +#include +#ifndef ISWINDOWS +#include +#endif + +typedef struct { + size_t start, count; + void *workspace; + len_t max_haystack_len; + bool started; + GlobalData *global; +} JobData; + + +static unsigned int STDCALL +run_scoring(JobData *job_data) { + GlobalData *global = job_data->global; + for (size_t i = job_data->start; i < job_data->start + job_data->count; i++) { + global->haystack[i].score = score_item(job_data->workspace, global->haystack[i].src, global->haystack[i].haystack_len, global->haystack[i].positions); + } + return 0; +} + +static void* +run_scoring_pthreads(void *job_data) { + run_scoring((JobData*)job_data); + return NULL; +} +#ifdef ISWINDOWS +#define START_FUNC run_scoring +#else +#define START_FUNC run_scoring_pthreads +#endif + +static JobData* +create_job(size_t i, size_t blocksz, GlobalData *global) { + JobData *ans = (JobData*)calloc(1, sizeof(JobData)); + if (ans == NULL) return NULL; + ans->start = i * blocksz; + if (ans->start >= global->haystack_count) ans->count = 0; + else ans->count = global->haystack_count - ans->start; + ans->max_haystack_len = 0; + for (size_t i = ans->start; i < ans->start + ans->count; i++) ans->max_haystack_len = MAX(ans->max_haystack_len, global->haystack[i].haystack_len); + if (ans->count > 0) { + ans->workspace = alloc_workspace(ans->max_haystack_len, global); + if (!ans->workspace) { free(ans); return NULL; } + } + ans->global = global; + return ans; +} + +static JobData* +free_job(JobData *job) { + if (job) { + if (job->workspace) free_workspace(job->workspace); + free(job); + } + return NULL; +} + + +static int +run_threaded(int num_threads_asked, GlobalData *global) { + int ret = 0; + size_t i, blocksz; + size_t num_threads = MAX(1, num_threads_asked > 0 ? num_threads_asked : cpu_count()); + if (global->haystack_size < 10000) num_threads = 1; + /* printf("num_threads: %lu asked: %d sysconf: %ld\n", num_threads, num_threads_asked, sysconf(_SC_NPROCESSORS_ONLN)); */ + + void *threads = alloc_threads(num_threads); + JobData **job_data = calloc(num_threads, sizeof(JobData*)); + if (threads == NULL || job_data == NULL) { ret = 1; goto end; } + + blocksz = global->haystack_count / num_threads + global->haystack_count % num_threads; + + for (i = 0; i < num_threads; i++) { + job_data[i] = create_job(i, blocksz, global); + if (job_data[i] == NULL) { ret = 1; goto end; } + } + + if (num_threads == 1) { + run_scoring(job_data[0]); + } else { + for (i = 0; i < num_threads; i++) { + job_data[i]->started = false; + if (job_data[i]->count > 0) { + if (!start_thread(threads, i, START_FUNC, job_data[i])) ret = 1; + else job_data[i]->started = true; + } + } + } + +end: + if (num_threads > 1 && job_data) { + for (i = 0; i < num_threads; i++) { + if (job_data[i] && job_data[i]->started) wait_for_thread(threads, i); + } + } + for (i = 0; i < num_threads; i++) job_data[i] = free_job(job_data[i]); + free(job_data); + free_threads(threads); + return ret; +} + + +static int +run_search(Options *opts, GlobalData *global, const char * const *lines, const size_t* sizes, size_t num_lines) { + const char *linebuf = NULL; + size_t idx = 0; + ssize_t sz = 0; + int ret = 0; + Candidates candidates = {0}; + Chars chars = {0}; + + ALLOC_VEC(text_t, chars, 8192 * 20); + ALLOC_VEC(Candidate, candidates, 8192); + if (chars.data == NULL || candidates.data == NULL) return 1; + + for (size_t i = 0; i < num_lines; i++) { + sz = sizes[i]; + linebuf = lines[i]; + if (sz > 0) { + ENSURE_SPACE(text_t, chars, sz); + ENSURE_SPACE(Candidate, candidates, 1); + sz = decode_utf8_string(linebuf, sz, &(NEXT(chars))); + NEXT(candidates).src_sz = sz; + NEXT(candidates).haystack_len = (len_t)(MIN(LEN_MAX, sz)); + global->haystack_size += NEXT(candidates).haystack_len; + NEXT(candidates).idx = idx++; + INC(candidates, 1); INC(chars, sz); + } + } + + // Prepare the haystack allocating space for positions arrays and settings + // up the src pointers to point to the correct locations + Candidate *haystack = &ITEM(candidates, 0); + len_t *positions = (len_t*)calloc(SIZE(candidates), sizeof(len_t) * global->needle_len); + if (positions) { + text_t *cdata = &ITEM(chars, 0); + for (size_t i = 0, off = 0; i < SIZE(candidates); i++) { + haystack[i].positions = positions + (i * global->needle_len); + haystack[i].src = cdata + off; + off += haystack[i].src_sz; + } + global->haystack = haystack; + global->haystack_count = SIZE(candidates); + ret = run_threaded(opts->num_threads, global); + if (ret == 0) output_results(global, haystack, SIZE(candidates), opts, global->needle_len, '\n'); + else { REPORT_OOM; } + } else { ret = 1; REPORT_OOM; } + + FREE_VEC(chars); free(positions); FREE_VEC(candidates); + return ret; +} + +static size_t +copy_unicode_object(PyObject *src, text_t *dest, size_t dest_sz) { + PyUnicode_READY(src); + int kind = PyUnicode_KIND(src); + void *data = PyUnicode_DATA(src); + size_t len = PyUnicode_GetLength(src); + for (size_t i = 0; i < len && i < dest_sz; i++) { + dest[i] = PyUnicode_READ(kind, data, i); + } + return len; +} + +static PyObject* +match(PyObject *self, PyObject *args) { + (void)(self); + int output_positions; + unsigned long limit; + PyObject *lines, *levels, *needle, *mark_before, *mark_after; + Options opts = {0}; + GlobalData global = {0}; + if (!PyArg_ParseTuple(args, "O!O!O!pkiO!O!", + &lines, &PyList_Type, &levels, &PyTuple_Type, &needle, &PyUnicode_Type, + &output_positions, &limit, &opts.num_threads, + &mark_before, &PyUnicode_Type, &mark_after, &PyUnicode_Type + )) return NULL; + opts.output_positions = output_positions ? true : false; + opts.limit = limit; + global.level1_len = copy_unicode_object(PyTuple_GET_ITEM(levels, 0), global.level1, arraysz(global.level1)); + global.level2_len = copy_unicode_object(PyTuple_GET_ITEM(levels, 1), global.level2, arraysz(global.level2)); + global.level3_len = copy_unicode_object(PyTuple_GET_ITEM(levels, 2), global.level3, arraysz(global.level3)); + global.needle_len = copy_unicode_object(needle, global.needle, arraysz(global.needle)); + opts.mark_before_sz = copy_unicode_object(mark_before, opts.mark_before, arraysz(opts.mark_before)); + opts.mark_after_sz = copy_unicode_object(mark_after, opts.mark_after, arraysz(opts.mark_after)); + size_t num_lines = PyList_GET_SIZE(lines); + char **clines = malloc(sizeof(char*) * num_lines); + size_t *sizes = malloc(sizeof(size_t) * num_lines); + if (!lines || !sizes) { PyErr_NoMemory(); return NULL; } + for (size_t i = 0; i < num_lines; i++) { + clines[i] = PyBytes_AS_STRING(PyTuple_GET_ITEM(lines, i)); + sizes[i] = PyBytes_GET_SIZE(PyTuple_GET_ITEM(lines, i)); + } + Py_BEGIN_ALLOW_THREADS; + run_search(&opts, &global, (const char* const *)clines, sizes, num_lines); + Py_END_ALLOW_THREADS; + free(clines); free(sizes); + if (global.oom) { free(global.output); return PyErr_NoMemory(); } + if (global.output) { + PyObject *ans = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, global.output, global.output_pos); + free(global.output); + return ans; + } + Py_RETURN_NONE; +} + +static PyMethodDef module_methods[] = { + {"match", match, METH_VARARGS, ""}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static struct PyModuleDef module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "subseq_matcher", /* name of module */ + .m_doc = NULL, + .m_size = -1, + .m_methods = module_methods +}; + +PyMODINIT_FUNC +PyInit_subseq_matcher(void) { + PyObject *m; + + m = PyModule_Create(&module); + if (m == NULL) return NULL; + return m; +} diff --git a/kittens/choose/output.c b/kittens/choose/output.c new file mode 100644 index 000000000..2f8fed166 --- /dev/null +++ b/kittens/choose/output.c @@ -0,0 +1,105 @@ +/* + * output.c + * Copyright (C) 2017 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "choose-data-types.h" +#include +#include +#include +#include +#ifdef ISWINDOWS +#include +#define STDOUT_FILENO 1 +static inline ssize_t ms_write(int fd, const void* buf, size_t count) { return _write(fd, buf, (unsigned int)count); } +#define write ms_write +#else +#include +#endif +#include + + +#define FIELD(x, which) (((Candidate*)(x))->which) + +static inline bool +ensure_space(GlobalData *global, size_t sz) { + if (global->output_sz < sz + global->output_pos) { + size_t before = global->output_sz; + global->output_sz += MAX(sz, (64 * 1024)); + global->output = realloc(global->output, sizeof(text_t) * global->output_sz); + if (!global->output) { + global->output_sz = before; + return false; + } + } + return true; +} + +static inline void +output_text(GlobalData *global, const text_t *data, size_t sz) { + if (ensure_space(global, sz)) { + memcpy(global->output + global->output_pos, data, sizeof(text_t) * sz); + global->output_pos += sz; + } +} + +static int +cmpscore(const void *a, const void *b) { + double sa = FIELD(a, score), sb = FIELD(b, score); + // Sort descending + return (sa > sb) ? -1 : ((sa == sb) ? ((int)FIELD(a, idx) - (int)FIELD(b, idx)) : 1); +} + +static void +output_with_marks(GlobalData *global, Options *opts, text_t *src, size_t src_sz, len_t *positions, len_t poslen) { + size_t pos, i = 0; + for (pos = 0; pos < poslen; pos++, i++) { + output_text(global, src + i, MIN(src_sz, positions[pos]) - i); + i = positions[pos]; + if (i < src_sz) { + if (opts->mark_before_sz > 0) output_text(global, opts->mark_before, opts->mark_before_sz); + output_text(global, src + i, 1); + if (opts->mark_after_sz > 0) output_text(global, opts->mark_after, opts->mark_after_sz); + } + } + i = positions[poslen - 1]; + if (i + 1 < src_sz) output_text(global, src + i + 1, src_sz - i - 1); +} + +static void +output_positions(GlobalData *global, len_t *positions, len_t num) { + wchar_t buf[128]; + for (len_t i = 0; i < num; i++) { + int num = swprintf(buf, sizeof(buf)/sizeof(buf[0]), L"%u", positions[i]); + if (num > 0 && ensure_space(global, num + 1)) { + for (int i = 0; i < num; i++) global->output[global->output_pos++] = buf[i]; + global->output[global->output_pos++] = (i == num - 1) ? ':' : ','; + } + } +} + + +static void +output_result(GlobalData *global, Candidate *c, Options *opts, len_t needle_len, text_t delim) { + if (opts->output_positions) output_positions(global, c->positions, needle_len); + if (opts->mark_before_sz > 0 || opts->mark_after_sz > 0) { + output_with_marks(global, opts, c->src, c->src_sz, c->positions, needle_len); + } else { + output_text(global, c->src, c->src_sz); + } + output_text(global, &delim, 1); +} + + +void +output_results(GlobalData *global, Candidate *haystack, size_t count, Options *opts, len_t needle_len, text_t delim) { + Candidate *c; + qsort(haystack, count, sizeof(*haystack), cmpscore); + size_t left = opts->limit > 0 ? opts->limit : count; + for (size_t i = 0; i < left; i++) { + c = haystack + i; + if (c->score > 0) output_result(global, c, opts, needle_len, delim); + } +} diff --git a/kittens/choose/score.c b/kittens/choose/score.c new file mode 100644 index 000000000..5c4cf6af1 --- /dev/null +++ b/kittens/choose/score.c @@ -0,0 +1,182 @@ +/* + * score.c + * Copyright (C) 2017 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "choose-data-types.h" +#include +#include +#include +#include + +typedef struct { + len_t *positions_buf; // buffer to store positions for every char in needle + len_t **positions; // Array of pointers into positions_buf + len_t *positions_count; // Array of counts for positions + len_t needle_len; // Length of the needle + len_t max_haystack_len; // Max length of a string in the haystack + len_t haystack_len; // Length of the current string in the haystack + len_t *address; // Array of offsets into the positions array + double max_score_per_char; + uint8_t *level_factors; // Array of score factors for every character in the current haystack that matches a character in the needle + text_t *level1, *level2, *level3; // The characters in the levels + len_t level1_len, level2_len, level3_len; + text_t *needle; // The current needle + text_t *haystack; //The current haystack +} WorkSpace; + +void* +alloc_workspace(len_t max_haystack_len, GlobalData *global) { + WorkSpace *ans = calloc(1, sizeof(WorkSpace)); + if (ans == NULL) return NULL; + ans->positions_buf = (len_t*) calloc(global->needle_len, sizeof(len_t) * max_haystack_len); + ans->positions = (len_t**)calloc(global->needle_len, sizeof(len_t*)); + ans->positions_count = (len_t*)calloc(2*global->needle_len, sizeof(len_t)); + ans->level_factors = (uint8_t*)calloc(max_haystack_len, sizeof(uint8_t)); + if (ans->positions == NULL || ans->positions_buf == NULL || ans->positions_count == NULL || ans->level_factors == NULL) { free_workspace(ans); return NULL; } + ans->needle = global->needle; + ans->needle_len = global->needle_len; + ans->max_haystack_len = max_haystack_len; + ans->level1 = global->level1; ans->level2 = global->level2; ans->level3 = global->level3; + ans->level1_len = global->level1_len; ans->level2_len = global->level2_len; ans->level3_len = global->level3_len; + ans->address = ans->positions_count + sizeof(len_t) * global->needle_len; + for (len_t i = 0; i < global->needle_len; i++) ans->positions[i] = ans->positions_buf + i * max_haystack_len; + return ans; +} + +#define NUKE(x) free(x); x = NULL; + +void* +free_workspace(void *v) { + WorkSpace *w = (WorkSpace*)v; + NUKE(w->positions_buf); + NUKE(w->positions); + NUKE(w->positions_count); + NUKE(w->level_factors); + free(w); + return NULL; +} + +static inline bool +has_char(text_t *text, len_t sz, text_t ch) { + for(len_t i = 0; i < sz; i++) { + if(text[i] == ch) return true; + } + return false; +} + +static inline uint8_t +level_factor_for(text_t current, text_t last, WorkSpace *w) { + text_t lch = LOWERCASE(last); + if (has_char(w->level1, w->level1_len, lch)) return 90; + if (has_char(w->level2, w->level2_len, lch)) return 80; + if (IS_LOWERCASE(last) && IS_UPPERCASE(current)) return 80; // CamelCase + if (has_char(w->level3, w->level3_len, lch)) return 70; + return 0; +} + +static void +init_workspace(WorkSpace *w, text_t *haystack, len_t haystack_len) { + // Calculate the positions and level_factors arrays for the specified haystack + bool level_factor_calculated = false; + memset(w->positions_count, 0, sizeof(*(w->positions_count)) * 2 * w->needle_len); + memset(w->level_factors, 0, sizeof(*(w->level_factors)) * w->max_haystack_len); + for (len_t i = 0; i < haystack_len; i++) { + level_factor_calculated = false; + for (len_t j = 0; j < w->needle_len; j++) { + if (w->needle[j] == LOWERCASE(haystack[i])) { + if (!level_factor_calculated) { + level_factor_calculated = true; + w->level_factors[i] = i > 0 ? level_factor_for(haystack[i], haystack[i-1], w) : 0; + } + w->positions[j][w->positions_count[j]++] = i; + } + } + } + w->haystack = haystack; + w->haystack_len = haystack_len; + w->max_score_per_char = (1.0 / haystack_len + 1.0 / w->needle_len) / 2.0; +} + + +static inline bool +has_atleast_one_match(WorkSpace *w) { + int p = -1; + bool found; + for (len_t i = 0; i < w->needle_len; i++) { + if (w->positions_count[i] == 0) return false; // All characters of the needle are not present in the haystack + found = false; + for (len_t j = 0; j < w->positions_count[i]; j++) { + if (w->positions[i][j] > p) { p = w->positions[i][j]; found = true; break; } + } + if (!found) return false; // Characters of needle not present in sequence in haystack + } + return true; +} + +#define POSITION(x) w->positions[x][w->address[x]] + +static inline bool +increment_address(WorkSpace *w) { + len_t pos = w->needle_len - 1; + while(true) { + w->address[pos]++; + if (w->address[pos] < w->positions_count[pos]) return true; + if (pos == 0) break; + w->address[pos--] = 0; + } + return false; +} + +static inline bool +address_is_monotonic(WorkSpace *w) { + // Check if the character positions pointed to by the current address are monotonic + for (len_t i = 1; i < w->needle_len; i++) { + if (POSITION(i) <= POSITION(i-1)) return false; + } + return true; +} + +static inline double +calc_score(WorkSpace *w) { + double ans = 0; + len_t distance, pos; + for (len_t i = 0; i < w->needle_len; i++) { + pos = POSITION(i); + if (i == 0) distance = pos < LEN_MAX ? pos + 1 : LEN_MAX; + else { + distance = pos - POSITION(i-1); + if (distance < 2) { + ans += w->max_score_per_char; // consecutive characters + continue; + } + } + if (w->level_factors[pos]) ans += (100 * w->max_score_per_char) / w->level_factors[pos]; // at a special location + else ans += (0.75 * w->max_score_per_char) / distance; + } + return ans; +} + +static double +process_item(WorkSpace *w, len_t *match_positions) { + double highscore = 0, score; + do { + if (!address_is_monotonic(w)) continue; + score = calc_score(w); + if (score > highscore) { + highscore = score; + for (len_t i = 0; i < w->needle_len; i++) match_positions[i] = POSITION(i); + } + } while(increment_address(w)); + return highscore; +} + +double +score_item(void *v, text_t *haystack, len_t haystack_len, len_t *match_positions) { + WorkSpace *w = (WorkSpace*)v; + init_workspace(w, haystack, haystack_len); + if (!has_atleast_one_match(w)) return 0; + return process_item(w, match_positions); +} diff --git a/kittens/choose/unix_compat.c b/kittens/choose/unix_compat.c new file mode 100644 index 000000000..fdcffbab1 --- /dev/null +++ b/kittens/choose/unix_compat.c @@ -0,0 +1,50 @@ +/* + * unix_compat.c + * Copyright (C) 2017 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "choose-data-types.h" +#include +#include +#include +#include +#include + +#ifdef __APPLE__ +#ifndef _SC_NPROCESSORS_ONLN +#define _SC_NPROCESSORS_ONLN 58 +#endif +#endif + +int +cpu_count() { + return sysconf(_SC_NPROCESSORS_ONLN); +} + + +void* +alloc_threads(size_t num_threads) { + return calloc(num_threads, sizeof(pthread_t)); +} + +bool +start_thread(void* threads, size_t i, void *(*start_routine) (void *), void *arg) { + int rc; + if ((rc = pthread_create(((pthread_t*)threads) + i, NULL, start_routine, arg))) { + fprintf(stderr, "Failed to create thread, with error: %s\n", strerror(rc)); + return false; + } + return true; +} + +void +wait_for_thread(void *threads, size_t i) { + pthread_join(((pthread_t*)(threads))[i], NULL); +} + +void +free_threads(void *threads) { + free(threads); +} diff --git a/kittens/choose/vector.h b/kittens/choose/vector.h new file mode 100644 index 000000000..33e13b5f1 --- /dev/null +++ b/kittens/choose/vector.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2017 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#pragma once + +#include + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +#define REPORT_OOM global->oom = 1; + +#define VECTOR_OF(TYPE, NAME) typedef struct { \ + TYPE *data; \ + size_t size; \ + size_t capacity; \ +} NAME; + +#define ALLOC_VEC(TYPE, vec, cap) \ + vec.size = 0; vec.capacity = cap; \ + vec.data = (TYPE*)malloc(vec.capacity * sizeof(TYPE)); \ + if (vec.data == NULL) { REPORT_OOM; } + +#define FREE_VEC(vec) \ + if (vec.data) { free(vec.data); vec.data = NULL; } \ + vec.size = 0; vec.capacity = 0; + +#define ENSURE_SPACE(TYPE, vec, amt) \ + if (vec.size + amt >= vec.capacity) { \ + vec.capacity = MAX(vec.capacity * 2, vec.size + amt); \ + vec.data = (TYPE*)realloc(vec.data, sizeof(TYPE) * vec.capacity); \ + if (vec.data == NULL) { REPORT_OOM; ret = 1; break; } \ + } + +#define NEXT(vec) (vec.data[vec.size]) + +#define INC(vec, amt) vec.size += amt; + +#define SIZE(vec) (vec.size) + +#define ITEM(vec, n) (vec.data[n]) diff --git a/kittens/choose/windows_compat.c b/kittens/choose/windows_compat.c new file mode 100644 index 000000000..a25bc36b0 --- /dev/null +++ b/kittens/choose/windows_compat.c @@ -0,0 +1,107 @@ +/* + * windows_compat.c + * Copyright (C) 2017 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "choose-data-types.h" + +#include +#include +#include +#include + +int +cpu_count() { + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + return sysinfo.dwNumberOfProcessors; +} + +void* +alloc_threads(size_t num_threads) { + return calloc(num_threads, sizeof(uintptr_t)); +} + +bool +start_thread(void* vt, size_t i, unsigned int (STDCALL *start_routine) (void *), void *arg) { + uintptr_t *threads = (uintptr_t*)vt; + errno = 0; + threads[i] = _beginthreadex(NULL, 0, start_routine, arg, 0, NULL); + if (threads[i] == 0) { + perror("Failed to create thread, with error"); + return false; + } + return true; +} + +void +wait_for_thread(void *vt, size_t i) { + uintptr_t *threads = vt; + WaitForSingleObject((HANDLE)threads[i], INFINITE); + CloseHandle((HANDLE)threads[i]); + threads[i] = 0; +} + +void +free_threads(void *threads) { + free(threads); +} + +ssize_t +getdelim(char **lineptr, size_t *n, int delim, FILE *stream) { + char c, *cur_pos, *new_lineptr; + size_t new_lineptr_len; + + if (lineptr == NULL || n == NULL || stream == NULL) { + errno = EINVAL; + return -1; + } + + if (*lineptr == NULL) { + *n = 8192; /* init len */ + if ((*lineptr = (char *)malloc(*n)) == NULL) { + errno = ENOMEM; + return -1; + } + } + + cur_pos = *lineptr; + for (;;) { + c = getc(stream); + + if (ferror(stream) || (c == EOF && cur_pos == *lineptr)) + return -1; + + if (c == EOF) + break; + + if ((*lineptr + *n - cur_pos) < 2) { + if (SSIZE_MAX / 2 < *n) { +#ifdef EOVERFLOW + errno = EOVERFLOW; +#else + errno = ERANGE; /* no EOVERFLOW defined */ +#endif + return -1; + } + new_lineptr_len = *n * 2; + + if ((new_lineptr = (char *)realloc(*lineptr, new_lineptr_len)) == NULL) { + errno = ENOMEM; + return -1; + } + *lineptr = new_lineptr; + *n = new_lineptr_len; + } + + *cur_pos++ = c; + + if (c == delim) + break; + } + + *cur_pos = '\0'; + return (ssize_t)(cur_pos - *lineptr); +} diff --git a/kitty/charsets.c b/kitty/charsets.c index c8c8ad6e5..83682903e 100644 --- a/kitty/charsets.c +++ b/kitty/charsets.c @@ -243,7 +243,7 @@ decode_utf8(uint32_t* state, uint32_t* codep, uint8_t byte) { } size_t -decode_utf8_string(char *src, size_t sz, uint32_t *dest) { +decode_utf8_string(const char *src, size_t sz, uint32_t *dest) { // dest must be a zeroed array of size at least sz uint32_t codep = 0, state = 0, prev = UTF8_ACCEPT; size_t i, d; diff --git a/kitty/charsets.h b/kitty/charsets.h index b3b144881..cb27e51f5 100644 --- a/kitty/charsets.h +++ b/kitty/charsets.h @@ -10,5 +10,5 @@ #include uint32_t decode_utf8(uint32_t*, uint32_t*, uint8_t byte); -size_t decode_utf8_string(char *src, size_t sz, uint32_t *dest); +size_t decode_utf8_string(const char *src, size_t sz, uint32_t *dest); unsigned int encode_utf8(uint32_t ch, char* dest); diff --git a/setup.py b/setup.py index b1440601a..d9443b992 100755 --- a/setup.py +++ b/setup.py @@ -464,11 +464,26 @@ def kittens_env(): def compile_kittens(incremental, compilation_database, all_keys): kenv = kittens_env() - for sources, all_headers, dest in [ - (['kittens/unicode_input/unicode_names.c'], ['kittens/unicode_input/names.h', 'kitty/data-types.h'], 'kittens/unicode_input/unicode_names'), - (['kittens/diff/speedup.c'], ['kitty/data-types.h'], 'kittens/diff/diff_speedup'), - ]: - compile_c_extension(kenv, dest, incremental, compilation_database, all_keys, sources, all_headers) + + def list_files(q): + return [os.path.relpath(x, base) for x in glob.glob(q)] + + def files(kitten, output, extra_headers=(), extra_sources=(), filter_sources=None): + sources = list(filter(filter_sources, list(extra_sources) + list_files(os.path.join('kittens', kitten, '*.c')))) + headers = list_files(os.path.join('kittens', kitten, '*.h')) + list(extra_headers) + return (sources, headers, 'kittens/{}/{}'.format(kitten, output)) + + for sources, all_headers, dest in ( + files('unicode_input', 'unicode_names'), + files('diff', 'diff_speedup'), + files( + 'choose', 'subseq_matcher', + extra_headers=('kitty/charsets.h',), + extra_sources=('kitty/charsets.c',), + filter_sources=lambda x: 'windows_compat.c' not in x), + ): + compile_c_extension( + kenv, dest, incremental, compilation_database, all_keys, sources, all_headers + ['kitty/data-types.h']) def build(args, native_optimizations=True):