From 19bce0c23c9c4ac76dee58fe772203e97a85cd46 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 23 Jan 2019 19:38:08 +0530
Subject: [PATCH] Start work on a choose kitten for fuzzy selection

---
 kittens/choose/__init__.py         |   0
 kittens/choose/choose-data-types.h |  93 +++++++++++
 kittens/choose/main.c              | 245 +++++++++++++++++++++++++++++
 kittens/choose/output.c            | 105 +++++++++++++
 kittens/choose/score.c             | 182 +++++++++++++++++++++
 kittens/choose/unix_compat.c       |  50 ++++++
 kittens/choose/vector.h            |  44 ++++++
 kittens/choose/windows_compat.c    | 107 +++++++++++++
 kitty/charsets.c                   |   2 +-
 kitty/charsets.h                   |   2 +-
 setup.py                           |  25 ++-
 11 files changed, 848 insertions(+), 7 deletions(-)
 create mode 100644 kittens/choose/__init__.py
 create mode 100644 kittens/choose/choose-data-types.h
 create mode 100644 kittens/choose/main.c
 create mode 100644 kittens/choose/output.c
 create mode 100644 kittens/choose/score.c
 create mode 100644 kittens/choose/unix_compat.c
 create mode 100644 kittens/choose/vector.h
 create mode 100644 kittens/choose/windows_compat.c
diff --git a/kittens/choose/__init__.py b/kittens/choose/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/kittens/choose/choose-data-types.h b/kittens/choose/choose-data-types.h
new file mode 100644
index 000000000..4f67f5941
--- /dev/null
+++ b/kittens/choose/choose-data-types.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2017 Kovid Goyal <kovid at kovidgoyal.net>
+ *
+ * Distributed under terms of the GPL3 license.
+ */
+
+#pragma once
+#if defined(_MSC_VER)
+#define ISWINDOWS
+#define STDCALL __stdcall
+#ifndef ssize_t
+#include <BaseTsd.h>
+typedef SSIZE_T ssize_t;
+#ifndef SSIZE_MAX
+#if defined(_WIN64)
+    #define SSIZE_MAX _I64_MAX
+#else
+    #define SSIZE_MAX LONG_MAX
+#endif
+#endif
+#endif
+#else
+#define STDCALL
+#endif
+#define _POSIX_C_SOURCE 200809L
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "vector.h"
+
+typedef uint8_t len_t;
+typedef uint32_t text_t;
+
+#define LEN_MAX UINT8_MAX
+#define UNUSED(x) (void)(x)
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+#define IS_LOWERCASE(x) (x) >= 'a' && (x) <= 'z'
+#define IS_UPPERCASE(x) (x) >= 'A' && (x) <= 'Z'
+#define LOWERCASE(x) ((IS_UPPERCASE(x)) ? (x) + 32 : (x))
+#define arraysz(x) (sizeof(x)/sizeof(x[0]))
+
+typedef struct {
+    text_t* src;
+    ssize_t src_sz;
+    len_t haystack_len;
+    len_t *positions;
+    double score;
+    ssize_t idx;
+} Candidate;
+
+typedef struct {
+    Candidate *haystack;
+    size_t haystack_count;
+    text_t level1[LEN_MAX], level2[LEN_MAX], level3[LEN_MAX], needle[LEN_MAX];
+    len_t level1_len, level2_len, level3_len, needle_len;
+    size_t haystack_size;
+    text_t *output;
+    size_t output_sz, output_pos;
+    int oom;
+} GlobalData;
+
+typedef struct {
+    bool output_positions;
+    size_t limit;
+    int num_threads;
+    text_t mark_before[128], mark_after[128];
+    size_t mark_before_sz, mark_after_sz;
+} Options;
+
+VECTOR_OF(len_t, Positions)
+VECTOR_OF(text_t, Chars)
+VECTOR_OF(Candidate, Candidates)
+
+
+void output_results(GlobalData *, Candidate *haystack, size_t count, Options *opts, len_t needle_len, text_t delim);
+void* alloc_workspace(len_t max_haystack_len, GlobalData*);
+void* free_workspace(void *v);
+double score_item(void *v, text_t *haystack, len_t haystack_len, len_t *match_positions);
+unsigned int encode_codepoint(text_t ch, char* dest);
+size_t unescape(const char *src, char *dest, size_t destlen);
+int cpu_count();
+void* alloc_threads(size_t num_threads);
+#ifdef ISWINDOWS
+bool start_thread(void* threads, size_t i, unsigned int (STDCALL *start_routine) (void *), void *arg);
+ssize_t getdelim(char **lineptr, size_t *n, int delim, FILE *stream);
+#else
+bool start_thread(void* threads, size_t i, void *(*start_routine) (void *), void *arg);
+#endif
+void wait_for_thread(void *threads, size_t i);
+void free_threads(void *threads);
diff --git a/kittens/choose/main.c b/kittens/choose/main.c
new file mode 100644
index 000000000..ca4907a00
--- /dev/null
+++ b/kittens/choose/main.c
@@ -0,0 +1,245 @@
+/*
+ * main.c
+ * Copyright (C) 2017 Kovid Goyal <kovid at kovidgoyal.net>
+ *
+ * Distributed under terms of the GPL3 license.
+ */
+
+#include "choose-data-types.h"
+#include "charsets.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <fcntl.h>
+#ifndef ISWINDOWS
+#include <unistd.h>
+#endif
+
+typedef struct {
+    size_t start, count;
+    void *workspace;
+    len_t max_haystack_len;
+    bool started;
+    GlobalData *global;
+} JobData;
+
+
+static unsigned int STDCALL
+run_scoring(JobData *job_data) {
+    GlobalData *global = job_data->global;
+    for (size_t i = job_data->start; i < job_data->start + job_data->count; i++) {
+        global->haystack[i].score = score_item(job_data->workspace, global->haystack[i].src, global->haystack[i].haystack_len, global->haystack[i].positions);
+    }
+    return 0;
+}
+
+static void*
+run_scoring_pthreads(void *job_data) {
+    run_scoring((JobData*)job_data);
+    return NULL;
+}
+#ifdef ISWINDOWS
+#define START_FUNC run_scoring
+#else
+#define START_FUNC run_scoring_pthreads
+#endif
+
+static JobData*
+create_job(size_t i, size_t blocksz, GlobalData *global) {
+    JobData *ans = (JobData*)calloc(1, sizeof(JobData));
+    if (ans == NULL) return NULL;
+    ans->start = i * blocksz;
+    if (ans->start >= global->haystack_count) ans->count = 0;
+    else ans->count = global->haystack_count - ans->start;
+    ans->max_haystack_len = 0;
+    for (size_t i = ans->start; i < ans->start + ans->count; i++) ans->max_haystack_len = MAX(ans->max_haystack_len, global->haystack[i].haystack_len);
+    if (ans->count > 0) {
+        ans->workspace = alloc_workspace(ans->max_haystack_len, global);
+        if (!ans->workspace) { free(ans); return NULL; }
+    }
+    ans->global = global;
+    return ans;
+}
+
+static JobData*
+free_job(JobData *job) {
+    if (job) {
+        if (job->workspace) free_workspace(job->workspace);
+        free(job);
+    }
+    return NULL;
+}
+
+
+static int
+run_threaded(int num_threads_asked, GlobalData *global) {
+    int ret = 0;
+    size_t i, blocksz;
+    size_t num_threads = MAX(1, num_threads_asked > 0 ? num_threads_asked : cpu_count());
+    if (global->haystack_size < 10000) num_threads = 1;
+    /* printf("num_threads: %lu asked: %d sysconf: %ld\n", num_threads, num_threads_asked, sysconf(_SC_NPROCESSORS_ONLN)); */
+
+    void *threads = alloc_threads(num_threads);
+    JobData **job_data = calloc(num_threads, sizeof(JobData*));
+    if (threads == NULL || job_data == NULL) { ret = 1; goto end; }
+
+    blocksz = global->haystack_count / num_threads + global->haystack_count % num_threads;
+
+    for (i = 0; i < num_threads; i++) {
+        job_data[i] = create_job(i, blocksz, global);
+        if (job_data[i] == NULL) { ret = 1; goto end; }
+    }
+
+    if (num_threads == 1) {
+        run_scoring(job_data[0]);
+    } else {
+        for (i = 0; i < num_threads; i++) {
+            job_data[i]->started = false;
+            if (job_data[i]->count > 0) {
+                if (!start_thread(threads, i, START_FUNC, job_data[i])) ret = 1;
+                else job_data[i]->started = true;
+            }
+        }
+    }
+
+end:
+    if (num_threads > 1 && job_data) {
+        for (i = 0; i < num_threads; i++) {
+            if (job_data[i] && job_data[i]->started) wait_for_thread(threads, i);
+        }
+    }
+    for (i = 0; i < num_threads; i++) job_data[i] = free_job(job_data[i]);
+    free(job_data);
+    free_threads(threads);
+    return ret;
+}
+
+
+static int
+run_search(Options *opts, GlobalData *global, const char * const *lines, const size_t* sizes, size_t num_lines) {
+    const char *linebuf = NULL;
+    size_t idx = 0;
+    ssize_t sz = 0;
+    int ret = 0;
+    Candidates candidates = {0};
+    Chars chars = {0};
+
+    ALLOC_VEC(text_t, chars, 8192 * 20);
+    ALLOC_VEC(Candidate, candidates, 8192);
+    if (chars.data == NULL || candidates.data == NULL) return 1;
+
+    for (size_t i = 0; i < num_lines; i++) {
+        sz = sizes[i];
+        linebuf = lines[i];
+        if (sz > 0) {
+            ENSURE_SPACE(text_t, chars, sz);
+            ENSURE_SPACE(Candidate, candidates, 1);
+            sz = decode_utf8_string(linebuf, sz, &(NEXT(chars)));
+            NEXT(candidates).src_sz = sz;
+            NEXT(candidates).haystack_len = (len_t)(MIN(LEN_MAX, sz));
+            global->haystack_size += NEXT(candidates).haystack_len;
+            NEXT(candidates).idx = idx++;
+            INC(candidates, 1); INC(chars, sz);
+        }
+    }
+
+    // Prepare the haystack allocating space for positions arrays and settings
+    // up the src pointers to point to the correct locations
+    Candidate *haystack = &ITEM(candidates, 0);
+    len_t *positions = (len_t*)calloc(SIZE(candidates), sizeof(len_t) * global->needle_len);
+    if (positions) {
+        text_t *cdata = &ITEM(chars, 0);
+        for (size_t i = 0, off = 0; i < SIZE(candidates); i++) {
+            haystack[i].positions = positions + (i * global->needle_len);
+            haystack[i].src = cdata + off;
+            off += haystack[i].src_sz;
+        }
+        global->haystack = haystack;
+        global->haystack_count = SIZE(candidates);
+        ret = run_threaded(opts->num_threads, global);
+        if (ret == 0) output_results(global, haystack, SIZE(candidates), opts, global->needle_len, '\n');
+        else { REPORT_OOM; }
+    } else { ret = 1; REPORT_OOM; }
+
+    FREE_VEC(chars); free(positions); FREE_VEC(candidates);
+    return ret;
+}
+
+static size_t
+copy_unicode_object(PyObject *src, text_t *dest, size_t dest_sz) {
+    PyUnicode_READY(src);
+    int kind = PyUnicode_KIND(src);
+    void *data = PyUnicode_DATA(src);
+    size_t len = PyUnicode_GetLength(src);
+    for (size_t i = 0; i < len && i < dest_sz; i++) {
+        dest[i] = PyUnicode_READ(kind, data, i);
+    }
+    return len;
+}
+
+static PyObject*
+match(PyObject *self, PyObject *args) {
+    (void)(self);
+    int output_positions;
+    unsigned long limit;
+    PyObject *lines, *levels, *needle, *mark_before, *mark_after;
+    Options opts = {0};
+    GlobalData global = {0};
+    if (!PyArg_ParseTuple(args, "O!O!O!pkiO!O!",
+            &lines, &PyList_Type, &levels, &PyTuple_Type, &needle, &PyUnicode_Type,
+            &output_positions, &limit, &opts.num_threads,
+            &mark_before, &PyUnicode_Type, &mark_after, &PyUnicode_Type
+    )) return NULL;
+    opts.output_positions = output_positions ? true : false;
+    opts.limit = limit;
+    global.level1_len = copy_unicode_object(PyTuple_GET_ITEM(levels, 0), global.level1, arraysz(global.level1));
+    global.level2_len = copy_unicode_object(PyTuple_GET_ITEM(levels, 1), global.level2, arraysz(global.level2));
+    global.level3_len = copy_unicode_object(PyTuple_GET_ITEM(levels, 2), global.level3, arraysz(global.level3));
+    global.needle_len = copy_unicode_object(needle, global.needle, arraysz(global.needle));
+    opts.mark_before_sz = copy_unicode_object(mark_before, opts.mark_before, arraysz(opts.mark_before));
+    opts.mark_after_sz = copy_unicode_object(mark_after, opts.mark_after, arraysz(opts.mark_after));
+    size_t num_lines = PyList_GET_SIZE(lines);
+    char **clines = malloc(sizeof(char*) * num_lines);
+    size_t *sizes = malloc(sizeof(size_t) * num_lines);
+    if (!lines || !sizes) { PyErr_NoMemory(); return NULL; }
+    for (size_t i = 0; i < num_lines; i++) {
+        clines[i] = PyBytes_AS_STRING(PyTuple_GET_ITEM(lines, i));
+        sizes[i] = PyBytes_GET_SIZE(PyTuple_GET_ITEM(lines, i));
+    }
+    Py_BEGIN_ALLOW_THREADS;
+    run_search(&opts, &global, (const char* const *)clines, sizes, num_lines);
+    Py_END_ALLOW_THREADS;
+    free(clines); free(sizes);
+    if (global.oom) { free(global.output); return PyErr_NoMemory(); }
+    if (global.output) {
+        PyObject *ans = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, global.output, global.output_pos);
+        free(global.output);
+        return ans;
+    }
+    Py_RETURN_NONE;
+}
+
+static PyMethodDef module_methods[] = {
+    {"match", match, METH_VARARGS, ""},
+    {NULL, NULL, 0, NULL}        /* Sentinel */
+};
+
+static struct PyModuleDef module = {
+   .m_base = PyModuleDef_HEAD_INIT,
+   .m_name = "subseq_matcher",   /* name of module */
+   .m_doc = NULL,
+   .m_size = -1,
+   .m_methods = module_methods
+};
+
+PyMODINIT_FUNC
+PyInit_subseq_matcher(void) {
+    PyObject *m;
+
+    m = PyModule_Create(&module);
+    if (m == NULL) return NULL;
+    return m;
+}
diff --git a/kittens/choose/output.c b/kittens/choose/output.c
new file mode 100644
index 000000000..2f8fed166
--- /dev/null
+++ b/kittens/choose/output.c
@@ -0,0 +1,105 @@
+/*
+ * output.c
+ * Copyright (C) 2017 Kovid Goyal <kovid at kovidgoyal.net>
+ *
+ * Distributed under terms of the GPL3 license.
+ */
+
+#include "choose-data-types.h"
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#ifdef ISWINDOWS
+#include <io.h>
+#define STDOUT_FILENO 1
+static inline ssize_t ms_write(int fd, const void* buf, size_t count) { return _write(fd, buf, (unsigned int)count); }
+#define write ms_write
+#else
+#include <unistd.h>
+#endif
+#include <errno.h>
+
+
+#define FIELD(x, which) (((Candidate*)(x))->which)
+
+static inline bool
+ensure_space(GlobalData *global, size_t sz) {
+    if (global->output_sz < sz + global->output_pos) {
+        size_t before = global->output_sz;
+        global->output_sz += MAX(sz, (64 * 1024));
+        global->output = realloc(global->output, sizeof(text_t) * global->output_sz);
+        if (!global->output) {
+            global->output_sz = before;
+            return false;
+        }
+    }
+    return true;
+}
+
+static inline void
+output_text(GlobalData *global, const text_t *data, size_t sz) {
+    if (ensure_space(global, sz)) {
+        memcpy(global->output + global->output_pos, data, sizeof(text_t) * sz);
+        global->output_pos += sz;
+    }
+}
+
+static int
+cmpscore(const void *a, const void *b) {
+    double sa = FIELD(a, score), sb = FIELD(b, score);
+    // Sort descending
+    return (sa > sb) ? -1 : ((sa == sb) ? ((int)FIELD(a, idx) - (int)FIELD(b, idx)) : 1);
+}
+
+static void
+output_with_marks(GlobalData *global, Options *opts, text_t *src, size_t src_sz, len_t *positions, len_t poslen) {
+    size_t pos, i = 0;
+    for (pos = 0; pos < poslen; pos++, i++) {
+        output_text(global, src + i, MIN(src_sz, positions[pos]) - i);
+        i = positions[pos];
+        if (i < src_sz) {
+            if (opts->mark_before_sz > 0) output_text(global, opts->mark_before, opts->mark_before_sz);
+            output_text(global, src + i, 1);
+            if (opts->mark_after_sz > 0) output_text(global, opts->mark_after, opts->mark_after_sz);
+        }
+    }
+    i = positions[poslen - 1];
+    if (i + 1 < src_sz) output_text(global, src + i + 1, src_sz - i - 1);
+}
+
+static void
+output_positions(GlobalData *global, len_t *positions, len_t num) {
+    wchar_t buf[128];
+    for (len_t i = 0; i < num; i++) {
+        int num = swprintf(buf, sizeof(buf)/sizeof(buf[0]), L"%u", positions[i]);
+        if (num > 0 && ensure_space(global, num + 1)) {
+            for (int i = 0; i < num; i++) global->output[global->output_pos++] = buf[i];
+            global->output[global->output_pos++] = (i == num - 1) ? ':' : ',';
+        }
+    }
+}
+
+
+static void
+output_result(GlobalData *global, Candidate *c, Options *opts, len_t needle_len, text_t delim) {
+    if (opts->output_positions) output_positions(global, c->positions, needle_len);
+    if (opts->mark_before_sz > 0 || opts->mark_after_sz > 0) {
+        output_with_marks(global, opts, c->src, c->src_sz, c->positions, needle_len);
+    } else {
+        output_text(global, c->src, c->src_sz);
+    }
+    output_text(global, &delim, 1);
+}
+
+
+void
+output_results(GlobalData *global, Candidate *haystack, size_t count, Options *opts, len_t needle_len, text_t delim) {
+    Candidate *c;
+    qsort(haystack, count, sizeof(*haystack), cmpscore);
+    size_t left = opts->limit > 0 ? opts->limit : count;
+    for (size_t i = 0; i < left; i++) {
+        c = haystack + i;
+        if (c->score > 0) output_result(global, c, opts, needle_len, delim);
+    }
+}
diff --git a/kittens/choose/score.c b/kittens/choose/score.c
new file mode 100644
index 000000000..5c4cf6af1
--- /dev/null
+++ b/kittens/choose/score.c
@@ -0,0 +1,182 @@
+/*
+ * score.c
+ * Copyright (C) 2017 Kovid Goyal <kovid at kovidgoyal.net>
+ *
+ * Distributed under terms of the GPL3 license.
+ */
+
+#include "choose-data-types.h"
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#include <stdio.h>
+
+typedef struct {
+    len_t *positions_buf;  // buffer to store positions for every char in needle
+    len_t **positions;  // Array of pointers into positions_buf
+    len_t *positions_count; // Array of counts for positions
+    len_t needle_len;  // Length of the needle
+    len_t max_haystack_len;  // Max length of a string in the haystack
+    len_t haystack_len; // Length of the current string in the haystack
+    len_t *address; // Array of offsets into the positions array
+    double max_score_per_char;
+    uint8_t *level_factors;  // Array of score factors for every character in the current haystack that matches a character in the needle
+    text_t *level1, *level2, *level3;  // The characters in the levels
+    len_t level1_len, level2_len, level3_len;
+    text_t *needle;  // The current needle
+    text_t *haystack; //The current haystack
+} WorkSpace;
+
+void*
+alloc_workspace(len_t max_haystack_len, GlobalData *global) {
+    WorkSpace *ans = calloc(1, sizeof(WorkSpace));
+    if (ans == NULL) return NULL;
+    ans->positions_buf = (len_t*) calloc(global->needle_len, sizeof(len_t) * max_haystack_len);
+    ans->positions = (len_t**)calloc(global->needle_len, sizeof(len_t*));
+    ans->positions_count = (len_t*)calloc(2*global->needle_len, sizeof(len_t));
+    ans->level_factors = (uint8_t*)calloc(max_haystack_len, sizeof(uint8_t));
+    if (ans->positions == NULL || ans->positions_buf == NULL || ans->positions_count == NULL || ans->level_factors == NULL) { free_workspace(ans); return NULL; }
+    ans->needle = global->needle;
+    ans->needle_len = global->needle_len;
+    ans->max_haystack_len = max_haystack_len;
+    ans->level1 = global->level1; ans->level2 = global->level2; ans->level3 = global->level3;
+    ans->level1_len = global->level1_len; ans->level2_len = global->level2_len; ans->level3_len = global->level3_len;
+    ans->address = ans->positions_count + sizeof(len_t) * global->needle_len;
+    for (len_t i = 0; i < global->needle_len; i++) ans->positions[i] = ans->positions_buf + i * max_haystack_len;
+    return ans;
+}
+
+#define NUKE(x) free(x); x = NULL;
+
+void*
+free_workspace(void *v) {
+    WorkSpace *w = (WorkSpace*)v;
+    NUKE(w->positions_buf);
+    NUKE(w->positions);
+    NUKE(w->positions_count);
+    NUKE(w->level_factors);
+    free(w);
+    return NULL;
+}
+
+static inline bool
+has_char(text_t *text, len_t sz, text_t ch) {
+    for(len_t i = 0; i < sz; i++) {
+        if(text[i] == ch) return true;
+    }
+    return false;
+}
+
+static inline uint8_t
+level_factor_for(text_t current, text_t last, WorkSpace *w) {
+    text_t lch = LOWERCASE(last);
+    if (has_char(w->level1, w->level1_len, lch)) return 90;
+    if (has_char(w->level2, w->level2_len, lch)) return 80;
+    if (IS_LOWERCASE(last) && IS_UPPERCASE(current)) return 80; // CamelCase
+    if (has_char(w->level3, w->level3_len, lch)) return 70;
+    return 0;
+}
+
+static void
+init_workspace(WorkSpace *w, text_t *haystack, len_t haystack_len) {
+    // Calculate the positions and level_factors arrays for the specified haystack
+    bool level_factor_calculated = false;
+    memset(w->positions_count, 0, sizeof(*(w->positions_count)) * 2 * w->needle_len);
+    memset(w->level_factors, 0, sizeof(*(w->level_factors)) * w->max_haystack_len);
+    for (len_t i = 0; i < haystack_len; i++) {
+        level_factor_calculated = false;
+        for (len_t j = 0; j < w->needle_len; j++) {
+            if (w->needle[j] == LOWERCASE(haystack[i])) {
+                if (!level_factor_calculated) {
+                    level_factor_calculated = true;
+                    w->level_factors[i] = i > 0 ? level_factor_for(haystack[i], haystack[i-1], w) : 0;
+                }
+                w->positions[j][w->positions_count[j]++] = i;
+            }
+        }
+    }
+    w->haystack = haystack;
+    w->haystack_len = haystack_len;
+    w->max_score_per_char = (1.0 / haystack_len + 1.0 / w->needle_len) / 2.0;
+}
+
+
+static inline bool
+has_atleast_one_match(WorkSpace *w) {
+    int p = -1;
+    bool found;
+    for (len_t i = 0; i < w->needle_len; i++) {
+        if (w->positions_count[i] == 0) return false;  // All characters of the needle are not present in the haystack
+        found = false;
+        for (len_t j = 0; j < w->positions_count[i]; j++) {
+            if (w->positions[i][j] > p) { p = w->positions[i][j]; found = true; break; }
+        }
+        if (!found) return false; // Characters of needle not present in sequence in haystack
+    }
+    return true;
+}
+
+#define POSITION(x) w->positions[x][w->address[x]]
+
+static inline bool
+increment_address(WorkSpace *w) {
+    len_t pos = w->needle_len - 1;
+    while(true) {
+        w->address[pos]++;
+        if (w->address[pos] < w->positions_count[pos]) return true;
+        if (pos == 0) break;
+        w->address[pos--] = 0;
+    }
+    return false;
+}
+
+static inline bool
+address_is_monotonic(WorkSpace *w) {
+    // Check if the character positions pointed to by the current address are monotonic
+    for (len_t i = 1; i < w->needle_len; i++) {
+        if (POSITION(i) <= POSITION(i-1)) return false;
+    }
+    return true;
+}
+
+static inline double
+calc_score(WorkSpace *w) {
+    double ans = 0;
+    len_t distance, pos;
+    for (len_t i = 0; i < w->needle_len; i++) {
+        pos = POSITION(i);
+        if (i == 0) distance = pos < LEN_MAX ? pos + 1 : LEN_MAX;
+        else {
+            distance = pos - POSITION(i-1);
+            if (distance < 2) {
+                ans += w->max_score_per_char; // consecutive characters
+                continue;
+            }
+        }
+        if (w->level_factors[pos]) ans += (100 * w->max_score_per_char) / w->level_factors[pos];  // at a special location
+        else ans += (0.75 * w->max_score_per_char) / distance;
+    }
+    return ans;
+}
+
+static double
+process_item(WorkSpace *w, len_t *match_positions) {
+    double highscore = 0, score;
+    do {
+        if (!address_is_monotonic(w)) continue;
+        score = calc_score(w);
+        if (score > highscore) {
+            highscore = score;
+            for (len_t i = 0; i < w->needle_len; i++) match_positions[i] = POSITION(i);
+        }
+    } while(increment_address(w));
+    return highscore;
+}
+
+double
+score_item(void *v, text_t *haystack, len_t haystack_len, len_t *match_positions) {
+    WorkSpace *w = (WorkSpace*)v;
+    init_workspace(w, haystack, haystack_len);
+    if (!has_atleast_one_match(w)) return 0;
+    return process_item(w, match_positions);
+}
diff --git a/kittens/choose/unix_compat.c b/kittens/choose/unix_compat.c
new file mode 100644
index 000000000..fdcffbab1
--- /dev/null
+++ b/kittens/choose/unix_compat.c
@@ -0,0 +1,50 @@
+/*
+ * unix_compat.c
+ * Copyright (C) 2017 Kovid Goyal <kovid at kovidgoyal.net>
+ *
+ * Distributed under terms of the GPL3 license.
+ */
+
+#include "choose-data-types.h"
+#include <unistd.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#ifdef __APPLE__
+#ifndef _SC_NPROCESSORS_ONLN
+#define _SC_NPROCESSORS_ONLN 58
+#endif
+#endif
+
+int
+cpu_count() {
+    return sysconf(_SC_NPROCESSORS_ONLN);
+}
+
+
+void*
+alloc_threads(size_t num_threads) {
+    return calloc(num_threads, sizeof(pthread_t));
+}
+
+bool
+start_thread(void* threads, size_t i, void *(*start_routine) (void *), void *arg) {
+    int rc;
+    if ((rc = pthread_create(((pthread_t*)threads) + i, NULL, start_routine, arg))) {
+        fprintf(stderr, "Failed to create thread, with error: %s\n", strerror(rc));
+        return false;
+    }
+    return true;
+}
+
+void
+wait_for_thread(void *threads, size_t i) {
+    pthread_join(((pthread_t*)(threads))[i], NULL);
+}
+
+void
+free_threads(void *threads) {
+    free(threads);
+}
diff --git a/kittens/choose/vector.h b/kittens/choose/vector.h
new file mode 100644
index 000000000..33e13b5f1
--- /dev/null
+++ b/kittens/choose/vector.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2017 Kovid Goyal <kovid at kovidgoyal.net>
+ *
+ * Distributed under terms of the GPL3 license.
+ */
+
+#pragma once
+
+#include <Python.h>
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+#define REPORT_OOM global->oom = 1;
+
+#define VECTOR_OF(TYPE, NAME) typedef struct { \
+    TYPE *data; \
+    size_t size; \
+    size_t capacity; \
+} NAME;
+
+#define ALLOC_VEC(TYPE, vec, cap) \
+    vec.size = 0; vec.capacity = cap; \
+    vec.data = (TYPE*)malloc(vec.capacity * sizeof(TYPE)); \
+    if (vec.data == NULL) { REPORT_OOM; }
+
+#define FREE_VEC(vec) \
+    if (vec.data) { free(vec.data); vec.data = NULL; } \
+    vec.size = 0; vec.capacity = 0;
+
+#define ENSURE_SPACE(TYPE, vec, amt) \
+    if (vec.size + amt >= vec.capacity) { \
+        vec.capacity = MAX(vec.capacity * 2, vec.size + amt); \
+        vec.data = (TYPE*)realloc(vec.data, sizeof(TYPE) * vec.capacity); \
+        if (vec.data == NULL) { REPORT_OOM; ret = 1; break; }  \
+    }
+
+#define NEXT(vec) (vec.data[vec.size])
+
+#define INC(vec, amt) vec.size += amt;
+
+#define SIZE(vec) (vec.size)
+
+#define ITEM(vec, n) (vec.data[n])
diff --git a/kittens/choose/windows_compat.c b/kittens/choose/windows_compat.c
new file mode 100644
index 000000000..a25bc36b0
--- /dev/null
+++ b/kittens/choose/windows_compat.c
@@ -0,0 +1,107 @@
+/*
+ * windows_compat.c
+ * Copyright (C) 2017 Kovid Goyal <kovid at kovidgoyal.net>
+ *
+ * Distributed under terms of the GPL3 license.
+ */
+
+#include "choose-data-types.h"
+
+#include <windows.h>
+#include <process.h>
+#include <stdio.h>
+#include <errno.h>
+
+int
+cpu_count() {
+    SYSTEM_INFO sysinfo;
+    GetSystemInfo(&sysinfo);
+    return sysinfo.dwNumberOfProcessors;
+}
+
+void*
+alloc_threads(size_t num_threads) {
+    return calloc(num_threads, sizeof(uintptr_t));
+}
+
+bool
+start_thread(void* vt, size_t i, unsigned int (STDCALL *start_routine) (void *), void *arg) {
+    uintptr_t *threads = (uintptr_t*)vt;
+    errno = 0;
+    threads[i] = _beginthreadex(NULL, 0, start_routine, arg, 0, NULL);
+    if (threads[i] == 0) {
+        perror("Failed to create thread, with error");
+        return false;
+    }
+    return true;
+}
+
+void
+wait_for_thread(void *vt, size_t i) {
+    uintptr_t *threads = vt;
+    WaitForSingleObject((HANDLE)threads[i], INFINITE);
+    CloseHandle((HANDLE)threads[i]);
+    threads[i] = 0;
+}
+
+void
+free_threads(void *threads) {
+    free(threads);
+}
+
+ssize_t
+getdelim(char **lineptr, size_t *n, int delim, FILE *stream) {
+    char c, *cur_pos, *new_lineptr;
+    size_t new_lineptr_len;
+
+    if (lineptr == NULL || n == NULL || stream == NULL) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    if (*lineptr == NULL) {
+        *n = 8192; /* init len */
+        if ((*lineptr = (char *)malloc(*n)) == NULL) {
+            errno = ENOMEM;
+            return -1;
+        }
+    }
+
+    cur_pos = *lineptr;
+    for (;;) {
+        c = getc(stream);
+
+        if (ferror(stream) || (c == EOF && cur_pos == *lineptr))
+            return -1;
+
+        if (c == EOF)
+            break;
+
+        if ((*lineptr + *n - cur_pos) < 2) {
+            if (SSIZE_MAX / 2 < *n) {
+#ifdef EOVERFLOW
+                errno = EOVERFLOW;
+#else
+                errno = ERANGE; /* no EOVERFLOW defined */
+#endif
+                return -1;
+            }
+            new_lineptr_len = *n * 2;
+
+            if ((new_lineptr = (char *)realloc(*lineptr, new_lineptr_len)) == NULL) {
+                errno = ENOMEM;
+                return -1;
+            }
+            *lineptr = new_lineptr;
+            *n = new_lineptr_len;
+        }
+
+        *cur_pos++ = c;
+
+        if (c == delim)
+            break;
+    }
+
+    *cur_pos = '\0';
+    return (ssize_t)(cur_pos - *lineptr);
+}
diff --git a/kitty/charsets.c b/kitty/charsets.c
index c8c8ad6e5..83682903e 100644
--- a/kitty/charsets.c
+++ b/kitty/charsets.c
@@ -243,7 +243,7 @@ decode_utf8(uint32_t* state, uint32_t* codep, uint8_t byte) {
 }
 
 size_t
-decode_utf8_string(char *src, size_t sz, uint32_t *dest) {
+decode_utf8_string(const char *src, size_t sz, uint32_t *dest) {
     // dest must be a zeroed array of size at least sz
     uint32_t codep = 0, state = 0, prev = UTF8_ACCEPT;
     size_t i, d;
diff --git a/kitty/charsets.h b/kitty/charsets.h
index b3b144881..cb27e51f5 100644
--- a/kitty/charsets.h
+++ b/kitty/charsets.h
@@ -10,5 +10,5 @@
 #include <stddef.h>
 
 uint32_t decode_utf8(uint32_t*, uint32_t*, uint8_t byte);
-size_t decode_utf8_string(char *src, size_t sz, uint32_t *dest);
+size_t decode_utf8_string(const char *src, size_t sz, uint32_t *dest);
 unsigned int encode_utf8(uint32_t ch, char* dest);
diff --git a/setup.py b/setup.py
index b1440601a..d9443b992 100755
--- a/setup.py
+++ b/setup.py
@@ -464,11 +464,26 @@ def kittens_env():
 
 def compile_kittens(incremental, compilation_database, all_keys):
     kenv = kittens_env()
-    for sources, all_headers, dest in [
-        (['kittens/unicode_input/unicode_names.c'], ['kittens/unicode_input/names.h', 'kitty/data-types.h'],  'kittens/unicode_input/unicode_names'),
-        (['kittens/diff/speedup.c'], ['kitty/data-types.h'], 'kittens/diff/diff_speedup'),
-    ]:
-        compile_c_extension(kenv, dest, incremental, compilation_database, all_keys, sources, all_headers)
+
+    def list_files(q):
+        return [os.path.relpath(x, base) for x in glob.glob(q)]
+
+    def files(kitten, output, extra_headers=(), extra_sources=(), filter_sources=None):
+        sources = list(filter(filter_sources, list(extra_sources) + list_files(os.path.join('kittens', kitten, '*.c'))))
+        headers = list_files(os.path.join('kittens', kitten, '*.h')) + list(extra_headers)
+        return (sources, headers, 'kittens/{}/{}'.format(kitten, output))
+
+    for sources, all_headers, dest in (
+        files('unicode_input', 'unicode_names'),
+        files('diff', 'diff_speedup'),
+        files(
+            'choose', 'subseq_matcher',
+            extra_headers=('kitty/charsets.h',),
+            extra_sources=('kitty/charsets.c',),
+            filter_sources=lambda x: 'windows_compat.c' not in x),
+    ):
+        compile_c_extension(
+            kenv, dest, incremental, compilation_database, all_keys, sources, all_headers + ['kitty/data-types.h'])
 
 
 def build(args, native_optimizations=True):