diff --git a/kittens/choose/__init__.py b/kittens/choose/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/kittens/choose/choose-data-types.h b/kittens/choose/choose-data-types.h deleted file mode 100644 index 65da19799..000000000 --- a/kittens/choose/choose-data-types.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2017 Kovid Goyal - * - * Distributed under terms of the GPL3 license. - */ - -#pragma once -#include "data-types.h" -#if defined(_MSC_VER) -#define ISWINDOWS -#define STDCALL __stdcall -#ifndef ssize_t -#include -typedef SSIZE_T ssize_t; -#ifndef SSIZE_MAX -#if defined(_WIN64) - #define SSIZE_MAX _I64_MAX -#else - #define SSIZE_MAX LONG_MAX -#endif -#endif -#endif -#else -#define STDCALL -#endif - -#include "vector.h" - -typedef uint8_t len_t; -typedef uint32_t text_t; - -#define LEN_MAX UINT8_MAX -#define IS_LOWERCASE(x) (x) >= 'a' && (x) <= 'z' -#define IS_UPPERCASE(x) (x) >= 'A' && (x) <= 'Z' -#define LOWERCASE(x) ((IS_UPPERCASE(x)) ? (x) + 32 : (x)) -#define arraysz(x) (sizeof(x)/sizeof(x[0])) - -typedef struct { - text_t* src; - ssize_t src_sz; - len_t haystack_len; - len_t *positions; - double score; - ssize_t idx; -} Candidate; - -typedef struct { - Candidate *haystack; - size_t haystack_count; - text_t level1[LEN_MAX], level2[LEN_MAX], level3[LEN_MAX], needle[LEN_MAX]; - len_t level1_len, level2_len, level3_len, needle_len; - size_t haystack_size; - text_t *output; - size_t output_sz, output_pos; - int oom; -} GlobalData; - -typedef struct { - bool output_positions; - size_t limit; - int num_threads; - text_t mark_before[128], mark_after[128], delimiter[128]; - size_t mark_before_sz, mark_after_sz, delimiter_sz; -} Options; - -VECTOR_OF(len_t, Positions) -VECTOR_OF(text_t, Chars) -VECTOR_OF(Candidate, Candidates) - - -void output_results(GlobalData *, Candidate *haystack, size_t count, Options *opts, len_t needle_len); -void* alloc_workspace(len_t max_haystack_len, GlobalData*); -void* free_workspace(void *v); -double score_item(void *v, text_t *haystack, len_t haystack_len, len_t *match_positions); -unsigned int encode_codepoint(text_t ch, char* dest); -size_t unescape(const char *src, char *dest, size_t destlen); -int cpu_count(void); -void* alloc_threads(size_t num_threads); -#ifdef ISWINDOWS -bool start_thread(void* threads, size_t i, unsigned int (STDCALL *start_routine) (void *), void *arg); -ssize_t getdelim(char **lineptr, size_t *n, int delim, FILE *stream); -#else -bool start_thread(void* threads, size_t i, void *(*start_routine) (void *), void *arg); -#endif -void wait_for_thread(void *threads, size_t i); -void free_threads(void *threads); diff --git a/kittens/choose/main.c b/kittens/choose/main.c deleted file mode 100644 index 788f140cc..000000000 --- a/kittens/choose/main.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - * main.c - * Copyright (C) 2017 Kovid Goyal - * - * Distributed under terms of the GPL3 license. - */ - -#include "choose-data-types.h" -#include "charsets.h" - -#include -#include -#include -#include -#include -#include -#ifndef ISWINDOWS -#include -#endif - -typedef struct { - size_t start, count; - void *workspace; - len_t max_haystack_len; - bool started; - GlobalData *global; -} JobData; - - -static unsigned int STDCALL -run_scoring(JobData *job_data) { - GlobalData *global = job_data->global; - for (size_t i = job_data->start; i < job_data->start + job_data->count; i++) { - global->haystack[i].score = score_item(job_data->workspace, global->haystack[i].src, global->haystack[i].haystack_len, global->haystack[i].positions); - } - return 0; -} - -static void* -run_scoring_pthreads(void *job_data) { - run_scoring((JobData*)job_data); - return NULL; -} -#ifdef ISWINDOWS -#define START_FUNC run_scoring -#else -#define START_FUNC run_scoring_pthreads -#endif - -static JobData* -create_job(size_t i, size_t blocksz, GlobalData *global) { - JobData *ans = (JobData*)calloc(1, sizeof(JobData)); - if (ans == NULL) return NULL; - ans->start = i * blocksz; - if (ans->start >= global->haystack_count) ans->count = 0; - else ans->count = global->haystack_count - ans->start; - ans->max_haystack_len = 0; - for (size_t j = ans->start; j < ans->start + ans->count; j++) ans->max_haystack_len = MAX(ans->max_haystack_len, global->haystack[j].haystack_len); - if (ans->count > 0) { - ans->workspace = alloc_workspace(ans->max_haystack_len, global); - if (!ans->workspace) { free(ans); return NULL; } - } - ans->global = global; - return ans; -} - -static JobData* -free_job(JobData *job) { - if (job) { - if (job->workspace) free_workspace(job->workspace); - free(job); - } - return NULL; -} - - -static int -run_threaded(int num_threads_asked, GlobalData *global) { - int ret = 0; - size_t i, blocksz; - size_t num_threads = MAX(1, num_threads_asked > 0 ? num_threads_asked : cpu_count()); - if (global->haystack_size < 10000) num_threads = 1; - /* printf("num_threads: %lu asked: %d sysconf: %ld\n", num_threads, num_threads_asked, sysconf(_SC_NPROCESSORS_ONLN)); */ - - void *threads = alloc_threads(num_threads); - JobData **job_data = calloc(num_threads, sizeof(JobData*)); - if (threads == NULL || job_data == NULL) { ret = 1; goto end; } - - blocksz = global->haystack_count / num_threads + global->haystack_count % num_threads; - - for (i = 0; i < num_threads; i++) { - job_data[i] = create_job(i, blocksz, global); - if (job_data[i] == NULL) { ret = 1; goto end; } - } - - if (num_threads == 1) { - run_scoring(job_data[0]); - } else { - for (i = 0; i < num_threads; i++) { - job_data[i]->started = false; - if (job_data[i]->count > 0) { - if (!start_thread(threads, i, START_FUNC, job_data[i])) ret = 1; - else job_data[i]->started = true; - } - } - } - -end: - if (num_threads > 1 && job_data) { - for (i = 0; i < num_threads; i++) { - if (job_data[i] && job_data[i]->started) wait_for_thread(threads, i); - } - } - if (job_data) { for (i = 0; i < num_threads; i++) job_data[i] = free_job(job_data[i]); } - free(job_data); - free_threads(threads); - return ret; -} - - -static int -run_search(Options *opts, GlobalData *global, const char * const *lines, const size_t* sizes, size_t num_lines) { - const char *linebuf = NULL; - size_t idx = 0; - ssize_t sz = 0; - int ret = 0; - Candidates candidates = {0}; - Chars chars = {0}; - - ALLOC_VEC(text_t, chars, 8192 * 20); - if (chars.data == NULL) return 1; - ALLOC_VEC(Candidate, candidates, 8192); - if (candidates.data == NULL) { FREE_VEC(chars); return 1; } - - for (size_t i = 0; i < num_lines; i++) { - sz = sizes[i]; - linebuf = lines[i]; - if (sz > 0) { - ENSURE_SPACE(text_t, chars, sz); - ENSURE_SPACE(Candidate, candidates, 1); - sz = decode_utf8_string(linebuf, sz, &(NEXT(chars))); - NEXT(candidates).src_sz = sz; - NEXT(candidates).haystack_len = (len_t)(MIN(LEN_MAX, sz)); - global->haystack_size += NEXT(candidates).haystack_len; - NEXT(candidates).idx = idx++; - INC(candidates, 1); INC(chars, sz); - } - } - - // Prepare the haystack allocating space for positions arrays and settings - // up the src pointers to point to the correct locations - Candidate *haystack = &ITEM(candidates, 0); - len_t *positions = (len_t*)calloc(SIZE(candidates), sizeof(len_t) * global->needle_len); - if (positions) { - text_t *cdata = &ITEM(chars, 0); - for (size_t i = 0, off = 0; i < SIZE(candidates); i++) { - haystack[i].positions = positions + (i * global->needle_len); - haystack[i].src = cdata + off; - off += haystack[i].src_sz; - } - global->haystack = haystack; - global->haystack_count = SIZE(candidates); - ret = run_threaded(opts->num_threads, global); - if (ret == 0) output_results(global, haystack, SIZE(candidates), opts, global->needle_len); - else { REPORT_OOM; } - } else { ret = 1; REPORT_OOM; } - - FREE_VEC(chars); free(positions); FREE_VEC(candidates); - return ret; -} - -static size_t -copy_unicode_object(PyObject *src, text_t *dest, size_t dest_sz) { - PyUnicode_READY(src); - int kind = PyUnicode_KIND(src); - void *data = PyUnicode_DATA(src); - size_t len = PyUnicode_GetLength(src); - for (size_t i = 0; i < len && i < dest_sz; i++) { - dest[i] = PyUnicode_READ(kind, data, i); - } - return len; -} - -static PyObject* -match(PyObject *self, PyObject *args) { - (void)(self); - int output_positions; - unsigned long limit; - PyObject *lines, *levels, *needle, *mark_before, *mark_after, *delimiter; - Options opts = {0}; - GlobalData global = {0}; - if (!PyArg_ParseTuple(args, "O!O!UpkiUUU", - &PyList_Type, &lines, &PyTuple_Type, &levels, &needle, - &output_positions, &limit, &opts.num_threads, - &mark_before, &mark_after, &delimiter - )) return NULL; - opts.output_positions = output_positions ? true : false; - opts.limit = limit; - global.level1_len = copy_unicode_object(PyTuple_GET_ITEM(levels, 0), global.level1, arraysz(global.level1)); - global.level2_len = copy_unicode_object(PyTuple_GET_ITEM(levels, 1), global.level2, arraysz(global.level2)); - global.level3_len = copy_unicode_object(PyTuple_GET_ITEM(levels, 2), global.level3, arraysz(global.level3)); - global.needle_len = copy_unicode_object(needle, global.needle, arraysz(global.needle)); - opts.mark_before_sz = copy_unicode_object(mark_before, opts.mark_before, arraysz(opts.mark_before)); - opts.mark_after_sz = copy_unicode_object(mark_after, opts.mark_after, arraysz(opts.mark_after)); - opts.delimiter_sz = copy_unicode_object(delimiter, opts.delimiter, arraysz(opts.delimiter)); - size_t num_lines = PyList_GET_SIZE(lines); - char **clines = malloc(sizeof(char*) * num_lines); - if (!clines) { return PyErr_NoMemory(); } - size_t *sizes = malloc(sizeof(size_t) * num_lines); - if (!sizes) { free(clines); clines = NULL; return PyErr_NoMemory(); } - for (size_t i = 0; i < num_lines; i++) { - clines[i] = PyBytes_AS_STRING(PyList_GET_ITEM(lines, i)); - sizes[i] = PyBytes_GET_SIZE(PyList_GET_ITEM(lines, i)); - } - Py_BEGIN_ALLOW_THREADS; - run_search(&opts, &global, (const char* const *)clines, sizes, num_lines); - Py_END_ALLOW_THREADS; - free(clines); free(sizes); - if (global.oom) { free(global.output); return PyErr_NoMemory(); } - if (global.output) { - PyObject *ans = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, global.output, global.output_pos); - free(global.output); - return ans; - } - Py_RETURN_NONE; -} - -static PyMethodDef module_methods[] = { - {"match", match, METH_VARARGS, ""}, - {NULL, NULL, 0, NULL} /* Sentinel */ -}; - -static struct PyModuleDef module = { - .m_base = PyModuleDef_HEAD_INIT, - .m_name = "subseq_matcher", /* name of module */ - .m_doc = NULL, - .m_size = -1, - .m_methods = module_methods -}; - -EXPORTED PyMODINIT_FUNC -PyInit_subseq_matcher(void) { - return PyModule_Create(&module); -} diff --git a/kittens/choose/main.py b/kittens/choose/main.py deleted file mode 100644 index e7194601a..000000000 --- a/kittens/choose/main.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python3 -# License: GPL v3 Copyright: 2018, Kovid Goyal - -import sys -from typing import List - -from kitty.key_encoding import KeyEvent - -from ..tui.handler import Handler -from ..tui.loop import Loop - - -class ChooseHandler(Handler): - - def initialize(self) -> None: - pass - - def on_text(self, text: str, in_bracketed_paste: bool = False) -> None: - pass - - def on_key(self, key_event: KeyEvent) -> None: - pass - - def on_interrupt(self) -> None: - self.quit_loop(1) - - def on_eot(self) -> None: - self.quit_loop(1) - - -def main(args: List[str]) -> None: - loop = Loop() - handler = ChooseHandler() - loop.loop(handler) - raise SystemExit(loop.return_code) - - -if __name__ == '__main__': - main(sys.argv) diff --git a/kittens/choose/match.py b/kittens/choose/match.py deleted file mode 100644 index 15ef54103..000000000 --- a/kittens/choose/match.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python -# License: GPLv3 Copyright: 2021, Kovid Goyal - -from typing import Iterable, List, Union - -from . import subseq_matcher - - -def match( - input_data: Union[str, bytes, Iterable[Union[str, bytes]]], - query: str, - threads: int = 0, - positions: bool = False, - level1: str = '/', - level2: str = '-_0123456789', - level3: str = '.', - limit: int = 0, - mark_before: str = '', - mark_after: str = '', - delimiter: str = '\n' -) -> List[str]: - if isinstance(input_data, str): - idata = [x.encode('utf-8') for x in input_data.split(delimiter)] - elif isinstance(input_data, bytes): - idata = input_data.split(delimiter.encode('utf-8')) - else: - idata = [x.encode('utf-8') if isinstance(x, str) else x for x in input_data] - query = query.lower() - level1 = level1.lower() - level2 = level2.lower() - level3 = level3.lower() - data = subseq_matcher.match( - idata, (level1, level2, level3), query, - positions, limit, threads, - mark_before, mark_after, delimiter) - if data is None: - return [] - return list(filter(None, data.split(delimiter or '\n'))) diff --git a/kittens/choose/output.c b/kittens/choose/output.c deleted file mode 100644 index f1e822318..000000000 --- a/kittens/choose/output.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * output.c - * Copyright (C) 2017 Kovid Goyal - * - * Distributed under terms of the GPL3 license. - */ - -#include "choose-data-types.h" -#include "../../kitty/iqsort.h" -#include -#include -#include -#include -#ifdef ISWINDOWS -#include -#define STDOUT_FILENO 1 -static ssize_t ms_write(int fd, const void* buf, size_t count) { return _write(fd, buf, (unsigned int)count); } -#define write ms_write -#else -#include -#endif -#include - - -#define FIELD(x, which) (((Candidate*)(x))->which) - -static bool -ensure_space(GlobalData *global, size_t sz) { - if (global->output_sz < sz + global->output_pos || !global->output) { - size_t before = global->output_sz; - global->output_sz += MAX(sz, (64u * 1024u)); - global->output = realloc(global->output, sizeof(text_t) * global->output_sz); - if (!global->output) { - global->output_sz = before; - return false; - } - } - return true; -} - -static void -output_text(GlobalData *global, const text_t *data, size_t sz) { - if (ensure_space(global, sz)) { - memcpy(global->output + global->output_pos, data, sizeof(text_t) * sz); - global->output_pos += sz; - } -} - -static void -output_with_marks(GlobalData *global, Options *opts, text_t *src, size_t src_sz, len_t *positions, len_t poslen) { - size_t pos, i = 0; - for (pos = 0; pos < poslen; pos++, i++) { - output_text(global, src + i, MIN(src_sz, positions[pos]) - i); - i = positions[pos]; - if (i < src_sz) { - if (opts->mark_before_sz > 0) output_text(global, opts->mark_before, opts->mark_before_sz); - output_text(global, src + i, 1); - if (opts->mark_after_sz > 0) output_text(global, opts->mark_after, opts->mark_after_sz); - } - } - i = positions[poslen - 1]; - if (i + 1 < src_sz) output_text(global, src + i + 1, src_sz - i - 1); -} - -static void -output_positions(GlobalData *global, len_t *positions, len_t num) { - wchar_t buf[128]; - for (len_t i = 0; i < num; i++) { - int pnum = swprintf(buf, arraysz(buf), L"%u", positions[i]); - if (pnum > 0 && ensure_space(global, pnum + 1)) { - for (int k = 0; k < pnum; k++) global->output[global->output_pos++] = buf[k]; - global->output[global->output_pos++] = (i == num - 1) ? ':' : ','; - } - } -} - - -static void -output_result(GlobalData *global, Candidate *c, Options *opts, len_t needle_len) { - if (opts->output_positions) output_positions(global, c->positions, needle_len); - if (opts->mark_before_sz > 0 || opts->mark_after_sz > 0) { - output_with_marks(global, opts, c->src, c->src_sz, c->positions, needle_len); - } else { - output_text(global, c->src, c->src_sz); - } - output_text(global, opts->delimiter, opts->delimiter_sz); -} - - -void -output_results(GlobalData *global, Candidate *haystack, size_t count, Options *opts, len_t needle_len) { - Candidate *c; -#define lt(b, a) ( (a)->score < (b)->score || ((a)->score == (b)->score && (a->idx < b->idx)) ) - QSORT(Candidate, haystack, count, lt); -#undef lt - size_t left = opts->limit > 0 ? opts->limit : count; - for (size_t i = 0; i < left; i++) { - c = haystack + i; - if (c->score > 0) output_result(global, c, opts, needle_len); - } -} diff --git a/kittens/choose/score.c b/kittens/choose/score.c deleted file mode 100644 index fa48efd80..000000000 --- a/kittens/choose/score.c +++ /dev/null @@ -1,182 +0,0 @@ -/* - * score.c - * Copyright (C) 2017 Kovid Goyal - * - * Distributed under terms of the GPL3 license. - */ - -#include "choose-data-types.h" -#include -#include -#include -#include - -typedef struct { - len_t *positions_buf; // buffer to store positions for every char in needle - len_t **positions; // Array of pointers into positions_buf - len_t *positions_count; // Array of counts for positions - len_t needle_len; // Length of the needle - len_t max_haystack_len; // Max length of a string in the haystack - len_t haystack_len; // Length of the current string in the haystack - len_t *address; // Array of offsets into the positions array - double max_score_per_char; - uint8_t *level_factors; // Array of score factors for every character in the current haystack that matches a character in the needle - text_t *level1, *level2, *level3; // The characters in the levels - len_t level1_len, level2_len, level3_len; - text_t *needle; // The current needle - text_t *haystack; //The current haystack -} WorkSpace; - -void* -alloc_workspace(len_t max_haystack_len, GlobalData *global) { - WorkSpace *ans = calloc(1, sizeof(WorkSpace)); - if (ans == NULL) return NULL; - ans->positions_buf = (len_t*) calloc(global->needle_len, sizeof(len_t) * max_haystack_len); - ans->positions = (len_t**)calloc(global->needle_len, sizeof(len_t*)); - ans->positions_count = (len_t*)calloc(2*global->needle_len, sizeof(len_t)); - ans->level_factors = (uint8_t*)calloc(max_haystack_len, sizeof(uint8_t)); - if (ans->positions == NULL || ans->positions_buf == NULL || ans->positions_count == NULL || ans->level_factors == NULL) { free_workspace(ans); return NULL; } - ans->needle = global->needle; - ans->needle_len = global->needle_len; - ans->max_haystack_len = max_haystack_len; - ans->level1 = global->level1; ans->level2 = global->level2; ans->level3 = global->level3; - ans->level1_len = global->level1_len; ans->level2_len = global->level2_len; ans->level3_len = global->level3_len; - ans->address = ans->positions_count + sizeof(len_t) * global->needle_len; - for (len_t i = 0; i < global->needle_len; i++) ans->positions[i] = ans->positions_buf + i * max_haystack_len; - return ans; -} - -#define NUKE(x) free(x); x = NULL; - -void* -free_workspace(void *v) { - WorkSpace *w = (WorkSpace*)v; - NUKE(w->positions_buf); - NUKE(w->positions); - NUKE(w->positions_count); - NUKE(w->level_factors); - free(w); - return NULL; -} - -static bool -has_char(text_t *text, len_t sz, text_t ch) { - for(len_t i = 0; i < sz; i++) { - if(text[i] == ch) return true; - } - return false; -} - -static uint8_t -level_factor_for(text_t current, text_t last, WorkSpace *w) { - text_t lch = LOWERCASE(last); - if (has_char(w->level1, w->level1_len, lch)) return 90; - if (has_char(w->level2, w->level2_len, lch)) return 80; - if (IS_LOWERCASE(last) && IS_UPPERCASE(current)) return 80; // CamelCase - if (has_char(w->level3, w->level3_len, lch)) return 70; - return 0; -} - -static void -init_workspace(WorkSpace *w, text_t *haystack, len_t haystack_len) { - // Calculate the positions and level_factors arrays for the specified haystack - bool level_factor_calculated = false; - memset(w->positions_count, 0, sizeof(*(w->positions_count)) * 2 * w->needle_len); - memset(w->level_factors, 0, sizeof(*(w->level_factors)) * w->max_haystack_len); - for (len_t i = 0; i < haystack_len; i++) { - level_factor_calculated = false; - for (len_t j = 0; j < w->needle_len; j++) { - if (w->needle[j] == LOWERCASE(haystack[i])) { - if (!level_factor_calculated) { - level_factor_calculated = true; - w->level_factors[i] = i > 0 ? level_factor_for(haystack[i], haystack[i-1], w) : 0; - } - w->positions[j][w->positions_count[j]++] = i; - } - } - } - w->haystack = haystack; - w->haystack_len = haystack_len; - w->max_score_per_char = (1.0 / haystack_len + 1.0 / w->needle_len) / 2.0; -} - - -static bool -has_atleast_one_match(WorkSpace *w) { - int p = -1; - bool found; - for (len_t i = 0; i < w->needle_len; i++) { - if (w->positions_count[i] == 0) return false; // All characters of the needle are not present in the haystack - found = false; - for (len_t j = 0; j < w->positions_count[i]; j++) { - if (w->positions[i][j] > p) { p = w->positions[i][j]; found = true; break; } - } - if (!found) return false; // Characters of needle not present in sequence in haystack - } - return true; -} - -#define POSITION(x) w->positions[x][w->address[x]] - -static bool -increment_address(WorkSpace *w) { - len_t pos = w->needle_len - 1; - while(true) { - w->address[pos]++; - if (w->address[pos] < w->positions_count[pos]) return true; - if (pos == 0) break; - w->address[pos--] = 0; - } - return false; -} - -static bool -address_is_monotonic(WorkSpace *w) { - // Check if the character positions pointed to by the current address are monotonic - for (len_t i = 1; i < w->needle_len; i++) { - if (POSITION(i) <= POSITION(i-1)) return false; - } - return true; -} - -static double -calc_score(WorkSpace *w) { - double ans = 0; - len_t distance, pos; - for (len_t i = 0; i < w->needle_len; i++) { - pos = POSITION(i); - if (i == 0) distance = pos < LEN_MAX ? pos + 1 : LEN_MAX; - else { - distance = pos - POSITION(i-1); - if (distance < 2) { - ans += w->max_score_per_char; // consecutive characters - continue; - } - } - if (w->level_factors[pos]) ans += (100 * w->max_score_per_char) / w->level_factors[pos]; // at a special location - else ans += (0.75 * w->max_score_per_char) / distance; - } - return ans; -} - -static double -process_item(WorkSpace *w, len_t *match_positions) { - double highscore = 0, score; - do { - if (!address_is_monotonic(w)) continue; - score = calc_score(w); - if (score > highscore) { - highscore = score; - for (len_t i = 0; i < w->needle_len; i++) match_positions[i] = POSITION(i); - } - } while(increment_address(w)); - return highscore; -} - -double -score_item(void *v, text_t *haystack, len_t haystack_len, len_t *match_positions) { - WorkSpace *w = (WorkSpace*)v; - init_workspace(w, haystack, haystack_len); - if (!has_atleast_one_match(w)) return 0; - return process_item(w, match_positions); -} diff --git a/kittens/choose/subseq_matcher.pyi b/kittens/choose/subseq_matcher.pyi deleted file mode 100644 index b8fc440ca..000000000 --- a/kittens/choose/subseq_matcher.pyi +++ /dev/null @@ -1,8 +0,0 @@ -from typing import List, Optional, Tuple - -def match( - lines: List[bytes], levels: Tuple[str, str, str], needle: str, - output_positions: bool, limit: int, num_threads: int, mark_before: str, - mark_after: str, delimiter: str -) -> Optional[str]: - pass diff --git a/kittens/choose/unix_compat.c b/kittens/choose/unix_compat.c deleted file mode 100644 index 1acab4e57..000000000 --- a/kittens/choose/unix_compat.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * unix_compat.c - * Copyright (C) 2017 Kovid Goyal - * - * Distributed under terms of the GPL3 license. - */ - -#include "choose-data-types.h" -#include -#include -#include -#include -#include - -#ifdef __APPLE__ -#ifndef _SC_NPROCESSORS_ONLN -#define _SC_NPROCESSORS_ONLN 58 -#endif -#endif - -int -cpu_count(void) { - return sysconf(_SC_NPROCESSORS_ONLN); -} - - -void* -alloc_threads(size_t num_threads) { - return calloc(num_threads, sizeof(pthread_t)); -} - -bool -start_thread(void* threads, size_t i, void *(*start_routine) (void *), void *arg) { - int rc; - if ((rc = pthread_create(((pthread_t*)threads) + i, NULL, start_routine, arg))) { - fprintf(stderr, "Failed to create thread, with error: %s\n", strerror(rc)); - return false; - } - return true; -} - -void -wait_for_thread(void *threads, size_t i) { - pthread_join(((pthread_t*)(threads))[i], NULL); -} - -void -free_threads(void *threads) { - free(threads); -} diff --git a/kittens/choose/vector.h b/kittens/choose/vector.h deleted file mode 100644 index 742a8412f..000000000 --- a/kittens/choose/vector.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2017 Kovid Goyal - * - * Distributed under terms of the GPL3 license. - */ - -#pragma once - -#include "data-types.h" - -#define REPORT_OOM global->oom = 1; - -#define VECTOR_OF(TYPE, NAME) typedef struct { \ - TYPE *data; \ - size_t size; \ - size_t capacity; \ -} NAME; - -#define ALLOC_VEC(TYPE, vec, cap) \ - vec.size = 0; vec.capacity = cap; \ - vec.data = (TYPE*)malloc(vec.capacity * sizeof(TYPE)); \ - if (vec.data == NULL) { REPORT_OOM; } - -#define FREE_VEC(vec) \ - if (vec.data) { free(vec.data); vec.data = NULL; } \ - vec.size = 0; vec.capacity = 0; - -#define ENSURE_SPACE(TYPE, vec, amt) \ - if (vec.size + amt >= vec.capacity) { \ - vec.capacity = MAX(vec.capacity * 2, vec.size + amt); \ - void *temp = realloc(vec.data, sizeof(TYPE) * vec.capacity); \ - if (temp == NULL) { REPORT_OOM; ret = 1; free(vec.data); vec.data = NULL; vec.size = 0; vec.capacity = 0; break; } \ - else vec.data = temp; \ - } - -#define NEXT(vec) (vec.data[vec.size]) - -#define INC(vec, amt) vec.size += amt; - -#define SIZE(vec) (vec.size) - -#define ITEM(vec, n) (vec.data[n]) diff --git a/kittens/choose/windows_compat.c b/kittens/choose/windows_compat.c deleted file mode 100644 index a25bc36b0..000000000 --- a/kittens/choose/windows_compat.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - * windows_compat.c - * Copyright (C) 2017 Kovid Goyal - * - * Distributed under terms of the GPL3 license. - */ - -#include "choose-data-types.h" - -#include -#include -#include -#include - -int -cpu_count() { - SYSTEM_INFO sysinfo; - GetSystemInfo(&sysinfo); - return sysinfo.dwNumberOfProcessors; -} - -void* -alloc_threads(size_t num_threads) { - return calloc(num_threads, sizeof(uintptr_t)); -} - -bool -start_thread(void* vt, size_t i, unsigned int (STDCALL *start_routine) (void *), void *arg) { - uintptr_t *threads = (uintptr_t*)vt; - errno = 0; - threads[i] = _beginthreadex(NULL, 0, start_routine, arg, 0, NULL); - if (threads[i] == 0) { - perror("Failed to create thread, with error"); - return false; - } - return true; -} - -void -wait_for_thread(void *vt, size_t i) { - uintptr_t *threads = vt; - WaitForSingleObject((HANDLE)threads[i], INFINITE); - CloseHandle((HANDLE)threads[i]); - threads[i] = 0; -} - -void -free_threads(void *threads) { - free(threads); -} - -ssize_t -getdelim(char **lineptr, size_t *n, int delim, FILE *stream) { - char c, *cur_pos, *new_lineptr; - size_t new_lineptr_len; - - if (lineptr == NULL || n == NULL || stream == NULL) { - errno = EINVAL; - return -1; - } - - if (*lineptr == NULL) { - *n = 8192; /* init len */ - if ((*lineptr = (char *)malloc(*n)) == NULL) { - errno = ENOMEM; - return -1; - } - } - - cur_pos = *lineptr; - for (;;) { - c = getc(stream); - - if (ferror(stream) || (c == EOF && cur_pos == *lineptr)) - return -1; - - if (c == EOF) - break; - - if ((*lineptr + *n - cur_pos) < 2) { - if (SSIZE_MAX / 2 < *n) { -#ifdef EOVERFLOW - errno = EOVERFLOW; -#else - errno = ERANGE; /* no EOVERFLOW defined */ -#endif - return -1; - } - new_lineptr_len = *n * 2; - - if ((new_lineptr = (char *)realloc(*lineptr, new_lineptr_len)) == NULL) { - errno = ENOMEM; - return -1; - } - *lineptr = new_lineptr; - *n = new_lineptr_len; - } - - *cur_pos++ = c; - - if (c == delim) - break; - } - - *cur_pos = '\0'; - return (ssize_t)(cur_pos - *lineptr); -} diff --git a/kitty_tests/choose.py b/kitty_tests/choose.py deleted file mode 100644 index cbb0e7de0..000000000 --- a/kitty_tests/choose.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python3 -# License: GPLv3 Copyright: 2019, Kovid Goyal - -import random -import string - -from . import BaseTest - - -def run(input_data, query, **kw): - kw['threads'] = kw.get('threads', 1) - mark = kw.pop('mark', False) - from kittens.choose.match import match - mark_before = mark_after = '' - if mark: - if mark is True: - mark_before, mark_after = '\033[32m', '\033[39m' - else: - mark_before = mark_after = mark - kw['mark_before'], kw['mark_after'] = mark_before, mark_after - return match(input_data, query, **kw) - - -class TestMatcher(BaseTest): - - def run_matcher(self, *args, **kwargs): - result = run(*args, **kwargs) - return result - - def basic_test(self, inp, query, out, **k): - result = self.run_matcher(inp, query, **k) - if out is not None: - if hasattr(out, 'splitlines'): - out = list(filter(None, out.split(k.get('delimiter', '\n')))) - self.assertEqual(list(out), result) - return out - - def test_filtering(self): - ' Non matching entries must be removed ' - self.basic_test('test\nxyz', 'te', 'test') - self.basic_test('abc\nxyz', 'ba', '') - self.basic_test('abc\n123', 'abc', 'abc') - - def test_case_insensitive(self): - self.basic_test('test\nxyz', 'Te', 'test') - self.basic_test('test\nxyz', 'XY', 'xyz') - self.basic_test('test\nXYZ', 'xy', 'XYZ') - self.basic_test('test\nXYZ', 'mn', '') - - def test_marking(self): - ' Marking of matched characters ' - self.basic_test( - 'test\nxyz', - 'ts', - '\x1b[32mt\x1b[39me\x1b[32ms\x1b[39mt', - mark=True) - - def test_positions(self): - ' Output of positions ' - self.basic_test('abc\nac', 'ac', '0,1:ac\n0,2:abc', positions=True) - self.basic_test('abc\nv', 'a', '0:abc', positions=True) - - def test_delimiter(self): - ' Test using a custom line delimiter ' - self.basic_test('abc\n21ac', 'ac', 'ac1abc\n2', delimiter='1') - - def test_scoring(self): - ' Scoring algorithm ' - # Match at start - self.basic_test('archer\nelementary', 'e', 'elementary\narcher') - # Match at level factor - self.basic_test('xxxy\nxx/y', 'y', 'xx/y\nxxxy') - # CamelCase - self.basic_test('xxxy\nxxxY', 'y', 'xxxY\nxxxy') - # Total length - self.basic_test('xxxya\nxxxy', 'y', 'xxxy\nxxxya') - # Distance - self.basic_test('abbc\nabc', 'ac', 'abc\nabbc') - # Extreme chars - self.basic_test('xxa\naxx', 'a', 'axx\nxxa') - # Highest score - self.basic_test('xa/a', 'a', 'xa/|a|', mark='|') - - def test_threading(self): - ' Test matching on a large data set with different number of threads ' - alphabet = string.ascii_lowercase + string.ascii_uppercase + string.digits - - def random_word(): - sz = random.randint(2, 10) - return ''.join(random.choice(alphabet) for x in range(sz)) - words = [random_word() for i in range(400)] - - def random_item(): - num = random.randint(2, 7) - return '/'.join(random.choice(words) for w in range(num)) - - data = '\n'.join(random_item() for x in range(25123)) - - for threads in range(4): - self.basic_test(data, 'foo', None, threads=threads) diff --git a/kitty_tests/main.py b/kitty_tests/main.py index 9ac16cf52..e671f1f36 100644 --- a/kitty_tests/main.py +++ b/kitty_tests/main.py @@ -172,11 +172,13 @@ def run_python_tests(args: Any, go_proc: 'Optional[subprocess.Popen[bytes]]' = N def print_go() -> None: try: - print(go_proc.stdout.read().decode('utf-8', 'replace'), end='', flush=True) + go_proc.wait() except KeyboardInterrupt: go_proc.terminate() if go_proc.wait(0.1) is None: go_proc.kill() + + print(go_proc.stdout.read().decode('utf-8', 'replace'), end='', flush=True) go_proc.stdout.close() go_proc.wait() diff --git a/tools/tui/subseq/score.go b/tools/tui/subseq/score.go new file mode 100644 index 000000000..4b10f22e2 --- /dev/null +++ b/tools/tui/subseq/score.go @@ -0,0 +1,234 @@ +// License: GPLv3 Copyright: 2023, Kovid Goyal, + +package subseq + +import ( + "fmt" + "strings" + + "kitty/tools/utils" + "kitty/tools/utils/images" + + "golang.org/x/exp/slices" +) + +var _ = fmt.Print + +const ( + LEVEL1 = "/" + LEVEL2 = "-_0123456789" + LEVEL3 = "." +) + +type resolved_options_type struct { + level1, level2, level3 []rune +} + +type Options struct { + Level1, Level2, Level3 string + NumberOfThreads int +} + +type Match struct { + Positions []int + Score float64 + idx int + Text string +} + +func level_factor_for(current_lcase, last_lcase, current_cased, last_cased rune, opts *resolved_options_type) int { + switch { + case slices.Contains(opts.level1, last_lcase): + return 90 + case slices.Contains(opts.level2, last_lcase): + return 80 + case last_lcase == last_cased && current_lcase != current_cased: // camelCase + return 80 + case slices.Contains(opts.level3, last_lcase): + return 70 + default: + return 0 + } +} + +type workspace_type struct { + positions [][]int // positions of each needle char in haystack + level_factors []int + address []int + max_score_per_char float64 +} + +func (w *workspace_type) initialize(haystack_sz, needle_sz int) { + if cap(w.positions) < needle_sz { + w.positions = make([][]int, needle_sz) + } else { + w.positions = w.positions[:needle_sz] + } + if cap(w.level_factors) < haystack_sz { + w.level_factors = make([]int, 2*haystack_sz) + } else { + w.level_factors = w.level_factors[:haystack_sz] + } + for i, s := range w.positions { + if cap(s) < haystack_sz { + w.positions[i] = make([]int, 0, 2*haystack_sz) + } else { + w.positions[i] = w.positions[i][:0] + } + } + if cap(w.address) < needle_sz { + w.address = make([]int, needle_sz) + } + w.address = utils.Memset(w.address) +} + +func (w *workspace_type) position(x int) int { // the position of xth needle char in the haystack for the current address + return w.positions[x][w.address[x]] +} + +func (w *workspace_type) increment_address() bool { + pos := len(w.positions) - 1 // the last needle char + for { + w.address[pos]++ + if w.address[pos] < len(w.positions[pos]) { + return true + } + if pos == 0 { + break + } + w.address[pos] = 0 + pos-- + } + return false +} + +func (w *workspace_type) address_is_monotonic() bool { + // Check if the character positions pointed to by the current address are monotonic + for i := 1; i < len(w.positions); i++ { + if w.position(i) <= w.position(i-1) { + return false + } + } + return true +} + +func (w *workspace_type) calc_score() (ans float64) { + distance, pos := 0, 0 + for i := 0; i < len(w.positions); i++ { + pos = w.position(i) + if i == 0 { + distance = pos + 1 + } else { + distance = pos - w.position(i-1) + if distance < 2 { + ans += w.max_score_per_char // consecutive chars + continue + } + } + if w.level_factors[pos] > 0 { + ans += (100.0 * w.max_score_per_char) / float64(w.level_factors[pos]) // at a special location + } else { + ans += (0.75 * w.max_score_per_char) / float64(distance) + } + } + return +} + +func has_atleast_one_match(w *workspace_type) (found bool) { + p := -1 + for i := 0; i < len(w.positions); i++ { + if len(w.positions[i]) == 0 { // all chars of needle not in haystack + return false + } + found = false + for _, pos := range w.positions[i] { + if pos > p { + p = pos + found = true + break + } + } + if !found { // chars of needle not present in sequence in haystack + return false + } + } + return true +} + +func score_item(item string, idx int, needle []rune, opts *resolved_options_type, w *workspace_type) *Match { + ans := &Match{idx: idx, Text: item, Positions: make([]int, len(needle))} + haystack := []rune(strings.ToLower(item)) + orig_haystack := []rune(item) + w.initialize(len(orig_haystack), len(needle)) + for i := 0; i < len(haystack); i++ { + level_factor_calculated := false + for j := 0; j < len(needle); j++ { + if needle[j] == haystack[i] { + if !level_factor_calculated { + level_factor_calculated = true + if i > 0 { + w.level_factors[i] = level_factor_for(haystack[i], haystack[i-1], orig_haystack[i], orig_haystack[i-1], opts) + } + } + w.positions[j] = append(w.positions[j], i) + } + } + } + w.max_score_per_char = (1.0/float64(len(orig_haystack)) + 1.0/float64(len(needle))) / 2.0 + if !has_atleast_one_match(w) { + return ans + } + var score float64 + for { + if w.address_is_monotonic() { + score = w.calc_score() + if score > ans.Score { + ans.Score = score + for i := range ans.Positions { + ans.Positions[i] = w.position(i) + } + } + } + if !w.increment_address() { + break + } + } + if ans.Score > 0 { + adjust := utils.RuneOffsetsToByteOffsets(item) + for i := range ans.Positions { + ans.Positions[i] = adjust(ans.Positions[i]) + } + } + return ans +} + +func ScoreItems(query string, items []string, opts Options) []*Match { + ctx := images.Context{} + ctx.SetNumberOfThreads(opts.NumberOfThreads) + ans := make([]*Match, len(items)) + results := make(chan *Match, len(items)) + nr := []rune(strings.ToLower(query)) + if opts.Level1 == "" { + opts.Level1 = LEVEL1 + } + if opts.Level2 == "" { + opts.Level2 = LEVEL2 + } + if opts.Level3 == "" { + opts.Level3 = LEVEL3 + } + ropts := resolved_options_type{ + level1: []rune(opts.Level1), level2: []rune(opts.Level2), level3: []rune(opts.Level3), + } + ctx.Parallel(0, len(items), func(nums <-chan int) { + w := workspace_type{} + for i := range nums { + results <- score_item(items[i], i, nr, &ropts, &w) + } + }) + close(results) + for x := range results { + ans[x.idx] = x + } + return ans +} diff --git a/tools/tui/subseq/score_test.go b/tools/tui/subseq/score_test.go new file mode 100644 index 000000000..0be713d7b --- /dev/null +++ b/tools/tui/subseq/score_test.go @@ -0,0 +1,91 @@ +// License: GPLv3 Copyright: 2023, Kovid Goyal, + +package subseq + +import ( + "fmt" + "kitty/tools/utils" + "strconv" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" +) + +var _ = fmt.Print + +func TestSubseq(t *testing.T) { + var positions [][]int + sort_by_score := false + + simple := func(items, query string, expected ...string) { + matches := ScoreItems(query, utils.Splitlines(items), Options{}) + if sort_by_score { + matches = utils.StableSort(matches, func(a, b *Match) bool { return a.Score > b.Score }) + } + actual := make([]string, 0, len(matches)) + actual_positions := make([][]int, 0, len(matches)) + for _, m := range matches { + if m.Score > 0 { + actual = append(actual, m.Text) + actual_positions = append(actual_positions, m.Positions) + } + } + if expected == nil { + expected = []string{} + } + if diff := cmp.Diff(expected, actual); diff != "" { + t.Fatalf("Failed for items: %v\nMatches: %#v\n%s", utils.Splitlines(items), matches, diff) + } + if positions != nil { + if diff := cmp.Diff(positions, actual_positions); diff != "" { + t.Fatalf("Failed positions for items: %v\n%s", utils.Splitlines(items), diff) + } + positions = nil + } + } + simple("test\nxyz", "te", "test") + simple("abc\nxyz", "ba") + simple("abc\n123", "abc", "abc") + simple("test\nxyz", "Te", "test") + simple("test\nxyz", "XY", "xyz") + simple("test\nXYZ", "xy", "XYZ") + simple("test\nXYZ", "mn") + + positions = [][]int{{0, 2}, {0, 1}} + simple("abc\nac", "ac", "abc", "ac") + positions = [][]int{{0}} + simple("abc\nv", "a", "abc") + positions = [][]int{{len("汉"), 7}} + simple("汉a字b\nxyz", "ab", "汉a字b") + + sort_by_score = true + // Match at start + simple("archer\nelementary", "e", "elementary", "archer") + // Match at level factor + simple("xxxy\nxx/y", "y", "xx/y", "xxxy") + // CamelCase + simple("xxxy\nxxxY", "y", "xxxY", "xxxy") + // Total length + simple("xxxya\nxxxy", "y", "xxxy", "xxxya") + // Distance + simple("abbc\nabc", "ac", "abc", "abbc") + // Extreme chars + simple("xxa\naxx", "a", "axx", "xxa") + // Highest score + positions = [][]int{{3}} + simple("xa/a", "a", "xa/a") + + sort_by_score = false + items := make([]string, 256) + for i := range items { + items[i] = strconv.Itoa(i) + } + expected := make([]string, 0, len(items)) + for _, x := range items { + if strings.ContainsRune(x, rune('2')) { + expected = append(expected, x) + } + } + simple(strings.Join(items, "\n"), "2", expected...) +} diff --git a/tools/utils/strings.go b/tools/utils/strings.go index 9a453b7e5..1373b4b6f 100644 --- a/tools/utils/strings.go +++ b/tools/utils/strings.go @@ -139,6 +139,9 @@ func Splitlines(x string, expected_number_of_lines ...int) (ans []string) { return NewLineScanner("").Split(x, expected_number_of_lines...) } +// Return a function that can be called sequentially with rune based offsets +// converting them to byte based offsets. The rune offsets must be monotonic, +// otherwise the function returns -1 func RuneOffsetsToByteOffsets(text string) func(int) int { self := struct { char_offset, byte_offset, last int