kitty/kittens/choose/score.c
Kovid Goyal be34af4555
Remove inline from .c files
It is ignored by compilers and hides unused static functions
2021-08-03 09:11:17 +05:30

183 lines
6.4 KiB
C

/*
* score.c
* Copyright (C) 2017 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#include "choose-data-types.h"
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include <stdio.h>
typedef struct {
len_t *positions_buf; // buffer to store positions for every char in needle
len_t **positions; // Array of pointers into positions_buf
len_t *positions_count; // Array of counts for positions
len_t needle_len; // Length of the needle
len_t max_haystack_len; // Max length of a string in the haystack
len_t haystack_len; // Length of the current string in the haystack
len_t *address; // Array of offsets into the positions array
double max_score_per_char;
uint8_t *level_factors; // Array of score factors for every character in the current haystack that matches a character in the needle
text_t *level1, *level2, *level3; // The characters in the levels
len_t level1_len, level2_len, level3_len;
text_t *needle; // The current needle
text_t *haystack; //The current haystack
} WorkSpace;
void*
alloc_workspace(len_t max_haystack_len, GlobalData *global) {
WorkSpace *ans = calloc(1, sizeof(WorkSpace));
if (ans == NULL) return NULL;
ans->positions_buf = (len_t*) calloc(global->needle_len, sizeof(len_t) * max_haystack_len);
ans->positions = (len_t**)calloc(global->needle_len, sizeof(len_t*));
ans->positions_count = (len_t*)calloc(2*global->needle_len, sizeof(len_t));
ans->level_factors = (uint8_t*)calloc(max_haystack_len, sizeof(uint8_t));
if (ans->positions == NULL || ans->positions_buf == NULL || ans->positions_count == NULL || ans->level_factors == NULL) { free_workspace(ans); return NULL; }
ans->needle = global->needle;
ans->needle_len = global->needle_len;
ans->max_haystack_len = max_haystack_len;
ans->level1 = global->level1; ans->level2 = global->level2; ans->level3 = global->level3;
ans->level1_len = global->level1_len; ans->level2_len = global->level2_len; ans->level3_len = global->level3_len;
ans->address = ans->positions_count + sizeof(len_t) * global->needle_len;
for (len_t i = 0; i < global->needle_len; i++) ans->positions[i] = ans->positions_buf + i * max_haystack_len;
return ans;
}
#define NUKE(x) free(x); x = NULL;
void*
free_workspace(void *v) {
WorkSpace *w = (WorkSpace*)v;
NUKE(w->positions_buf);
NUKE(w->positions);
NUKE(w->positions_count);
NUKE(w->level_factors);
free(w);
return NULL;
}
static bool
has_char(text_t *text, len_t sz, text_t ch) {
for(len_t i = 0; i < sz; i++) {
if(text[i] == ch) return true;
}
return false;
}
static uint8_t
level_factor_for(text_t current, text_t last, WorkSpace *w) {
text_t lch = LOWERCASE(last);
if (has_char(w->level1, w->level1_len, lch)) return 90;
if (has_char(w->level2, w->level2_len, lch)) return 80;
if (IS_LOWERCASE(last) && IS_UPPERCASE(current)) return 80; // CamelCase
if (has_char(w->level3, w->level3_len, lch)) return 70;
return 0;
}
static void
init_workspace(WorkSpace *w, text_t *haystack, len_t haystack_len) {
// Calculate the positions and level_factors arrays for the specified haystack
bool level_factor_calculated = false;
memset(w->positions_count, 0, sizeof(*(w->positions_count)) * 2 * w->needle_len);
memset(w->level_factors, 0, sizeof(*(w->level_factors)) * w->max_haystack_len);
for (len_t i = 0; i < haystack_len; i++) {
level_factor_calculated = false;
for (len_t j = 0; j < w->needle_len; j++) {
if (w->needle[j] == LOWERCASE(haystack[i])) {
if (!level_factor_calculated) {
level_factor_calculated = true;
w->level_factors[i] = i > 0 ? level_factor_for(haystack[i], haystack[i-1], w) : 0;
}
w->positions[j][w->positions_count[j]++] = i;
}
}
}
w->haystack = haystack;
w->haystack_len = haystack_len;
w->max_score_per_char = (1.0 / haystack_len + 1.0 / w->needle_len) / 2.0;
}
static bool
has_atleast_one_match(WorkSpace *w) {
int p = -1;
bool found;
for (len_t i = 0; i < w->needle_len; i++) {
if (w->positions_count[i] == 0) return false; // All characters of the needle are not present in the haystack
found = false;
for (len_t j = 0; j < w->positions_count[i]; j++) {
if (w->positions[i][j] > p) { p = w->positions[i][j]; found = true; break; }
}
if (!found) return false; // Characters of needle not present in sequence in haystack
}
return true;
}
#define POSITION(x) w->positions[x][w->address[x]]
static bool
increment_address(WorkSpace *w) {
len_t pos = w->needle_len - 1;
while(true) {
w->address[pos]++;
if (w->address[pos] < w->positions_count[pos]) return true;
if (pos == 0) break;
w->address[pos--] = 0;
}
return false;
}
static bool
address_is_monotonic(WorkSpace *w) {
// Check if the character positions pointed to by the current address are monotonic
for (len_t i = 1; i < w->needle_len; i++) {
if (POSITION(i) <= POSITION(i-1)) return false;
}
return true;
}
static double
calc_score(WorkSpace *w) {
double ans = 0;
len_t distance, pos;
for (len_t i = 0; i < w->needle_len; i++) {
pos = POSITION(i);
if (i == 0) distance = pos < LEN_MAX ? pos + 1 : LEN_MAX;
else {
distance = pos - POSITION(i-1);
if (distance < 2) {
ans += w->max_score_per_char; // consecutive characters
continue;
}
}
if (w->level_factors[pos]) ans += (100 * w->max_score_per_char) / w->level_factors[pos]; // at a special location
else ans += (0.75 * w->max_score_per_char) / distance;
}
return ans;
}
static double
process_item(WorkSpace *w, len_t *match_positions) {
double highscore = 0, score;
do {
if (!address_is_monotonic(w)) continue;
score = calc_score(w);
if (score > highscore) {
highscore = score;
for (len_t i = 0; i < w->needle_len; i++) match_positions[i] = POSITION(i);
}
} while(increment_address(w));
return highscore;
}
double
score_item(void *v, text_t *haystack, len_t haystack_len, len_t *match_positions) {
WorkSpace *w = (WorkSpace*)v;
init_workspace(w, haystack, haystack_len);
if (!has_atleast_one_match(w)) return 0;
return process_item(w, match_positions);
}