Inline qsort

Cleaner, more local code and faster sorting by virtue of inlining the
comparisons. What's not to like.
This commit is contained in:
Kovid Goyal 2021-01-04 14:14:15 +05:30
parent 0f4156c564
commit ca65ad6fa3
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 308 additions and 40 deletions

View File

@ -6,6 +6,7 @@
*/
#include "choose-data-types.h"
#include "../../kitty/iqsort.h"
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
@ -45,13 +46,6 @@ output_text(GlobalData *global, const text_t *data, size_t sz) {
}
}
static int
cmpscore(const void *a, const void *b) {
double sa = FIELD(a, score), sb = FIELD(b, score);
// Sort descending
return (sa > sb) ? -1 : ((sa == sb) ? ((int)FIELD(a, idx) - (int)FIELD(b, idx)) : 1);
}
static void
output_with_marks(GlobalData *global, Options *opts, text_t *src, size_t src_sz, len_t *positions, len_t poslen) {
size_t pos, i = 0;
@ -96,7 +90,9 @@ output_result(GlobalData *global, Candidate *c, Options *opts, len_t needle_len)
void
output_results(GlobalData *global, Candidate *haystack, size_t count, Options *opts, len_t needle_len) {
Candidate *c;
qsort(haystack, count, sizeof(*haystack), cmpscore);
#define lt(b, a) ( (a)->score < (b)->score || ((a)->score == (b)->score && (a->idx < b->idx)) )
QSORT(Candidate, haystack, count, lt);
#undef lt
size_t left = opts->limit > 0 ? opts->limit : count;
for (size_t i = 0; i < left; i++) {
c = haystack + i;

View File

@ -8,6 +8,7 @@
#include "graphics.h"
#include "state.h"
#include "disk-cache.h"
#include "iqsort.h"
#include <sys/types.h>
#include <sys/stat.h>
@ -132,19 +133,15 @@ trim_predicate(Image *img) {
}
static int
oldest_last(const void* a, const void *b) {
monotonic_t ans = ((Image*)(b))->atime - ((Image*)(a))->atime;
return ans < 0 ? -1 : (ans == 0 ? 0 : 1);
}
static inline void
apply_storage_quota(GraphicsManager *self, size_t storage_limit, id_type currently_added_image_internal_id) {
// First remove unreferenced images, even if they have an id
remove_images(self, trim_predicate, currently_added_image_internal_id);
if (self->used_storage < storage_limit) return;
qsort(self->images, self->image_count, sizeof(self->images[0]), oldest_last);
#define oldest_last(a, b) ((b)->atime < (a)->atime)
QSORT(Image, self->images, self->image_count, oldest_last)
#undef oldest_last
while (self->used_storage > storage_limit && self->image_count > 0) {
remove_image(self, self->image_count - 1);
}
@ -320,12 +317,6 @@ find_or_create_image(GraphicsManager *self, uint32_t id, bool *existing) {
return ans;
}
static int
cmp_client_ids(const void* a, const void* b) {
const uint32_t *x = a, *y = b;
return *x - *y;
}
static inline uint32_t
get_free_client_id(const GraphicsManager *self) {
if (!self->image_count) return 1;
@ -336,7 +327,9 @@ get_free_client_id(const GraphicsManager *self) {
if (q->client_id) client_ids[count++] = q->client_id;
}
if (!count) { free(client_ids); return 1; }
qsort(client_ids, count, sizeof(uint32_t), cmp_client_ids);
#define int_lt(a, b) ((*a)<(*b))
QSORT(u_int32_t, client_ids, count, int_lt)
#undef int_lt
uint32_t prev_id = 0, ans = 1;
for (size_t i = 0; i < count; i++) {
if (client_ids[i] == prev_id) continue;
@ -614,14 +607,6 @@ handle_put_command(GraphicsManager *self, const GraphicsCommand *g, Cursor *c, b
return img->client_id;
}
static int
cmp_by_zindex_and_image(const void *a_, const void *b_) {
const ImageRenderData *a = (const ImageRenderData*)a_, *b = (const ImageRenderData*)b_;
int ans = a->z_index - b->z_index;
if (ans == 0) ans = a->image_id - b->image_id;
return ans;
}
static inline void
set_vertex_data(ImageRenderData *rd, const ImageRef *ref, const ImageRect *dest_rect) {
#define R(n, a, b) rd->vertices[n*4] = ref->src_rect.a; rd->vertices[n*4 + 1] = ref->src_rect.b; rd->vertices[n*4 + 2] = dest_rect->a; rd->vertices[n*4 + 3] = dest_rect->b;
@ -686,7 +671,9 @@ grman_update_layers(GraphicsManager *self, unsigned int scrolled_by, float scree
}}
if (!self->count) return false;
// Sort visible refs in draw order (z-index, img)
qsort(self->render_data, self->count, sizeof(self->render_data[0]), cmp_by_zindex_and_image);
#define lt(a, b) ( (a)->z_index < (b)->z_index || ((a)->z_index == (b)->z_index && (a)->image_id < (b)->image_id) )
QSORT(ImageRenderData, self->render_data, self->count, lt);
#undef lt
// Calculate the group counts
i = 0;
while (i < self->count) {

290
kitty/iqsort.h Normal file
View File

@ -0,0 +1,290 @@
/* $Id: qsort.h,v 1.5 2008-01-28 18:16:49 mjt Exp $
* Adopted from GNU glibc by Mjt.
* See stdlib/qsort.c in glibc */
/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
/* in-line qsort implementation. Differs from traditional qsort() routine
* in that it is a macro, not a function, and instead of passing an address
* of a comparison routine to the function, it is possible to inline
* comparison routine, thus speeding up sorting a lot.
*
* Usage:
* #include "iqsort.h"
* #define islt(a,b) (strcmp((*a),(*b))<0)
* char *arr[];
* int n;
* QSORT(char*, arr, n, islt);
*
* The "prototype" and 4 arguments are:
* QSORT(TYPE,BASE,NELT,ISLT)
* 1) type of each element, TYPE,
* 2) address of the beginning of the array, of type TYPE*,
* 3) number of elements in the array, and
* 4) comparision routine.
* Array pointer and number of elements are referenced only once.
* This is similar to a call
* qsort(BASE,NELT,sizeof(TYPE),ISLT)
* with the difference in last parameter.
* Note the islt macro/routine (it receives pointers to two elements):
* the only condition of interest is whenever one element is less than
* another, no other conditions (greather than, equal to etc) are tested.
* So, for example, to define integer sort, use:
* #define islt(a,b) ((*a)<(*b))
* QSORT(int, arr, n, islt)
*
* The macro could be used to implement a sorting function (see examples
* below), or to implement the sorting algorithm inline. That is, either
* create a sorting function and use it whenever you want to sort something,
* or use QSORT() macro directly instead a call to such routine. Note that
* the macro expands to quite some code (compiled size of int qsort on x86
* is about 700..800 bytes).
*
* Using this macro directly it isn't possible to implement traditional
* qsort() routine, because the macro assumes sizeof(element) == sizeof(TYPE),
* while qsort() allows element size to be different.
*
* Several ready-to-use examples:
*
* Sorting array of integers:
* void int_qsort(int *arr, unsigned n) {
* #define int_lt(a,b) ((*a)<(*b))
* QSORT(int, arr, n, int_lt);
* }
*
* Sorting array of string pointers:
* void str_qsort(char *arr[], unsigned n) {
* #define str_lt(a,b) (strcmp((*a),(*b)) < 0)
* QSORT(char*, arr, n, str_lt);
* }
*
* Sorting array of structures:
*
* struct elt {
* int key;
* ...
* };
* void elt_qsort(struct elt *arr, unsigned n) {
* #define elt_lt(a,b) ((a)->key < (b)->key)
* QSORT(struct elt, arr, n, elt_lt);
* }
*
* And so on.
*/
/* Swap two items pointed to by A and B using temporary buffer t. */
#define _QSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t)))
/* Discontinue quicksort algorithm when partition gets below this size.
This particular magic number was chosen to work best on a Sun 4/260. */
#define _QSORT_MAX_THRESH 4
/* Stack node declarations used to store unfulfilled partition obligations
* (inlined in QSORT).
typedef struct {
QSORT_TYPE *_lo, *_hi;
} qsort_stack_node;
*/
/* The next 4 #defines implement a very fast in-line stack abstraction. */
/* The stack needs log (total_elements) entries (we could even subtract
log(MAX_THRESH)). Since total_elements has type unsigned, we get as
upper bound for log (total_elements):
bits per byte (CHAR_BIT) * sizeof(unsigned). */
#define _QSORT_STACK_SIZE (8 * sizeof(unsigned))
#define _QSORT_PUSH(top, low, high) \
(((top->_lo = (low)), (top->_hi = (high)), ++top))
#define _QSORT_POP(low, high, top) \
((--top, (low = top->_lo), (high = top->_hi)))
#define _QSORT_STACK_NOT_EMPTY (_stack < _top)
/* Order size using quicksort. This implementation incorporates
four optimizations discussed in Sedgewick:
1. Non-recursive, using an explicit stack of pointer that store the
next array partition to sort. To save time, this maximum amount
of space required to store an array of SIZE_MAX is allocated on the
stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
Pretty cheap, actually.
2. Chose the pivot element using a median-of-three decision tree.
This reduces the probability of selecting a bad pivot value and
eliminates certain extraneous comparisons.
3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
insertion sort to order the MAX_THRESH items within each partition.
This is a big win, since insertion sort is faster for small, mostly
sorted array segments.
4. The larger of the two sub-partitions is always pushed onto the
stack first, with the algorithm then concentrating on the
smaller partition. This *guarantees* no more than log (total_elems)
stack size is needed (actually O(1) in this case)! */
/* The main code starts here... */
#define QSORT(QSORT_TYPE,QSORT_BASE,QSORT_NELT,QSORT_LT) \
{ \
QSORT_TYPE *const _base = (QSORT_BASE); \
const unsigned _elems = (QSORT_NELT); \
QSORT_TYPE _hold; \
\
/* Don't declare two variables of type QSORT_TYPE in a single \
* statement: eg `TYPE a, b;', in case if TYPE is a pointer, \
* expands to `type* a, b;' wich isn't what we want. \
*/ \
\
if (_elems > _QSORT_MAX_THRESH) { \
QSORT_TYPE *_lo = _base; \
QSORT_TYPE *_hi = _lo + _elems - 1; \
struct { \
QSORT_TYPE *_hi; QSORT_TYPE *_lo; \
} _stack[_QSORT_STACK_SIZE], *_top = _stack + 1; \
\
while (_QSORT_STACK_NOT_EMPTY) { \
QSORT_TYPE *_left_ptr; QSORT_TYPE *_right_ptr; \
\
/* Select median value from among LO, MID, and HI. Rearrange \
LO and HI so the three values are sorted. This lowers the \
probability of picking a pathological pivot value and \
skips a comparison for both the LEFT_PTR and RIGHT_PTR in \
the while loops. */ \
\
QSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1); \
\
if (QSORT_LT (_mid, _lo)) \
_QSORT_SWAP (_mid, _lo, _hold); \
if (QSORT_LT (_hi, _mid)) { \
_QSORT_SWAP (_mid, _hi, _hold); \
if (QSORT_LT (_mid, _lo)) \
_QSORT_SWAP (_mid, _lo, _hold); \
} \
\
_left_ptr = _lo + 1; \
_right_ptr = _hi - 1; \
\
/* Here's the famous ``collapse the walls'' section of quicksort. \
Gotta like those tight inner loops! They are the main reason \
that this algorithm runs much faster than others. */ \
do { \
while (QSORT_LT (_left_ptr, _mid)) \
++_left_ptr; \
\
while (QSORT_LT (_mid, _right_ptr)) \
--_right_ptr; \
\
if (_left_ptr < _right_ptr) { \
_QSORT_SWAP (_left_ptr, _right_ptr, _hold); \
if (_mid == _left_ptr) \
_mid = _right_ptr; \
else if (_mid == _right_ptr) \
_mid = _left_ptr; \
++_left_ptr; \
--_right_ptr; \
} \
else if (_left_ptr == _right_ptr) { \
++_left_ptr; \
--_right_ptr; \
break; \
} \
} while (_left_ptr <= _right_ptr); \
\
/* Set up pointers for next iteration. First determine whether \
left and right partitions are below the threshold size. If so, \
ignore one or both. Otherwise, push the larger partition's \
bounds on the stack and continue sorting the smaller one. */ \
\
if (_right_ptr - _lo <= _QSORT_MAX_THRESH) { \
if (_hi - _left_ptr <= _QSORT_MAX_THRESH) \
/* Ignore both small partitions. */ \
_QSORT_POP (_lo, _hi, _top); \
else \
/* Ignore small left partition. */ \
_lo = _left_ptr; \
} \
else if (_hi - _left_ptr <= _QSORT_MAX_THRESH) \
/* Ignore small right partition. */ \
_hi = _right_ptr; \
else if (_right_ptr - _lo > _hi - _left_ptr) { \
/* Push larger left partition indices. */ \
_QSORT_PUSH (_top, _lo, _right_ptr); \
_lo = _left_ptr; \
} \
else { \
/* Push larger right partition indices. */ \
_QSORT_PUSH (_top, _left_ptr, _hi); \
_hi = _right_ptr; \
} \
} \
} \
\
/* Once the BASE array is partially sorted by quicksort the rest \
is completely sorted using insertion sort, since this is efficient \
for partitions below MAX_THRESH size. BASE points to the \
beginning of the array to sort, and END_PTR points at the very \
last element in the array (*not* one beyond it!). */ \
\
{ \
QSORT_TYPE *const _end_ptr = _base + _elems - 1; \
QSORT_TYPE *_tmp_ptr = _base; \
register QSORT_TYPE *_run_ptr; \
QSORT_TYPE *_thresh; \
\
_thresh = _base + _QSORT_MAX_THRESH; \
if (_thresh > _end_ptr) \
_thresh = _end_ptr; \
\
/* Find smallest element in first threshold and place it at the \
array's beginning. This is the smallest array element, \
and the operation speeds up insertion sort's inner loop. */ \
\
for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr) \
if (QSORT_LT (_run_ptr, _tmp_ptr)) \
_tmp_ptr = _run_ptr; \
\
if (_tmp_ptr != _base) \
_QSORT_SWAP (_tmp_ptr, _base, _hold); \
\
/* Insertion sort, running from left-hand-side \
* up to right-hand-side. */ \
\
_run_ptr = _base + 1; \
while (++_run_ptr <= _end_ptr) { \
_tmp_ptr = _run_ptr - 1; \
while (QSORT_LT (_run_ptr, _tmp_ptr)) \
--_tmp_ptr; \
\
++_tmp_ptr; \
if (_tmp_ptr != _run_ptr) { \
QSORT_TYPE *_trav = _run_ptr + 1; \
while (--_trav >= _run_ptr) { \
QSORT_TYPE *_hi; QSORT_TYPE *_lo; \
_hold = *_trav; \
\
for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo) \
*_hi = *_lo; \
*_hi = _hold; \
} \
} \
} \
} \
\
}

View File

@ -11,6 +11,7 @@
}
#include "state.h"
#include "iqsort.h"
#include "fonts.h"
#include "lineops.h"
#include "hyperlink.h"
@ -2487,14 +2488,6 @@ mark_hyperlinks_in_line(Screen *self, Line *line, hyperlink_id_type id, index_ty
return found;
}
static int
compare_ranges(const void *a_, const void* b_) {
const Selection *a = a_, *b = b_;
int ans = a->sort_y - b->sort_y;
if (!ans) ans = a->sort_x - b->sort_x;
return ans;
}
static void
sort_ranges(const Screen *self, Selections *s) {
IterationData a;
@ -2503,7 +2496,9 @@ sort_ranges(const Screen *self, Selections *s) {
s->items[i].sort_x = a.first.x;
s->items[i].sort_y = a.y;
}
qsort(s->items, s->count, sizeof(Selection), compare_ranges);
#define range_lt(a, b) ((a)->sort_y < (b)->sort_y || ((a)->sort_y == (b)->sort_y && (a)->sort_x < (b)->sort_x))
QSORT(Selection, s->items, s->count, range_lt);
#undef range_lt
}
hyperlink_id_type