From 952f0ad98ce844f002d7775f96d181803d9f06ff Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Oct 2017 21:54:59 +0530 Subject: [PATCH] Accommodate the underpowered Apple lineup macOS is missing glDrawArraysInstancedBaseInstance --- kitty/graphics.c | 8 +++++--- kitty/graphics.h | 2 +- kitty/graphics_vertex.glsl | 16 ++-------------- kitty/shaders.c | 20 ++++++++++---------- 4 files changed, 18 insertions(+), 28 deletions(-) diff --git a/kitty/graphics.c b/kitty/graphics.c index a66efecdc..180ba0aac 100644 --- a/kitty/graphics.c +++ b/kitty/graphics.c @@ -595,9 +595,11 @@ grman_update_layers(GraphicsManager *self, unsigned int scrolled_by, float scree if (ref->z_index < 0) self->num_of_negative_refs++; else self->num_of_positive_refs++; ensure_space_for(self, render_data, ImageRenderData, self->count + 1, capacity, 100); ImageRenderData *rd = self->render_data + self->count; +#define R(n, a, b) rd->vertices[n*4] = ref->src_rect.a; rd->vertices[n*4 + 1] = ref->src_rect.b; rd->vertices[n*4 + 2] = r.a; rd->vertices[n*4 + 3] = r.b; + R(0, right, top); R(1, right, bottom); R(2, left, bottom); R(3, left, top); +#undef R self->count++; rd->z_index = ref->z_index; rd->image_id = img->internal_id; - rd->src_rect = ref->src_rect; rd->dest_rect = r; rd->texture_id = img->texture_id; }} if (!self->count) return false; @@ -720,9 +722,9 @@ W(update_layers) { PyObject *ans = PyTuple_New(self->count); for (size_t i = 0; i < self->count; i++) { ImageRenderData *r = self->render_data + i; -#define R(attr) Py_BuildValue("{sf sf sf sf}", "left", r->attr.left, "top", r->attr.top, "right", r->attr.right, "bottom", r->attr.bottom) +#define R(offset) Py_BuildValue("{sf sf sf sf}", "left", r->vertices[offset + 8], "top", r->vertices[offset + 1], "right", r->vertices[offset], "bottom", r->vertices[offset + 5]) PyTuple_SET_ITEM(ans, i, - Py_BuildValue("{sN sN sI si sI}", "src_rect", R(src_rect), "dest_rect", R(dest_rect), "group_count", r->group_count, "z_index", r->z_index, "image_id", r->image_id) + Py_BuildValue("{sN sN sI si sI}", "src_rect", R(0), "dest_rect", R(2), "group_count", r->group_count, "z_index", r->z_index, "image_id", r->image_id) ); #undef R } diff --git a/kitty/graphics.h b/kitty/graphics.h index e5da9837d..4b573590c 100644 --- a/kitty/graphics.h +++ b/kitty/graphics.h @@ -53,7 +53,7 @@ typedef struct { } Image; typedef struct { - ImageRect src_rect, dest_rect; + float vertices[16]; uint32_t texture_id, group_count; int z_index; size_t image_id; diff --git a/kitty/graphics_vertex.glsl b/kitty/graphics_vertex.glsl index d1abf1396..12d72b368 100644 --- a/kitty/graphics_vertex.glsl +++ b/kitty/graphics_vertex.glsl @@ -1,21 +1,9 @@ #version 330 layout(location=0) in vec4 src; -layout(location=1) in vec4 position; out vec2 texcoord; -const uint LEFT = uint(0), TOP = uint(1), RIGHT = uint(2), BOTTOM = uint(3); - -const uvec2 pos_map[] = uvec2[4]( - uvec2(RIGHT, TOP), - uvec2(RIGHT, BOTTOM), - uvec2(LEFT, BOTTOM), - uvec2(LEFT, TOP) -); - - void main() { - uvec2 pos = pos_map[gl_VertexID]; - gl_Position = vec4(position[pos.x], position[pos.y], 0, 1); - texcoord = vec2(src[pos.x], src[pos.y]); + texcoord = vec2(src[0], src[1]); + gl_Position = vec4(src[2], src[3], 0, 1); } diff --git a/kitty/shaders.c b/kitty/shaders.c index 980ce040a..2b2e0a7d3 100644 --- a/kitty/shaders.c +++ b/kitty/shaders.c @@ -202,7 +202,7 @@ create_cell_vao() { add_attribute_to_vao(CELL_PROGRAM, vao_idx, #name, \ /*size=*/size, /*dtype=*/dtype, /*stride=*/stride, /*offset=*/offset, /*divisor=*/1); #define A1(name, size, dtype, offset) A(name, size, dtype, (void*)(offsetof(Cell, offset)), sizeof(Cell)) -#define AL(p, name, size, dtype, offset, stride) { GLint aloc = attrib_location(p, name); if (aloc == -1 ) fatal("No attribute named: %s found in this program", name); add_located_attribute_to_vao(vao_idx, aloc, size, dtype, stride, (void*)offset, 1); } +#define AL(p, name, size, dtype, offset, stride) { GLint aloc = attrib_location(p, name); if (aloc == -1 ) fatal("No attribute named: %s found in this program", name); add_located_attribute_to_vao(vao_idx, aloc, size, dtype, stride, offset, 0); } add_buffer_to_vao(vao_idx, GL_ARRAY_BUFFER); A1(sprite_coords, 4, GL_UNSIGNED_SHORT, sprite_x); @@ -215,8 +215,7 @@ create_cell_vao() { alloc_vao_buffer(vao_idx, cell_program_layouts[CELL_PROGRAM].render_data.size, bufnum, GL_STREAM_DRAW); add_buffer_to_vao(vao_idx, GL_ARRAY_BUFFER); - AL(GRAPHICS_PROGRAM, "src", 4, GL_FLOAT, 0, sizeof(ImageRenderData)); - AL(GRAPHICS_PROGRAM, "position", 4, GL_FLOAT, offsetof(ImageRenderData, dest_rect), sizeof(ImageRenderData)); + AL(GRAPHICS_PROGRAM, "src", 4, GL_FLOAT, NULL, 0); return vao_idx; #undef A @@ -288,10 +287,10 @@ cell_prepare_to_render(ssize_t vao_idx, Screen *screen, GLfloat xstart, GLfloat } if (grman_update_layers(screen->grman, screen->scrolled_by, xstart, ystart, dx, dy, screen->columns, screen->lines)) { - sz = sizeof(ImageRenderData) * screen->grman->count; - address = alloc_and_map_vao_buffer(vao_idx, sz, graphics_buffer, GL_STREAM_DRAW, GL_WRITE_ONLY); - memcpy(address, screen->grman->render_data, sz); - unmap_vao_buffer(vao_idx, graphics_buffer); address = NULL; + sz = sizeof(GLfloat) * 16 * screen->grman->count; + GLfloat *a = alloc_and_map_vao_buffer(vao_idx, sz, graphics_buffer, GL_STREAM_DRAW, GL_WRITE_ONLY); + for (size_t i = 0; i < screen->grman->count; i++, a += 16) memcpy(a, screen->grman->render_data[i].vertices, sizeof(screen->grman->render_data[0].vertices)); + unmap_vao_buffer(vao_idx, graphics_buffer); a = NULL; } cell_update_uniform_block(vao_idx, screen, uniform_buffer, xstart, ystart, dx, dy, cursor); @@ -317,9 +316,10 @@ draw_graphics(ImageRenderData *data, GLuint start, GLuint count) { for (GLuint i=0; i < count;) { ImageRenderData *rd = data + start + i; glBindTexture(GL_TEXTURE_2D, rd->texture_id); check_gl(); - glDrawArraysInstancedBaseInstance(GL_TRIANGLE_FAN, 0, 4, rd->group_count, base); - base += rd->group_count; - i += rd->group_count; + // You could reduce the number of draw calls by using + // glDrawArraysInstancedBaseInstance but Apple chose to abandon OpenGL + // before implementing it. + for (GLuint k=0; k < rd->group_count; k++, base += 4, i++) glDrawArrays(GL_TRIANGLE_FAN, base, 4); } }