From c76dc917ccb38cba70b75fec65213874258ddb94 Mon Sep 17 00:00:00 2001 From: Matthew Date: Sat, 29 Mar 2025 17:38:04 +1100 Subject: [PATCH] add async to vulkan buffer transfers --- src/allocators.c | 111 +++++++++++++++++++++++++++++++++++ src/allocators.h | 70 ++++++++++++++++++++++ src/arena.c | 56 ------------------ src/arena.h | 25 -------- src/ds.c | 2 + src/ds.h | 3 + src/entry_linux.c | 36 ++++++++---- src/entry_linux.h | 3 +- src/entry_windows.c | 3 +- src/entry_windows.h | 3 +- src/game.c | 43 +++++++++----- src/platform_linux.c | 5 ++ src/platform_linux.h | 2 + src/renderer.h | 4 +- src/renderer_vulkan.c | 63 ++++++++++++++------ src/renderer_vulkan.h | 12 ++-- src/renderer_vulkan_public.c | 69 +++++++++++++++++++--- 17 files changed, 367 insertions(+), 143 deletions(-) create mode 100644 src/allocators.c create mode 100644 src/allocators.h delete mode 100644 src/arena.c delete mode 100644 src/arena.h create mode 100644 src/ds.c create mode 100644 src/ds.h diff --git a/src/allocators.c b/src/allocators.c new file mode 100644 index 0000000..f8e5324 --- /dev/null +++ b/src/allocators.c @@ -0,0 +1,111 @@ +// ::Allocator::Arena::Start:: + +static Arena *CreateArena(rawptr buffer, isize length) +{ + Arena *arena = (Arena *)buffer; + buffer = PtrAdd(buffer, ARENA_HEADER_SIZE); + + arena->buffer = buffer; + arena->length = length; + arena->pos = 0; + + return arena; +} + +static rawptr ArenaAllocAlign(Arena *arena, isize size, isize align) +{ + rawptr ptr = NULL; + + uintptr curr_ptr = (uintptr)arena->buffer + (uintptr)arena->pos; + uintptr offset = AlignPow2(curr_ptr, align); + offset -= (uintptr)arena->buffer; + + if (offset+size <= arena->length) + { + ptr = &arena->buffer[offset]; + arena->pos = offset+size; + } + else + { + Printfln("Out of memory: %d", arena->init_line_no); + Assert(0, "Memory Failure"); + } + + return ptr; +} + +static rawptr ArenaAlloc(Arena *arena, isize size) +{ + return ArenaAllocAlign(arena, size, DEFAULT_ALIGNMENT); +} + +static void ArenaFree(Arena *arena) +{ + arena->pos = 0; +} + +static void ArenaFreeZeroed(Arena *arena) +{ + MemZero(arena->buffer, arena->pos); + ArenaFree(arena); +} + +static void DeallocArena(Arena *arena) +{ + MemFree(arena, arena->length); +} + +static Arena * CreateArenaDebug(rawptr buffer, isize length, u32 init_line_no) +{ + Arena *arena = CreateArena(buffer, length); + arena->init_line_no = init_line_no; + return arena; +} + +// ::Allocator::Arena::End:: + +// ::Allocator::FreeList::Start:: + +static FreeListAlloc *CreateFreeListAlloc(isize init_size, isize grow_size) +{ + isize size = init_size + sizeof(FreeListAlloc) + sizeof(FreeListBuffer) + sizeof(rawptr) + sizeof(FLNode); + u8 *mem = (u8 *)MemAllocZeroed(size); + FreeListAlloc *fl_alloc = (FreeListAlloc *)mem; + + u8 *pre_mem = mem; + mem = (u8 *)PtrAdd(mem, sizeof(FreeListAlloc)); + isize rem_size = (size) - (pre_mem - mem); + + fl_alloc->buf_len = 1; + fl_alloc->grow_size = grow_size; + fl_alloc->buffers = (FreeListBuffer *)mem; + + pre_mem = mem; + mem = (u8 *)PtrAdd(mem, sizeof(FreeListBuffer)); + rem_size -= pre_mem - mem; + + fl_alloc->nil = (FLNode *)mem; + + pre_mem = mem; + mem = (u8 *)PtrAdd(mem, sizeof(FLNode)); + rem_size -= pre_mem - mem; + + fl_alloc->buffers[0].buf = mem; + fl_alloc->buffers[0].size = (u32)size; + fl_alloc->buffers[0].free_size = (u32)rem_size; + + return fl_alloc; +} + +static void DeallocFreeListAlloc(FreeListAlloc *alloc) +{ + for (u8 i = alloc->buf_len-1; i >= 0; i--) + { + if (i == 0) + MemFree(alloc, alloc->buffers[i].size); + else + MemFree(alloc->buffers[i].buf, alloc->buffers[i].size); + } +} + +// ::Allocator::FreeList::End:: diff --git a/src/allocators.h b/src/allocators.h new file mode 100644 index 0000000..923cdff --- /dev/null +++ b/src/allocators.h @@ -0,0 +1,70 @@ +#pragma once + +// ::Allocator::Arena::Header:: + +#define ARENA_HEADER_SIZE 64 + +typedef struct +{ + u8 *buffer; + isize length; + isize pos; + u32 init_line_no; +} Arena; + +typedef struct +{ + Arena *arena; + u64 pos; +} TempArena; + +static Arena *CreateArena(rawptr buffer, isize length); +static rawptr ArenaAllocAlign(Arena *arena, isize size, isize align); +static rawptr ArenaAlloc(Arena *arena, isize size); +static void ArenaFree(Arena *arena); +static void ArenaFreeZeroed(Arena *arena); +static void DeallocArena(Arena *arena); +static Arena *CreateArenaDebug(rawptr buffer, isize length, u32 init_line_no); + +// ::Allocator::FreeList::Header:: + +typedef enum u8 +{ + RB_RED, + RB_BLACK, +} FLNodeColor; + +typedef struct FLNode_t FLNode; + +typedef struct FLNode_t +{ + FLNode *parent; + FLNode *left; + FLNode *right; + i32 value; + rawptr data; + FLNodeColor color; +} FLNode; + +typedef struct FreeListBuffer_t +{ + rawptr buf; + u32 size; + u32 free_size; +} FreeListBuffer; + +typedef struct FreeListAlloc_t +{ + FLNode *head; + FLNode *nil; + FreeListBuffer *buffers; + isize grow_size; + u8 buf_len; +} FreeListAlloc; + +static FreeListAlloc *CreateFreeListAlloc(isize init_size, isize grow_size); +static void DeallocFreeListAlloc(FreeListAlloc *alloc); +static inline b32 FLNodeInsert(FreeListAlloc *alloc, FLNode *node); +static inline void FLNodeDelete(FLNode *node); +static inline void FLNodeLeftRotate(FLNode *node); +static inline void FLNodeRightRotate(FLNode *node); diff --git a/src/arena.c b/src/arena.c deleted file mode 100644 index 14790cc..0000000 --- a/src/arena.c +++ /dev/null @@ -1,56 +0,0 @@ -static Arena *CreateArena(rawptr buffer, isize length) -{ - Arena *arena = (Arena *)buffer; - buffer = PtrAdd(buffer, ARENA_HEADER_SIZE); - - arena->buffer = buffer; - arena->length = length; - arena->pos = 0; - - return arena; -} - -static rawptr ArenaAllocAlign(Arena *arena, isize size, isize align) -{ - rawptr ptr = NULL; - - uintptr curr_ptr = (uintptr)arena->buffer + (uintptr)arena->pos; - uintptr offset = AlignPow2(curr_ptr, align); - offset -= (uintptr)arena->buffer; - - if (offset+size <= arena->length) - { - ptr = &arena->buffer[offset]; - arena->pos = offset+size; - } - else - { - Printfln("Out of memory: %d", arena->init_line_no); - Assert(0, "Memory Failure"); - } - - return ptr; -} - -static rawptr ArenaAlloc(Arena *arena, isize size) -{ - return ArenaAllocAlign(arena, size, DEFAULT_ALIGNMENT); -} - -static void ArenaFree(Arena *arena) -{ - arena->pos = 0; -} - -static void ArenaFreeZeroed(Arena *arena) -{ - MemZero(arena->buffer, arena->pos); - ArenaFree(arena); -} - -static Arena * CreateArenaDebug(rawptr buffer, isize length, u32 init_line_no) -{ - Arena *arena = CreateArena(buffer, length); - arena->init_line_no = init_line_no; - return arena; -} diff --git a/src/arena.h b/src/arena.h deleted file mode 100644 index cea2923..0000000 --- a/src/arena.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#define ARENA_HEADER_SIZE 64 - -typedef struct -{ - u8 *buffer; - isize length; - isize pos; - u32 init_line_no; -} Arena; - -typedef struct -{ - Arena *arena; - u64 pos; -} TempArena; - -static Arena *CreateArena(rawptr buffer, isize length); -static rawptr ArenaAllocAlign(Arena *arena, isize size, isize align); -static rawptr ArenaAlloc(Arena *arena, isize size); -static void ArenaFree(Arena *arena); -static void ArenaFreeZeroed(Arena *arena); -static Arena *CreateArenaDebug(rawptr buffer, isize length, u32 init_line_no); - diff --git a/src/ds.c b/src/ds.c new file mode 100644 index 0000000..139597f --- /dev/null +++ b/src/ds.c @@ -0,0 +1,2 @@ + + diff --git a/src/ds.h b/src/ds.h new file mode 100644 index 0000000..45dcbb0 --- /dev/null +++ b/src/ds.h @@ -0,0 +1,3 @@ +#pragma once + + diff --git a/src/entry_linux.c b/src/entry_linux.c index 38dd01f..a6d1066 100644 --- a/src/entry_linux.c +++ b/src/entry_linux.c @@ -3,8 +3,9 @@ #include "entry_linux.h" #include "platform.c" +#include "ds.c" #include "util.c" -#include "arena.c" +#include "allocators.c" #include "renderer.c" #include "game.c" @@ -21,23 +22,34 @@ const char *strs[10] = { "String 10", }; u32 volatile str_index = 0; +pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +pthread_cond_t cond1 = PTHREAD_COND_INITIALIZER; +pthread_cond_t cond2 = PTHREAD_COND_INITIALIZER; #include void *ThreadFunc(void *i) { - for (;;) - { - u32 val = __atomic_fetch_add(&str_index, 1, __ATOMIC_RELEASE); - if (val < 10) - { - Printfln("Thread %d: %s", *(u32 *)i, strs[val]); - sleep(1); - } - else - break; + pthread_mutex_t mut; - } + u32 val = *(u32 *)i; + + pthread_mutex_init(&mut, NULL); + + Printfln("Thread %d Started", *(u32 *)i); + + pthread_cond_t *c = val == 1 ? &cond1 : &cond2; + + pthread_mutex_lock(&mut); + pthread_cond_wait(c, &mut); + + Printfln("Thread %d woken up", *(u32 *)i); + + pthread_cond_wait(&cond, &mut); + + Printfln("Thread %d global wake up", val); + + pthread_mutex_unlock(&mut); pthread_exit(NULL); } diff --git a/src/entry_linux.h b/src/entry_linux.h index e107561..ad7db7a 100644 --- a/src/entry_linux.h +++ b/src/entry_linux.h @@ -11,9 +11,10 @@ #include "stb/stb_sprintf.h" #include "shared_types.h" +#include "ds.h" #include "platform.h" #include "util.h" -#include "arena.h" +#include "allocators.h" #include "renderer.h" #include "game.h" diff --git a/src/entry_windows.c b/src/entry_windows.c index 9b849e4..caa2e59 100644 --- a/src/entry_windows.c +++ b/src/entry_windows.c @@ -3,8 +3,9 @@ #include "entry_windows.h" #include "platform.c" +#include "ds.c" #include "util.c" -#include "arena.c" +#include "allocators.c" #include "renderer.c" #include "game.c" diff --git a/src/entry_windows.h b/src/entry_windows.h index a91f6ea..f47048d 100644 --- a/src/entry_windows.h +++ b/src/entry_windows.h @@ -9,9 +9,10 @@ #include "stb/stb_sprintf.h" #include "shared_types.h" +#include "ds.h" #include "platform.h" #include "util.h" -#include "arena.h" +#include "allocators.h" #include "renderer.h" #include "game.h" diff --git a/src/game.c b/src/game.c index d87f4e2..00549ff 100644 --- a/src/game.c +++ b/src/game.c @@ -73,11 +73,13 @@ static void HandleInputs(GameInput *inputs, u32 count) static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count) { + __atomic_store_n(&renderer.vk.imm.job_count, 0, __ATOMIC_RELEASE); + __atomic_store_n(&renderer.vk.imm.remaining_count, 0, __ATOMIC_RELEASE); + GetViewportSize(&ctx->pc.res); HandleInputs(inputs, i_count); - if (mouse_pressed && !ctx->window.grabbed && mouse_pos_x > ctx->window.p0.x && mouse_pos_x < ctx->window.p1.x @@ -110,18 +112,27 @@ static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count) DrawRect(&ctx->gui, ctx->window.p0, ctx->window.p1, (Vec4){ .r = 0.1f, .g = 0.3f, .b = 0.8f, .a = 1.0f }); - RenderBuffer vertex_buffer = { - .type = RENDER_BUFFER_TYPE_VERTEX, - .size = sizeof(GUIVertex) * ctx->gui.vertices_len, - }; + RenderBuffer *vertex_buffer = ArenaAlloc(ctx->arena, sizeof(RenderBuffer)); + vertex_buffer->type = RENDER_BUFFER_TYPE_VERTEX; + vertex_buffer->size = sizeof(GUIVertex) * ctx->gui.vertices_len; - RenderBuffer index_buffer = { - .type = RENDER_BUFFER_TYPE_INDEX, - .size = sizeof(u32) * ctx->gui.indices_len, - }; + RenderBuffer *index_buffer = ArenaAlloc(ctx->arena, sizeof(RenderBuffer)); + index_buffer->type = RENDER_BUFFER_TYPE_INDEX, + index_buffer->size = sizeof(u32) * ctx->gui.indices_len, - CreateAndUploadToBuffer(&vertex_buffer, ctx->gui.vertices); - CreateAndUploadToBuffer(&index_buffer, ctx->gui.indices); + CreateAndUploadToBuffer(vertex_buffer, ctx->gui.vertices); + CreateAndUploadToBuffer(index_buffer, ctx->gui.indices); + + for (u32 i = 0; i < renderer.vk_conf.avail_threads; i++) + { + pthread_cond_signal(&cond); + } + + u32 count = -1; + while (count != 0) + { + __atomic_load(&renderer.vk.imm.remaining_count, &count, __ATOMIC_SEQ_CST); + } BeginFrame(); @@ -129,20 +140,22 @@ static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count) SetPushConstants(&ctx->pc); - BindVertexBuffer(&vertex_buffer); - BindIndexBuffer(&index_buffer); + BindVertexBuffer(vertex_buffer); + BindIndexBuffer(index_buffer); DrawIndexed(6, ctx->gui.instance_count); FinishFrame(); - FreeBuffers(&vertex_buffer, 1); - FreeBuffers(&index_buffer, 1); + FreeBuffers(vertex_buffer, 1); + FreeBuffers(index_buffer, 1); ctx->gui.vertices_len = 0; ctx->gui.indices_len = 0; ctx->gui.instance_count = 0; ArenaFree(ctx->arena); + + Printfln("end of frame %d", renderer.frame_state.frame_cnt); } static void DrawRect(GUIContext *ctx, Vec2 p0, Vec2 p1, Vec4 col) diff --git a/src/platform_linux.c b/src/platform_linux.c index cba3f78..643dc52 100644 --- a/src/platform_linux.c +++ b/src/platform_linux.c @@ -132,6 +132,11 @@ rawptr _MemAllocZeroed(isize size) return ptr; } +void MemFree(rawptr ptr, isize size) +{ + Assert(munmap(ptr, size) == 0, "munmap failed"); +} + isize _GetPageSize() { return (isize)sysconf(_SC_PAGESIZE); diff --git a/src/platform_linux.h b/src/platform_linux.h index b5ed92d..e65f3a6 100644 --- a/src/platform_linux.h +++ b/src/platform_linux.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -72,6 +73,7 @@ b32 _InitPlatform(); // Memory Functions rawptr _MemAlloc(isize size); rawptr _MemAllocZeroed(isize size); +void MemFree(rawptr ptr, isize size); isize _GetPageSize(); // Print Functions diff --git a/src/renderer.h b/src/renderer.h index ced9085..9584376 100644 --- a/src/renderer.h +++ b/src/renderer.h @@ -60,8 +60,8 @@ void DestroyRenderer(); // ::Buffers::Header:: static b32 CreateBuffer(RenderBuffer *buffer); static void FreeBuffers(RenderBuffer *buffers, u32 buffer_count); -static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix); -static b32 CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr); +static b32 UploadToBuffer(RenderBuffer **buffer, rawptr *ptr, u32 count, u8 thr_ix); +static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr); static void BindVertexBuffer(RenderBuffer *buffer); static void BindIndexBuffer(RenderBuffer *buffer); diff --git a/src/renderer_vulkan.c b/src/renderer_vulkan.c index 4d8e1f2..73877bd 100644 --- a/src/renderer_vulkan.c +++ b/src/renderer_vulkan.c @@ -482,7 +482,7 @@ static b32 CreateDevice() count++; } -device_info.queueCreateInfoCount = count; + device_info.queueCreateInfoCount = count; device_info.pQueueCreateInfos = &queue_info[0]; VkResult result = vkCreateDevice(renderer.vk.phys_device, &device_info, NULL, &renderer.vk.device); @@ -755,21 +755,22 @@ static b32 CreateImmediateStructures() VkResult result; VkDevice device = renderer.vk.device; ImmediateStructures *imm = &renderer.vk.imm; - u8 thread_count = 1; pool_create_info.queueFamilyIndex = renderer.vk.queues.transfer; if (renderer.vk_conf.avail_threads >= 10) - thread_count = 3; + renderer.vk_conf.avail_threads = 3; else if (renderer.vk_conf.avail_threads >= 8) - thread_count = 2; + renderer.vk_conf.avail_threads = 2; + else + renderer.vk_conf.avail_threads = 1; - imm->pools = ArenaAlloc(renderer.perm_arena, sizeof(VkCommandPool) * thread_count); - imm->cmds = ArenaAlloc(renderer.perm_arena, sizeof(VkCommandBuffer) * thread_count); - imm->fences = ArenaAlloc(renderer.perm_arena, sizeof(VkFence) * thread_count); + imm->pools = ArenaAlloc(renderer.perm_arena, sizeof(VkCommandPool) * renderer.vk_conf.avail_threads); + imm->cmds = ArenaAlloc(renderer.perm_arena, sizeof(VkCommandBuffer) * renderer.vk_conf.avail_threads); + imm->fences = ArenaAlloc(renderer.perm_arena, sizeof(VkFence) * renderer.vk_conf.avail_threads); imm->queued_buffers = ArenaAlloc(renderer.perm_arena, sizeof(RenderBuffer) * BUFFER_QUEUE_LEN); imm->data = ArenaAlloc(renderer.perm_arena, sizeof(void *) * BUFFER_QUEUE_LEN); - for (u32 i = 0; i < thread_count && success; i++) + for (u32 i = 0; i < renderer.vk_conf.avail_threads && success; i++) { result = vkCreateCommandPool(device, &pool_create_info, NULL, &imm->pools[i]); if (result != VK_SUCCESS) @@ -1133,35 +1134,61 @@ static b32 CreateShaderModule(u8 *bytes, u32 len, VkShaderModule *module) #ifdef __linux__ +pthread_cond_t cond; + void *VkLoaderStart(void *i) { u32 index = *(u32 *)i; pthread_t self = pthread_self(); + pthread_mutex_t mut; + pthread_mutex_init(&mut, NULL); for (;;) - { - u32 job_count = __atomic_load_n(&renderer.vk_conf.job_count, __ATOMIC_RELEASE); + { + // TODO: Benchmark and test with __ATOMIC_RELEASE + u32 ticket = __atomic_add_fetch(&renderer.vk.imm.ticket, 1, __ATOMIC_SEQ_CST); + + while (ticket != renderer.vk.imm.next_ticket); + + u32 job_count = __atomic_load_n(&renderer.vk.imm.job_count, __ATOMIC_SEQ_CST); if (job_count < 0) { + __atomic_store_n(&renderer.vk.imm.next_ticket, renderer.vk.imm.next_ticket+1, __ATOMIC_SEQ_CST); pthread_exit(NULL); } - else if (job_count == 0) { - __atomic_add_fetch(&renderer.vk_conf.sleeping_count, __ATOMIC_RELEASE); - pthread_suspend(self); + __atomic_store_n(&renderer.vk.imm.next_ticket, renderer.vk.imm.next_ticket+1, __ATOMIC_SEQ_CST); + pthread_mutex_lock(&mut); + pthread_cond_wait(&cond, &mut); + pthread_mutex_unlock(&mut); } - - else if (__atomic_compare_exchange_n(&renderer.vk_conf.job_count, &job_count, job_count-1, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED)) + else { - job_count -= 1; + RenderBuffer *buffers[16]; + rawptr data[16]; + u32 count = 0; + for (u32 i = 0; i < job_count && i < 16; i++) + { + buffers[i] = renderer.vk.imm.queued_buffers[i]; + data[i] = renderer.vk.imm.data[i]; + count += 1; + } + __atomic_sub_fetch(&renderer.vk.imm.job_count, count, __ATOMIC_SEQ_CST); + __atomic_store_n(&renderer.vk.imm.next_ticket, renderer.vk.imm.next_ticket+1, __ATOMIC_SEQ_CST); + + for (u32 i = 0; i < count; i++) + Assert(CreateBuffer(buffers[i]), "VkLoader CreateBuffer failure"); + + Assert(UploadToBuffer(buffers, data, count, index), "VkLoader UploadToBuffer failure"); + + u32 rem = __atomic_sub_fetch(&renderer.vk.imm.remaining_count, count, __ATOMIC_SEQ_CST); } - } } -static b32 StartVkLoaderThreads() +static void StartVkLoaderThreads() { u32 count = renderer.vk_conf.avail_threads; pthread_t *threads = ArenaAlloc(renderer.perm_arena, sizeof(pthread_t) * count); diff --git a/src/renderer_vulkan.h b/src/renderer_vulkan.h index f7d3581..fd8af47 100644 --- a/src/renderer_vulkan.h +++ b/src/renderer_vulkan.h @@ -226,10 +226,11 @@ typedef struct VkCommandBuffer *cmds; VkFence *fences; RenderBuffer **queued_buffers; - void *data; + rawptr *data; i32 volatile job_count; - i32 volatile completed_count; - i32 volatile sleeping_count; + i32 volatile remaining_count; + u32 volatile ticket; + u32 volatile next_ticket; } ImmediateStructures; typedef struct { @@ -345,7 +346,7 @@ static VkFormat GetImageFormat(); static b32 CreateDescriptors(); static b32 CreatePipelines(); static b32 CreateShaderModule(u8 *bytes, u32 len, VkShaderModule *module); -static b32 StartVkLoaderThreads(); +static void StartVkLoaderThreads(); // ::Vulkan::Util::Header:: @@ -413,6 +414,9 @@ static Renderer renderer = { .color_space = INT_MAX, .present_mode = INT_MAX, }, + .imm = { + .next_ticket = 1, + }, } }; diff --git a/src/renderer_vulkan_public.c b/src/renderer_vulkan_public.c index b45583a..9556522 100644 --- a/src/renderer_vulkan_public.c +++ b/src/renderer_vulkan_public.c @@ -41,7 +41,8 @@ b32 InitRenderer(Arena *arena) Assert(CreateImmediateStructures(), "Unable to create immediate structures"); Assert(CreateDescriptors(), "Unable to initialize descriptors."); Assert(CreatePipelines(), "Unable to initialize pipelines."); - Assert(StartVkLoaderThreads(), "Unable to initialize vulkan loader threads"); + + StartVkLoaderThreads(); ArenaFree(renderer.arena); @@ -132,7 +133,6 @@ static b32 CreateBuffer(RenderBuffer *buffer) VkResult result; DeviceQueues *queues = &renderer.vk.queues; - ImmediateStructures *imm = &renderer.vk.imm; VmaAllocator alloc = renderer.vk.alloc; VkBufferCreateInfo buffer_info = { @@ -199,13 +199,15 @@ static b32 CreateBuffer(RenderBuffer *buffer) success = false; } + Printfln("buffer ptr: %d", buffer->buffer); + return success; } -static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix) +static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 thr_ix) { - Assert(buffer, "UploadToBuffer: buffer must not be null"); - Assert(ptr, "UploadToBuffer: ptr must not be null"); + Assert(buffers, "UploadToBuffer: buffer must not be null"); + Assert(ptrs, "UploadToBuffer: ptr must not be null"); b32 success = true; @@ -214,10 +216,49 @@ static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix) VkDevice device = renderer.vk.device; VkQueue queue = renderer.vk.queues.transfer_queue; VmaAllocator alloc = renderer.vk.alloc; - rawptr mapped_buffer = NULL; + rawptr mapped_buffers[16] = {}; + RenderBuffer staging_buffers[16] = {}; - b32 host_visible = buffer->type & HOST_VISIBLE_BUFFERS; + u32 copy_count = 0; + + b32 imm_started = success = BeginImmSubmit(device, &fence, cmd); + for (u32 i = 0; i < count && success; i++) + { + b32 host_visible = buffers[i]->type & HOST_VISIBLE_BUFFERS; + if (host_visible) + { + vmaMapMemory(alloc, buffers[i]->alloc, &mapped_buffers[i]); + MemCpy(mapped_buffers[i], ptrs[i], buffers[i]->size); + } + else + { + staging_buffers[copy_count].type = RENDER_BUFFER_TYPE_STAGING; + staging_buffers[copy_count].size = buffers[i]->size; + success = CreateBuffer(&staging_buffers[i]); + + if (success) + { + vmaMapMemory(alloc, staging_buffers[i].alloc, &mapped_buffers[i]); + MemCpy(mapped_buffers[i], ptrs[i], staging_buffers[i].size); + + VkBufferCopy buffer_copy = { .size = (VkDeviceSize)buffers[i]->size }; + vkCmdCopyBuffer(cmd, staging_buffers[i].buffer, buffers[i]->buffer, 1, &buffer_copy); + + copy_count += 1; + } + } + } + + FinishImmSubmit(device, &fence, cmd, queue); + vkWaitForFences(device, 1, &fence, VK_TRUE, 999999999); + + for (u32 i = 0; i < copy_count; i++) + { + vmaUnmapMemory(alloc, staging_buffers[i].alloc); + vmaDestroyBuffer(alloc, staging_buffers[i].buffer, staging_buffers[i].alloc); + } + /* if (host_visible) { vmaMapMemory(alloc, buffer->alloc, &mapped_buffer); @@ -245,6 +286,7 @@ static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix) if (success) { VkBufferCopy buffer_copy = { .size = (VkDeviceSize)buffer->size }; + Printfln("copy buffer: %d", buffer->buffer); vkCmdCopyBuffer(cmd, staging_buffer.buffer, buffer->buffer, 1, &buffer_copy); } @@ -257,6 +299,7 @@ static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix) vmaDestroyBuffer(alloc, staging_buffer.buffer, staging_buffer.alloc); } } + */ return success; } @@ -264,11 +307,21 @@ static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix) static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr) { // TODO: revisit this to see if it could be done better + Assert(renderer.vk.imm.job_count+1 < BUFFER_QUEUE_LEN, "CreateAndUploadToBuffer out of bounds"); - u32 i = __atomic_fetch_add(&renderer.vk.imm.job_count, 1, __ATOMIC_RELEASE); + u32 ticket = __atomic_add_fetch(&renderer.vk.imm.ticket, 1, __ATOMIC_SEQ_CST); + + while (ticket != renderer.vk.imm.next_ticket); + + u32 i = __atomic_fetch_add(&renderer.vk.imm.job_count, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&renderer.vk.imm.remaining_count, 1, __ATOMIC_SEQ_CST); renderer.vk.imm.queued_buffers[i] = buffer; renderer.vk.imm.data[i] = ptr; + + __atomic_thread_fence(__ATOMIC_RELEASE); + + __atomic_add_fetch(&renderer.vk.imm.next_ticket, 1, __ATOMIC_SEQ_CST); } /* TODO: DELETE