add async to vulkan buffer transfers

This commit is contained in:
Matthew 2025-03-29 17:38:04 +11:00
parent 4ab1abbc27
commit c76dc917cc
17 changed files with 367 additions and 143 deletions

111
src/allocators.c Normal file
View File

@ -0,0 +1,111 @@
// ::Allocator::Arena::Start::
static Arena *CreateArena(rawptr buffer, isize length)
{
Arena *arena = (Arena *)buffer;
buffer = PtrAdd(buffer, ARENA_HEADER_SIZE);
arena->buffer = buffer;
arena->length = length;
arena->pos = 0;
return arena;
}
static rawptr ArenaAllocAlign(Arena *arena, isize size, isize align)
{
rawptr ptr = NULL;
uintptr curr_ptr = (uintptr)arena->buffer + (uintptr)arena->pos;
uintptr offset = AlignPow2(curr_ptr, align);
offset -= (uintptr)arena->buffer;
if (offset+size <= arena->length)
{
ptr = &arena->buffer[offset];
arena->pos = offset+size;
}
else
{
Printfln("Out of memory: %d", arena->init_line_no);
Assert(0, "Memory Failure");
}
return ptr;
}
static rawptr ArenaAlloc(Arena *arena, isize size)
{
return ArenaAllocAlign(arena, size, DEFAULT_ALIGNMENT);
}
static void ArenaFree(Arena *arena)
{
arena->pos = 0;
}
static void ArenaFreeZeroed(Arena *arena)
{
MemZero(arena->buffer, arena->pos);
ArenaFree(arena);
}
static void DeallocArena(Arena *arena)
{
MemFree(arena, arena->length);
}
static Arena * CreateArenaDebug(rawptr buffer, isize length, u32 init_line_no)
{
Arena *arena = CreateArena(buffer, length);
arena->init_line_no = init_line_no;
return arena;
}
// ::Allocator::Arena::End::
// ::Allocator::FreeList::Start::
static FreeListAlloc *CreateFreeListAlloc(isize init_size, isize grow_size)
{
isize size = init_size + sizeof(FreeListAlloc) + sizeof(FreeListBuffer) + sizeof(rawptr) + sizeof(FLNode);
u8 *mem = (u8 *)MemAllocZeroed(size);
FreeListAlloc *fl_alloc = (FreeListAlloc *)mem;
u8 *pre_mem = mem;
mem = (u8 *)PtrAdd(mem, sizeof(FreeListAlloc));
isize rem_size = (size) - (pre_mem - mem);
fl_alloc->buf_len = 1;
fl_alloc->grow_size = grow_size;
fl_alloc->buffers = (FreeListBuffer *)mem;
pre_mem = mem;
mem = (u8 *)PtrAdd(mem, sizeof(FreeListBuffer));
rem_size -= pre_mem - mem;
fl_alloc->nil = (FLNode *)mem;
pre_mem = mem;
mem = (u8 *)PtrAdd(mem, sizeof(FLNode));
rem_size -= pre_mem - mem;
fl_alloc->buffers[0].buf = mem;
fl_alloc->buffers[0].size = (u32)size;
fl_alloc->buffers[0].free_size = (u32)rem_size;
return fl_alloc;
}
static void DeallocFreeListAlloc(FreeListAlloc *alloc)
{
for (u8 i = alloc->buf_len-1; i >= 0; i--)
{
if (i == 0)
MemFree(alloc, alloc->buffers[i].size);
else
MemFree(alloc->buffers[i].buf, alloc->buffers[i].size);
}
}
// ::Allocator::FreeList::End::

70
src/allocators.h Normal file
View File

@ -0,0 +1,70 @@
#pragma once
// ::Allocator::Arena::Header::
#define ARENA_HEADER_SIZE 64
typedef struct
{
u8 *buffer;
isize length;
isize pos;
u32 init_line_no;
} Arena;
typedef struct
{
Arena *arena;
u64 pos;
} TempArena;
static Arena *CreateArena(rawptr buffer, isize length);
static rawptr ArenaAllocAlign(Arena *arena, isize size, isize align);
static rawptr ArenaAlloc(Arena *arena, isize size);
static void ArenaFree(Arena *arena);
static void ArenaFreeZeroed(Arena *arena);
static void DeallocArena(Arena *arena);
static Arena *CreateArenaDebug(rawptr buffer, isize length, u32 init_line_no);
// ::Allocator::FreeList::Header::
typedef enum u8
{
RB_RED,
RB_BLACK,
} FLNodeColor;
typedef struct FLNode_t FLNode;
typedef struct FLNode_t
{
FLNode *parent;
FLNode *left;
FLNode *right;
i32 value;
rawptr data;
FLNodeColor color;
} FLNode;
typedef struct FreeListBuffer_t
{
rawptr buf;
u32 size;
u32 free_size;
} FreeListBuffer;
typedef struct FreeListAlloc_t
{
FLNode *head;
FLNode *nil;
FreeListBuffer *buffers;
isize grow_size;
u8 buf_len;
} FreeListAlloc;
static FreeListAlloc *CreateFreeListAlloc(isize init_size, isize grow_size);
static void DeallocFreeListAlloc(FreeListAlloc *alloc);
static inline b32 FLNodeInsert(FreeListAlloc *alloc, FLNode *node);
static inline void FLNodeDelete(FLNode *node);
static inline void FLNodeLeftRotate(FLNode *node);
static inline void FLNodeRightRotate(FLNode *node);

View File

@ -1,56 +0,0 @@
static Arena *CreateArena(rawptr buffer, isize length)
{
Arena *arena = (Arena *)buffer;
buffer = PtrAdd(buffer, ARENA_HEADER_SIZE);
arena->buffer = buffer;
arena->length = length;
arena->pos = 0;
return arena;
}
static rawptr ArenaAllocAlign(Arena *arena, isize size, isize align)
{
rawptr ptr = NULL;
uintptr curr_ptr = (uintptr)arena->buffer + (uintptr)arena->pos;
uintptr offset = AlignPow2(curr_ptr, align);
offset -= (uintptr)arena->buffer;
if (offset+size <= arena->length)
{
ptr = &arena->buffer[offset];
arena->pos = offset+size;
}
else
{
Printfln("Out of memory: %d", arena->init_line_no);
Assert(0, "Memory Failure");
}
return ptr;
}
static rawptr ArenaAlloc(Arena *arena, isize size)
{
return ArenaAllocAlign(arena, size, DEFAULT_ALIGNMENT);
}
static void ArenaFree(Arena *arena)
{
arena->pos = 0;
}
static void ArenaFreeZeroed(Arena *arena)
{
MemZero(arena->buffer, arena->pos);
ArenaFree(arena);
}
static Arena * CreateArenaDebug(rawptr buffer, isize length, u32 init_line_no)
{
Arena *arena = CreateArena(buffer, length);
arena->init_line_no = init_line_no;
return arena;
}

View File

@ -1,25 +0,0 @@
#pragma once
#define ARENA_HEADER_SIZE 64
typedef struct
{
u8 *buffer;
isize length;
isize pos;
u32 init_line_no;
} Arena;
typedef struct
{
Arena *arena;
u64 pos;
} TempArena;
static Arena *CreateArena(rawptr buffer, isize length);
static rawptr ArenaAllocAlign(Arena *arena, isize size, isize align);
static rawptr ArenaAlloc(Arena *arena, isize size);
static void ArenaFree(Arena *arena);
static void ArenaFreeZeroed(Arena *arena);
static Arena *CreateArenaDebug(rawptr buffer, isize length, u32 init_line_no);

2
src/ds.c Normal file
View File

@ -0,0 +1,2 @@

3
src/ds.h Normal file
View File

@ -0,0 +1,3 @@
#pragma once

View File

@ -3,8 +3,9 @@
#include "entry_linux.h" #include "entry_linux.h"
#include "platform.c" #include "platform.c"
#include "ds.c"
#include "util.c" #include "util.c"
#include "arena.c" #include "allocators.c"
#include "renderer.c" #include "renderer.c"
#include "game.c" #include "game.c"
@ -21,23 +22,34 @@ const char *strs[10] = {
"String 10", "String 10",
}; };
u32 volatile str_index = 0; u32 volatile str_index = 0;
pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
pthread_cond_t cond1 = PTHREAD_COND_INITIALIZER;
pthread_cond_t cond2 = PTHREAD_COND_INITIALIZER;
#include <unistd.h> #include <unistd.h>
void *ThreadFunc(void *i) void *ThreadFunc(void *i)
{ {
for (;;) pthread_mutex_t mut;
{
u32 val = __atomic_fetch_add(&str_index, 1, __ATOMIC_RELEASE);
if (val < 10)
{
Printfln("Thread %d: %s", *(u32 *)i, strs[val]);
sleep(1);
}
else
break;
} u32 val = *(u32 *)i;
pthread_mutex_init(&mut, NULL);
Printfln("Thread %d Started", *(u32 *)i);
pthread_cond_t *c = val == 1 ? &cond1 : &cond2;
pthread_mutex_lock(&mut);
pthread_cond_wait(c, &mut);
Printfln("Thread %d woken up", *(u32 *)i);
pthread_cond_wait(&cond, &mut);
Printfln("Thread %d global wake up", val);
pthread_mutex_unlock(&mut);
pthread_exit(NULL); pthread_exit(NULL);
} }

View File

@ -11,9 +11,10 @@
#include "stb/stb_sprintf.h" #include "stb/stb_sprintf.h"
#include "shared_types.h" #include "shared_types.h"
#include "ds.h"
#include "platform.h" #include "platform.h"
#include "util.h" #include "util.h"
#include "arena.h" #include "allocators.h"
#include "renderer.h" #include "renderer.h"
#include "game.h" #include "game.h"

View File

@ -3,8 +3,9 @@
#include "entry_windows.h" #include "entry_windows.h"
#include "platform.c" #include "platform.c"
#include "ds.c"
#include "util.c" #include "util.c"
#include "arena.c" #include "allocators.c"
#include "renderer.c" #include "renderer.c"
#include "game.c" #include "game.c"

View File

@ -9,9 +9,10 @@
#include "stb/stb_sprintf.h" #include "stb/stb_sprintf.h"
#include "shared_types.h" #include "shared_types.h"
#include "ds.h"
#include "platform.h" #include "platform.h"
#include "util.h" #include "util.h"
#include "arena.h" #include "allocators.h"
#include "renderer.h" #include "renderer.h"
#include "game.h" #include "game.h"

View File

@ -73,11 +73,13 @@ static void HandleInputs(GameInput *inputs, u32 count)
static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count) static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count)
{ {
__atomic_store_n(&renderer.vk.imm.job_count, 0, __ATOMIC_RELEASE);
__atomic_store_n(&renderer.vk.imm.remaining_count, 0, __ATOMIC_RELEASE);
GetViewportSize(&ctx->pc.res); GetViewportSize(&ctx->pc.res);
HandleInputs(inputs, i_count); HandleInputs(inputs, i_count);
if (mouse_pressed && !ctx->window.grabbed if (mouse_pressed && !ctx->window.grabbed
&& mouse_pos_x > ctx->window.p0.x && mouse_pos_x > ctx->window.p0.x
&& mouse_pos_x < ctx->window.p1.x && mouse_pos_x < ctx->window.p1.x
@ -110,18 +112,27 @@ static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count)
DrawRect(&ctx->gui, ctx->window.p0, ctx->window.p1, (Vec4){ .r = 0.1f, .g = 0.3f, .b = 0.8f, .a = 1.0f }); DrawRect(&ctx->gui, ctx->window.p0, ctx->window.p1, (Vec4){ .r = 0.1f, .g = 0.3f, .b = 0.8f, .a = 1.0f });
RenderBuffer vertex_buffer = { RenderBuffer *vertex_buffer = ArenaAlloc(ctx->arena, sizeof(RenderBuffer));
.type = RENDER_BUFFER_TYPE_VERTEX, vertex_buffer->type = RENDER_BUFFER_TYPE_VERTEX;
.size = sizeof(GUIVertex) * ctx->gui.vertices_len, vertex_buffer->size = sizeof(GUIVertex) * ctx->gui.vertices_len;
};
RenderBuffer index_buffer = { RenderBuffer *index_buffer = ArenaAlloc(ctx->arena, sizeof(RenderBuffer));
.type = RENDER_BUFFER_TYPE_INDEX, index_buffer->type = RENDER_BUFFER_TYPE_INDEX,
.size = sizeof(u32) * ctx->gui.indices_len, index_buffer->size = sizeof(u32) * ctx->gui.indices_len,
};
CreateAndUploadToBuffer(&vertex_buffer, ctx->gui.vertices); CreateAndUploadToBuffer(vertex_buffer, ctx->gui.vertices);
CreateAndUploadToBuffer(&index_buffer, ctx->gui.indices); CreateAndUploadToBuffer(index_buffer, ctx->gui.indices);
for (u32 i = 0; i < renderer.vk_conf.avail_threads; i++)
{
pthread_cond_signal(&cond);
}
u32 count = -1;
while (count != 0)
{
__atomic_load(&renderer.vk.imm.remaining_count, &count, __ATOMIC_SEQ_CST);
}
BeginFrame(); BeginFrame();
@ -129,20 +140,22 @@ static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count)
SetPushConstants(&ctx->pc); SetPushConstants(&ctx->pc);
BindVertexBuffer(&vertex_buffer); BindVertexBuffer(vertex_buffer);
BindIndexBuffer(&index_buffer); BindIndexBuffer(index_buffer);
DrawIndexed(6, ctx->gui.instance_count); DrawIndexed(6, ctx->gui.instance_count);
FinishFrame(); FinishFrame();
FreeBuffers(&vertex_buffer, 1); FreeBuffers(vertex_buffer, 1);
FreeBuffers(&index_buffer, 1); FreeBuffers(index_buffer, 1);
ctx->gui.vertices_len = 0; ctx->gui.vertices_len = 0;
ctx->gui.indices_len = 0; ctx->gui.indices_len = 0;
ctx->gui.instance_count = 0; ctx->gui.instance_count = 0;
ArenaFree(ctx->arena); ArenaFree(ctx->arena);
Printfln("end of frame %d", renderer.frame_state.frame_cnt);
} }
static void DrawRect(GUIContext *ctx, Vec2 p0, Vec2 p1, Vec4 col) static void DrawRect(GUIContext *ctx, Vec2 p0, Vec2 p1, Vec4 col)

View File

@ -132,6 +132,11 @@ rawptr _MemAllocZeroed(isize size)
return ptr; return ptr;
} }
void MemFree(rawptr ptr, isize size)
{
Assert(munmap(ptr, size) == 0, "munmap failed");
}
isize _GetPageSize() isize _GetPageSize()
{ {
return (isize)sysconf(_SC_PAGESIZE); return (isize)sysconf(_SC_PAGESIZE);

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <pthread.h> #include <pthread.h>
#include <signal.h>
#include <limits.h> #include <limits.h>
#include <stdint.h> #include <stdint.h>
#include <stdbool.h> #include <stdbool.h>
@ -72,6 +73,7 @@ b32 _InitPlatform();
// Memory Functions // Memory Functions
rawptr _MemAlloc(isize size); rawptr _MemAlloc(isize size);
rawptr _MemAllocZeroed(isize size); rawptr _MemAllocZeroed(isize size);
void MemFree(rawptr ptr, isize size);
isize _GetPageSize(); isize _GetPageSize();
// Print Functions // Print Functions

View File

@ -60,8 +60,8 @@ void DestroyRenderer();
// ::Buffers::Header:: // ::Buffers::Header::
static b32 CreateBuffer(RenderBuffer *buffer); static b32 CreateBuffer(RenderBuffer *buffer);
static void FreeBuffers(RenderBuffer *buffers, u32 buffer_count); static void FreeBuffers(RenderBuffer *buffers, u32 buffer_count);
static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix); static b32 UploadToBuffer(RenderBuffer **buffer, rawptr *ptr, u32 count, u8 thr_ix);
static b32 CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr); static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr);
static void BindVertexBuffer(RenderBuffer *buffer); static void BindVertexBuffer(RenderBuffer *buffer);
static void BindIndexBuffer(RenderBuffer *buffer); static void BindIndexBuffer(RenderBuffer *buffer);

View File

@ -482,7 +482,7 @@ static b32 CreateDevice()
count++; count++;
} }
device_info.queueCreateInfoCount = count; device_info.queueCreateInfoCount = count;
device_info.pQueueCreateInfos = &queue_info[0]; device_info.pQueueCreateInfos = &queue_info[0];
VkResult result = vkCreateDevice(renderer.vk.phys_device, &device_info, NULL, &renderer.vk.device); VkResult result = vkCreateDevice(renderer.vk.phys_device, &device_info, NULL, &renderer.vk.device);
@ -755,21 +755,22 @@ static b32 CreateImmediateStructures()
VkResult result; VkResult result;
VkDevice device = renderer.vk.device; VkDevice device = renderer.vk.device;
ImmediateStructures *imm = &renderer.vk.imm; ImmediateStructures *imm = &renderer.vk.imm;
u8 thread_count = 1;
pool_create_info.queueFamilyIndex = renderer.vk.queues.transfer; pool_create_info.queueFamilyIndex = renderer.vk.queues.transfer;
if (renderer.vk_conf.avail_threads >= 10) if (renderer.vk_conf.avail_threads >= 10)
thread_count = 3; renderer.vk_conf.avail_threads = 3;
else if (renderer.vk_conf.avail_threads >= 8) else if (renderer.vk_conf.avail_threads >= 8)
thread_count = 2; renderer.vk_conf.avail_threads = 2;
else
renderer.vk_conf.avail_threads = 1;
imm->pools = ArenaAlloc(renderer.perm_arena, sizeof(VkCommandPool) * thread_count); imm->pools = ArenaAlloc(renderer.perm_arena, sizeof(VkCommandPool) * renderer.vk_conf.avail_threads);
imm->cmds = ArenaAlloc(renderer.perm_arena, sizeof(VkCommandBuffer) * thread_count); imm->cmds = ArenaAlloc(renderer.perm_arena, sizeof(VkCommandBuffer) * renderer.vk_conf.avail_threads);
imm->fences = ArenaAlloc(renderer.perm_arena, sizeof(VkFence) * thread_count); imm->fences = ArenaAlloc(renderer.perm_arena, sizeof(VkFence) * renderer.vk_conf.avail_threads);
imm->queued_buffers = ArenaAlloc(renderer.perm_arena, sizeof(RenderBuffer) * BUFFER_QUEUE_LEN); imm->queued_buffers = ArenaAlloc(renderer.perm_arena, sizeof(RenderBuffer) * BUFFER_QUEUE_LEN);
imm->data = ArenaAlloc(renderer.perm_arena, sizeof(void *) * BUFFER_QUEUE_LEN); imm->data = ArenaAlloc(renderer.perm_arena, sizeof(void *) * BUFFER_QUEUE_LEN);
for (u32 i = 0; i < thread_count && success; i++) for (u32 i = 0; i < renderer.vk_conf.avail_threads && success; i++)
{ {
result = vkCreateCommandPool(device, &pool_create_info, NULL, &imm->pools[i]); result = vkCreateCommandPool(device, &pool_create_info, NULL, &imm->pools[i]);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
@ -1133,35 +1134,61 @@ static b32 CreateShaderModule(u8 *bytes, u32 len, VkShaderModule *module)
#ifdef __linux__ #ifdef __linux__
pthread_cond_t cond;
void *VkLoaderStart(void *i) void *VkLoaderStart(void *i)
{ {
u32 index = *(u32 *)i; u32 index = *(u32 *)i;
pthread_t self = pthread_self(); pthread_t self = pthread_self();
pthread_mutex_t mut;
pthread_mutex_init(&mut, NULL);
for (;;) for (;;)
{ {
u32 job_count = __atomic_load_n(&renderer.vk_conf.job_count, __ATOMIC_RELEASE); // TODO: Benchmark and test with __ATOMIC_RELEASE
u32 ticket = __atomic_add_fetch(&renderer.vk.imm.ticket, 1, __ATOMIC_SEQ_CST);
while (ticket != renderer.vk.imm.next_ticket);
u32 job_count = __atomic_load_n(&renderer.vk.imm.job_count, __ATOMIC_SEQ_CST);
if (job_count < 0) if (job_count < 0)
{ {
__atomic_store_n(&renderer.vk.imm.next_ticket, renderer.vk.imm.next_ticket+1, __ATOMIC_SEQ_CST);
pthread_exit(NULL); pthread_exit(NULL);
} }
else if (job_count == 0) else if (job_count == 0)
{ {
__atomic_add_fetch(&renderer.vk_conf.sleeping_count, __ATOMIC_RELEASE); __atomic_store_n(&renderer.vk.imm.next_ticket, renderer.vk.imm.next_ticket+1, __ATOMIC_SEQ_CST);
pthread_suspend(self); pthread_mutex_lock(&mut);
pthread_cond_wait(&cond, &mut);
pthread_mutex_unlock(&mut);
} }
else
else if (__atomic_compare_exchange_n(&renderer.vk_conf.job_count, &job_count, job_count-1, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED))
{ {
job_count -= 1; RenderBuffer *buffers[16];
rawptr data[16];
u32 count = 0;
for (u32 i = 0; i < job_count && i < 16; i++)
{
buffers[i] = renderer.vk.imm.queued_buffers[i];
data[i] = renderer.vk.imm.data[i];
count += 1;
}
__atomic_sub_fetch(&renderer.vk.imm.job_count, count, __ATOMIC_SEQ_CST);
__atomic_store_n(&renderer.vk.imm.next_ticket, renderer.vk.imm.next_ticket+1, __ATOMIC_SEQ_CST);
for (u32 i = 0; i < count; i++)
Assert(CreateBuffer(buffers[i]), "VkLoader CreateBuffer failure");
Assert(UploadToBuffer(buffers, data, count, index), "VkLoader UploadToBuffer failure");
u32 rem = __atomic_sub_fetch(&renderer.vk.imm.remaining_count, count, __ATOMIC_SEQ_CST);
} }
} }
} }
static b32 StartVkLoaderThreads() static void StartVkLoaderThreads()
{ {
u32 count = renderer.vk_conf.avail_threads; u32 count = renderer.vk_conf.avail_threads;
pthread_t *threads = ArenaAlloc(renderer.perm_arena, sizeof(pthread_t) * count); pthread_t *threads = ArenaAlloc(renderer.perm_arena, sizeof(pthread_t) * count);

View File

@ -226,10 +226,11 @@ typedef struct
VkCommandBuffer *cmds; VkCommandBuffer *cmds;
VkFence *fences; VkFence *fences;
RenderBuffer **queued_buffers; RenderBuffer **queued_buffers;
void *data; rawptr *data;
i32 volatile job_count; i32 volatile job_count;
i32 volatile completed_count; i32 volatile remaining_count;
i32 volatile sleeping_count; u32 volatile ticket;
u32 volatile next_ticket;
} ImmediateStructures; } ImmediateStructures;
typedef struct { typedef struct {
@ -345,7 +346,7 @@ static VkFormat GetImageFormat();
static b32 CreateDescriptors(); static b32 CreateDescriptors();
static b32 CreatePipelines(); static b32 CreatePipelines();
static b32 CreateShaderModule(u8 *bytes, u32 len, VkShaderModule *module); static b32 CreateShaderModule(u8 *bytes, u32 len, VkShaderModule *module);
static b32 StartVkLoaderThreads(); static void StartVkLoaderThreads();
// ::Vulkan::Util::Header:: // ::Vulkan::Util::Header::
@ -413,6 +414,9 @@ static Renderer renderer = {
.color_space = INT_MAX, .color_space = INT_MAX,
.present_mode = INT_MAX, .present_mode = INT_MAX,
}, },
.imm = {
.next_ticket = 1,
},
} }
}; };

View File

@ -41,7 +41,8 @@ b32 InitRenderer(Arena *arena)
Assert(CreateImmediateStructures(), "Unable to create immediate structures"); Assert(CreateImmediateStructures(), "Unable to create immediate structures");
Assert(CreateDescriptors(), "Unable to initialize descriptors."); Assert(CreateDescriptors(), "Unable to initialize descriptors.");
Assert(CreatePipelines(), "Unable to initialize pipelines."); Assert(CreatePipelines(), "Unable to initialize pipelines.");
Assert(StartVkLoaderThreads(), "Unable to initialize vulkan loader threads");
StartVkLoaderThreads();
ArenaFree(renderer.arena); ArenaFree(renderer.arena);
@ -132,7 +133,6 @@ static b32 CreateBuffer(RenderBuffer *buffer)
VkResult result; VkResult result;
DeviceQueues *queues = &renderer.vk.queues; DeviceQueues *queues = &renderer.vk.queues;
ImmediateStructures *imm = &renderer.vk.imm;
VmaAllocator alloc = renderer.vk.alloc; VmaAllocator alloc = renderer.vk.alloc;
VkBufferCreateInfo buffer_info = { VkBufferCreateInfo buffer_info = {
@ -199,13 +199,15 @@ static b32 CreateBuffer(RenderBuffer *buffer)
success = false; success = false;
} }
Printfln("buffer ptr: %d", buffer->buffer);
return success; return success;
} }
static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix) static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 thr_ix)
{ {
Assert(buffer, "UploadToBuffer: buffer must not be null"); Assert(buffers, "UploadToBuffer: buffer must not be null");
Assert(ptr, "UploadToBuffer: ptr must not be null"); Assert(ptrs, "UploadToBuffer: ptr must not be null");
b32 success = true; b32 success = true;
@ -214,10 +216,49 @@ static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix)
VkDevice device = renderer.vk.device; VkDevice device = renderer.vk.device;
VkQueue queue = renderer.vk.queues.transfer_queue; VkQueue queue = renderer.vk.queues.transfer_queue;
VmaAllocator alloc = renderer.vk.alloc; VmaAllocator alloc = renderer.vk.alloc;
rawptr mapped_buffer = NULL; rawptr mapped_buffers[16] = {};
RenderBuffer staging_buffers[16] = {};
b32 host_visible = buffer->type & HOST_VISIBLE_BUFFERS; u32 copy_count = 0;
b32 imm_started = success = BeginImmSubmit(device, &fence, cmd);
for (u32 i = 0; i < count && success; i++)
{
b32 host_visible = buffers[i]->type & HOST_VISIBLE_BUFFERS;
if (host_visible)
{
vmaMapMemory(alloc, buffers[i]->alloc, &mapped_buffers[i]);
MemCpy(mapped_buffers[i], ptrs[i], buffers[i]->size);
}
else
{
staging_buffers[copy_count].type = RENDER_BUFFER_TYPE_STAGING;
staging_buffers[copy_count].size = buffers[i]->size;
success = CreateBuffer(&staging_buffers[i]);
if (success)
{
vmaMapMemory(alloc, staging_buffers[i].alloc, &mapped_buffers[i]);
MemCpy(mapped_buffers[i], ptrs[i], staging_buffers[i].size);
VkBufferCopy buffer_copy = { .size = (VkDeviceSize)buffers[i]->size };
vkCmdCopyBuffer(cmd, staging_buffers[i].buffer, buffers[i]->buffer, 1, &buffer_copy);
copy_count += 1;
}
}
}
FinishImmSubmit(device, &fence, cmd, queue);
vkWaitForFences(device, 1, &fence, VK_TRUE, 999999999);
for (u32 i = 0; i < copy_count; i++)
{
vmaUnmapMemory(alloc, staging_buffers[i].alloc);
vmaDestroyBuffer(alloc, staging_buffers[i].buffer, staging_buffers[i].alloc);
}
/*
if (host_visible) if (host_visible)
{ {
vmaMapMemory(alloc, buffer->alloc, &mapped_buffer); vmaMapMemory(alloc, buffer->alloc, &mapped_buffer);
@ -245,6 +286,7 @@ static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix)
if (success) if (success)
{ {
VkBufferCopy buffer_copy = { .size = (VkDeviceSize)buffer->size }; VkBufferCopy buffer_copy = { .size = (VkDeviceSize)buffer->size };
Printfln("copy buffer: %d", buffer->buffer);
vkCmdCopyBuffer(cmd, staging_buffer.buffer, buffer->buffer, 1, &buffer_copy); vkCmdCopyBuffer(cmd, staging_buffer.buffer, buffer->buffer, 1, &buffer_copy);
} }
@ -257,6 +299,7 @@ static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix)
vmaDestroyBuffer(alloc, staging_buffer.buffer, staging_buffer.alloc); vmaDestroyBuffer(alloc, staging_buffer.buffer, staging_buffer.alloc);
} }
} }
*/
return success; return success;
} }
@ -264,11 +307,21 @@ static b32 UploadToBuffer(RenderBuffer *buffer, rawptr ptr, u8 thr_ix)
static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr) static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr)
{ {
// TODO: revisit this to see if it could be done better // TODO: revisit this to see if it could be done better
Assert(renderer.vk.imm.job_count+1 < BUFFER_QUEUE_LEN, "CreateAndUploadToBuffer out of bounds");
u32 i = __atomic_fetch_add(&renderer.vk.imm.job_count, 1, __ATOMIC_RELEASE); u32 ticket = __atomic_add_fetch(&renderer.vk.imm.ticket, 1, __ATOMIC_SEQ_CST);
while (ticket != renderer.vk.imm.next_ticket);
u32 i = __atomic_fetch_add(&renderer.vk.imm.job_count, 1, __ATOMIC_SEQ_CST);
__atomic_fetch_add(&renderer.vk.imm.remaining_count, 1, __ATOMIC_SEQ_CST);
renderer.vk.imm.queued_buffers[i] = buffer; renderer.vk.imm.queued_buffers[i] = buffer;
renderer.vk.imm.data[i] = ptr; renderer.vk.imm.data[i] = ptr;
__atomic_thread_fence(__ATOMIC_RELEASE);
__atomic_add_fetch(&renderer.vk.imm.next_ticket, 1, __ATOMIC_SEQ_CST);
} }
/* TODO: DELETE /* TODO: DELETE