From 72e1b91e2a2dda8eb7b951c9f31512793281244f Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 25 Apr 2025 18:48:13 +1000 Subject: [PATCH] work on asset loading queues --- build.sh | 2 +- src/assets.c | 5 +- src/assets.h | 1 + src/ds.c | 23 ++- src/ds.h | 1 + src/game.c | 2 +- src/platform/platform.h | 19 ++ src/platform/platform_linux.h | 37 ++++ src/platform/platform_linux_public.c | 19 ++ src/renderer.h | 34 +++- src/renderer_vulkan.c | 271 ++++++++++++++++++--------- src/renderer_vulkan.h | 62 ++++-- src/renderer_vulkan_public.c | 34 +++- src/util.c | 31 ++- src/util.h | 73 +++++--- src/vulkan_config.c | 2 +- 16 files changed, 452 insertions(+), 164 deletions(-) diff --git a/build.sh b/build.sh index 29067da..d1ebcdd 100755 --- a/build.sh +++ b/build.sh @@ -47,7 +47,7 @@ glsl_stage_geom="-fshader-stage=geom" glsl_stage_comp="-fshader-stage=comp" glsl_out="-o./shaders/glsl/" -clang_common="${include_flags} ${render_flag} -DCOMPILER_CLANG -std=c23 -fuse-ld=mold -Xclang -flto-visibility-public-std -Wno-unknown-warning-option -fdiagnostics-absolute-paths -Wall -Wno-missing-braces -Wno-unused-function -Wno-writable-strings -Wno-unused-value -Wno-unused-variable -Wno-unused-local-typedef -Wno-deprecated-register -Wno-deprecated-declarations -Wno-unused-but-set-variable -Wno-single-bit-bitfield-constant-conversion -Wno-compare-distinct-pointer-types -Wno-initializer-overrides -Wno-incompatible-pointer-types-discards-qualifiers -Wno-for-loop-analysis -DVMA_STATIC_VULKAN_FUNCTIONS=0" +clang_common="${include_flags} ${render_flag} -DCOMPILER_CLANG -std=c23 -fuse-ld=mold -Xclang -flto-visibility-public-std -Wno-unknown-warning-option -fdiagnostics-absolute-paths -Wall -Wno-missing-braces -Wno-unused-function -Wno-writable-strings -Wno-unused-value -Wno-unused-variable -Wno-unused-local-typedef -Wno-deprecated-register -Wno-deprecated-declarations -Wno-unused-but-set-variable -Wno-single-bit-bitfield-constant-conversion -Wno-compare-distinct-pointer-types -Wno-initializer-overrides -Wno-incompatible-pointer-types-discards-qualifiers -Wno-for-loop-analysis -DVMA_STATIC_VULKAN_FUNCTIONS=0 -ferror-limit=200" clang_debug="$compiler -g -O0 -DBUILD_DEBUG=1 ${clang_common}" clang_release="$compiler -O2 ${clang_common}" clang_test="$compiler -O2 -DBUILD_TEST=1 ${clang_common}" diff --git a/src/assets.c b/src/assets.c index 54d0c13..be8dd72 100644 --- a/src/assets.c +++ b/src/assets.c @@ -67,8 +67,9 @@ static Asset AssetPackLoadTexture(TextureAsset asset_id) asset.bytes = stbi_load_from_memory(img, asset_info->len, &x, &y, &ch, 4); asset.len = asset_info->len; - asset.texture_meta.w = u32(x); - asset.texture_meta.h = u32(y); + asset.texture_meta.w = u32(x); + asset.texture_meta.h = u32(y); + asset.texture_meta.ch = u32(ch); Texture_Asset_Lookup[asset_id] = asset; diff --git a/src/assets.h b/src/assets.h index 7894cfe..b3a3067 100644 --- a/src/assets.h +++ b/src/assets.h @@ -65,6 +65,7 @@ typedef struct TextureAssetMeta { u32 w; u32 h; + u32 ch; } TextureAssetMeta; typedef struct Asset diff --git a/src/ds.c b/src/ds.c index ad96d32..9245b3f 100644 --- a/src/ds.c +++ b/src/ds.c @@ -300,7 +300,7 @@ static b32 RBTreeSearchNearest(RBTree *tree, u64 key, RBNode **out_node) break; u64 diff = node->key - key; - diff = Abs(diff); + diff = Absu64(diff); if (diff == 0) { @@ -546,6 +546,27 @@ static KeyValuePair *HashTableSearchRawptr(HashTable *table, rawptr key) return result; } +static void HashTableDeleteU64(HashTable *table, u64 key) +{ + u64 hash = HashFromString(String8Struct(&key)); + u64 index = hash % table->cap; + HashList *list = table->lists + index; + HashNode *prev = P_HT_NIL; + for (HashNode *node = list->first; node != P_HT_NIL; node = node->next) + { + if (node->v.key_u64 == key) + { + if (prev != P_HT_NIL) + prev->next = node->next; + + node->v.key_u64 = 0; + node->v.value_u64 = 0; + HTQueuePush(ht->free_lists.first, ht->free_lists.last, node); + break; + } + } +} + // ::DataStructures::HashTable::Functions::End:: diff --git a/src/ds.h b/src/ds.h index 9e2fe5b..7c8c820 100644 --- a/src/ds.h +++ b/src/ds.h @@ -121,4 +121,5 @@ static HashNode *HashTablePushU64U32(HashTable *table, u64 key, u32 value); static HashNode *HashTablePushU64U64(HashTable *table, u64 key, u64 value); static HashNode *HashTablePushU64String8(HashTable *table, u64 key, String8 value); static HashNode *HashTablePushU64Rawptr(HashTable *table, u64 key, rawptr value); +static void HashTableDeleteU64(HashTable *table, u64 key); diff --git a/src/game.c b/src/game.c index f547cfd..f0721be 100644 --- a/src/game.c +++ b/src/game.c @@ -84,7 +84,7 @@ static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count) CreateAndUploadToBuffer(vertex_buffer, ctx->gui.vertices); CreateAndUploadToBuffer(index_buffer, ctx->gui.indices); - JobQueueWaitForCompletion(&renderer.vk.imm.queue); + WaitForBufferQueue(); BeginFrame(); diff --git a/src/platform/platform.h b/src/platform/platform.h index 441615c..b5892d2 100644 --- a/src/platform/platform.h +++ b/src/platform/platform.h @@ -73,8 +73,27 @@ static inline u64 ReadCPUTimer(); // ::Platform::Atomics::Header:: +#define DefSigAtomicFetchIncr(T) static inline T AtomicFetchIncr##T(T *ptr) +#define DefSigAtomicFetchSub(T) static inline T AtomicFetchSub##T(T *ptr, T count) +#define DefSigAtomicIncr(T) static inline void AtomicIncr##T(T *ptr) +#define DefSigAtomicStore(T) static inline void AtomicStore##T(T *ptr, T value) +#define DefSigAtomicLoad(T) static inline T AtomicLoad##T(T *ptr) +#define DefSigAtomicCompareExchange(T) static inline T AtomicCompareExchange##T(T *ptr, T *expected, T desired) + +DefScalarSig(AtomicFetchIncr); +DefScalarSig(AtomicFetchSub); +DefScalarSig(AtomicIncr); +DefScalarSig(AtomicStore); +DefScalarSig(AtomicLoad); +DefScalarSig(AtomicCompareExchange); + +/* static inline u32 AtomicFetchIncrU32(u32 *ptr); static inline u32 AtomicFetchSubU32(u32 *ptr, u32 count); static inline void AtomicIncrU32(u32 *ptr); static inline u32 AtomicFetchU32(u32 *ptr); +static inline void AtomicStoreB32(b32 *ptr, b32 value); static inline u32 AtomicLoadU32(u32 *ptr); +static inline b32 AtomicCompareExchangeU32(u32 *ptr, u32 *expected, u32 desired); +static inline b32 AtomicCompareExchangeB32(b32 *ptr, b32 *expected, b32 desired); +*/ diff --git a/src/platform/platform_linux.h b/src/platform/platform_linux.h index 2930e2b..3000e36 100644 --- a/src/platform/platform_linux.h +++ b/src/platform/platform_linux.h @@ -87,3 +87,40 @@ KeyboardInput ConvertInputEvent(u32 x_key); b32 CheckSyscallErr(void *ptr); +// ::Platform::Linux::Atomics::Header:: + +#define DefAtomicFetchIncr(T) \ +static inline T AtomicFetchIncr##T(T *ptr) \ +{ \ + return __atomic_fetch_add(ptr, (T)1, __ATOMIC_ACQUIRE); \ +} + +#define DefAtomicIncr(T) \ +static inline void AtomicIncr##T(T *ptr) \ +{ \ + __atomic_fetch_add(ptr, (T)1, __ATOMIC_RELEASE); \ +} + +#define DefAtomicStore(T) \ +static inline void AtomicStore##T(T *ptr, T value) \ +{ \ + __atomic_store_n(ptr, value, __ATOMIC_RELEASE); \ +} + +#define DefAtomicLoad(T) \ +static inline T AtomicLoad##T(T *ptr) \ +{ \ + return __atomic_load_n(ptr, __ATOMIC_ACQUIRE); \ +} + +#define DefAtomicFetchSub(T) \ +static inline T AtomicFetchSub##T(T *ptr, T count) \ +{ \ + return __atomic_fetch_sub(ptr, count, __ATOMIC_ACQUIRE); \ +} + +#define DefAtomicCompareExchange(T) \ +static inline b32 AtomicCompareExchange##T(T *ptr, T *expected, T desired) \ +{ \ + return __atomic_compare_exchange_n(ptr, expected, desired, true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); \ +} diff --git a/src/platform/platform_linux_public.c b/src/platform/platform_linux_public.c index 7821cc0..6fc7b4a 100644 --- a/src/platform/platform_linux_public.c +++ b/src/platform/platform_linux_public.c @@ -334,6 +334,14 @@ static inline u64 ReadCPUTimer() // ::Platform::Atomics::Functions::Start:: +DefScalarImpl(AtomicFetchIncr); +DefScalarImpl(AtomicIncr); +DefScalarImpl(AtomicStore); +DefScalarImpl(AtomicLoad); +DefScalarImpl(AtomicFetchSub); +DefScalarImpl(AtomicCompareExchange); + +/* static inline u32 AtomicFetchIncrU32(u32 *ptr) { return __atomic_fetch_add(ptr, 1, __ATOMIC_ACQUIRE); @@ -354,4 +362,15 @@ static inline u32 AtomicFetchSubU32(u32 *ptr, u32 count) return __atomic_sub_fetch(ptr, count, __ATOMIC_ACQUIRE); } +static inline b32 AtomicCompareExchangeU32(u32 *ptr, u32 *expected, u32 desired) +{ + return __atomic_compare_exchange_n(ptr, expected, desired, true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + +static inline b32 AtomicCompareExchangeB32(b32 *ptr, b32 *expected, b32 desired) +{ + return __atomic_compare_exchange_n(ptr, expected, desired, true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} +*/ + // ::Platform::Atomics::Functions::End:: diff --git a/src/renderer.h b/src/renderer.h index 99cbc5d..4d72207 100644 --- a/src/renderer.h +++ b/src/renderer.h @@ -4,9 +4,17 @@ typedef u32 DescHandle; -// @requirement RenderBuffer type; -// @requirement u32 size; +// @requirement RenderBufferType type +// @requirement u32 size +// @requirement u32 index typedef struct RenderBuffer RenderBuffer; + +// @requirement TextureBufferType type +// @requirement u32 width +// @requirement u32 height +// @requirement u32 index +typedef struct TextureBuffer TextureBuffer; + typedef struct PushConst PushConst; typedef struct ShaderGlobals ShaderGlobals; @@ -28,14 +36,21 @@ typedef enum PipelineType_e typedef enum RenderBufferType_e { - RENDER_BUFFER_TYPE_NONE = 0x0000, - RENDER_BUFFER_TYPE_VERTEX = 0x001, - RENDER_BUFFER_TYPE_INDEX = 0x002, - RENDER_BUFFER_TYPE_UNIFORM = 0x004, - RENDER_BUFFER_TYPE_STAGING = 0x008, - RENDER_BUFFER_TYPE_STORAGE = 0x010, + RENDER_BUFFER_TYPE_NONE = 0x0000, + RENDER_BUFFER_TYPE_VERTEX = 0x0001, + RENDER_BUFFER_TYPE_INDEX = 0x0002, + RENDER_BUFFER_TYPE_UNIFORM = 0x0004, + RENDER_BUFFER_TYPE_STAGING = 0x0008, + RENDER_BUFFER_TYPE_STORAGE = 0x0010, } RenderBufferType; +typedef enum TextureBufferType_e +{ + IMAGE_BUFFER_TYPE_NONE = 0x0000, + IMAGE_BUFFER_TYPE_IMAGE = 0x0001, + IMAGE_BUFFER_TYPE_SAMPLER = 0x0002, +} TextureBufferType; + typedef enum VertexAttrType_e { VERTEX_ATTRIBUTE_TYPE_VERTEX = 0, @@ -61,8 +76,9 @@ void DestroyRenderer(); static b32 CreateBuffer(RenderBuffer *buffer); static void FreeBuffers(RenderBuffer *buffers, u32 buffer_count); -static b32 UploadToBuffer(RenderBuffer **buffer, rawptr *ptr, u32 count, u8 thr_ix); +static b32 UploadToBuffer(RenderBuffer **buffer, rawptr *ptr, u32 count, u32 thr_ix); static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr); +static DescHandle CreateAndUploadToTexture(TextureBuffer *buffer, rawptr ptr); static void BindVertexBuffer(RenderBuffer *buffer); static void BindIndexBuffer(RenderBuffer *buffer); static AssetHandle RendererLoadTexture(TextureAsset asset_id); diff --git a/src/renderer_vulkan.c b/src/renderer_vulkan.c index 42eeede..f12c1bc 100644 --- a/src/renderer_vulkan.c +++ b/src/renderer_vulkan.c @@ -319,17 +319,14 @@ static void ResizeSwapchain() // ::Vulkan::Images::Functions::Start:: -static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 height) +static b32 CreateVkSampler(TextureBuffer *buffer) { b32 success = true; - VkDevice device = renderer.vk.device; - VkCommandBuffer cmd = renderer.vk.imm.cmds[thread_idx]; - VkFence fence = renderer.vk.imm.fences[thread_idx]; - VkQueue queue = renderer.vk.queues.transfer_queue; + Image *image = &buffer->image; RenderBuffer staging_buffer = { .type = RENDER_BUFFER_TYPE_STAGING, - .size = width * height, + .size = buffer->width * buffer->height, }; VmaAllocationCreateInfo alloc_create_info = { @@ -348,8 +345,8 @@ static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .extent = { - .width = width, - .height = height, + .width = buffer->width, + .height = buffer->height, .depth = 1, }, }; @@ -372,44 +369,7 @@ static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 if (success) { image->curr_layout = VK_IMAGE_LAYOUT_UNDEFINED; - TransitionImage(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - success = CreateBuffer(&staging_buffer); - } - - if (success) - success = BeginImmSubmit(device, fence, cmd); - - if (success) - { - rawptr mapped_buf = NULL; - vmaMapMemory(renderer.vk.alloc, staging_buffer.alloc, &mapped_buf); - MemCpy(mapped_buf, buf, width * height); - - VkBufferImageCopy buffer_copy = { - .bufferRowLength = width, - .bufferImageHeight = height, - .imageSubresource = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .layerCount = 1, - }, - .imageExtent = { - .width = width, - .height = height, - .depth = 1, - }, - }; - - vkCmdCopyBufferToImage(cmd, staging_buffer.buffer, image->img, image->curr_layout, 1, &buffer_copy); - - vmaUnmapMemory(renderer.vk.alloc, staging_buffer.alloc); - vmaDestroyBuffer(renderer.vk.alloc, staging_buffer.buffer, staging_buffer.alloc); - - success = FinishImmSubmit(device, fence, cmd, queue); - } - - if (success) - { VkImageViewCreateInfo view_info = { .sType = STYPE(IMAGE_VIEW_CREATE_INFO), .image = image->img, @@ -422,7 +382,7 @@ static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 } }; - result = vkCreateImageView(device, &view_info, NULL, &image->view); + result = vkCreateImageView(renderer.vk.device, &view_info, NULL, &image->view); if (result != VK_SUCCESS) success = false; } @@ -447,7 +407,7 @@ static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, }; - result = vkCreateSampler(device, &sampler_info, NULL, &image->sampler); + result = vkCreateSampler(renderer.vk.device, &sampler_info, NULL, &image->sampler); if (result != VK_SUCCESS) success = false; } @@ -455,6 +415,56 @@ static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 return success; } +static void UploadToImage(TextureBuffer *buffer, u8 *data, u32 thread_idx) +{ + b32 success = true; + + VkDevice device = renderer.vk.device; + VkCommandBuffer cmd = renderer.vk.imm.cmds[thread_idx]; + VkFence fence = renderer.vk.imm.fences[thread_idx]; + VkQueue queue = renderer.vk.queues.transfer_queue; + Image *image = &buffer->image; + u32 width = buffer->width; + u32 height = buffer->height; + u32 channels = buffer->channels; + RenderBuffer staging_buffer; + + TransitionImage(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + success = CreateBuffer(&staging_buffer); + + if (success) + success = BeginImmSubmit(device, fence, cmd); + + if (success) + { + rawptr mapped_buf = NULL; + vmaMapMemory(renderer.vk.alloc, staging_buffer.alloc, &mapped_buf); + MemCpy(mapped_buf, data, width * height * channels); + + VkBufferImageCopy buffer_copy = { + .bufferRowLength = width, + .bufferImageHeight = height, + .imageSubresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = 1, + }, + .imageExtent = { + .width = width, + .height = height, + .depth = 1, + }, + }; + + vkCmdCopyBufferToImage(cmd, staging_buffer.buffer, image->img, image->curr_layout, 1, &buffer_copy); + + vmaUnmapMemory(renderer.vk.alloc, staging_buffer.alloc); + vmaDestroyBuffer(renderer.vk.alloc, staging_buffer.buffer, staging_buffer.alloc); + + success = FinishImmSubmit(device, fence, cmd, queue); + } +} + // ::Vulkan::Images::Functions::End:: @@ -488,6 +498,14 @@ static void DescriptorTableInsert(DescType type, u64 asset_id, DescHandle handle HashTablePushU64Rawptr(table, asset_id, asset_info); } +static void DescriptorTableDelete(DescType type, u64 asset_id) +{ + HashTable *table = &renderer.vk.pipe.bindings[type].lookup_table; + + + +} + // ::Vulkan::Descriptors::Functions::End:: @@ -971,11 +989,9 @@ static b32 CreateImmediateStructures() else renderer.vk_conf.avail_threads = 0; - imm->pools = ArenaAlloc(renderer.perm_arena, sizeof(VkCommandPool) * renderer.vk_conf.avail_threads); - imm->cmds = ArenaAlloc(renderer.perm_arena, sizeof(VkCommandBuffer) * renderer.vk_conf.avail_threads); - imm->fences = ArenaAlloc(renderer.perm_arena, sizeof(VkFence) * renderer.vk_conf.avail_threads); - imm->queued_buffers = ArenaAlloc(renderer.perm_arena, sizeof(RenderBuffer) * BUFFER_QUEUE_LEN); - imm->data = ArenaAlloc(renderer.perm_arena, sizeof(void *) * BUFFER_QUEUE_LEN); + imm->pools = MakeArray(renderer.perm_arena, VkCommandPool, renderer.vk_conf.avail_threads); + imm->cmds = MakeArray(renderer.perm_arena, VkCommandBuffer, renderer.vk_conf.avail_threads); + imm->fences = MakeArray(renderer.perm_arena, VkFence, renderer.vk_conf.avail_threads); for (u32 i = 0; i < renderer.vk_conf.avail_threads && success; i++) { @@ -997,6 +1013,15 @@ static b32 CreateImmediateStructures() return success; } +static void CreateUploadQueues() +{ + for (u32 i = 0; i < DESC_TYPE_MAX; i++) + { + renderer.upload_queues[i].queued_ptrs = MakeArray(renderer.perm_arena, rawptr, BUFFER_QUEUE_LEN); + renderer.upload_queues[i].data = MakeArray(renderer.perm_arena, rawptr, BUFFER_QUEUE_LEN); + } +} + static b32 CreateSwapchain() { b32 success = true; @@ -1016,8 +1041,8 @@ static b32 CreateSwapchain() u32 width = renderer.vk.sc.extent.width; u32 height = renderer.vk.sc.extent.height; - extent.width = Clamp((u32)width, capabilities.minImageExtent.width, capabilities.maxImageExtent.width); - extent.height = Clamp((u32)height, capabilities.minImageExtent.height, capabilities.maxImageExtent.height); + extent.width = Clampu32((u32)width, capabilities.minImageExtent.width, capabilities.maxImageExtent.width); + extent.height = Clampu32((u32)height, capabilities.minImageExtent.height, capabilities.maxImageExtent.height); if (present_mode == INT_MAX || format == INT_MAX || color_space == INT_MAX) { @@ -1194,7 +1219,7 @@ static b32 CreateDescriptors() if (result != VK_SUCCESS) success = false; - for (u32 i = DESC_TYPE_COMBINED_SAMPLER; i < DESC_TYPE_MAX; i++) + for (u32 i = DESC_TYPE_SAMPLER; i < DESC_TYPE_MAX; i++) { bindless_layout_binding.descriptorType = desc_type_map[i]; result = vkCreateDescriptorSetLayout(device, &bindless_layout_create_info, NULL, &renderer.vk.pipe.layouts[i]); @@ -1382,6 +1407,82 @@ static void StartVkLoaderThreads() // ::Vulkan::Async::Functions::Start:: +static u32 VkLoaderProcessBuffers(u32 thread_index) +{ + JobQueue *job_queue = &renderer.upload_queues[DESC_TYPE_BUFFER].job_queue; + TicketMut *ticket_mut = &renderer.upload_queues[DESC_TYPE_BUFFER].ticket_mut; + RenderBuffer **render_buffers = renderer.upload_queues[DESC_TYPE_BUFFER].queued_buffers; + rawptr *buffer_data = renderer.upload_queues[DESC_TYPE_BUFFER].data; + u32 buffer_count = JobQueueGetCount(job_queue); + + u32 count = 0; + if (buffer_count > 0) + { + TicketMutLock(ticket_mut); + + RenderBuffer *buffers[16]; + rawptr data[16]; + for (u32 i = buffer_count; i >= 0 && count < 16; i++) + { + buffers[count] = render_buffers[i]; + data[count] = buffer_data[i]; + count += 1; + } + + JobQueueMarkUnqueued(job_queue, count); + + TicketMutUnlock(ticket_mut); + + for (u32 i = 0; i < count; i++) + Assert(CreateBuffer(buffers[i]), "VkLoaderProcessBuffers failure: CreateBuffer failed"); + + Assert(UploadToBuffer(buffers, data, count, thread_index), "VkLoaderProcessBuffers failure: UploadToBuffer failed"); + + JobQueueMarkCompleted(job_queue, count); + } + + return count; +} + +static u32 VkLoaderProcessSamplers(u32 thread_index) +{ + JobQueue *job_queue = &renderer.upload_queues[DESC_TYPE_SAMPLER].job_queue; + TicketMut *ticket_mut = &renderer.upload_queues[DESC_TYPE_SAMPLER].ticket_mut; + TextureBuffer **texture_buffers = renderer.upload_queues[DESC_TYPE_SAMPLER].queued_textures; + rawptr *texture_data = renderer.upload_queues[DESC_TYPE_SAMPLER].data; + u32 buffer_count = JobQueueGetCount(job_queue); + + u32 count = 0; + if (buffer_count > 0) + { + TicketMutLock(ticket_mut); + + TextureBuffer *buffers[16]; + rawptr data[16]; + for (u32 i = buffer_count; i >= 0 && count < 16; i++) + { + buffers[count] = texture_buffers[i]; + data[count] = texture_data[i]; + count += 1; + } + + JobQueueMarkUnqueued(job_queue, count); + + TicketMutUnlock(ticket_mut); + + for (u32 i = 0; i < count; i++) + { + Assert(CreateVkSampler(buffers[i]), "Unable to create VkSampler"); + UploadToImage(buffers[i], data[i], thread_index); + } + + JobQueueMarkCompleted(&renderer.vk.imm.texture_queue, count); + } + + return count; +} + + #ifdef __linux__ void *VkLoaderStart(void *i) @@ -1391,46 +1492,44 @@ void *VkLoaderStart(void *i) pthread_mutex_t mut; pthread_mutex_init(&mut, NULL); + u32 processed_count = 0; + u32 iter_count = 0; for (;;) { - TicketMutLock(&renderer.vk.imm.mut); + for (u32 i = 0; i < DESC_TYPE_MAX; i++) + { + Mut *mut = &renderer.upload_queues[i].mut; + if (MutTryLock(mut)) + { + switch (i) + { + case DESC_TYPE_BUFFER: + { + processed_count += VkLoaderProcessBuffers(index); + } break; + case DESC_TYPE_SAMPLER: + { + processed_count += VkLoaderProcessSamplers(index); + } break; + default: + break; + } - u32 job_count = JobQueueGetCount(&renderer.vk.imm.queue); - if (job_count < 0) - { - TicketMutUnlock(&renderer.vk.imm.mut); - pthread_exit(NULL); + MutUnlock(mut); + } } - else if (job_count == 0) + + iter_count += 1; + + if (processed_count < 0) + pthread_exit(NULL); + else if (processed_count == 0 && iter_count >= 3) { - TicketMutUnlock(&renderer.vk.imm.mut); + iter_count = 0; pthread_mutex_lock(&mut); pthread_cond_wait(&cond, &mut); pthread_mutex_unlock(&mut); } - else - { - RenderBuffer *buffers[16]; - rawptr data[16]; - u32 count = 0; - for (u32 i = 0; i < job_count && i < 16; i++) - { - buffers[i] = renderer.vk.imm.queued_buffers[i]; - data[i] = renderer.vk.imm.data[i]; - count += 1; - } - - JobQueueMarkUnqueued(&renderer.vk.imm.queue, count); - - TicketMutUnlock(&renderer.vk.imm.mut); - - for (u32 i = 0; i < count; i++) - Assert(CreateBuffer(buffers[i]), "VkLoader CreateBuffer failure"); - - Assert(UploadToBuffer(buffers, data, count, index), "VkLoader UploadToBuffer failure"); - - JobQueueMarkCompleted(&renderer.vk.imm.queue, count); - } } } diff --git a/src/renderer_vulkan.h b/src/renderer_vulkan.h index 4a2cc92..11e90e9 100644 --- a/src/renderer_vulkan.h +++ b/src/renderer_vulkan.h @@ -173,9 +173,10 @@ VK_DECLARE(vkCreateSampler); typedef enum DescType_e { DESC_TYPE_SHARED, - DESC_TYPE_COMBINED_SAMPLER, + DESC_TYPE_SAMPLER, // DO NOT MOVE FROM POSITION 1 !! DESC_TYPE_STORAGE_IMAGE, DESC_TYPE_UNIFORM, + DESC_TYPE_BUFFER, DESC_TYPE_MAX, } DescType; @@ -185,6 +186,26 @@ typedef struct ShaderGlobals Vec2 res; } ShaderGlobals; +typedef struct Image +{ + VkImage img; + VkImageView view; + VkSampler sampler; + VmaAllocation alloc; + VkFormat fmt; + VkImageLayout curr_layout; +} Image; + +typedef struct TextureBuffer +{ + TextureBufferType type; + Image image; + u32 width; + u32 height; + u32 channels; + u32 index; +} TextureBuffer; + typedef struct RenderBuffer { RenderBufferType type; @@ -192,7 +213,7 @@ typedef struct RenderBuffer VmaAllocation alloc; VmaAllocationInfo info; u32 size; - i32 mem_index; // TODO(MA): use this + u32 index; // TODO(MA): use this } RenderBuffer; typedef struct MeshBuffer @@ -254,15 +275,27 @@ typedef struct FrameStructures u32 *buffer_counts; } FrameStructures; +typedef struct UploadQueue +{ + union + { + RenderBuffer **queued_buffers; + TextureBuffer **queued_textures; + rawptr *queued_ptrs; + }; + rawptr *data; + TicketMut ticket_mut; + Mut mut; + JobQueue job_queue; +} UploadQueue; + typedef struct ImmediateStructures { VkCommandPool *pools; VkCommandBuffer *cmds; VkFence *fences; - RenderBuffer **queued_buffers; - rawptr *data; - JobQueue queue; - TicketMut mut; + JobQueue buffer_queue; + JobQueue texture_queue; } ImmediateStructures; typedef struct DeviceQueues @@ -272,16 +305,6 @@ typedef struct DeviceQueues b8 single_queue; } DeviceQueues; -typedef struct Image -{ - VkImage img; - VkImageView view; - VkSampler sampler; - VmaAllocation alloc; - VkFormat fmt; - VkImageLayout curr_layout; -} Image; - typedef struct SwapchainStructures { VkFormat format; @@ -350,6 +373,7 @@ typedef struct Renderer PendingUpdates pending; Arena *arena; Arena *perm_arena; + UploadQueue upload_queues[DESC_TYPE_MAX]; } Renderer; // ::Vulkan::Debug::Functions::Header:: @@ -384,6 +408,7 @@ static VkFormat GetImageFormat(); static b32 CreateDescriptors(); static b32 CreatePipelines(); static b32 CreateShaderModule(u8 *bytes, u32 len, VkShaderModule *module); +static void CreateUploadQueues(); static void StartVkLoaderThreads(); // ::Vulkan::Util::Functions::Header:: @@ -409,6 +434,8 @@ static inline void CopyImageToImage(VkCommandBuffer cmd, VkImage src, VkImage ds #endif void VkLoaderWake(); +static u32 VkLoaderProcessBuffers(u32 thread_index); +static u32 VkLoaderProcessSamplers(u32 thread_index); // ::Vulkan::ImmediateSubmit::Functions::Header:: @@ -425,7 +452,8 @@ static void ResizeSwapchain(); // ::Vulkan::Images::Functions::Header:: -static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 height); +static b32 CreateVkSampler(TextureBuffer *buffer); +static void UploadToImage(TextureBuffer *buffer, u8 *data, u32 thread_idx); // ::Vulkan::Descriptors::Functions::Header:: diff --git a/src/renderer_vulkan_public.c b/src/renderer_vulkan_public.c index 752819e..1358c25 100644 --- a/src/renderer_vulkan_public.c +++ b/src/renderer_vulkan_public.c @@ -40,6 +40,7 @@ b32 InitRenderer(Arena *arena) Assert(CreateImmediateStructures(), "Unable to create immediate structures"); Assert(CreateDescriptors(), "Unable to initialize descriptors."); Assert(CreatePipelines(), "Unable to initialize pipelines."); + CreateUploadQueues(); StartVkLoaderThreads(); @@ -197,7 +198,7 @@ static b32 CreateBuffer(RenderBuffer *buffer) return success; } -static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 thr_ix) +static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u32 thr_ix) { Assert(buffers, "UploadToBuffer: buffer must not be null"); Assert(ptrs, "UploadToBuffer: ptr must not be null"); @@ -257,13 +258,26 @@ static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 th static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr) { - TicketMutLock(&renderer.vk.imm.mut); - - u32 job_idx = JobQueueAdd(&renderer.vk.imm.queue, 1); - renderer.vk.imm.queued_buffers[job_idx] = buffer; - renderer.vk.imm.data[job_idx] = ptr; + TicketMutLock(&renderer.upload_queues[DESC_TYPE_BUFFER].ticket_mut); - TicketMutUnlock(&renderer.vk.imm.mut); + u32 job_idx = JobQueueAdd(&renderer.upload_queues[DESC_TYPE_BUFFER].job_queue, 1); + renderer.upload_queues[DESC_TYPE_BUFFER].queued_buffers[job_idx] = buffer; + renderer.upload_queues[DESC_TYPE_BUFFER].data[job_idx] = ptr; + + TicketMutUnlock(&renderer.upload_queues[DESC_TYPE_BUFFER].ticket_mut); + + VkLoaderWake(); +} + +static DescHandle CreateAndUploadToTexture(TextureBuffer *buffer, rawptr ptr) +{ + TicketMutLock(&renderer.upload_queues[DESC_TYPE_SAMPLER].ticket_mut); + + u32 job_idx = JobQueueAdd(&renderer.upload_queues[DESC_TYPE_SAMPLER].job_queue, 1); + renderer.upload_queues[DESC_TYPE_SAMPLER].queued_textures[job_idx] = buffer; + renderer.upload_queues[DESC_TYPE_SAMPLER].data[job_idx] = ptr; + + TicketMutUnlock(&renderer.upload_queues[DESC_TYPE_SAMPLER].ticket_mut); VkLoaderWake(); } @@ -313,12 +327,14 @@ static AssetHandle RendererLoadTexture(TextureAsset asset_id) static void WaitForBufferQueue() { - JobQueueWaitForCompletion(&renderer.vk.imm.queue); + JobQueueWaitForCompletion(&renderer.vk.imm.buffer_queue); + JobQueueWaitForCompletion(&renderer.vk.imm.texture_queue); } static void ResetBufferQueue() { - JobQueueReset(&renderer.vk.imm.queue); + JobQueueReset(&renderer.vk.imm.texture_queue); + JobQueueReset(&renderer.vk.imm.buffer_queue); } // ::Vulkan::Renderer::Buffers::Functions::End:: diff --git a/src/util.c b/src/util.c index 50c85b5..51b793f 100644 --- a/src/util.c +++ b/src/util.c @@ -302,51 +302,62 @@ static inline void EndProfileBlock(ProfileBlock *block) // ::Util::Async::Functions::Start:: +static inline b32 MutTryLock(Mut *mut) +{ + b32 lock = true; + return AtomicCompareExchangeb32(&mut->lock, &lock, false); +} + +static inline void MutUnlock(Mut *mut) +{ + AtomicStoreb32(&mut->lock, false); +} + static inline void TicketMutLock(TicketMut *mut) { - u32 ticket = AtomicFetchIncrU32(&mut->ticket); + u32 ticket = AtomicFetchIncru32(&mut->ticket); while (ticket != mut->next_ticket); } static inline void TicketMutUnlock(TicketMut *mut) { - AtomicIncrU32(&mut->next_ticket); + AtomicIncru32(&mut->next_ticket); } static inline u32 JobQueueAdd(JobQueue *queue, u32 count) { - u32 job_idx = AtomicFetchIncrU32(&queue->queued); - AtomicFetchIncrU32(&queue->remaining); + u32 job_idx = AtomicFetchIncru32(&queue->queued); + AtomicFetchIncru32(&queue->remaining); return job_idx; } static inline u32 JobQueueGetCount(JobQueue *queue) { - return AtomicLoadU32(&queue->queued); + return AtomicLoadu32(&queue->queued); } static inline void JobQueueMarkUnqueued(JobQueue *queue, u32 count) { - AtomicFetchSubU32(&queue->queued, count); + AtomicFetchSubu32(&queue->queued, count); } static inline void JobQueueMarkCompleted(JobQueue *queue, u32 count) { - AtomicFetchSubU32(&queue->remaining, count); + AtomicFetchSubu32(&queue->remaining, count); } static inline void JobQueueReset(JobQueue *queue) { - AtomicFetchSubU32(&queue->queued, queue->queued); - AtomicFetchSubU32(&queue->remaining, queue->remaining); + AtomicFetchSubu32(&queue->queued, queue->queued); + AtomicFetchSubu32(&queue->remaining, queue->remaining); } static inline void JobQueueWaitForCompletion(JobQueue *queue) { u32 remaining; do { - remaining = AtomicLoadU32(&queue->remaining); + remaining = AtomicLoadu32(&queue->remaining); } while (remaining != 0); } diff --git a/src/util.h b/src/util.h index 51408fa..f42e068 100644 --- a/src/util.h +++ b/src/util.h @@ -48,6 +48,39 @@ typedef struct Arena Arena; #define cast(T, x) ((T)(x)) +#define DefScalarSig(def) \ + DefSig##def(i8); \ + DefSig##def(i16); \ + DefSig##def(i32); \ + DefSig##def(i64); \ + DefSig##def(u8); \ + DefSig##def(u16); \ + DefSig##def(u32); \ + DefSig##def(u64); \ + DefSig##def(f32); \ + DefSig##def(f64); \ + DefSig##def(b8); \ + DefSig##def(b32); \ + DefSig##def(uintptr); \ + DefSig##def(intptr) + +#define DefScalarImpl(def) \ + Def##def(i8); \ + Def##def(i16); \ + Def##def(i32); \ + Def##def(i64); \ + Def##def(u8); \ + Def##def(u16); \ + Def##def(u32); \ + Def##def(u64); \ + Def##def(f32); \ + Def##def(f64); \ + Def##def(b8); \ + Def##def(b32); \ + Def##def(uintptr); \ + Def##def(intptr) + + // ::Util::LinkedList::Macros:: #define CheckNil(nil, p) ((p) == 0 || (p) == nil) @@ -129,49 +162,28 @@ void MemCpy(rawptr dst, rawptr src, usize len); #define DefMathImpl(def) \ DefIntegerImpl(def); \ - DefFloatImpl(def); - -#define MathGeneric(fn, params) _Generic(params, \ - i8: i8##fn, \ - i16: i16##fn, \ - i32: i32##fn, \ - i64: i64##fn, \ - u8: u8##fn, \ - u16: u16##fn, \ - u32: u32##fn, \ - u64: u64##fn, \ - f32: f32##fn, \ - f64: f64##fn \ -)params - -#define Min(l, r) MathGeneric(Min, (l, r)) - -#define Max(l, r) MathGeneric(Max, (l, r)) - -#define Clamp(v, min, max) MathGeneric(Clamp, (v, min, max)) - -#define Abs(v) MathGeneric(Abs, (v)) + DefFloatImpl(def) #define DefMin(T) \ -T T##Min(T l, T r) \ +T Min##T(T l, T r) \ { \ return l < r ? l : r; \ } #define DefMax(T) \ -T T##Max(T l, T r) \ +T Max##T(T l, T r) \ { \ return l > r ? l : r; \ } #define DefClamp(T) \ -T T##Clamp(T v, T min, T max) \ +T Clamp##T(T v, T min, T max) \ { \ - return Min(max, Max(v, min)); \ + return Min##T(max, Max##T(v, min)); \ } #define DefAbs(T) \ -T T##Abs(T v) \ +T Abs##T(T v) \ { \ return v < (T)0 ? -v : v; \ } @@ -242,12 +254,19 @@ typedef struct TicketMut u32 volatile next_ticket; } TicketMut; +typedef struct Mut +{ + b32 volatile lock; +} Mut; + typedef struct JobQueue { u32 volatile queued; u32 volatile remaining; } JobQueue; +static inline b32 MutTryLock(Mut *mut); +static inline void MutUnlock(Mut *mut); static inline void TicketMutLock(TicketMut *mut); static inline void TicketMutUnlock(TicketMut *mut); static inline u32 JobQueueAdd(JobQueue *queue, u32 count); diff --git a/src/vulkan_config.c b/src/vulkan_config.c index 28a75f8..85b953f 100644 --- a/src/vulkan_config.c +++ b/src/vulkan_config.c @@ -259,7 +259,7 @@ static VkDescriptorSetLayoutCreateInfo shared_layout_create_info = { }; static VkDescriptorType desc_type_map[DESC_TYPE_MAX] = { - [DESC_TYPE_COMBINED_SAMPLER] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + [DESC_TYPE_SAMPLER] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, [DESC_TYPE_STORAGE_IMAGE] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, [DESC_TYPE_UNIFORM] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, };