From 44668e87428e0d73765cfba05ceb39b2f44b00f0 Mon Sep 17 00:00:00 2001 From: Matthew Date: Wed, 23 Apr 2025 18:39:43 +1000 Subject: [PATCH] job queue abstraction, create image sampler function and misc fixes --- src/game.c | 14 +-- src/platform/platform.h | 3 + src/platform/platform_linux_public.c | 10 ++ src/renderer.h | 1 + src/renderer_vulkan.c | 166 +++++++++++++++++++++++++-- src/renderer_vulkan.h | 19 ++- src/renderer_vulkan_public.c | 27 +++-- src/util.c | 37 ++++++ src/util.h | 11 ++ src/vulkan_config.c | 5 +- 10 files changed, 255 insertions(+), 38 deletions(-) diff --git a/src/game.c b/src/game.c index b0943b0..f547cfd 100644 --- a/src/game.c +++ b/src/game.c @@ -48,8 +48,7 @@ static void DestroyGame() static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count) { - __atomic_store_n(&renderer.vk.imm.job_count, 0, __ATOMIC_RELEASE); - __atomic_store_n(&renderer.vk.imm.remaining_count, 0, __ATOMIC_RELEASE); + ResetBufferQueue(); PrepareGUICtx(ctx); @@ -85,16 +84,7 @@ static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count) CreateAndUploadToBuffer(vertex_buffer, ctx->gui.vertices); CreateAndUploadToBuffer(index_buffer, ctx->gui.indices); - for (u32 i = 0; i < renderer.vk_conf.avail_threads; i++) - { - pthread_cond_signal(&cond); - } - - i32 count = -1; - while (count != 0) - { - __atomic_load(&renderer.vk.imm.remaining_count, &count, __ATOMIC_SEQ_CST); - } + JobQueueWaitForCompletion(&renderer.vk.imm.queue); BeginFrame(); diff --git a/src/platform/platform.h b/src/platform/platform.h index a7edf32..441615c 100644 --- a/src/platform/platform.h +++ b/src/platform/platform.h @@ -74,4 +74,7 @@ static inline u64 ReadCPUTimer(); // ::Platform::Atomics::Header:: static inline u32 AtomicFetchIncrU32(u32 *ptr); +static inline u32 AtomicFetchSubU32(u32 *ptr, u32 count); static inline void AtomicIncrU32(u32 *ptr); +static inline u32 AtomicFetchU32(u32 *ptr); +static inline u32 AtomicLoadU32(u32 *ptr); diff --git a/src/platform/platform_linux_public.c b/src/platform/platform_linux_public.c index 6f6ec15..7821cc0 100644 --- a/src/platform/platform_linux_public.c +++ b/src/platform/platform_linux_public.c @@ -344,4 +344,14 @@ static inline void AtomicIncrU32(u32 *ptr) __atomic_fetch_add(ptr, 1, __ATOMIC_RELEASE); } +static inline u32 AtomicLoadU32(u32 *ptr) +{ + return __atomic_load_n(ptr, __ATOMIC_ACQUIRE); +} + +static inline u32 AtomicFetchSubU32(u32 *ptr, u32 count) +{ + return __atomic_sub_fetch(ptr, count, __ATOMIC_ACQUIRE); +} + // ::Platform::Atomics::Functions::End:: diff --git a/src/renderer.h b/src/renderer.h index 36a22d2..99cbc5d 100644 --- a/src/renderer.h +++ b/src/renderer.h @@ -66,6 +66,7 @@ static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr); static void BindVertexBuffer(RenderBuffer *buffer); static void BindIndexBuffer(RenderBuffer *buffer); static AssetHandle RendererLoadTexture(TextureAsset asset_id); +static void ResetBufferQueue(); // ::Renderer::Uniforms::Header:: // ::Renderer::PushConstants::Header:: diff --git a/src/renderer_vulkan.c b/src/renderer_vulkan.c index 03d4662..42eeede 100644 --- a/src/renderer_vulkan.c +++ b/src/renderer_vulkan.c @@ -204,11 +204,12 @@ static void BeginRendering() // ::Vulkan::ImmediateSubmit::Functions::Start:: -static b32 BeginImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd) +static b32 BeginImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd) { b32 success = true; - VkResult result = vkResetFences(device, 1, fence); + VkFence f = fence; + VkResult result = vkResetFences(device, 1, &f); if (result != VK_SUCCESS) { Printfln("vkResetFences failure: %s", VkResultStr(result)); @@ -243,9 +244,10 @@ static b32 BeginImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd) return success; } -static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd, VkQueue queue) +static b32 FinishImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd, VkQueue queue) { b32 success = true; + VkFence f = fence; VkResult result = vkEndCommandBuffer(cmd); if (result != VK_SUCCESS) @@ -267,7 +269,7 @@ static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd, .pCommandBufferInfos = &cmd_submit_info, }; - result = vkQueueSubmit2(queue, 1, &submit_info, *fence); + result = vkQueueSubmit2(queue, 1, &submit_info, f); if (result != VK_SUCCESS) { Printfln("vkQueueSubmit2 imm failure: %s", VkResultStr(result)); @@ -277,7 +279,7 @@ static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd, if (success) { - result = vkWaitForFences(device, 1, fence, true, 9999999999); + result = vkWaitForFences(device, 1, &f, true, 9999999999); if (result != VK_SUCCESS) { Printfln("vkWaitForFences imm failure: %s", VkResultStr(result)); @@ -315,6 +317,148 @@ static void ResizeSwapchain() +// ::Vulkan::Images::Functions::Start:: + +static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 height) +{ + b32 success = true; + VkDevice device = renderer.vk.device; + VkCommandBuffer cmd = renderer.vk.imm.cmds[thread_idx]; + VkFence fence = renderer.vk.imm.fences[thread_idx]; + VkQueue queue = renderer.vk.queues.transfer_queue; + + RenderBuffer staging_buffer = { + .type = RENDER_BUFFER_TYPE_STAGING, + .size = width * height, + }; + + VmaAllocationCreateInfo alloc_create_info = { + .usage = VMA_MEMORY_USAGE_GPU_ONLY, + .requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + }; + + VkImageCreateInfo image_info = { + .sType = STYPE(IMAGE_CREATE_INFO), + .imageType = VK_IMAGE_TYPE_2D, + .mipLevels = 1, + .arrayLayers = 1, + .format = VK_FORMAT_R8G8B8A8_SRGB, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + }; + + if (renderer.vk.queues.graphics != renderer.vk.queues.transfer) + { + image_info.sharingMode = VK_SHARING_MODE_CONCURRENT; + image_info.queueFamilyIndexCount = 2; + image_info.pQueueFamilyIndices = (u32[]){renderer.vk.queues.graphics, renderer.vk.queues.transfer}; + } + + VkResult result = vmaCreateImage(renderer.vk.alloc, &image_info, + &alloc_create_info, &image->img, &image->alloc, NULL); + if (result != VK_SUCCESS) + { + success = false; + Printfln("vmaCreateImage failure: %s", VkResultStr(result)); + } + + if (success) + { + image->curr_layout = VK_IMAGE_LAYOUT_UNDEFINED; + TransitionImage(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + success = CreateBuffer(&staging_buffer); + } + + if (success) + success = BeginImmSubmit(device, fence, cmd); + + if (success) + { + rawptr mapped_buf = NULL; + vmaMapMemory(renderer.vk.alloc, staging_buffer.alloc, &mapped_buf); + MemCpy(mapped_buf, buf, width * height); + + VkBufferImageCopy buffer_copy = { + .bufferRowLength = width, + .bufferImageHeight = height, + .imageSubresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = 1, + }, + .imageExtent = { + .width = width, + .height = height, + .depth = 1, + }, + }; + + vkCmdCopyBufferToImage(cmd, staging_buffer.buffer, image->img, image->curr_layout, 1, &buffer_copy); + + vmaUnmapMemory(renderer.vk.alloc, staging_buffer.alloc); + vmaDestroyBuffer(renderer.vk.alloc, staging_buffer.buffer, staging_buffer.alloc); + + success = FinishImmSubmit(device, fence, cmd, queue); + } + + if (success) + { + VkImageViewCreateInfo view_info = { + .sType = STYPE(IMAGE_VIEW_CREATE_INFO), + .image = image->img, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_SRGB, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + } + }; + + result = vkCreateImageView(device, &view_info, NULL, &image->view); + if (result != VK_SUCCESS) + success = false; + } + + if (success) + { + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(renderer.vk.phys_device, &properties); + + // TODO: handle no anisotropy + VkSamplerCreateInfo sampler_info = { + .sType = STYPE(SAMPLER_CREATE_INFO), + .magFilter = VK_FILTER_LINEAR, + .minFilter = VK_FILTER_LINEAR, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .anisotropyEnable = VK_TRUE, + .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK, + .compareOp = VK_COMPARE_OP_ALWAYS, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, + }; + + result = vkCreateSampler(device, &sampler_info, NULL, &image->sampler); + if (result != VK_SUCCESS) + success = false; + } + + return success; +} + +// ::Vulkan::Images::Functions::End:: + + + // ::Vulkan::Descriptors::Functions::Start:: static DescAssetInfo *DescriptorTableSearch(DescType type, u64 asset_id) @@ -702,6 +846,7 @@ static b32 InitVkDeviceFunctions() { INIT_DEV_FN(vkCmdDraw); INIT_DEV_FN(vkDeviceWaitIdle); INIT_DEV_FN(vkCmdClearColorImage); + INIT_DEV_FN(vkCreateSampler); return true; } @@ -1250,7 +1395,7 @@ void *VkLoaderStart(void *i) { TicketMutLock(&renderer.vk.imm.mut); - u32 job_count = __atomic_load_n(&renderer.vk.imm.job_count, __ATOMIC_SEQ_CST); + u32 job_count = JobQueueGetCount(&renderer.vk.imm.queue); if (job_count < 0) { TicketMutUnlock(&renderer.vk.imm.mut); @@ -1275,7 +1420,7 @@ void *VkLoaderStart(void *i) count += 1; } - __atomic_sub_fetch(&renderer.vk.imm.job_count, count, __ATOMIC_SEQ_CST); + JobQueueMarkUnqueued(&renderer.vk.imm.queue, count); TicketMutUnlock(&renderer.vk.imm.mut); @@ -1284,11 +1429,16 @@ void *VkLoaderStart(void *i) Assert(UploadToBuffer(buffers, data, count, index), "VkLoader UploadToBuffer failure"); - u32 rem = __atomic_sub_fetch(&renderer.vk.imm.remaining_count, count, __ATOMIC_RELEASE); + JobQueueMarkCompleted(&renderer.vk.imm.queue, count); } } } +void VkLoaderWake() +{ + for (u32 i = 0; i < renderer.vk_conf.avail_threads; i++) + pthread_cond_signal(&cond); +} #elif _WIN32 diff --git a/src/renderer_vulkan.h b/src/renderer_vulkan.h index 5014e93..4a2cc92 100644 --- a/src/renderer_vulkan.h +++ b/src/renderer_vulkan.h @@ -157,6 +157,7 @@ VK_DECLARE(vkQueuePresentKHR); VK_DECLARE(vkCmdDraw); VK_DECLARE(vkDeviceWaitIdle); VK_DECLARE(vkCmdClearColorImage); +VK_DECLARE(vkCreateSampler); #include "vma/vk_mem_alloc.h" @@ -260,8 +261,7 @@ typedef struct ImmediateStructures VkFence *fences; RenderBuffer **queued_buffers; rawptr *data; - i32 volatile job_count; - i32 volatile remaining_count; + JobQueue queue; TicketMut mut; } ImmediateStructures; @@ -276,6 +276,7 @@ typedef struct Image { VkImage img; VkImageView view; + VkSampler sampler; VmaAllocation alloc; VkFormat fmt; VkImageLayout curr_layout; @@ -402,15 +403,17 @@ static inline void CopyImageToImage(VkCommandBuffer cmd, VkImage src, VkImage ds // ::Vulkan::Async::Functions::Header:: #ifdef __linux__ -void *VkLoaderStart(void *thread_data); + void *VkLoaderStart(void *thread_data); #elif _WIN32 -#error not yet implemented +# error not yet implemented #endif +void VkLoaderWake(); + // ::Vulkan::ImmediateSubmit::Functions::Header:: -static b32 BeginImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd); -static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd, VkQueue queue); +static b32 BeginImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd); +static b32 FinishImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd, VkQueue queue); // ::Vulkan::Rendering::Functions::Header:: @@ -420,6 +423,10 @@ static void BeginRendering(); static void ResizeSwapchain(); +// ::Vulkan::Images::Functions::Header:: + +static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 height); + // ::Vulkan::Descriptors::Functions::Header:: static DescAssetInfo *DescriptorTableSearch(DescType type, u64 asset_id); diff --git a/src/renderer_vulkan_public.c b/src/renderer_vulkan_public.c index 0c5b585..752819e 100644 --- a/src/renderer_vulkan_public.c +++ b/src/renderer_vulkan_public.c @@ -214,7 +214,7 @@ static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 th u32 copy_count = 0; - b32 imm_started = success = BeginImmSubmit(device, &fence, cmd); + b32 imm_started = success = BeginImmSubmit(device, fence, cmd); for (u32 i = 0; i < count && success; i++) { b32 host_visible = buffers[i]->type & HOST_VISIBLE_BUFFERS; @@ -243,7 +243,7 @@ static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 th } } - FinishImmSubmit(device, &fence, cmd, queue); + FinishImmSubmit(device, fence, cmd, queue); vkWaitForFences(device, 1, &fence, VK_TRUE, 999999999); for (u32 i = 0; i < copy_count; i++) @@ -257,18 +257,15 @@ static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 th static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr) { - // TODO: revisit this to see if it could be done better - Assert(renderer.vk.imm.job_count+1 < BUFFER_QUEUE_LEN, "CreateAndUploadToBuffer out of bounds"); - TicketMutLock(&renderer.vk.imm.mut); - u32 i = __atomic_fetch_add(&renderer.vk.imm.job_count, 1, __ATOMIC_SEQ_CST); - __atomic_fetch_add(&renderer.vk.imm.remaining_count, 1, __ATOMIC_SEQ_CST); - - renderer.vk.imm.queued_buffers[i] = buffer; - renderer.vk.imm.data[i] = ptr; + u32 job_idx = JobQueueAdd(&renderer.vk.imm.queue, 1); + renderer.vk.imm.queued_buffers[job_idx] = buffer; + renderer.vk.imm.data[job_idx] = ptr; TicketMutUnlock(&renderer.vk.imm.mut); + + VkLoaderWake(); } static void FreeBuffers(RenderBuffer *buffers, u32 buffer_count) @@ -314,6 +311,16 @@ static AssetHandle RendererLoadTexture(TextureAsset asset_id) return handle; } +static void WaitForBufferQueue() +{ + JobQueueWaitForCompletion(&renderer.vk.imm.queue); +} + +static void ResetBufferQueue() +{ + JobQueueReset(&renderer.vk.imm.queue); +} + // ::Vulkan::Renderer::Buffers::Functions::End:: diff --git a/src/util.c b/src/util.c index 36f7bca..50c85b5 100644 --- a/src/util.c +++ b/src/util.c @@ -313,4 +313,41 @@ static inline void TicketMutUnlock(TicketMut *mut) AtomicIncrU32(&mut->next_ticket); } +static inline u32 JobQueueAdd(JobQueue *queue, u32 count) +{ + u32 job_idx = AtomicFetchIncrU32(&queue->queued); + AtomicFetchIncrU32(&queue->remaining); + + return job_idx; +} + +static inline u32 JobQueueGetCount(JobQueue *queue) +{ + return AtomicLoadU32(&queue->queued); +} + +static inline void JobQueueMarkUnqueued(JobQueue *queue, u32 count) +{ + AtomicFetchSubU32(&queue->queued, count); +} + +static inline void JobQueueMarkCompleted(JobQueue *queue, u32 count) +{ + AtomicFetchSubU32(&queue->remaining, count); +} + +static inline void JobQueueReset(JobQueue *queue) +{ + AtomicFetchSubU32(&queue->queued, queue->queued); + AtomicFetchSubU32(&queue->remaining, queue->remaining); +} + +static inline void JobQueueWaitForCompletion(JobQueue *queue) +{ + u32 remaining; + do { + remaining = AtomicLoadU32(&queue->remaining); + } while (remaining != 0); +} + // ::Util::Async::Functions::End:: diff --git a/src/util.h b/src/util.h index 42d6b40..51408fa 100644 --- a/src/util.h +++ b/src/util.h @@ -242,5 +242,16 @@ typedef struct TicketMut u32 volatile next_ticket; } TicketMut; +typedef struct JobQueue +{ + u32 volatile queued; + u32 volatile remaining; +} JobQueue; + static inline void TicketMutLock(TicketMut *mut); static inline void TicketMutUnlock(TicketMut *mut); +static inline u32 JobQueueAdd(JobQueue *queue, u32 count); +static inline void JobQueueMarkUnqueued(JobQueue *queue, u32 count); +static inline void JobQueueMarkCompleted(JobQueue *queue, u32 count); +static inline void JobQueueReset(JobQueue *queue); +static inline void JobQueueWaitForCompletion(JobQueue *queue); diff --git a/src/vulkan_config.c b/src/vulkan_config.c index 28ebae6..28a75f8 100644 --- a/src/vulkan_config.c +++ b/src/vulkan_config.c @@ -90,9 +90,10 @@ static VkPhysicalDeviceVulkan11Features vk_11_features = { static const VkPhysicalDeviceFeatures vk_features = { .shaderUniformBufferArrayDynamicIndexing = VK_TRUE, - .shaderSampledImageArrayDynamicIndexing = VK_TRUE, + .shaderSampledImageArrayDynamicIndexing = VK_TRUE, .shaderStorageBufferArrayDynamicIndexing = VK_TRUE, - .shaderStorageImageArrayDynamicIndexing = VK_TRUE + .shaderStorageImageArrayDynamicIndexing = VK_TRUE, + .samplerAnisotropy = VK_TRUE, }; static const VkPhysicalDeviceFeatures2 vk_features_2 = {