job queue abstraction, create image sampler function and misc fixes

This commit is contained in:
Matthew 2025-04-23 18:39:43 +10:00
parent 45b8f96d34
commit 44668e8742
10 changed files with 255 additions and 38 deletions

View File

@ -48,8 +48,7 @@ static void DestroyGame()
static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count) static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count)
{ {
__atomic_store_n(&renderer.vk.imm.job_count, 0, __ATOMIC_RELEASE); ResetBufferQueue();
__atomic_store_n(&renderer.vk.imm.remaining_count, 0, __ATOMIC_RELEASE);
PrepareGUICtx(ctx); PrepareGUICtx(ctx);
@ -85,16 +84,7 @@ static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count)
CreateAndUploadToBuffer(vertex_buffer, ctx->gui.vertices); CreateAndUploadToBuffer(vertex_buffer, ctx->gui.vertices);
CreateAndUploadToBuffer(index_buffer, ctx->gui.indices); CreateAndUploadToBuffer(index_buffer, ctx->gui.indices);
for (u32 i = 0; i < renderer.vk_conf.avail_threads; i++) JobQueueWaitForCompletion(&renderer.vk.imm.queue);
{
pthread_cond_signal(&cond);
}
i32 count = -1;
while (count != 0)
{
__atomic_load(&renderer.vk.imm.remaining_count, &count, __ATOMIC_SEQ_CST);
}
BeginFrame(); BeginFrame();

View File

@ -74,4 +74,7 @@ static inline u64 ReadCPUTimer();
// ::Platform::Atomics::Header:: // ::Platform::Atomics::Header::
static inline u32 AtomicFetchIncrU32(u32 *ptr); static inline u32 AtomicFetchIncrU32(u32 *ptr);
static inline u32 AtomicFetchSubU32(u32 *ptr, u32 count);
static inline void AtomicIncrU32(u32 *ptr); static inline void AtomicIncrU32(u32 *ptr);
static inline u32 AtomicFetchU32(u32 *ptr);
static inline u32 AtomicLoadU32(u32 *ptr);

View File

@ -344,4 +344,14 @@ static inline void AtomicIncrU32(u32 *ptr)
__atomic_fetch_add(ptr, 1, __ATOMIC_RELEASE); __atomic_fetch_add(ptr, 1, __ATOMIC_RELEASE);
} }
static inline u32 AtomicLoadU32(u32 *ptr)
{
return __atomic_load_n(ptr, __ATOMIC_ACQUIRE);
}
static inline u32 AtomicFetchSubU32(u32 *ptr, u32 count)
{
return __atomic_sub_fetch(ptr, count, __ATOMIC_ACQUIRE);
}
// ::Platform::Atomics::Functions::End:: // ::Platform::Atomics::Functions::End::

View File

@ -66,6 +66,7 @@ static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr);
static void BindVertexBuffer(RenderBuffer *buffer); static void BindVertexBuffer(RenderBuffer *buffer);
static void BindIndexBuffer(RenderBuffer *buffer); static void BindIndexBuffer(RenderBuffer *buffer);
static AssetHandle RendererLoadTexture(TextureAsset asset_id); static AssetHandle RendererLoadTexture(TextureAsset asset_id);
static void ResetBufferQueue();
// ::Renderer::Uniforms::Header:: // ::Renderer::Uniforms::Header::
// ::Renderer::PushConstants::Header:: // ::Renderer::PushConstants::Header::

View File

@ -204,11 +204,12 @@ static void BeginRendering()
// ::Vulkan::ImmediateSubmit::Functions::Start:: // ::Vulkan::ImmediateSubmit::Functions::Start::
static b32 BeginImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd) static b32 BeginImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd)
{ {
b32 success = true; b32 success = true;
VkResult result = vkResetFences(device, 1, fence); VkFence f = fence;
VkResult result = vkResetFences(device, 1, &f);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
{ {
Printfln("vkResetFences failure: %s", VkResultStr(result)); Printfln("vkResetFences failure: %s", VkResultStr(result));
@ -243,9 +244,10 @@ static b32 BeginImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd)
return success; return success;
} }
static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd, VkQueue queue) static b32 FinishImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd, VkQueue queue)
{ {
b32 success = true; b32 success = true;
VkFence f = fence;
VkResult result = vkEndCommandBuffer(cmd); VkResult result = vkEndCommandBuffer(cmd);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
@ -267,7 +269,7 @@ static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd,
.pCommandBufferInfos = &cmd_submit_info, .pCommandBufferInfos = &cmd_submit_info,
}; };
result = vkQueueSubmit2(queue, 1, &submit_info, *fence); result = vkQueueSubmit2(queue, 1, &submit_info, f);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
{ {
Printfln("vkQueueSubmit2 imm failure: %s", VkResultStr(result)); Printfln("vkQueueSubmit2 imm failure: %s", VkResultStr(result));
@ -277,7 +279,7 @@ static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd,
if (success) if (success)
{ {
result = vkWaitForFences(device, 1, fence, true, 9999999999); result = vkWaitForFences(device, 1, &f, true, 9999999999);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
{ {
Printfln("vkWaitForFences imm failure: %s", VkResultStr(result)); Printfln("vkWaitForFences imm failure: %s", VkResultStr(result));
@ -315,6 +317,148 @@ static void ResizeSwapchain()
// ::Vulkan::Images::Functions::Start::
static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 height)
{
b32 success = true;
VkDevice device = renderer.vk.device;
VkCommandBuffer cmd = renderer.vk.imm.cmds[thread_idx];
VkFence fence = renderer.vk.imm.fences[thread_idx];
VkQueue queue = renderer.vk.queues.transfer_queue;
RenderBuffer staging_buffer = {
.type = RENDER_BUFFER_TYPE_STAGING,
.size = width * height,
};
VmaAllocationCreateInfo alloc_create_info = {
.usage = VMA_MEMORY_USAGE_GPU_ONLY,
.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
};
VkImageCreateInfo image_info = {
.sType = STYPE(IMAGE_CREATE_INFO),
.imageType = VK_IMAGE_TYPE_2D,
.mipLevels = 1,
.arrayLayers = 1,
.format = VK_FORMAT_R8G8B8A8_SRGB,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.extent = {
.width = width,
.height = height,
.depth = 1,
},
};
if (renderer.vk.queues.graphics != renderer.vk.queues.transfer)
{
image_info.sharingMode = VK_SHARING_MODE_CONCURRENT;
image_info.queueFamilyIndexCount = 2;
image_info.pQueueFamilyIndices = (u32[]){renderer.vk.queues.graphics, renderer.vk.queues.transfer};
}
VkResult result = vmaCreateImage(renderer.vk.alloc, &image_info,
&alloc_create_info, &image->img, &image->alloc, NULL);
if (result != VK_SUCCESS)
{
success = false;
Printfln("vmaCreateImage failure: %s", VkResultStr(result));
}
if (success)
{
image->curr_layout = VK_IMAGE_LAYOUT_UNDEFINED;
TransitionImage(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
success = CreateBuffer(&staging_buffer);
}
if (success)
success = BeginImmSubmit(device, fence, cmd);
if (success)
{
rawptr mapped_buf = NULL;
vmaMapMemory(renderer.vk.alloc, staging_buffer.alloc, &mapped_buf);
MemCpy(mapped_buf, buf, width * height);
VkBufferImageCopy buffer_copy = {
.bufferRowLength = width,
.bufferImageHeight = height,
.imageSubresource = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.layerCount = 1,
},
.imageExtent = {
.width = width,
.height = height,
.depth = 1,
},
};
vkCmdCopyBufferToImage(cmd, staging_buffer.buffer, image->img, image->curr_layout, 1, &buffer_copy);
vmaUnmapMemory(renderer.vk.alloc, staging_buffer.alloc);
vmaDestroyBuffer(renderer.vk.alloc, staging_buffer.buffer, staging_buffer.alloc);
success = FinishImmSubmit(device, fence, cmd, queue);
}
if (success)
{
VkImageViewCreateInfo view_info = {
.sType = STYPE(IMAGE_VIEW_CREATE_INFO),
.image = image->img,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = VK_FORMAT_R8G8B8A8_SRGB,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
}
};
result = vkCreateImageView(device, &view_info, NULL, &image->view);
if (result != VK_SUCCESS)
success = false;
}
if (success)
{
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(renderer.vk.phys_device, &properties);
// TODO: handle no anisotropy
VkSamplerCreateInfo sampler_info = {
.sType = STYPE(SAMPLER_CREATE_INFO),
.magFilter = VK_FILTER_LINEAR,
.minFilter = VK_FILTER_LINEAR,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
.anisotropyEnable = VK_TRUE,
.maxAnisotropy = properties.limits.maxSamplerAnisotropy,
.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK,
.compareOp = VK_COMPARE_OP_ALWAYS,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
};
result = vkCreateSampler(device, &sampler_info, NULL, &image->sampler);
if (result != VK_SUCCESS)
success = false;
}
return success;
}
// ::Vulkan::Images::Functions::End::
// ::Vulkan::Descriptors::Functions::Start:: // ::Vulkan::Descriptors::Functions::Start::
static DescAssetInfo *DescriptorTableSearch(DescType type, u64 asset_id) static DescAssetInfo *DescriptorTableSearch(DescType type, u64 asset_id)
@ -702,6 +846,7 @@ static b32 InitVkDeviceFunctions() {
INIT_DEV_FN(vkCmdDraw); INIT_DEV_FN(vkCmdDraw);
INIT_DEV_FN(vkDeviceWaitIdle); INIT_DEV_FN(vkDeviceWaitIdle);
INIT_DEV_FN(vkCmdClearColorImage); INIT_DEV_FN(vkCmdClearColorImage);
INIT_DEV_FN(vkCreateSampler);
return true; return true;
} }
@ -1250,7 +1395,7 @@ void *VkLoaderStart(void *i)
{ {
TicketMutLock(&renderer.vk.imm.mut); TicketMutLock(&renderer.vk.imm.mut);
u32 job_count = __atomic_load_n(&renderer.vk.imm.job_count, __ATOMIC_SEQ_CST); u32 job_count = JobQueueGetCount(&renderer.vk.imm.queue);
if (job_count < 0) if (job_count < 0)
{ {
TicketMutUnlock(&renderer.vk.imm.mut); TicketMutUnlock(&renderer.vk.imm.mut);
@ -1275,7 +1420,7 @@ void *VkLoaderStart(void *i)
count += 1; count += 1;
} }
__atomic_sub_fetch(&renderer.vk.imm.job_count, count, __ATOMIC_SEQ_CST); JobQueueMarkUnqueued(&renderer.vk.imm.queue, count);
TicketMutUnlock(&renderer.vk.imm.mut); TicketMutUnlock(&renderer.vk.imm.mut);
@ -1284,11 +1429,16 @@ void *VkLoaderStart(void *i)
Assert(UploadToBuffer(buffers, data, count, index), "VkLoader UploadToBuffer failure"); Assert(UploadToBuffer(buffers, data, count, index), "VkLoader UploadToBuffer failure");
u32 rem = __atomic_sub_fetch(&renderer.vk.imm.remaining_count, count, __ATOMIC_RELEASE); JobQueueMarkCompleted(&renderer.vk.imm.queue, count);
} }
} }
} }
void VkLoaderWake()
{
for (u32 i = 0; i < renderer.vk_conf.avail_threads; i++)
pthread_cond_signal(&cond);
}
#elif _WIN32 #elif _WIN32

View File

@ -157,6 +157,7 @@ VK_DECLARE(vkQueuePresentKHR);
VK_DECLARE(vkCmdDraw); VK_DECLARE(vkCmdDraw);
VK_DECLARE(vkDeviceWaitIdle); VK_DECLARE(vkDeviceWaitIdle);
VK_DECLARE(vkCmdClearColorImage); VK_DECLARE(vkCmdClearColorImage);
VK_DECLARE(vkCreateSampler);
#include "vma/vk_mem_alloc.h" #include "vma/vk_mem_alloc.h"
@ -260,8 +261,7 @@ typedef struct ImmediateStructures
VkFence *fences; VkFence *fences;
RenderBuffer **queued_buffers; RenderBuffer **queued_buffers;
rawptr *data; rawptr *data;
i32 volatile job_count; JobQueue queue;
i32 volatile remaining_count;
TicketMut mut; TicketMut mut;
} ImmediateStructures; } ImmediateStructures;
@ -276,6 +276,7 @@ typedef struct Image
{ {
VkImage img; VkImage img;
VkImageView view; VkImageView view;
VkSampler sampler;
VmaAllocation alloc; VmaAllocation alloc;
VkFormat fmt; VkFormat fmt;
VkImageLayout curr_layout; VkImageLayout curr_layout;
@ -402,15 +403,17 @@ static inline void CopyImageToImage(VkCommandBuffer cmd, VkImage src, VkImage ds
// ::Vulkan::Async::Functions::Header:: // ::Vulkan::Async::Functions::Header::
#ifdef __linux__ #ifdef __linux__
void *VkLoaderStart(void *thread_data); void *VkLoaderStart(void *thread_data);
#elif _WIN32 #elif _WIN32
#error not yet implemented # error not yet implemented
#endif #endif
void VkLoaderWake();
// ::Vulkan::ImmediateSubmit::Functions::Header:: // ::Vulkan::ImmediateSubmit::Functions::Header::
static b32 BeginImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd); static b32 BeginImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd);
static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd, VkQueue queue); static b32 FinishImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd, VkQueue queue);
// ::Vulkan::Rendering::Functions::Header:: // ::Vulkan::Rendering::Functions::Header::
@ -420,6 +423,10 @@ static void BeginRendering();
static void ResizeSwapchain(); static void ResizeSwapchain();
// ::Vulkan::Images::Functions::Header::
static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 height);
// ::Vulkan::Descriptors::Functions::Header:: // ::Vulkan::Descriptors::Functions::Header::
static DescAssetInfo *DescriptorTableSearch(DescType type, u64 asset_id); static DescAssetInfo *DescriptorTableSearch(DescType type, u64 asset_id);

View File

@ -214,7 +214,7 @@ static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 th
u32 copy_count = 0; u32 copy_count = 0;
b32 imm_started = success = BeginImmSubmit(device, &fence, cmd); b32 imm_started = success = BeginImmSubmit(device, fence, cmd);
for (u32 i = 0; i < count && success; i++) for (u32 i = 0; i < count && success; i++)
{ {
b32 host_visible = buffers[i]->type & HOST_VISIBLE_BUFFERS; b32 host_visible = buffers[i]->type & HOST_VISIBLE_BUFFERS;
@ -243,7 +243,7 @@ static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 th
} }
} }
FinishImmSubmit(device, &fence, cmd, queue); FinishImmSubmit(device, fence, cmd, queue);
vkWaitForFences(device, 1, &fence, VK_TRUE, 999999999); vkWaitForFences(device, 1, &fence, VK_TRUE, 999999999);
for (u32 i = 0; i < copy_count; i++) for (u32 i = 0; i < copy_count; i++)
@ -257,18 +257,15 @@ static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 th
static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr) static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr)
{ {
// TODO: revisit this to see if it could be done better
Assert(renderer.vk.imm.job_count+1 < BUFFER_QUEUE_LEN, "CreateAndUploadToBuffer out of bounds");
TicketMutLock(&renderer.vk.imm.mut); TicketMutLock(&renderer.vk.imm.mut);
u32 i = __atomic_fetch_add(&renderer.vk.imm.job_count, 1, __ATOMIC_SEQ_CST); u32 job_idx = JobQueueAdd(&renderer.vk.imm.queue, 1);
__atomic_fetch_add(&renderer.vk.imm.remaining_count, 1, __ATOMIC_SEQ_CST); renderer.vk.imm.queued_buffers[job_idx] = buffer;
renderer.vk.imm.data[job_idx] = ptr;
renderer.vk.imm.queued_buffers[i] = buffer;
renderer.vk.imm.data[i] = ptr;
TicketMutUnlock(&renderer.vk.imm.mut); TicketMutUnlock(&renderer.vk.imm.mut);
VkLoaderWake();
} }
static void FreeBuffers(RenderBuffer *buffers, u32 buffer_count) static void FreeBuffers(RenderBuffer *buffers, u32 buffer_count)
@ -314,6 +311,16 @@ static AssetHandle RendererLoadTexture(TextureAsset asset_id)
return handle; return handle;
} }
static void WaitForBufferQueue()
{
JobQueueWaitForCompletion(&renderer.vk.imm.queue);
}
static void ResetBufferQueue()
{
JobQueueReset(&renderer.vk.imm.queue);
}
// ::Vulkan::Renderer::Buffers::Functions::End:: // ::Vulkan::Renderer::Buffers::Functions::End::

View File

@ -313,4 +313,41 @@ static inline void TicketMutUnlock(TicketMut *mut)
AtomicIncrU32(&mut->next_ticket); AtomicIncrU32(&mut->next_ticket);
} }
static inline u32 JobQueueAdd(JobQueue *queue, u32 count)
{
u32 job_idx = AtomicFetchIncrU32(&queue->queued);
AtomicFetchIncrU32(&queue->remaining);
return job_idx;
}
static inline u32 JobQueueGetCount(JobQueue *queue)
{
return AtomicLoadU32(&queue->queued);
}
static inline void JobQueueMarkUnqueued(JobQueue *queue, u32 count)
{
AtomicFetchSubU32(&queue->queued, count);
}
static inline void JobQueueMarkCompleted(JobQueue *queue, u32 count)
{
AtomicFetchSubU32(&queue->remaining, count);
}
static inline void JobQueueReset(JobQueue *queue)
{
AtomicFetchSubU32(&queue->queued, queue->queued);
AtomicFetchSubU32(&queue->remaining, queue->remaining);
}
static inline void JobQueueWaitForCompletion(JobQueue *queue)
{
u32 remaining;
do {
remaining = AtomicLoadU32(&queue->remaining);
} while (remaining != 0);
}
// ::Util::Async::Functions::End:: // ::Util::Async::Functions::End::

View File

@ -242,5 +242,16 @@ typedef struct TicketMut
u32 volatile next_ticket; u32 volatile next_ticket;
} TicketMut; } TicketMut;
typedef struct JobQueue
{
u32 volatile queued;
u32 volatile remaining;
} JobQueue;
static inline void TicketMutLock(TicketMut *mut); static inline void TicketMutLock(TicketMut *mut);
static inline void TicketMutUnlock(TicketMut *mut); static inline void TicketMutUnlock(TicketMut *mut);
static inline u32 JobQueueAdd(JobQueue *queue, u32 count);
static inline void JobQueueMarkUnqueued(JobQueue *queue, u32 count);
static inline void JobQueueMarkCompleted(JobQueue *queue, u32 count);
static inline void JobQueueReset(JobQueue *queue);
static inline void JobQueueWaitForCompletion(JobQueue *queue);

View File

@ -90,9 +90,10 @@ static VkPhysicalDeviceVulkan11Features vk_11_features = {
static const VkPhysicalDeviceFeatures vk_features = { static const VkPhysicalDeviceFeatures vk_features = {
.shaderUniformBufferArrayDynamicIndexing = VK_TRUE, .shaderUniformBufferArrayDynamicIndexing = VK_TRUE,
.shaderSampledImageArrayDynamicIndexing = VK_TRUE, .shaderSampledImageArrayDynamicIndexing = VK_TRUE,
.shaderStorageBufferArrayDynamicIndexing = VK_TRUE, .shaderStorageBufferArrayDynamicIndexing = VK_TRUE,
.shaderStorageImageArrayDynamicIndexing = VK_TRUE .shaderStorageImageArrayDynamicIndexing = VK_TRUE,
.samplerAnisotropy = VK_TRUE,
}; };
static const VkPhysicalDeviceFeatures2 vk_features_2 = { static const VkPhysicalDeviceFeatures2 vk_features_2 = {