job queue abstraction, create image sampler function and misc fixes

This commit is contained in:
Matthew 2025-04-23 18:39:43 +10:00
parent 45b8f96d34
commit 44668e8742
10 changed files with 255 additions and 38 deletions

View File

@ -48,8 +48,7 @@ static void DestroyGame()
static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count)
{
__atomic_store_n(&renderer.vk.imm.job_count, 0, __ATOMIC_RELEASE);
__atomic_store_n(&renderer.vk.imm.remaining_count, 0, __ATOMIC_RELEASE);
ResetBufferQueue();
PrepareGUICtx(ctx);
@ -85,16 +84,7 @@ static void RunCycle(GameContext *ctx, GameInput *inputs, u32 i_count)
CreateAndUploadToBuffer(vertex_buffer, ctx->gui.vertices);
CreateAndUploadToBuffer(index_buffer, ctx->gui.indices);
for (u32 i = 0; i < renderer.vk_conf.avail_threads; i++)
{
pthread_cond_signal(&cond);
}
i32 count = -1;
while (count != 0)
{
__atomic_load(&renderer.vk.imm.remaining_count, &count, __ATOMIC_SEQ_CST);
}
JobQueueWaitForCompletion(&renderer.vk.imm.queue);
BeginFrame();

View File

@ -74,4 +74,7 @@ static inline u64 ReadCPUTimer();
// ::Platform::Atomics::Header::
static inline u32 AtomicFetchIncrU32(u32 *ptr);
static inline u32 AtomicFetchSubU32(u32 *ptr, u32 count);
static inline void AtomicIncrU32(u32 *ptr);
static inline u32 AtomicFetchU32(u32 *ptr);
static inline u32 AtomicLoadU32(u32 *ptr);

View File

@ -344,4 +344,14 @@ static inline void AtomicIncrU32(u32 *ptr)
__atomic_fetch_add(ptr, 1, __ATOMIC_RELEASE);
}
static inline u32 AtomicLoadU32(u32 *ptr)
{
return __atomic_load_n(ptr, __ATOMIC_ACQUIRE);
}
static inline u32 AtomicFetchSubU32(u32 *ptr, u32 count)
{
return __atomic_sub_fetch(ptr, count, __ATOMIC_ACQUIRE);
}
// ::Platform::Atomics::Functions::End::

View File

@ -66,6 +66,7 @@ static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr);
static void BindVertexBuffer(RenderBuffer *buffer);
static void BindIndexBuffer(RenderBuffer *buffer);
static AssetHandle RendererLoadTexture(TextureAsset asset_id);
static void ResetBufferQueue();
// ::Renderer::Uniforms::Header::
// ::Renderer::PushConstants::Header::

View File

@ -204,11 +204,12 @@ static void BeginRendering()
// ::Vulkan::ImmediateSubmit::Functions::Start::
static b32 BeginImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd)
static b32 BeginImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd)
{
b32 success = true;
VkResult result = vkResetFences(device, 1, fence);
VkFence f = fence;
VkResult result = vkResetFences(device, 1, &f);
if (result != VK_SUCCESS)
{
Printfln("vkResetFences failure: %s", VkResultStr(result));
@ -243,9 +244,10 @@ static b32 BeginImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd)
return success;
}
static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd, VkQueue queue)
static b32 FinishImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd, VkQueue queue)
{
b32 success = true;
VkFence f = fence;
VkResult result = vkEndCommandBuffer(cmd);
if (result != VK_SUCCESS)
@ -267,7 +269,7 @@ static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd,
.pCommandBufferInfos = &cmd_submit_info,
};
result = vkQueueSubmit2(queue, 1, &submit_info, *fence);
result = vkQueueSubmit2(queue, 1, &submit_info, f);
if (result != VK_SUCCESS)
{
Printfln("vkQueueSubmit2 imm failure: %s", VkResultStr(result));
@ -277,7 +279,7 @@ static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd,
if (success)
{
result = vkWaitForFences(device, 1, fence, true, 9999999999);
result = vkWaitForFences(device, 1, &f, true, 9999999999);
if (result != VK_SUCCESS)
{
Printfln("vkWaitForFences imm failure: %s", VkResultStr(result));
@ -315,6 +317,148 @@ static void ResizeSwapchain()
// ::Vulkan::Images::Functions::Start::
static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 height)
{
b32 success = true;
VkDevice device = renderer.vk.device;
VkCommandBuffer cmd = renderer.vk.imm.cmds[thread_idx];
VkFence fence = renderer.vk.imm.fences[thread_idx];
VkQueue queue = renderer.vk.queues.transfer_queue;
RenderBuffer staging_buffer = {
.type = RENDER_BUFFER_TYPE_STAGING,
.size = width * height,
};
VmaAllocationCreateInfo alloc_create_info = {
.usage = VMA_MEMORY_USAGE_GPU_ONLY,
.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
};
VkImageCreateInfo image_info = {
.sType = STYPE(IMAGE_CREATE_INFO),
.imageType = VK_IMAGE_TYPE_2D,
.mipLevels = 1,
.arrayLayers = 1,
.format = VK_FORMAT_R8G8B8A8_SRGB,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.extent = {
.width = width,
.height = height,
.depth = 1,
},
};
if (renderer.vk.queues.graphics != renderer.vk.queues.transfer)
{
image_info.sharingMode = VK_SHARING_MODE_CONCURRENT;
image_info.queueFamilyIndexCount = 2;
image_info.pQueueFamilyIndices = (u32[]){renderer.vk.queues.graphics, renderer.vk.queues.transfer};
}
VkResult result = vmaCreateImage(renderer.vk.alloc, &image_info,
&alloc_create_info, &image->img, &image->alloc, NULL);
if (result != VK_SUCCESS)
{
success = false;
Printfln("vmaCreateImage failure: %s", VkResultStr(result));
}
if (success)
{
image->curr_layout = VK_IMAGE_LAYOUT_UNDEFINED;
TransitionImage(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
success = CreateBuffer(&staging_buffer);
}
if (success)
success = BeginImmSubmit(device, fence, cmd);
if (success)
{
rawptr mapped_buf = NULL;
vmaMapMemory(renderer.vk.alloc, staging_buffer.alloc, &mapped_buf);
MemCpy(mapped_buf, buf, width * height);
VkBufferImageCopy buffer_copy = {
.bufferRowLength = width,
.bufferImageHeight = height,
.imageSubresource = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.layerCount = 1,
},
.imageExtent = {
.width = width,
.height = height,
.depth = 1,
},
};
vkCmdCopyBufferToImage(cmd, staging_buffer.buffer, image->img, image->curr_layout, 1, &buffer_copy);
vmaUnmapMemory(renderer.vk.alloc, staging_buffer.alloc);
vmaDestroyBuffer(renderer.vk.alloc, staging_buffer.buffer, staging_buffer.alloc);
success = FinishImmSubmit(device, fence, cmd, queue);
}
if (success)
{
VkImageViewCreateInfo view_info = {
.sType = STYPE(IMAGE_VIEW_CREATE_INFO),
.image = image->img,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = VK_FORMAT_R8G8B8A8_SRGB,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
}
};
result = vkCreateImageView(device, &view_info, NULL, &image->view);
if (result != VK_SUCCESS)
success = false;
}
if (success)
{
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(renderer.vk.phys_device, &properties);
// TODO: handle no anisotropy
VkSamplerCreateInfo sampler_info = {
.sType = STYPE(SAMPLER_CREATE_INFO),
.magFilter = VK_FILTER_LINEAR,
.minFilter = VK_FILTER_LINEAR,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
.anisotropyEnable = VK_TRUE,
.maxAnisotropy = properties.limits.maxSamplerAnisotropy,
.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK,
.compareOp = VK_COMPARE_OP_ALWAYS,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
};
result = vkCreateSampler(device, &sampler_info, NULL, &image->sampler);
if (result != VK_SUCCESS)
success = false;
}
return success;
}
// ::Vulkan::Images::Functions::End::
// ::Vulkan::Descriptors::Functions::Start::
static DescAssetInfo *DescriptorTableSearch(DescType type, u64 asset_id)
@ -702,6 +846,7 @@ static b32 InitVkDeviceFunctions() {
INIT_DEV_FN(vkCmdDraw);
INIT_DEV_FN(vkDeviceWaitIdle);
INIT_DEV_FN(vkCmdClearColorImage);
INIT_DEV_FN(vkCreateSampler);
return true;
}
@ -1250,7 +1395,7 @@ void *VkLoaderStart(void *i)
{
TicketMutLock(&renderer.vk.imm.mut);
u32 job_count = __atomic_load_n(&renderer.vk.imm.job_count, __ATOMIC_SEQ_CST);
u32 job_count = JobQueueGetCount(&renderer.vk.imm.queue);
if (job_count < 0)
{
TicketMutUnlock(&renderer.vk.imm.mut);
@ -1275,7 +1420,7 @@ void *VkLoaderStart(void *i)
count += 1;
}
__atomic_sub_fetch(&renderer.vk.imm.job_count, count, __ATOMIC_SEQ_CST);
JobQueueMarkUnqueued(&renderer.vk.imm.queue, count);
TicketMutUnlock(&renderer.vk.imm.mut);
@ -1284,11 +1429,16 @@ void *VkLoaderStart(void *i)
Assert(UploadToBuffer(buffers, data, count, index), "VkLoader UploadToBuffer failure");
u32 rem = __atomic_sub_fetch(&renderer.vk.imm.remaining_count, count, __ATOMIC_RELEASE);
JobQueueMarkCompleted(&renderer.vk.imm.queue, count);
}
}
}
void VkLoaderWake()
{
for (u32 i = 0; i < renderer.vk_conf.avail_threads; i++)
pthread_cond_signal(&cond);
}
#elif _WIN32

View File

@ -157,6 +157,7 @@ VK_DECLARE(vkQueuePresentKHR);
VK_DECLARE(vkCmdDraw);
VK_DECLARE(vkDeviceWaitIdle);
VK_DECLARE(vkCmdClearColorImage);
VK_DECLARE(vkCreateSampler);
#include "vma/vk_mem_alloc.h"
@ -260,8 +261,7 @@ typedef struct ImmediateStructures
VkFence *fences;
RenderBuffer **queued_buffers;
rawptr *data;
i32 volatile job_count;
i32 volatile remaining_count;
JobQueue queue;
TicketMut mut;
} ImmediateStructures;
@ -276,6 +276,7 @@ typedef struct Image
{
VkImage img;
VkImageView view;
VkSampler sampler;
VmaAllocation alloc;
VkFormat fmt;
VkImageLayout curr_layout;
@ -402,15 +403,17 @@ static inline void CopyImageToImage(VkCommandBuffer cmd, VkImage src, VkImage ds
// ::Vulkan::Async::Functions::Header::
#ifdef __linux__
void *VkLoaderStart(void *thread_data);
void *VkLoaderStart(void *thread_data);
#elif _WIN32
#error not yet implemented
# error not yet implemented
#endif
void VkLoaderWake();
// ::Vulkan::ImmediateSubmit::Functions::Header::
static b32 BeginImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd);
static b32 FinishImmSubmit(VkDevice device, VkFence *fence, VkCommandBuffer cmd, VkQueue queue);
static b32 BeginImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd);
static b32 FinishImmSubmit(VkDevice device, VkFence fence, VkCommandBuffer cmd, VkQueue queue);
// ::Vulkan::Rendering::Functions::Header::
@ -420,6 +423,10 @@ static void BeginRendering();
static void ResizeSwapchain();
// ::Vulkan::Images::Functions::Header::
static b32 CreateVkSampler(Image *image, u32 thread_idx, u8 *buf, u32 width, u32 height);
// ::Vulkan::Descriptors::Functions::Header::
static DescAssetInfo *DescriptorTableSearch(DescType type, u64 asset_id);

View File

@ -214,7 +214,7 @@ static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 th
u32 copy_count = 0;
b32 imm_started = success = BeginImmSubmit(device, &fence, cmd);
b32 imm_started = success = BeginImmSubmit(device, fence, cmd);
for (u32 i = 0; i < count && success; i++)
{
b32 host_visible = buffers[i]->type & HOST_VISIBLE_BUFFERS;
@ -243,7 +243,7 @@ static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 th
}
}
FinishImmSubmit(device, &fence, cmd, queue);
FinishImmSubmit(device, fence, cmd, queue);
vkWaitForFences(device, 1, &fence, VK_TRUE, 999999999);
for (u32 i = 0; i < copy_count; i++)
@ -257,18 +257,15 @@ static b32 UploadToBuffer(RenderBuffer **buffers, rawptr *ptrs, u32 count, u8 th
static void CreateAndUploadToBuffer(RenderBuffer *buffer, rawptr ptr)
{
// TODO: revisit this to see if it could be done better
Assert(renderer.vk.imm.job_count+1 < BUFFER_QUEUE_LEN, "CreateAndUploadToBuffer out of bounds");
TicketMutLock(&renderer.vk.imm.mut);
u32 i = __atomic_fetch_add(&renderer.vk.imm.job_count, 1, __ATOMIC_SEQ_CST);
__atomic_fetch_add(&renderer.vk.imm.remaining_count, 1, __ATOMIC_SEQ_CST);
renderer.vk.imm.queued_buffers[i] = buffer;
renderer.vk.imm.data[i] = ptr;
u32 job_idx = JobQueueAdd(&renderer.vk.imm.queue, 1);
renderer.vk.imm.queued_buffers[job_idx] = buffer;
renderer.vk.imm.data[job_idx] = ptr;
TicketMutUnlock(&renderer.vk.imm.mut);
VkLoaderWake();
}
static void FreeBuffers(RenderBuffer *buffers, u32 buffer_count)
@ -314,6 +311,16 @@ static AssetHandle RendererLoadTexture(TextureAsset asset_id)
return handle;
}
static void WaitForBufferQueue()
{
JobQueueWaitForCompletion(&renderer.vk.imm.queue);
}
static void ResetBufferQueue()
{
JobQueueReset(&renderer.vk.imm.queue);
}
// ::Vulkan::Renderer::Buffers::Functions::End::

View File

@ -313,4 +313,41 @@ static inline void TicketMutUnlock(TicketMut *mut)
AtomicIncrU32(&mut->next_ticket);
}
static inline u32 JobQueueAdd(JobQueue *queue, u32 count)
{
u32 job_idx = AtomicFetchIncrU32(&queue->queued);
AtomicFetchIncrU32(&queue->remaining);
return job_idx;
}
static inline u32 JobQueueGetCount(JobQueue *queue)
{
return AtomicLoadU32(&queue->queued);
}
static inline void JobQueueMarkUnqueued(JobQueue *queue, u32 count)
{
AtomicFetchSubU32(&queue->queued, count);
}
static inline void JobQueueMarkCompleted(JobQueue *queue, u32 count)
{
AtomicFetchSubU32(&queue->remaining, count);
}
static inline void JobQueueReset(JobQueue *queue)
{
AtomicFetchSubU32(&queue->queued, queue->queued);
AtomicFetchSubU32(&queue->remaining, queue->remaining);
}
static inline void JobQueueWaitForCompletion(JobQueue *queue)
{
u32 remaining;
do {
remaining = AtomicLoadU32(&queue->remaining);
} while (remaining != 0);
}
// ::Util::Async::Functions::End::

View File

@ -242,5 +242,16 @@ typedef struct TicketMut
u32 volatile next_ticket;
} TicketMut;
typedef struct JobQueue
{
u32 volatile queued;
u32 volatile remaining;
} JobQueue;
static inline void TicketMutLock(TicketMut *mut);
static inline void TicketMutUnlock(TicketMut *mut);
static inline u32 JobQueueAdd(JobQueue *queue, u32 count);
static inline void JobQueueMarkUnqueued(JobQueue *queue, u32 count);
static inline void JobQueueMarkCompleted(JobQueue *queue, u32 count);
static inline void JobQueueReset(JobQueue *queue);
static inline void JobQueueWaitForCompletion(JobQueue *queue);

View File

@ -92,7 +92,8 @@ static const VkPhysicalDeviceFeatures vk_features = {
.shaderUniformBufferArrayDynamicIndexing = VK_TRUE,
.shaderSampledImageArrayDynamicIndexing = VK_TRUE,
.shaderStorageBufferArrayDynamicIndexing = VK_TRUE,
.shaderStorageImageArrayDynamicIndexing = VK_TRUE
.shaderStorageImageArrayDynamicIndexing = VK_TRUE,
.samplerAnisotropy = VK_TRUE,
};
static const VkPhysicalDeviceFeatures2 vk_features_2 = {