From 9e4333b9fbeb6a55aa3f64c481f5e28265a06d9b Mon Sep 17 00:00:00 2001 From: Benjamin Kraft Date: Sat, 5 Oct 2024 18:02:49 +0200 Subject: [PATCH] double buffered vertex buffer in two threads with two different queues if possible --- include/application.hpp | 16 +- include/vulkan/command_pool.hpp | 8 +- include/vulkan/instance.hpp | 48 +++--- shaders/pbd.comp | 10 +- src/application.cpp | 255 ++++++++++++++++++++++---------- src/vulkan/buffer.cpp | 6 +- src/vulkan/command_pool.cpp | 19 +-- src/vulkan/instance.cpp | 53 +++---- src/vulkan/swapchain.cpp | 13 +- 9 files changed, 255 insertions(+), 173 deletions(-) diff --git a/include/application.hpp b/include/application.hpp index 86a5d6e..86c1c4c 100644 --- a/include/application.hpp +++ b/include/application.hpp @@ -44,9 +44,12 @@ private: void createSyncObjects(); unique_ptr imageAvailable; unique_ptr renderFinished; - unique_ptr computeFinished; - unique_ptr renderInFlight; - unique_ptr computeInFlight; + unique_ptr computeSemaphore; + unique_ptr transferFinished; + unique_ptr renderFence; + unique_ptr computeFence; + unique_ptr transferFence; + std::mutex submitMutex; unique_ptr swapchain; unique_ptr descriptorPool; @@ -56,7 +59,8 @@ private: unique_ptr camera; void createMeshBuffers(); - unique_ptr vertexBuffer; + size_t currentDrawVertexBuffer = 0; + unique_ptr vertexBuffers[2]; unique_ptr faceBuffer; unique_ptr edgeBuffer; unique_ptr triangleBuffer; @@ -75,6 +79,7 @@ private: struct Properties { glm::vec3 gravity; + // Delta time in seconds float dt; uint32_t k; }; @@ -86,8 +91,7 @@ private: unique_ptr normalPipeline; void updateUniformBuffer(); - void recordGraphicsCommandBuffer(uint32_t imageIndex); - void recordDrawCommands(); + void recordDrawCommands(VkCommandBuffer cmdBuffer); void drawFrame(); void recordComputeCommands(VkCommandBuffer cmdBuffer); diff --git a/include/vulkan/command_pool.hpp b/include/vulkan/command_pool.hpp index 49d6139..7800c1c 100644 --- a/include/vulkan/command_pool.hpp +++ b/include/vulkan/command_pool.hpp @@ -7,12 +7,10 @@ class Instance; class CommandPool { public: - explicit CommandPool(VkSurfaceKHR surface); + explicit CommandPool(uint32_t queueFamilyIndex, uint32_t bufferCount); ~CommandPool(); - VkCommandBuffer graphicsBuffer = VK_NULL_HANDLE; - VkCommandBuffer computeBuffer = VK_NULL_HANDLE; VkCommandPool handle = VK_NULL_HANDLE; + std::vector buffers; private: - - void createBuffers(); + void allocateBuffers(uint32_t count); }; \ No newline at end of file diff --git a/include/vulkan/instance.hpp b/include/vulkan/instance.hpp index fc5f81e..dafddfe 100644 --- a/include/vulkan/instance.hpp +++ b/include/vulkan/instance.hpp @@ -4,8 +4,11 @@ #include #include #include +#include #include "vk_mem_alloc.h" +using std::optional, std::vector; + class CommandPool; class Instance { @@ -14,35 +17,37 @@ public: ~Instance(); GLFWwindow *window = nullptr; - VkQueue graphicsQueue = VK_NULL_HANDLE; - VkQueue presentQueue = VK_NULL_HANDLE; - VkQueue computeQueue = VK_NULL_HANDLE; + VkQueue graphicsAndPresentQueue = VK_NULL_HANDLE; + VkQueue computeAndTransferQueue = VK_NULL_HANDLE; bool windowResized = false; - CommandPool* commandPool = nullptr; + CommandPool* renderingCommandPool = nullptr; + CommandPool* computeCommandPool = nullptr; + struct QueueFamilyIndices { - std::optional graphicsFamily; - std::optional computeFamily; - std::optional presentFamily; - std::optional graphicsAndComputeFamily; - bool isComplete() const { - return graphicsFamily.has_value() && - computeFamily.has_value() && - presentFamily.has_value() && - graphicsAndComputeFamily.has_value(); - } + vector graphicsAndPresent; + vector computeAndTransfer; std::set uniqueQueueFamilies(){ - return { - graphicsFamily.value(), - presentFamily.value(), - computeFamily.value(), - graphicsAndComputeFamily.value() - }; + std::set unique; + unique.insert(graphicsAndPresent.begin(), graphicsAndPresent.end()); + unique.insert(computeAndTransfer.begin(), computeAndTransfer.end()); + return unique; + } + uint32_t tryComputeAndTransferDedicated(){ + for (uint32_t family : computeAndTransfer){ + if (std::find(graphicsAndPresent.begin(), graphicsAndPresent.end(), family) == graphicsAndPresent.end()){ + return family; + } + } + return computeAndTransfer[0]; + } + bool isEnough(){ + return !graphicsAndPresent.empty() && !computeAndTransfer.empty(); } }; - static QueueFamilyIndices findQueueFamilies(VkPhysicalDevice device, VkSurfaceKHR surface); + QueueFamilyIndices indices {}; static Instance* instance; static VkDevice GetDevice(); @@ -67,5 +72,6 @@ private: bool isDeviceSuitable(VkPhysicalDevice potentialPhysicalDevice); static bool checkDeviceExtensionSupport(VkPhysicalDevice device); + static QueueFamilyIndices findQueueFamilies(VkPhysicalDevice device, VkSurfaceKHR surface); }; \ No newline at end of file diff --git a/shaders/pbd.comp b/shaders/pbd.comp index 543f8ae..248a26a 100644 --- a/shaders/pbd.comp +++ b/shaders/pbd.comp @@ -68,8 +68,8 @@ void preSolve(uint vID){ if (vertices[vID].w == 0){ return; } - vertices[vID].velocity += dt * gravity; - vertices[vID].position += dt * vertices[vID].velocity; + vertices[vID].velocity += dt / k * gravity; + vertices[vID].position += dt / k * vertices[vID].velocity; float dist = vertices[vID].position.y + 5; if (dist < 0){ @@ -102,7 +102,7 @@ void solveEdge(uint eID){ vec3 diff = v1.position - v2.position; float currentLength = length(diff); - float alpha = edge.compliance / dt / dt; + float alpha = edge.compliance / (dt / k) / (dt / k); float s = -(currentLength - edge.restLength) / (v1.w + v2.w + alpha); @@ -144,7 +144,7 @@ void solveTetrahedron(uint tetID){ if (w == 0) return; - float alpha = tetrahedron.compliance / dt / dt; + float alpha = tetrahedron.compliance / (dt / k) / (dt / k); float s = -volumeError / (w + alpha); @@ -163,7 +163,7 @@ void postSolve(uint vID){ if (vertices[vID].w == 0){ return; } - vertices[vID].velocity = (vertices[vID].position - vertices[vID].prevPosition) / dt; + vertices[vID].velocity = (vertices[vID].position - vertices[vID].prevPosition) / (dt / k); } void main() { diff --git a/src/application.cpp b/src/application.cpp index abfed1c..23d7bfa 100644 --- a/src/application.cpp +++ b/src/application.cpp @@ -12,6 +12,7 @@ #include "soft_body.hpp" #include "mesh.hpp" #include "constraints.hpp" +#include "timer.hpp" Application::Application() { createSyncObjects(); @@ -34,7 +35,7 @@ Application::Application() { createMeshBuffers(); SizeInformation sizeInformation {}; - sizeInformation.vertexCount = vertexBuffer->size / sizeof(Vertex); + sizeInformation.vertexCount = vertexBuffers[0]->size / sizeof(Vertex); sizeInformation.faceCount = faceBuffer->size / sizeof(Face); sizeInformationBuffer = make_unique( @@ -44,14 +45,14 @@ Application::Application() { properties.gravity = {0, -9.81, 0}; properties.k = 10; - properties.dt = 1.f / 60.f / static_cast(properties.k); + properties.dt = 1.f / 60.f; propertiesBuffer = make_unique( sizeof(Properties), &properties, sizeof(properties), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0); - descriptorPool->bindBuffer(*vertexBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, DescriptorSet::MESH, 0); + descriptorPool->bindBuffer(*vertexBuffers[1 - currentDrawVertexBuffer], VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, DescriptorSet::MESH, 0); descriptorPool->bindBuffer(*faceBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, DescriptorSet::MESH, 1); descriptorPool->bindBuffer(*edgeBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, DescriptorSet::MESH, 2); descriptorPool->bindBuffer(*triangleBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, DescriptorSet::MESH, 3); @@ -68,12 +69,32 @@ Application::Application() { vmaFreeStatsString(Instance::GetAllocator(), stats); } +#include +#include + +using namespace std::chrono; + void Application::mainLoop() { + std::future compute = std::async(std::launch::async, [this](){ + while (!glfwWindowShouldClose(Instance::instance->window)){ + Timer timer; + + auto t1 = system_clock::now(); + update(); + auto t2 = system_clock::now(); + + microseconds updateDuration = duration_cast(t2 - t1); + + microseconds sleepDuration(static_cast(properties.dt * 1000 * 1000)); + std::this_thread::sleep_for(sleepDuration - updateDuration); + } + }); + while (!glfwWindowShouldClose(Instance::instance->window)){ glfwPollEvents(); - update(); drawFrame(); } + compute.wait(); vkDeviceWaitIdle(Instance::GetDevice()); } @@ -84,9 +105,11 @@ Application::~Application() { void Application::createSyncObjects() { imageAvailable = make_unique(); renderFinished = make_unique(); - computeFinished = make_unique(); - renderInFlight = make_unique(true); - computeInFlight = make_unique(true); + computeSemaphore = make_unique(); + transferFinished = make_unique(); + renderFence = make_unique(true); + computeFence = make_unique(true); + transferFence = make_unique(true); } void Application::createMeshBuffers() { @@ -102,7 +125,7 @@ void Application::createMeshBuffers() { } body = std::make_unique(&bunny, 1.f / 3); - for (size_t i = 0; i < 5; i++){ + for (size_t i = 0; i < 10; i++){ auto copy = std::make_unique(*body.get()); copy->applyVertexOffset({i * 2, 0, 2}); softBodies.push_back(std::move(copy)); @@ -196,7 +219,10 @@ void Application::createMeshBuffers() { : Buffer(size, data, size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | additionalUsageFlags, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0) {} }; - vertexBuffer = make_unique(vertices.data(), vertices.size() * sizeof(Vertex), VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); + vertexBuffers[0] = make_unique(vertices.data(), vertices.size() * sizeof(Vertex), + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + vertexBuffers[1] = make_unique(vertices.data(), vertices.size() * sizeof(Vertex), + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); faceBuffer = make_unique(faces.data(), faces.size() * sizeof(Face), VK_BUFFER_USAGE_INDEX_BUFFER_BIT); edgeBuffer = make_unique(constraintData.edges.data(), constraintData.edges.size() * sizeof(Edge)); triangleBuffer = make_unique(constraintData.triangles.data(), constraintData.triangles.size() * sizeof(Triangle)); @@ -246,54 +272,18 @@ void Application::updateUniformBuffer() { ubo.projection[1][1] *= -1; memcpy(uniformBuffer->allocationInfo.pMappedData, &ubo, sizeof(UniformBufferObject)); -} - -void Application::recordGraphicsCommandBuffer(uint32_t imageIndex) { - VkCommandBuffer cmdBuffer = Instance::instance->commandPool->graphicsBuffer; - - VkCommandBufferBeginInfo beginInfo {}; - beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - - vkBeginCommandBuffer(cmdBuffer, &beginInfo); - - VkRenderPassBeginInfo renderPassInfo {}; - renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - renderPassInfo.renderPass = swapchain->renderPass; - renderPassInfo.framebuffer = swapchain->frameBuffers[imageIndex]; - renderPassInfo.renderArea.offset = {0, 0}; - renderPassInfo.renderArea.extent = swapchain->extent; - - VkClearValue clearValues[2] {}; - clearValues[0].color = {{0, 0, 0, 1}}; - clearValues[1].depthStencil = {1.0f, 0}; - - renderPassInfo.clearValueCount = 2; - renderPassInfo.pClearValues = clearValues; - - vkCmdBeginRenderPass(cmdBuffer, &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE); - - VkViewport viewport {}; - viewport.x = 0; - viewport.y = 0; - viewport.width = static_cast(swapchain->extent.width); - viewport.height = static_cast(swapchain->extent.height); - viewport.minDepth = 0; - viewport.maxDepth = 1; - vkCmdSetViewport(cmdBuffer, 0, 1, &viewport); - VkRect2D scissor {}; - scissor.offset = {0, 0}; - scissor.extent = swapchain->extent; - vkCmdSetScissor(cmdBuffer, 0, 1, &scissor); + VkMappedMemoryRange mappedMemoryRange {}; + mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + mappedMemoryRange.offset = uniformBuffer->allocationInfo.offset; + mappedMemoryRange.size = uniformBuffer->allocationInfo.size; + mappedMemoryRange.memory = uniformBuffer->allocationInfo.deviceMemory; - recordDrawCommands(); - - vkCmdEndRenderPass(cmdBuffer); - vkEndCommandBuffer(cmdBuffer); + vkFlushMappedMemoryRanges(Instance::GetDevice(), 1, &mappedMemoryRange); } void Application::drawFrame() { - vkWaitForFences(Instance::GetDevice(), 1, &renderInFlight->handle, VK_TRUE, UINT64_MAX); + vkWaitForFences(Instance::GetDevice(), 1, &renderFence->handle, VK_TRUE, UINT64_MAX); uint32_t imageIndex; VkResult result = vkAcquireNextImageKHR(Instance::GetDevice(), swapchain->handle, UINT64_MAX, imageAvailable->handle, VK_NULL_HANDLE, &imageIndex); @@ -302,30 +292,96 @@ void Application::drawFrame() { return; } - vkResetFences(Instance::GetDevice(), 1, &renderInFlight->handle); - - vkResetCommandBuffer(Instance::instance->commandPool->graphicsBuffer, 0); - recordGraphicsCommandBuffer(imageIndex); + vkResetFences(Instance::GetDevice(), 1, &renderFence->handle); camera->update(0.017); updateUniformBuffer(); + VkCommandBuffer cmdBuffer = Instance::instance->renderingCommandPool->buffers[0]; + { + vkResetCommandBuffer(cmdBuffer, 0); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + + vkBeginCommandBuffer(cmdBuffer, &beginInfo); + + VkBufferMemoryBarrier vertexBufferBarrier{}; + vertexBufferBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + vertexBufferBarrier.size = vertexBuffers[currentDrawVertexBuffer]->size; + vertexBufferBarrier.offset = 0; + vertexBufferBarrier.buffer = vertexBuffers[currentDrawVertexBuffer]->handle; + vertexBufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; + vertexBufferBarrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + + vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, + nullptr, 1, &vertexBufferBarrier, 0, nullptr); + + VkBufferMemoryBarrier uniformBufferBarrier {}; + uniformBufferBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + uniformBufferBarrier.size = uniformBuffer->size; + uniformBufferBarrier.offset = 0; + uniformBufferBarrier.buffer = uniformBuffer->handle; + uniformBufferBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + uniformBufferBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; + + vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, + 0, nullptr, 1, &uniformBufferBarrier, 0, nullptr); + + VkRenderPassBeginInfo renderPassInfo{}; + renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + renderPassInfo.renderPass = swapchain->renderPass; + renderPassInfo.framebuffer = swapchain->frameBuffers[imageIndex]; + renderPassInfo.renderArea.offset = {0, 0}; + renderPassInfo.renderArea.extent = swapchain->extent; + + VkClearValue clearValues[2]{}; + clearValues[0].color = {{0, 0, 0, 1}}; + clearValues[1].depthStencil = {1.0f, 0}; + + renderPassInfo.clearValueCount = 2; + renderPassInfo.pClearValues = clearValues; + + vkCmdBeginRenderPass(cmdBuffer, &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE); + + VkViewport viewport{}; + viewport.x = 0; + viewport.y = 0; + viewport.width = static_cast(swapchain->extent.width); + viewport.height = static_cast(swapchain->extent.height); + viewport.minDepth = 0; + viewport.maxDepth = 1; + vkCmdSetViewport(cmdBuffer, 0, 1, &viewport); + + VkRect2D scissor{}; + scissor.offset = {0, 0}; + scissor.extent = swapchain->extent; + vkCmdSetScissor(cmdBuffer, 0, 1, &scissor); + + recordDrawCommands(cmdBuffer); + + vkCmdEndRenderPass(cmdBuffer); + vkEndCommandBuffer(cmdBuffer); + } + VkSubmitInfo submitInfo {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - VkSemaphore waitSemaphores[] = {imageAvailable->handle, computeFinished->handle}; - VkPipelineStageFlags waitStages[] = {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT}; - submitInfo.waitSemaphoreCount = 2; + VkSemaphore waitSemaphores[] = {imageAvailable->handle}; + VkPipelineStageFlags waitStages[] = {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT}; + submitInfo.waitSemaphoreCount = 1; submitInfo.pWaitSemaphores = waitSemaphores; submitInfo.pWaitDstStageMask = waitStages; submitInfo.commandBufferCount = 1; - submitInfo.pCommandBuffers = &Instance::instance->commandPool->graphicsBuffer; + submitInfo.pCommandBuffers = &cmdBuffer; VkSemaphore signalSemaphores[] = {renderFinished->handle}; submitInfo.signalSemaphoreCount = 1; submitInfo.pSignalSemaphores = signalSemaphores; - vkQueueSubmit(Instance::instance->graphicsQueue, 1, &submitInfo, renderInFlight->handle); + submitMutex.lock(); + vkQueueSubmit(Instance::instance->graphicsAndPresentQueue, 1, &submitInfo, renderFence->handle); + submitMutex.unlock(); VkPresentInfoKHR presentInfo {}; presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; @@ -337,7 +393,10 @@ void Application::drawFrame() { presentInfo.pSwapchains = swapchains; presentInfo.pImageIndices = &imageIndex; - result = vkQueuePresentKHR(Instance::instance->presentQueue, &presentInfo); + submitMutex.lock(); + result = vkQueuePresentKHR(Instance::instance->graphicsAndPresentQueue, &presentInfo); + submitMutex.unlock(); + if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR || Instance::instance->windowResized){ Instance::instance->windowResized = false; swapchain->recreateSwapchain(); @@ -345,37 +404,69 @@ void Application::drawFrame() { } void Application::update() { - vkWaitForFences(Instance::GetDevice(), 1, &computeInFlight->handle, VK_TRUE, UINT64_MAX); - vkResetFences(Instance::GetDevice(), 1, &computeInFlight->handle); - + vkWaitForFences(Instance::GetDevice(), 1, &transferFence->handle, VK_TRUE, UINT64_MAX); + vkResetFences(Instance::GetDevice(), 1, &transferFence->handle); - VkCommandBuffer cmdBuffer = Instance::instance->commandPool->computeBuffer; - vkResetCommandBuffer(cmdBuffer, 0); + currentDrawVertexBuffer = 1 - currentDrawVertexBuffer; + descriptorPool->bindBuffer(*vertexBuffers[1 - currentDrawVertexBuffer], VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, DescriptorSet::MESH, 0); VkCommandBufferBeginInfo beginInfo {}; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.flags = 0; - vkBeginCommandBuffer(cmdBuffer, &beginInfo); - recordComputeCommands(cmdBuffer); - vkEndCommandBuffer(cmdBuffer); + VkCommandBuffer cmdBuffer = Instance::instance->computeCommandPool->buffers[0]; + { + vkResetCommandBuffer(cmdBuffer, 0); + vkBeginCommandBuffer(cmdBuffer, &beginInfo); + recordComputeCommands(cmdBuffer); + vkEndCommandBuffer(cmdBuffer); + + VkSubmitInfo submit {}; + submit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit.commandBufferCount = 1; + submit.pCommandBuffers = &cmdBuffer; + submit.signalSemaphoreCount = 1; + submit.pSignalSemaphores = &computeSemaphore->handle; + + submitMutex.lock(); + vkQueueSubmit(Instance::instance->computeAndTransferQueue, 1, &submit, nullptr); + submitMutex.unlock(); + } - VkSubmitInfo submit {}; - submit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit.commandBufferCount = 1; - submit.pCommandBuffers = &Instance::instance->commandPool->computeBuffer; - submit.signalSemaphoreCount = 1; - submit.pSignalSemaphores = &computeFinished->handle; + cmdBuffer = Instance::instance->computeCommandPool->buffers[1]; + vkResetCommandBuffer(cmdBuffer, 0); + { + vkBeginCommandBuffer(cmdBuffer, &beginInfo); - vkQueueSubmit(Instance::instance->computeQueue, 1, &submit, computeInFlight->handle); -} + VkBufferCopy copyRegion {}; + copyRegion.size = vertexBuffers[0]->size; + copyRegion.srcOffset = 0; + copyRegion.dstOffset = 0; + + vkCmdCopyBuffer(cmdBuffer, vertexBuffers[1 - currentDrawVertexBuffer]->handle, vertexBuffers[currentDrawVertexBuffer]->handle, 1, ©Region); + + vkEndCommandBuffer(cmdBuffer); -void Application::recordDrawCommands() { - VkCommandBuffer cmdBuffer = Instance::instance->commandPool->graphicsBuffer; + VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + + VkSubmitInfo submit {}; + submit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit.commandBufferCount = 1; + submit.pCommandBuffers = &cmdBuffer; + submit.waitSemaphoreCount = 1; + submit.pWaitSemaphores = &computeSemaphore->handle; + submit.pWaitDstStageMask = &waitStage; + + submitMutex.lock(); + vkQueueSubmit(Instance::instance->computeAndTransferQueue, 1, &submit, transferFence->handle); + submitMutex.unlock(); + } +} +void Application::recordDrawCommands(VkCommandBuffer cmdBuffer) { vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipeline->handle); - VkBuffer buffers[] = {vertexBuffer->handle}; + VkBuffer buffers[] = {vertexBuffers[currentDrawVertexBuffer]->handle}; VkDeviceSize offsets[] = {0}; vkCmdBindVertexBuffers(cmdBuffer, 0, 1, buffers, offsets); vkCmdBindIndexBuffer(cmdBuffer, faceBuffer->handle, 0, VK_INDEX_TYPE_UINT32); @@ -393,7 +484,7 @@ void Application::recordComputeCommands(VkCommandBuffer cmdBuffer) { return (threads - 1) / blockSize + 1; }; - uint32_t vertexGroupCount = getGroupCount(vertexBuffer->size / sizeof(Vertex), BlOCK_SIZE); + uint32_t vertexGroupCount = getGroupCount(vertexBuffers[1 - currentDrawVertexBuffer]->size / sizeof(Vertex), BlOCK_SIZE); uint32_t faceGroupCount = getGroupCount(faceBuffer->size / sizeof(Face), BlOCK_SIZE); VkMemoryBarrier barrier {}; diff --git a/src/vulkan/buffer.cpp b/src/vulkan/buffer.cpp index 4a7fe29..39e6007 100644 --- a/src/vulkan/buffer.cpp +++ b/src/vulkan/buffer.cpp @@ -43,7 +43,7 @@ void Buffer::copyTo(Buffer *dst) { VkCommandBufferAllocateInfo allocateInfo {}; allocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - allocateInfo.commandPool = Instance::instance->commandPool->handle; + allocateInfo.commandPool = Instance::instance->renderingCommandPool->handle; allocateInfo.commandBufferCount = 1; VkCommandBuffer commandBuffer; @@ -64,8 +64,8 @@ void Buffer::copyTo(Buffer *dst) { submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &commandBuffer; - vkQueueSubmit(Instance::instance->graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE); + vkQueueSubmit(Instance::instance->graphicsAndPresentQueue, 1, &submitInfo, VK_NULL_HANDLE); vkDeviceWaitIdle(Instance::GetDevice()); - vkFreeCommandBuffers(Instance::GetDevice(), Instance::instance->commandPool->handle, 1, &commandBuffer); + vkFreeCommandBuffers(Instance::GetDevice(), Instance::instance->renderingCommandPool->handle, 1, &commandBuffer); } diff --git a/src/vulkan/command_pool.cpp b/src/vulkan/command_pool.cpp index b41f103..a1d0c47 100644 --- a/src/vulkan/command_pool.cpp +++ b/src/vulkan/command_pool.cpp @@ -2,32 +2,29 @@ #include "application.hpp" #include "vulkan/instance.hpp" -CommandPool::CommandPool(VkSurfaceKHR surface) { - Instance::QueueFamilyIndices indices = Instance::findQueueFamilies(Instance::GetPhysicalDevice(), surface); - +CommandPool::CommandPool(uint32_t queueFamilyIndex, uint32_t bufferCount) { VkCommandPoolCreateInfo poolInfo {}; poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - poolInfo.queueFamilyIndex = indices.graphicsAndComputeFamily.value(); + poolInfo.queueFamilyIndex = queueFamilyIndex; vkCreateCommandPool(Instance::GetDevice(), &poolInfo, nullptr, &handle); - createBuffers(); + allocateBuffers(bufferCount); } -void CommandPool::createBuffers() { +void CommandPool::allocateBuffers(uint32_t count) { VkCommandBufferAllocateInfo allocateInfo {}; allocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocateInfo.commandPool = handle; allocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - allocateInfo.commandBufferCount = 1; + allocateInfo.commandBufferCount = count; - vkAllocateCommandBuffers(Instance::GetDevice(), &allocateInfo, &graphicsBuffer); - vkAllocateCommandBuffers(Instance::GetDevice(), &allocateInfo, &computeBuffer); + buffers.resize(count); + vkAllocateCommandBuffers(Instance::GetDevice(), &allocateInfo, buffers.data()); } CommandPool::~CommandPool() { - vkFreeCommandBuffers(Instance::GetDevice(), handle, 1, &graphicsBuffer); - vkFreeCommandBuffers(Instance::GetDevice(), handle, 1, &computeBuffer); + vkFreeCommandBuffers(Instance::GetDevice(), handle, buffers.size(), buffers.data()); vkDestroyCommandPool(Instance::GetDevice(), handle, nullptr); } diff --git a/src/vulkan/instance.cpp b/src/vulkan/instance.cpp index 1379caf..22d85b2 100644 --- a/src/vulkan/instance.cpp +++ b/src/vulkan/instance.cpp @@ -59,7 +59,8 @@ Instance::Instance() { pickPhysicalDevice(); createLogicalDevice(); createAllocator(); - commandPool = new CommandPool(surface); + renderingCommandPool = new CommandPool(indices.graphicsAndPresent[0], 1); + computeCommandPool = new CommandPool(indices.tryComputeAndTransferDedicated(), 2); } void Instance::initWindow() { @@ -118,9 +119,10 @@ void Instance::pickPhysicalDevice() { std::vector devices(deviceCount); vkEnumeratePhysicalDevices(handle, &deviceCount, devices.data()); - for (const VkPhysicalDevice &device : devices){ - if (isDeviceSuitable(device)){ - physicalDevice = device; + for (const VkPhysicalDevice &potentialPhysicalDevice : devices){ + if (isDeviceSuitable(potentialPhysicalDevice)){ + physicalDevice = potentialPhysicalDevice; + indices = findQueueFamilies(physicalDevice, surface); break; } } @@ -135,8 +137,6 @@ void Instance::pickPhysicalDevice() { } void Instance::createLogicalDevice() { - QueueFamilyIndices indices = findQueueFamilies(physicalDevice, surface); - std::vector queueCreateInfos; float queuePriority = 1.0f; @@ -167,9 +167,8 @@ void Instance::createLogicalDevice() { vkCreateDevice(physicalDevice, &createInfo, nullptr, &device); - vkGetDeviceQueue(device, indices.graphicsFamily.value(), 0, &graphicsQueue); - vkGetDeviceQueue(device, indices.presentFamily.value(), 0, &presentQueue); - vkGetDeviceQueue(device, indices.graphicsAndComputeFamily.value(), 0, &computeQueue); + vkGetDeviceQueue(device, indices.graphicsAndPresent[0], 0, &graphicsAndPresentQueue); + vkGetDeviceQueue(device, indices.tryComputeAndTransferDedicated(), 0, &computeAndTransferQueue); } void Instance::createAllocator() { @@ -196,23 +195,18 @@ Instance::QueueFamilyIndices Instance::findQueueFamilies(VkPhysicalDevice device uint32_t i = 0; for (const VkQueueFamilyProperties& queueFamilyProperties : queueFamilies){ - if (queueFamilyProperties.queueFlags & VK_QUEUE_GRAPHICS_BIT){ - indices.graphicsFamily = i; - } - if (queueFamilyProperties.queueFlags & VK_QUEUE_COMPUTE_BIT){ - indices.computeFamily = i; - } - if (indices.graphicsFamily == i && indices.computeFamily == i){ - indices.graphicsAndComputeFamily = i; - } - VkBool32 presentSupport = false; - vkGetPhysicalDeviceSurfaceSupportKHR(device, i, surface, &presentSupport); - if (presentSupport){ - indices.presentFamily = i; - } - if (indices.isComplete()){ - break; - } + VkBool32 present = false; + vkGetPhysicalDeviceSurfaceSupportKHR(device, i, surface, &present); + + bool graphics = queueFamilyProperties.queueFlags & VK_QUEUE_GRAPHICS_BIT; + bool compute = queueFamilyProperties.queueFlags & VK_QUEUE_COMPUTE_BIT; + bool transfer = queueFamilyProperties.queueFlags & VK_QUEUE_TRANSFER_BIT; + + if (graphics && present) + indices.graphicsAndPresent.push_back(i); + if (compute && transfer) + indices.computeAndTransfer.push_back(i); + i++; } return indices; @@ -225,7 +219,7 @@ bool Instance::isDeviceSuitable(VkPhysicalDevice potentialPhysicalDevice) { VkPhysicalDeviceFeatures deviceFeatures; vkGetPhysicalDeviceFeatures(potentialPhysicalDevice, &deviceFeatures); - QueueFamilyIndices indices = findQueueFamilies(potentialPhysicalDevice, surface); + QueueFamilyIndices potentialIndices = findQueueFamilies(potentialPhysicalDevice, surface); bool extensionsSupported = checkDeviceExtensionSupport(potentialPhysicalDevice); @@ -235,7 +229,7 @@ bool Instance::isDeviceSuitable(VkPhysicalDevice potentialPhysicalDevice) { swapChainAdequate = !details.formats.empty() && !details.presentModes.empty(); } - return indices.isComplete() && extensionsSupported && swapChainAdequate; + return potentialIndices.isEnough() && extensionsSupported && swapChainAdequate; } bool Instance::checkDeviceExtensionSupport(VkPhysicalDevice device) { @@ -271,7 +265,8 @@ VkSurfaceKHR Instance::GetSurface() { } Instance::~Instance() { - delete commandPool; + delete renderingCommandPool; + delete computeCommandPool; vmaDestroyAllocator(allocator); vkDestroyDevice(device, nullptr); vkDestroySurfaceKHR(handle, surface, nullptr); diff --git a/src/vulkan/swapchain.cpp b/src/vulkan/swapchain.cpp index 8956981..dce6f3b 100644 --- a/src/vulkan/swapchain.cpp +++ b/src/vulkan/swapchain.cpp @@ -80,16 +80,7 @@ void Swapchain::createSwapchain() { createInfo.imageArrayLayers = 1; createInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - Instance::QueueFamilyIndices indices = Instance::findQueueFamilies(Instance::GetPhysicalDevice(), Instance::GetSurface()); - uint32_t queueFamilyIndices[] = {indices.graphicsFamily.value(), indices.presentFamily.value()}; - - if (indices.graphicsFamily != indices.presentFamily){ - createInfo.imageSharingMode = VK_SHARING_MODE_CONCURRENT; - createInfo.queueFamilyIndexCount = 2; - createInfo.pQueueFamilyIndices = queueFamilyIndices; - } else { - createInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - } + createInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; createInfo.preTransform = swapchainSupport.capabilities.currentTransform; createInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; @@ -118,7 +109,7 @@ void Swapchain::cleanupSwapchain() { } void Swapchain::recreateSwapchain() { - vkDeviceWaitIdle(Instance::GetDevice()); + vkQueueWaitIdle(Instance::instance->graphicsAndPresentQueue); cleanupSwapchain();