From 296235288f4925849d1c64b03c33cc097250e090 Mon Sep 17 00:00:00 2001 From: Benjamin Kraft Date: Sat, 19 Oct 2024 10:47:38 +0200 Subject: [PATCH] abstract compute pipeline barrier --- imgui.ini | 8 +++ include/application.hpp | 9 +-- include/constraints.hpp | 3 + src/application.cpp | 147 ++++++++++++++++++++-------------------- src/soft_body.cpp | 12 ++-- 5 files changed, 96 insertions(+), 83 deletions(-) diff --git a/imgui.ini b/imgui.ini index 321faee..0278abe 100644 --- a/imgui.ini +++ b/imgui.ini @@ -32,6 +32,14 @@ Collapsed=0 Pos=1617,2 Size=302,1002 Collapsed=0 +DockId=0x00000001,0 + +[Window][Performance 2] +Pos=1617,2 +Size=302,1002 +Collapsed=0 +DockId=0x00000001,1 [Docking][Data] +DockNode ID=0x00000001 Pos=1617,2 Size=302,1002 Selected=0x60B79D0E diff --git a/include/application.hpp b/include/application.hpp index f743d8b..49042f0 100644 --- a/include/application.hpp +++ b/include/application.hpp @@ -91,12 +91,13 @@ private: unique_ptr normalPipeline; void updateCameraBuffer(); - void recordDrawCommands(VkCommandBuffer cmdBuffer); + void recordDrawCommands(VkCommandBuffer commandBuffer); void drawFrame(float dt); - void recordGrabCommands(VkCommandBuffer cmdBuffer); - void recordPBDCommands(VkCommandBuffer cmdBuffer); - void recordNormalCommands(VkCommandBuffer cmdBuffer); + void recordGrabCommands(VkCommandBuffer commandBuffer); + void recordPBDCommands(VkCommandBuffer commandBuffer); + void recordNormalCommands(VkCommandBuffer commandBuffer); + void computePipelineBarrier(VkCommandBuffer commandBuffer); void update(); diff --git a/include/constraints.hpp b/include/constraints.hpp index 8826082..134fd73 100644 --- a/include/constraints.hpp +++ b/include/constraints.hpp @@ -56,6 +56,9 @@ private: uint32_t prePartitionTetrahedronCount; }; + +// The following classes are only for the graph coloring in pre-processing + struct Constraint { virtual ~Constraint() {} virtual void writeData(ConstraintData& dataLists) const {}; diff --git a/src/application.cpp b/src/application.cpp index 33ab94c..529ab95 100644 --- a/src/application.cpp +++ b/src/application.cpp @@ -188,14 +188,14 @@ void Application::createMeshBuffers() { auto body = std::make_unique(&sphere, 1.f / 60); - for (size_t i = 0; i < 5; i++){ + for (size_t i = 0; i < 2; i++){ auto copy = std::make_unique(*body.get()); copy->applyVertexOffset({i * 2, 0, 0}); softBodies.push_back(std::move(copy)); } body = std::make_unique(&bunny, 1.f / 10); - for (size_t i = 0; i < 5; i++){ + for (size_t i = 0; i < 2; i++){ auto copy = std::make_unique(*body.get()); copy->applyVertexOffset({i * 2, 0, 2}); softBodies.push_back(std::move(copy)); @@ -481,17 +481,17 @@ void Application::drawFrame(float dt) { } } -void Application::recordDrawCommands(VkCommandBuffer cmdBuffer) { - vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipeline->handle); +void Application::recordDrawCommands(VkCommandBuffer commandBuffer) { + vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipeline->handle); VkBuffer buffers[] = {vertexBuffers[currentDrawVertexBuffer]->handle}; VkDeviceSize offsets[] = {0}; - vkCmdBindVertexBuffers(cmdBuffer, 0, 1, buffers, offsets); - vkCmdBindIndexBuffer(cmdBuffer, faceBuffer->handle, 0, VK_INDEX_TYPE_UINT32); - vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipeline->layout, 0, 1, &descriptorPool->sets[DescriptorSet::WORLD], 0, nullptr); + vkCmdBindVertexBuffers(commandBuffer, 0, 1, buffers, offsets); + vkCmdBindIndexBuffer(commandBuffer, faceBuffer->handle, 0, VK_INDEX_TYPE_UINT32); + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipeline->layout, 0, 1, &descriptorPool->sets[DescriptorSet::WORLD], 0, nullptr); for (const auto &softBody : softBodies){ - vkCmdDrawIndexed(cmdBuffer, softBody->faces.size() * 3, 1, softBody->firstIndex, 0, 0); + vkCmdDrawIndexed(commandBuffer, softBody->faces.size() * 3, 1, softBody->firstIndex, 0, 0); } } @@ -567,73 +567,58 @@ uint32_t Application::GetGroupCount(uint32_t threads, uint32_t blockSize) { return (threads - 1) / blockSize + 1; } -void Application::recordGrabCommands(VkCommandBuffer cmdBuffer) { - VkMemoryBarrier barrier {}; - barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - +void Application::recordGrabCommands(VkCommandBuffer commandBuffer) { // TODO maybe add buffermemorybarrier for camera uniform, because it can be changed from main drawing thread - vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, grabPipeline->handle); - vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, grabPipeline->layout, 0, 1, &descriptorPool->sets[DescriptorSet::MESH], 0, nullptr); - vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, grabPipeline->layout, 1, 1, &descriptorPool->sets[DescriptorSet::WORLD], 0, nullptr); - vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, grabPipeline->layout, 2, 1, &descriptorPool->sets[DescriptorSet::SIMULATION], 0, nullptr); + vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, grabPipeline->handle); + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, grabPipeline->layout, 0, 1, &descriptorPool->sets[DescriptorSet::MESH], 0, nullptr); + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, grabPipeline->layout, 1, 1, &descriptorPool->sets[DescriptorSet::WORLD], 0, nullptr); + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, grabPipeline->layout, 2, 1, &descriptorPool->sets[DescriptorSet::SIMULATION], 0, nullptr); GrabPushData pushConstants {}; if (grabber->started()){ pushConstants.state = 0; pushConstants.screenPosition = grabber->previousCursorPosition; - vkCmdPushConstants(cmdBuffer, grabPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(GrabPushData), &pushConstants); + vkCmdPushConstants(commandBuffer, grabPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(GrabPushData), &pushConstants); uint32_t faceInvocations = GetGroupCount(faceBuffer->size / sizeof(Face), BLOCK_SIZE_GRAB); - vkCmdDispatch(cmdBuffer, faceInvocations, 1, 1); - - vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier, - 0, nullptr, 0, nullptr); + vkCmdDispatch(commandBuffer, faceInvocations, 1, 1); + computePipelineBarrier(commandBuffer); pushConstants.state = 1; - vkCmdPushConstants(cmdBuffer, grabPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(GrabPushData), &pushConstants); - vkCmdDispatch(cmdBuffer, 1, 1, 1); + vkCmdPushConstants(commandBuffer, grabPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(GrabPushData), &pushConstants); + vkCmdDispatch(commandBuffer, 1, 1, 1); } - vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier, - 0, nullptr, 0, nullptr); + computePipelineBarrier(commandBuffer); glm::vec2 screenDelta; if (grabber->moved(screenDelta)){ pushConstants.state = 2; pushConstants.screenDelta = screenDelta; - vkCmdPushConstants(cmdBuffer, grabPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(GrabPushData), &pushConstants); - vkCmdDispatch(cmdBuffer, 1, 1, 1); + vkCmdPushConstants(commandBuffer, grabPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(GrabPushData), &pushConstants); + vkCmdDispatch(commandBuffer, 1, 1, 1); } - vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier, - 0, nullptr, 0, nullptr); + computePipelineBarrier(commandBuffer); if (grabber->stopped()){ pushConstants.state = 3; - vkCmdPushConstants(cmdBuffer, grabPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(GrabPushData), &pushConstants); - vkCmdDispatch(cmdBuffer, 1, 1, 1); + vkCmdPushConstants(commandBuffer, grabPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(GrabPushData), &pushConstants); + vkCmdDispatch(commandBuffer, 1, 1, 1); } - vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier, - 0, nullptr, 0, nullptr); + computePipelineBarrier(commandBuffer); } -void Application::recordPBDCommands(VkCommandBuffer cmdBuffer) { - VkMemoryBarrier barrier {}; - barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - +void Application::recordPBDCommands(VkCommandBuffer commandBuffer) { uint32_t vertexGroupCount = GetGroupCount(vertexBuffers[1 - currentDrawVertexBuffer]->size / sizeof(Vertex), BLOCK_SIZE_PBD); - vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pbdPipeline->handle); - vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pbdPipeline->layout, 0, 1, &descriptorPool->sets[DescriptorSet::MESH], 0, nullptr); - vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pbdPipeline->layout, 1, 1, &descriptorPool->sets[DescriptorSet::SIMULATION], 0, nullptr); + vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pbdPipeline->handle); + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pbdPipeline->layout, 0, 1, &descriptorPool->sets[DescriptorSet::MESH], 0, nullptr); + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pbdPipeline->layout, 1, 1, &descriptorPool->sets[DescriptorSet::SIMULATION], 0, nullptr); uint32_t state; @@ -641,12 +626,12 @@ void Application::recordPBDCommands(VkCommandBuffer cmdBuffer) { for (size_t i = 0; i < k; i++){ state = 0; - vkCmdPushConstants(cmdBuffer, pbdPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); - vkCmdDispatch(cmdBuffer, vertexGroupCount, 1, 1); - vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); + vkCmdPushConstants(commandBuffer, pbdPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); + vkCmdDispatch(commandBuffer, vertexGroupCount, 1, 1); + computePipelineBarrier(commandBuffer); state = 1; - vkCmdPushConstants(cmdBuffer, pbdPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); + vkCmdPushConstants(commandBuffer, pbdPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); for (uint32_t partition = 0; partition < constraintData.partitionCount; partition++){ auto edgePartition = constraintData.edgePartitions[partition]; @@ -657,51 +642,67 @@ void Application::recordPBDCommands(VkCommandBuffer cmdBuffer) { ConstraintData::Partition partitions[2] = {edgePartition, tetrahedronPartition}; - vkCmdPushConstants(cmdBuffer, pbdPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, + vkCmdPushConstants(commandBuffer, pbdPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, offsetof(PBDPushData, edgePartition), sizeof(partitions), partitions); uint32_t invocations = GetGroupCount(edgePartition.size + tetrahedronPartition.size, BLOCK_SIZE_PBD); - vkCmdDispatch(cmdBuffer, invocations, 1, 1); - - vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); + vkCmdDispatch(commandBuffer, invocations, 1, 1); + computePipelineBarrier(commandBuffer); } state = 2; - vkCmdPushConstants(cmdBuffer, pbdPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); - vkCmdDispatch(cmdBuffer, vertexGroupCount, 1, 1); - vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); + vkCmdPushConstants(commandBuffer, pbdPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); + vkCmdDispatch(commandBuffer, vertexGroupCount, 1, 1); + computePipelineBarrier(commandBuffer); } } -void Application::recordNormalCommands(VkCommandBuffer cmdBuffer) { - VkMemoryBarrier barrier {}; - barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - +void Application::recordNormalCommands(VkCommandBuffer commandBuffer) { uint32_t vertexGroupCount = GetGroupCount(vertexBuffers[1 - currentDrawVertexBuffer]->size / sizeof(Vertex), BLOCK_SIZE_NORMAL); uint32_t faceGroupCount = GetGroupCount(faceBuffer->size / sizeof(Face), BLOCK_SIZE_NORMAL); - vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, normalPipeline->handle); - vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, normalPipeline->layout, 0, 1, &descriptorPool->sets[DescriptorSet::MESH], 0, nullptr); + vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, normalPipeline->handle); + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, normalPipeline->layout, 0, 1, &descriptorPool->sets[DescriptorSet::MESH], 0, nullptr); uint32_t state = 0; - vkCmdPushConstants(cmdBuffer, normalPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); - vkCmdDispatch(cmdBuffer, vertexGroupCount, 1, 1); - vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); + vkCmdPushConstants(commandBuffer, normalPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); + vkCmdDispatch(commandBuffer, vertexGroupCount, 1, 1); + computePipelineBarrier(commandBuffer); state = 1; - vkCmdPushConstants(cmdBuffer, normalPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); - vkCmdDispatch(cmdBuffer, faceGroupCount, 1, 1); - vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); + vkCmdPushConstants(commandBuffer, normalPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); + vkCmdDispatch(commandBuffer, faceGroupCount, 1, 1); + computePipelineBarrier(commandBuffer); state = 2; - vkCmdPushConstants(cmdBuffer, normalPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); - vkCmdDispatch(cmdBuffer, vertexGroupCount, 1, 1); + vkCmdPushConstants(commandBuffer, normalPipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &state); + vkCmdDispatch(commandBuffer, vertexGroupCount, 1, 1); + computePipelineBarrier(commandBuffer); +} - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); +void Application::computePipelineBarrier(VkCommandBuffer commandBuffer) { + VkMemoryBarrier memoryBarrier {}; + memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + memoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + memoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + + const Buffer& buffer = *vertexBuffers[1 - currentDrawVertexBuffer]; + VkBufferMemoryBarrier bufferMemoryBarrier {}; + bufferMemoryBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + bufferMemoryBarrier.buffer = buffer.handle; + bufferMemoryBarrier.size = buffer.size; + bufferMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + bufferMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + bufferMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufferMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + + vkCmdPipelineBarrier(commandBuffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 1, &memoryBarrier, + 1, &bufferMemoryBarrier, + 0, nullptr); } void Application::imGuiWindows() { diff --git a/src/soft_body.cpp b/src/soft_body.cpp index 16b7022..a365ce9 100644 --- a/src/soft_body.cpp +++ b/src/soft_body.cpp @@ -192,17 +192,17 @@ void SoftBody::splitConstraints() { unordered_map> pointToConstraints; - vector lengthConstraints; + vector distanceConstraints; vector volumeConstraints; - lengthConstraints.reserve(constraintData.edges.size()); + distanceConstraints.reserve(constraintData.edges.size()); volumeConstraints.reserve(constraintData.tetrahedra.size()); for (const Edge &edge : constraintData.edges) - lengthConstraints.push_back(DistanceConstraint(edge)); + distanceConstraints.push_back(DistanceConstraint(edge)); for (const Tetrahedron &tetrahedron : constraintData.tetrahedra) volumeConstraints.push_back(VolumeConstraint(tetrahedron)); - for (const DistanceConstraint &distanceConstraint : lengthConstraints){ + for (const DistanceConstraint &distanceConstraint : distanceConstraints){ pointToConstraints[distanceConstraint.a].push_back(&distanceConstraint); pointToConstraints[distanceConstraint.b].push_back(&distanceConstraint); } @@ -225,8 +225,8 @@ void SoftBody::splitConstraints() { graph[constraint].assign(neighbors.begin(), neighbors.end()); }; - #pragma omp parallel for default(none) shared(findAdjacent, lengthConstraints) - for (const DistanceConstraint &distanceConstraint : lengthConstraints){ + #pragma omp parallel for default(none) shared(findAdjacent, distanceConstraints) + for (const DistanceConstraint &distanceConstraint : distanceConstraints){ findAdjacent(&distanceConstraint, { distanceConstraint.a, distanceConstraint.b