diff --git a/shaders/compute.glsl b/shaders/compute.glsl new file mode 100644 index 0000000..1b7a7d7 --- /dev/null +++ b/shaders/compute.glsl @@ -0,0 +1,85 @@ +#version 430 +#extension GL_ARB_gpu_shader_fp64 : enable +layout (local_size_x=50, local_size_y=1, local_size_z=1) in; + +layout(std430, binding = 0) buffer PositionBuffer { + dvec2 X[]; +}; + +layout(std430, binding = 1) buffer VelocityBuffer { + dvec2 V[]; +}; + +layout(std430, binding = 2) buffer MassBuffer { + double M[]; +}; + +layout(std430, binding = 3) buffer LengthBuffer { + double L[]; +}; + +layout(std430, binding = 4) buffer IndexBuffer { + uint indices[]; +}; + +layout(std430, binding = 5) buffer SegmentCountBuffer { + uint segmentCounts[]; +}; + +uniform double h; +uniform double gravity; +uniform uint substeps; + +shared dvec2 P[50]; + +void solveConstraint(uint segmentId, uint gIndex){ + double w1 = segmentId == 0 ? 0.0 : 1.0 / M[gIndex - 1]; + double w2 = 1.0 / M[gIndex]; + + dvec2 s = (segmentId == 0 ? dvec2(0, 0) : P[segmentId - 1]) - P[segmentId]; + dvec2 n = normalize(s); + double l = length(s); + + dvec2 deltaP1 = n * -w1 / (w1 + w2) * (l - L[gIndex]); + dvec2 deltaP2 = n * w2 / (w1 + w2) * (l - L[gIndex]); + + if (segmentId > 0) + P[segmentId - 1] += deltaP1; + P[segmentId] += deltaP2; +} + +void main() { + uint pendulumId = gl_WorkGroupID.x; + uint i_off = indices[pendulumId]; + uint count = segmentCounts[pendulumId]; + uint localId = gl_LocalInvocationID.x; + uint index = i_off + localId; + + if (localId >= count) + return; + + for (int k = 0; k < substeps; k++){ + + // explicit integration with gravity as external force + V[index] = V[index] + dvec2(0, -gravity * h); + P[localId] = X[index] + V[index] * h; + + memoryBarrierShared(); + + if (localId % 2 == 0){ + solveConstraint(localId, i_off + localId); + if (localId + 1 < count){ + memoryBarrierShared(); + solveConstraint(localId + 1, i_off + localId + 1); + } + } + + barrier(); + + // update position and velocity + V[index] = (P[localId] - X[index]) / h; + X[index] = P[localId]; + + memoryBarrierBuffer(); + } +} diff --git a/shaders/shaders.qrc b/shaders/shaders.qrc index eb51953..cd4cc85 100644 --- a/shaders/shaders.qrc +++ b/shaders/shaders.qrc @@ -4,5 +4,6 @@ fragment_pendula.glsl vertex_quad.glsl fragment_quad.glsl + compute.glsl \ No newline at end of file diff --git a/src/GLWidget.cpp b/src/GLWidget.cpp index c46b66c..32a6f1b 100644 --- a/src/GLWidget.cpp +++ b/src/GLWidget.cpp @@ -22,7 +22,7 @@ GLWidget::GLWidget(Simulation * simulation) : simulation(simulation) { void GLWidget::initializeGL() { initializeOpenGLFunctions(); - std::cout << "OpenGL Version: " << format().majorVersion() << "." << format().minorVersion() << std::endl; + printf("OpenGL Version: %d.%d\n", format().majorVersion(), format().minorVersion()); pendulaProgram = new QOpenGLShaderProgram; pendulaProgram->addShaderFromSourceFile(QOpenGLShader::Vertex, ":/shaders/vertex_pendula.glsl"); diff --git a/src/MainWindow.cpp b/src/MainWindow.cpp index 3abe412..ab7b54c 100644 --- a/src/MainWindow.cpp +++ b/src/MainWindow.cpp @@ -21,10 +21,10 @@ MainWindow::MainWindow() { simulationThread = new QThread(this); + simulationThread->setObjectName("Simulation"); simulation = new Simulation; simulation->moveToThread(simulationThread); simulationThread->start(); - QMetaObject::invokeMethod(simulation, &Simulation::initialize); masses = std::vector(MaxSegments); lengths = std::vector(MaxSegments); @@ -35,6 +35,8 @@ MainWindow::MainWindow() { buildUI(); normalizeLengths(); + + QMetaObject::invokeMethod(simulation, &Simulation::initialize); } void MainWindow::buildUI() { diff --git a/src/Pendulum.h b/src/Pendulum.h index faf0cd7..c8fc3c5 100644 --- a/src/Pendulum.h +++ b/src/Pendulum.h @@ -4,7 +4,8 @@ #include #include #include "Vector.h" -#include "GLWidget.h" + +class GLWidget; class Pendulum { public: @@ -16,6 +17,7 @@ public: double kineticEnergy() const; private: friend class GLWidget; + friend class Simulation; std::vector X, V; std::vector M, L; QColor color; diff --git a/src/Simulation.cpp b/src/Simulation.cpp index 86ebab4..96a4f5e 100644 --- a/src/Simulation.cpp +++ b/src/Simulation.cpp @@ -31,6 +31,9 @@ void Simulation::initialize() { surface->create(); context->makeCurrent(surface); initializeOpenGLFunctions(); + if (!context->hasExtension("GL_ARB_gpu_shader_fp64")){ + printf("Double precision not supported by OpenGL!\n"); + } } // Read GPU Limits @@ -48,11 +51,67 @@ void Simulation::initialize() { // Shader { + program = new QOpenGLShaderProgram; + program->addShaderFromSourceFile(QOpenGLShader::Compute, ":/shaders/compute.glsl"); + program->link(); + + glGenBuffers(6, &SSBO.positions); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, SSBO.positions); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, SSBO.velocities); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, SSBO.masses); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, SSBO.lengths); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, SSBO.indices); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, SSBO.segmentCounts); + } +} + +// When the layout changes +void Simulation::updateGPUData() { + std::vector positions; + std::vector velocities; + std::vector masses; + std::vector lengths; + std::vector indices; + std::vector segmentCounts; + + GLuint currentIndex = 0; + for (const Pendulum * p : pendula){ + indices.push_back(currentIndex); + currentIndex += p->X.size(); + segmentCounts.push_back(p->X.size()); + for (size_t i = 0; i < p->X.size(); i++){ + positions.push_back(GLdouble(p->X[i].x)); + positions.push_back(GLdouble(p->X[i].y)); + velocities.push_back(GLdouble(p->V[i].x)); + velocities.push_back(GLdouble(p->V[i].y)); + masses.push_back(GLfloat(p->M[i])); + lengths.push_back(GLfloat(p->L[i])); + } } + auto posSize = GLsizeiptr(positions.size() * sizeof(GLdouble)); + auto velSize = GLsizeiptr(velocities.size() * sizeof(GLdouble)); + auto massSize = GLsizeiptr(masses.size() * sizeof(GLdouble)); + auto lengthSize = GLsizeiptr(lengths.size() * sizeof(GLdouble)); + auto indicesSize = GLsizeiptr(indices.size() * sizeof(GLuint)); + auto segmentCountsSize = GLsizeiptr(segmentCounts.size() * sizeof(GLuint)); + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.positions); + glBufferData(GL_SHADER_STORAGE_BUFFER, posSize, positions.data(), GL_DYNAMIC_DRAW); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.velocities); + glBufferData(GL_SHADER_STORAGE_BUFFER, velSize, velocities.data(), GL_DYNAMIC_DRAW); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.masses); + glBufferData(GL_SHADER_STORAGE_BUFFER, massSize, masses.data(), GL_STATIC_DRAW); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.lengths); + glBufferData(GL_SHADER_STORAGE_BUFFER, lengthSize, lengths.data(), GL_STATIC_DRAW); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.indices); + glBufferData(GL_SHADER_STORAGE_BUFFER, indicesSize, indices.data(), GL_STATIC_DRAW); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.segmentCounts); + glBufferData(GL_SHADER_STORAGE_BUFFER, segmentCountsSize, segmentCounts.data(), GL_STATIC_DRAW); } +// Every few milliseconds void Simulation::update() { if (!isPlaying) return; @@ -66,11 +125,11 @@ void Simulation::update() { double newPotentialEnergy = 0; double newKineticEnergy = 0; + auto t1 = system_clock::now(); - + /* // OLD with CPUs { - #pragma omp parallel for for (int i = 0; i < pendula.size(); i++){ // NOLINT(*-loop-convert) // not ranged based for msvc for (int k = 0; k < substeps; k++) @@ -82,13 +141,50 @@ void Simulation::update() { #pragma omp atomic newKineticEnergy += localKineticEnergy; } - } + }*/ + // NEW with GPUs { - + program->bind(); + glUniform1d(program->uniformLocation("h"), h); + glUniform1d(program->uniformLocation("gravity"), gravity); + glUniform1ui(program->uniformLocation("substeps"), substeps); + + + glDispatchCompute(pendula.size(), 1, 1); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + + // Read updated positions + { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.positions); + auto * newPositions = (double*) glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY); + int index = 0; + for (Pendulum * p : pendula){ + for (Vector &point : p->X){ + point.x = newPositions[index++]; + point.y = newPositions[index++]; + } + } + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + } + // Read updated velocities + { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.velocities); + auto * newVelocities = (double*) glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY); + int index = 0; + for (Pendulum * p : pendula){ + for (Vector &velocity : p->V){ + velocity.x = newVelocities[index++]; + velocity.y = newVelocities[index++]; + } + } + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + } } + std::cout << duration_cast(system_clock::now() - t1).count() << std::endl; + potentialEnergy = newPotentialEnergy; kineticEnergy = newKineticEnergy; @@ -133,7 +229,3 @@ void Simulation::updateEnergy() { } } -void Simulation::updateGPUData() { - -} - diff --git a/src/Simulation.h b/src/Simulation.h index 726da3f..6222ad7 100644 --- a/src/Simulation.h +++ b/src/Simulation.h @@ -47,6 +47,14 @@ private: int maxWGInvocations; } gpuLimits; QOpenGLShaderProgram * program; + struct SSBO { + GLuint positions; + GLuint velocities; + GLuint masses; + GLuint lengths; + GLuint indices; + GLuint segmentCounts; + } SSBO; QTimer * timer; int updateInterval = 16; }; \ No newline at end of file