Compare commits

...

3 Commits

  1. 86
      shaders/compute.glsl
  2. 1
      shaders/shaders.qrc
  3. 5
      src/GLWidget.cpp
  4. 19
      src/MainWindow.cpp
  5. 7
      src/MainWindow.h
  6. 4
      src/Pendulum.h
  7. 166
      src/Simulation.cpp
  8. 22
      src/Simulation.h
  9. 2
      src/Slider.h

@ -0,0 +1,86 @@
#version 430
#extension GL_ARB_gpu_shader_fp64 : enable
#define MAX_SEGMENTS 32
layout (local_size_x=MAX_SEGMENTS, local_size_y=1, local_size_z=1) in;
layout(std430, binding = 0) buffer PositionBuffer {
dvec2 X[];
};
layout(std430, binding = 1) buffer VelocityBuffer {
dvec2 V[];
};
layout(std430, binding = 2) buffer InverseMassBuffer {
double W[];
};
layout(std430, binding = 3) buffer LengthBuffer {
double L[];
};
layout(std430, binding = 4) buffer IndexBuffer {
uint indices[];
};
layout(std430, binding = 5) buffer SegmentCountBuffer {
uint segmentCounts[];
};
uniform double h;
uniform double gravity;
uniform uint substeps;
shared dvec2 P[MAX_SEGMENTS];
// distance constraint
void solveConstraint(uint segmentId, uint gIndex){
double w1 = segmentId == 0 ? 0.0 : W[gIndex - 1];
double w2 = W[gIndex];
dvec2 s = (segmentId == 0 ? dvec2(0, 0) : P[segmentId - 1]) - P[segmentId];
dvec2 n = normalize(s);
double l = length(s);
dvec2 deltaP1 = n * -w1 / (w1 + w2) * (l - L[gIndex]);
dvec2 deltaP2 = n * w2 / (w1 + w2) * (l - L[gIndex]);
if (segmentId > 0)
P[segmentId - 1] += deltaP1;
P[segmentId] += deltaP2;
}
// No need for synchronization, because each pendulum gets its own warp
void main() {
uint pendulumId = gl_WorkGroupID.x;
uint i_off = indices[pendulumId];
uint segmentCount = segmentCounts[pendulumId];
uint localId = gl_LocalInvocationID.x;
uint gIndex = i_off + localId;
// discard unneeded local threads for segments
if (localId >= segmentCount)
return;
// substepping is not parallelizable
for (int k = 0; k < substeps; k++){
// explicit integration with gravity as external force
V[gIndex] = V[gIndex] + dvec2(0, -gravity * h);
P[localId] = X[gIndex] + V[gIndex] * h;
// every segment depends on its neighbour, so we can not parallelize all constraints
// only parallelize those, who are independent of each other, skipping every second segment
if (localId % 2 == 0){
solveConstraint(localId, i_off + localId);
if (localId + 1 < segmentCount)
solveConstraint(localId + 1, gIndex + 1);
}
// update position and velocity
V[gIndex] = (P[localId] - X[gIndex]) / h;
X[gIndex] = P[localId];
}
}

@ -4,5 +4,6 @@
<file>fragment_pendula.glsl</file>
<file>vertex_quad.glsl</file>
<file>fragment_quad.glsl</file>
<file>compute.glsl</file>
</qresource>
</RCC>

@ -9,6 +9,7 @@
#include "Overlay.h"
#include <QOpenGLShaderProgram>
#include "FPS.h"
#include "Pendulum.h"
GLWidget::GLWidget(Simulation * simulation) : simulation(simulation) {
startTimer(1000 / 144);
@ -21,7 +22,7 @@ GLWidget::GLWidget(Simulation * simulation) : simulation(simulation) {
void GLWidget::initializeGL() {
initializeOpenGLFunctions();
std::cout << "OpenGL Version: " << format().majorVersion() << "." << format().minorVersion() << std::endl;
printf("OpenGL Version: %d.%d\n", format().majorVersion(), format().minorVersion());
pendulaProgram = new QOpenGLShaderProgram;
pendulaProgram->addShaderFromSourceFile(QOpenGLShader::Vertex, ":/shaders/vertex_pendula.glsl");
@ -186,6 +187,7 @@ void GLWidget::uploadStaticDataToGPU() {
}
void GLWidget::changePosition() {
auto t1 = system_clock::now();
glBindBuffer(GL_ARRAY_BUFFER, positionVBO);
auto positions = (GLfloat*) glMapBufferRange(GL_ARRAY_BUFFER, 0, GLsizeiptr(positionCount * sizeof(float)), GL_MAP_WRITE_BIT);
size_t index = 0;
@ -198,6 +200,7 @@ void GLWidget::changePosition() {
}
}
glUnmapBuffer(GL_ARRAY_BUFFER);
// std::cout << duration_cast<microseconds>(system_clock::now() - t1).count() << std::endl;
simulation->semaphore.release();
}

@ -17,13 +17,14 @@
#include <QCloseEvent>
#include <QTimer>
#include "Overlay.h"
#include "Slider.h"
MainWindow::MainWindow() {
simulationThread = new QThread(this);
simulationThread->setObjectName("Simulation");
simulation = new Simulation;
simulation->moveToThread(simulationThread);
simulationThread->start();
QMetaObject::invokeMethod(simulation, &Simulation::initialize);
masses = std::vector<double>(MaxSegments);
lengths = std::vector<double>(MaxSegments);
@ -34,6 +35,8 @@ MainWindow::MainWindow() {
buildUI();
normalizeLengths();
QMetaObject::invokeMethod(simulation, &Simulation::initialize);
}
void MainWindow::buildUI() {
@ -377,6 +380,20 @@ QWidget * MainWindow::buildSimulationUI() {
lyt->addWidget(showMasses);
}
// GPU-Acceleration
{
auto useGPU = new QCheckBox("Use GPU-Acceleration");
connect(useGPU, &QCheckBox::stateChanged, simulation, &Simulation::useGPUChanged);
connect(simulation, &Simulation::gpuNotSupported, [useGPU](const std::string &message){
useGPU->setCheckState(Qt::Unchecked);
useGPU->setEnabled(false);
useGPU->setToolTip(QString::fromStdString(message));
});
useGPU->setCheckState(Qt::Unchecked);
lyt->addWidget(useGPU);
}
return w;
}

@ -1,11 +1,12 @@
#include <QWidget>
#include <vector>
#include "Slider.h"
#include "Pendulum.h"
class Simulation;
class QGridLayout;
class GLWidget;
template<typename T>
class Slider;
enum Property {Angle, Mass, Length};
@ -23,7 +24,7 @@ private:
GLWidget * glWidget = nullptr;
static const int MaxSegments = 50;
static const int MaxSegments = 32;
std::vector<double> masses, lengths;
int segments = 0;
@ -38,7 +39,7 @@ private:
QGridLayout * segmentGrid {};
Slider<double> * gravitySlider = nullptr, * timescaleSlider = nullptr;
Slider<> * substepsSlider = nullptr;
Slider<int> * substepsSlider = nullptr;
signals:
void pendulaCreated(const std::vector<Pendulum*> &add);

@ -4,7 +4,8 @@
#include <QVector2D>
#include <QColor>
#include "Vector.h"
#include "GLWidget.h"
class GLWidget;
class Pendulum {
public:
@ -16,6 +17,7 @@ public:
double kineticEnergy() const;
private:
friend class GLWidget;
friend class Simulation;
std::vector<Vector> X, V;
std::vector<double> M, L;
QColor color;

@ -5,6 +5,7 @@
#include <omp.h>
#include <iostream>
#include "FPS.h"
#include "Pendulum.h"
#include <QOpenGLContext>
#include <QOpenGLShaderProgram>
#include <QOffscreenSurface>
@ -12,7 +13,6 @@
Simulation::Simulation() {
ups = new FPS;
ups->setUpdateInterval(100);
timer = new QTimer(this);
QTimer::connect(timer, &QTimer::timeout, this, &Simulation::update);
timer->setInterval(updateInterval);
@ -31,24 +31,90 @@ void Simulation::initialize() {
surface->create();
context->makeCurrent(surface);
initializeOpenGLFunctions();
if (!context->hasExtension("GL_ARB_gpu_shader_fp64")){
std::string message = "Double precision not supported by OpenGL! Falling back to CPU-Multithreading.";
printf("%s\n", message.c_str());
useGPUAcceleration = false;
emit gpuNotSupported(message);
}
}
int maxWorkGroupCount[3];
int maxWorkGroupSize[3];
int maxWorkGroupInvocations;
for (int i = 0; i < 3; i++){
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, i, maxWorkGroupCount + i);
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, i, maxWorkGroupSize + i);
// Read GPU Limits
{
for (int i = 0; i < 3; i++){
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, i, gpuLimits.maxWGCount + i);
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, i, gpuLimits.maxWGSize + i);
}
glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &gpuLimits.maxWGInvocations);
printf("Max work group count: (%d, %d, %d)\n", gpuLimits.maxWGCount[0], gpuLimits.maxWGCount[1], gpuLimits.maxWGCount[2]);
printf("Max work group size: (%d, %d, %d)\n", gpuLimits.maxWGSize[0], gpuLimits.maxWGSize[1], gpuLimits.maxWGSize[2]);
printf("Max work group invocations (x * y * z): %d\n", gpuLimits.maxWGInvocations);
}
glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &maxWorkGroupInvocations);
// Shader
{
program = new QOpenGLShaderProgram;
program->addShaderFromSourceFile(QOpenGLShader::Compute, ":/shaders/compute.glsl");
program->link();
glGenBuffers(6, &SSBO.positions);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, SSBO.positions);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, SSBO.velocities);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, SSBO.invMasses);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, SSBO.lengths);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, SSBO.indices);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, SSBO.segmentCounts);
}
printf("Max work group count: (%d, %d, %d)\n", maxWorkGroupCount[0], maxWorkGroupCount[1], maxWorkGroupCount[2]);
printf("Max work group size: (%d, %d, %d)\n", maxWorkGroupSize[0], maxWorkGroupSize[1], maxWorkGroupSize[2]);
printf("Max work group invocations (x * y * z): %d\n", maxWorkGroupInvocations);
}
// When the layout changes
void Simulation::updateGPUData() {
std::vector<GLdouble> positions;
std::vector<GLdouble> velocities;
std::vector<GLdouble> invMasses;
std::vector<GLdouble> lengths;
std::vector<GLuint> indices;
std::vector<GLuint> segmentCounts;
GLuint currentIndex = 0;
for (const Pendulum * p : pendula){
indices.push_back(currentIndex);
currentIndex += p->X.size();
segmentCounts.push_back(p->X.size());
for (size_t i = 0; i < p->X.size(); i++){
positions.push_back(p->X[i].x);
positions.push_back(p->X[i].y);
velocities.push_back(p->V[i].x);
velocities.push_back(p->V[i].y);
invMasses.push_back(1.0 / p->M[i]);
lengths.push_back(p->L[i]);
}
}
auto posSize = GLsizeiptr(positions.size() * sizeof(GLdouble));
auto velSize = GLsizeiptr(velocities.size() * sizeof(GLdouble));
auto invMassSize = GLsizeiptr(invMasses.size() * sizeof(GLdouble));
auto lengthSize = GLsizeiptr(lengths.size() * sizeof(GLdouble));
auto indicesSize = GLsizeiptr(indices.size() * sizeof(GLuint));
auto segmentCountsSize = GLsizeiptr(segmentCounts.size() * sizeof(GLuint));
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.positions);
glBufferData(GL_SHADER_STORAGE_BUFFER, posSize, positions.data(), GL_DYNAMIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.velocities);
glBufferData(GL_SHADER_STORAGE_BUFFER, velSize, velocities.data(), GL_DYNAMIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.invMasses);
glBufferData(GL_SHADER_STORAGE_BUFFER, invMassSize, invMasses.data(), GL_STATIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.lengths);
glBufferData(GL_SHADER_STORAGE_BUFFER, lengthSize, lengths.data(), GL_STATIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.indices);
glBufferData(GL_SHADER_STORAGE_BUFFER, indicesSize, indices.data(), GL_STATIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.segmentCounts);
glBufferData(GL_SHADER_STORAGE_BUFFER, segmentCountsSize, segmentCounts.data(), GL_STATIC_DRAW);
}
// Every few milliseconds
void Simulation::update() {
if (!isPlaying)
return;
@ -62,16 +128,66 @@ void Simulation::update() {
double newPotentialEnergy = 0;
double newKineticEnergy = 0;
#pragma omp parallel for
for (int i = 0; i < pendula.size(); i++){ // NOLINT(*-loop-convert) // not ranged based for msvc
for (int k = 0; k < substeps; k++)
pendula[i]->update(h, gravity);
double localPotentialEnergy = pendula[i]->potentialEnergy(gravity);
double localKineticEnergy = pendula[i]->kineticEnergy();
#pragma omp atomic
newPotentialEnergy += localPotentialEnergy;
#pragma omp atomic
newKineticEnergy += localKineticEnergy;
if (useGPUAcceleration) {
program->bind();
glUniform1d(program->uniformLocation("h"), h);
glUniform1d(program->uniformLocation("gravity"), gravity);
glUniform1ui(program->uniformLocation("substeps"), substeps);
glDispatchCompute(pendula.size(), 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
// Read updated positions
{
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.positions);
auto * newPositions = (double*) glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
int index = 0;
for (Pendulum * p : pendula){
for (Vector &point : p->X){
point.x = newPositions[index++];
point.y = newPositions[index++];
}
}
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
// Read updated velocities
{
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.velocities);
auto * newVelocities = (double*) glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
int index = 0;
for (Pendulum * p : pendula){
for (Vector &velocity : p->V){
velocity.x = newVelocities[index++];
velocity.y = newVelocities[index++];
}
}
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
#pragma omp parallel for
for (int i = 0; i < pendula.size(); i++){ // NOLINT(*-loop-convert) not ranged based for msvc
double localPotentialEnergy = pendula[i]->potentialEnergy(gravity);
double localKineticEnergy = pendula[i]->kineticEnergy();
#pragma omp atomic
newPotentialEnergy += localPotentialEnergy;
#pragma omp atomic
newKineticEnergy += localKineticEnergy;
}
} else {
// CPU-Multithreading
#pragma omp parallel for
for (int i = 0; i < pendula.size(); i++){ // NOLINT(*-loop-convert) not ranged based for msvc
for (int k = 0; k < substeps; k++)
pendula[i]->update(h, gravity);
double localPotentialEnergy = pendula[i]->potentialEnergy(gravity);
double localKineticEnergy = pendula[i]->kineticEnergy();
#pragma omp atomic
newPotentialEnergy += localPotentialEnergy;
#pragma omp atomic
newKineticEnergy += localKineticEnergy;
}
}
potentialEnergy = newPotentialEnergy;
@ -92,6 +208,7 @@ void Simulation::clearPendula() {
pendula.shrink_to_fit();
updateEnergy();
updateGPUData();
emit layoutChanged();
}
@ -103,6 +220,7 @@ void Simulation::addPendula(const std::vector<Pendulum *> &add) {
pendula.push_back(p);
updateEnergy();
updateGPUData();
emit layoutChanged();
}
@ -115,3 +233,9 @@ void Simulation::updateEnergy() {
}
}
void Simulation::useGPUChanged(int state) {
useGPUAcceleration = state == Qt::Checked;
if (useGPUAcceleration)
updateGPUData();
}

@ -2,12 +2,12 @@
#include <cstdint>
#include <QObject>
#include <chrono>
#include "Pendulum.h"
#include <semaphore>
#include <QOpenGLFunctions_4_3_Core>
class QTimer;
class FPS;
class Pendulum;
class QOpenGLShaderProgram;
class Simulation : public QObject, protected QOpenGLFunctions_4_3_Core {
Q_OBJECT
@ -32,14 +32,32 @@ public:
void initialize();
void updateEnergy();
signals:
void gpuNotSupported(const std::string &message);
void layoutChanged();
void positionChanged();
public slots:
void clearPendula();
void addPendula(const std::vector<Pendulum *> &add);
void useGPUChanged(int);
private slots:
void update();
private:
void updateGPUData();
struct GPULimits {
int maxWGCount[3];
int maxWGSize[3];
int maxWGInvocations;
} gpuLimits;
QOpenGLShaderProgram * program;
struct SSBO {
GLuint positions;
GLuint velocities;
GLuint invMasses;
GLuint lengths;
GLuint indices;
GLuint segmentCounts;
} SSBO;
QTimer * timer;
int updateInterval = 16;
bool useGPUAcceleration = false;
};

@ -1,3 +1,5 @@
#pragma once
#include <QSlider>
#include <functional>
#include <QLabel>

Loading…
Cancel
Save