maybe fixed for windows

fixed version 3.3 and 4.3, support < 4.3
gpu acceleration more efficient
11 changed files with 352 additions and 23 deletions
--- a/shaders/compute.glsl
+++ b/shaders/compute.glsl
@ -0,0 +1,86 @@
+#version 430
+#extension GL_ARB_gpu_shader_fp64 : enable
+
+#define MAX_SEGMENTS 32
+
+layout (local_size_x=MAX_SEGMENTS, local_size_y=1, local_size_z=1) in;
+
+layout(std430, binding = 0) buffer PositionBuffer {
+    dvec2 X[];
+};
+
+layout(std430, binding = 1) buffer VelocityBuffer {
+    dvec2 V[];
+};
+
+layout(std430, binding = 2) buffer InverseMassBuffer {
+    double W[];
+};
+
+layout(std430, binding = 3) buffer LengthBuffer {
+    double L[];
+};
+
+layout(std430, binding = 4) buffer IndexBuffer {
+    uint indices[];
+};
+
+layout(std430, binding = 5) buffer SegmentCountBuffer {
+    uint segmentCounts[];
+};
+
+uniform double h;
+uniform double gravity;
+uniform uint substeps;
+
+shared dvec2 P[MAX_SEGMENTS];
+
+// distance constraint
+void solveConstraint(uint segmentId, uint gIndex){
+    double w1 = segmentId == 0 ? 0.0 : W[gIndex - 1];
+    double w2 = W[gIndex];
+
+    dvec2 s = (segmentId == 0 ? dvec2(0, 0) : P[segmentId - 1]) - P[segmentId];
+    dvec2 n = normalize(s);
+    double l = length(s);
+
+    dvec2 deltaP1 = n * -w1 / (w1 + w2) * (l - L[gIndex]);
+    dvec2 deltaP2 = n *  w2 / (w1 + w2) * (l - L[gIndex]);
+
+    if (segmentId > 0)
+        P[segmentId - 1] += deltaP1;
+    P[segmentId] += deltaP2;
+}
+
+// No need for synchronization, because each pendulum gets its own warp
+void main() {
+    uint pendulumId = gl_WorkGroupID.x;
+    uint i_off = indices[pendulumId];
+    uint segmentCount = segmentCounts[pendulumId];
+    uint localId = gl_LocalInvocationID.x;
+    uint gIndex = i_off + localId;
+
+    // discard unneeded local threads for segments
+    if (localId >= segmentCount)
+        return;
+
+    // substepping is not parallelizable
+    for (int k = 0; k < substeps; k++){
+
+        // explicit integration with gravity as external force
+        V[gIndex] = V[gIndex] + dvec2(0, -gravity * h);
+        P[localId] = X[gIndex] + V[gIndex] * h;
+
+        // every segment depends on its neighbour, so we can not parallelize all constraints
+        // only parallelize those, who are independent of each other, skipping every second segment
+        if (localId % 2 == 0){
+            solveConstraint(localId, i_off + localId);
+            if (localId + 1 < segmentCount)
+                solveConstraint(localId + 1, gIndex + 1);
+        }
+
+        // update position and velocity
+        V[gIndex] = (P[localId] - X[gIndex]) / h;
+        X[gIndex] = P[localId];
+    }
+}
--- a/shaders/shaders.qrc
+++ b/shaders/shaders.qrc
@ -4,5 +4,6 @@
        <file>fragment_pendula.glsl</file>
        <file>vertex_quad.glsl</file>
        <file>fragment_quad.glsl</file>
+        <file>compute.glsl</file>
    </qresource>
 </RCC>
--- a/src/GLWidget.cpp
+++ b/src/GLWidget.cpp
@ -9,6 +9,7 @@
 #include "Overlay.h"
 #include <QOpenGLShaderProgram>
 #include "FPS.h"
+#include "Pendulum.h"

 GLWidget::GLWidget(Simulation * simulation) : simulation(simulation) {
 	startTimer(1000 / 144);
@ -21,6 +22,8 @@ GLWidget::GLWidget(Simulation * simulation) : simulation(simulation) {
 void GLWidget::initializeGL() {
 	initializeOpenGLFunctions();

+	printf("OpenGL Version: %d.%d\n", format().majorVersion(), format().minorVersion());
+
 	pendulaProgram = new QOpenGLShaderProgram;
 	pendulaProgram->addShaderFromSourceFile(QOpenGLShader::Vertex, ":/shaders/vertex_pendula.glsl");
 	pendulaProgram->addShaderFromSourceFile(QOpenGLShader::Fragment, ":/shaders/fragment_pendula.glsl");
@ -43,6 +46,7 @@ void GLWidget::initializeGL() {
 	simulation->semaphore.acquire();
 	uploadStaticDataToGPU();

+	makeCurrent();
 	overlay->init();
 }

@ -99,9 +103,11 @@ bool GLWidget::AnyDialogOpen() {
 }

 void GLWidget::uploadStaticDataToGPU() {
+	makeCurrent();
+
 	auto pendula = &simulation->pendula;

-	int pointCount = std::transform_reduce(pendula->begin(), pendula->end(), 0, [](int prev, int curr){
+	size_t pointCount = std::transform_reduce(pendula->begin(), pendula->end(), 0, [](size_t prev, size_t curr){
 		return prev + curr + 1;
 	}, [](const Pendulum * p){
 		return p->X.size();
@ -180,10 +186,15 @@ void GLWidget::uploadStaticDataToGPU() {

 	glBindVertexArray(0);

+	doneCurrent();
+
 	simulation->semaphore.release();
 }

 void GLWidget::changePosition() {
+	makeCurrent();
+
+	auto t1 = system_clock::now();
 	glBindBuffer(GL_ARRAY_BUFFER, positionVBO);
 	auto positions = (GLfloat*) glMapBufferRange(GL_ARRAY_BUFFER, 0, GLsizeiptr(positionCount * sizeof(float)), GL_MAP_WRITE_BIT);
 	size_t index = 0;
@ -196,6 +207,9 @@ void GLWidget::changePosition() {
 		}
 	}
 	glUnmapBuffer(GL_ARRAY_BUFFER);
+	// std::cout << duration_cast<microseconds>(system_clock::now() - t1).count() << std::endl;
+
+	doneCurrent();
 	simulation->semaphore.release();
 }

--- a/src/GLWidget.h
+++ b/src/GLWidget.h
@ -1,7 +1,7 @@
 #pragma once

 #include <QOpenGLWidget>
-#include <QOpenGLExtraFunctions>
+#include <QOpenGLFunctions_3_3_Core>
 #include <QMatrix3x3>

 class Pendulum;
@ -9,7 +9,7 @@ class Simulation;
 class QOpenGLShaderProgram;
 class Overlay;

-class GLWidget : public QOpenGLWidget, protected QOpenGLExtraFunctions {
+class GLWidget : public QOpenGLWidget, protected QOpenGLFunctions_3_3_Core {
 public:
 	explicit GLWidget(Simulation *);
 	Overlay * overlay;
--- a/src/MainWindow.cpp
+++ b/src/MainWindow.cpp
@ -17,12 +17,15 @@
 #include <QCloseEvent>
 #include <QTimer>
 #include "Overlay.h"
+#include "Slider.h"
+#include <QOffscreenSurface>

 MainWindow::MainWindow() {
 	simulationThread = new QThread(this);
+	simulationThread->setObjectName("Simulation");
 	simulation = new Simulation;
 	simulation->moveToThread(simulationThread);
-	simulationThread->start();
+	connect(simulationThread, &QThread::started, simulation, &Simulation::initialize);

 	masses = std::vector<double>(MaxSegments);
 	lengths = std::vector<double>(MaxSegments);
@ -33,6 +36,10 @@ MainWindow::MainWindow() {
 	buildUI();

 	normalizeLengths();
+
+	auto offSurface = new QOffscreenSurface;
+	offSurface->create();
+	simulation->offSurface = offSurface;
 }

 void MainWindow::buildUI() {
@ -376,6 +383,20 @@ QWidget * MainWindow::buildSimulationUI() {
 		lyt->addWidget(showMasses);
 	}

+	// GPU-Acceleration
+	{
+		auto useGPU = new QCheckBox("Use GPU-Acceleration");
+        useGPU->setCheckState(Qt::Checked);
+		connect(useGPU, &QCheckBox::stateChanged, simulation, &Simulation::useGPUChanged);
+		connect(simulation, &Simulation::gpuNotSupported, [useGPU](const std::string &message){
+			useGPU->setCheckState(Qt::Unchecked);
+			useGPU->setEnabled(false);
+			useGPU->setToolTip(QString::fromStdString(message));
+		});
+
+		lyt->addWidget(useGPU);
+	}
+
 	return w;
 }

--- a/src/MainWindow.h
+++ b/src/MainWindow.h
@ -1,11 +1,12 @@
 #include <QWidget>
 #include <vector>
-#include "Slider.h"
 #include "Pendulum.h"

 class Simulation;
 class QGridLayout;
 class GLWidget;
+template<typename T>
+		class Slider;

 enum Property {Angle, Mass, Length};

@ -14,16 +15,16 @@ class MainWindow : public QWidget {
 public:
 	explicit MainWindow();
 	~MainWindow() override;
+	QThread * simulationThread;
 private:
 	void buildUI();
 	QWidget * buildAddUI();
 	QWidget * buildSimulationUI();
 	Simulation * simulation;
-	QThread * simulationThread;

 	GLWidget * glWidget = nullptr;

-	static const int MaxSegments = 50;
+	static const int MaxSegments = 32;

 	std::vector<double> masses, lengths;
 	int segments = 0;
@ -38,7 +39,7 @@ private:

 	QGridLayout * segmentGrid {};
 	Slider<double> * gravitySlider = nullptr, * timescaleSlider = nullptr;
-	Slider<> * substepsSlider = nullptr;
+	Slider<int> * substepsSlider = nullptr;

 signals:
 	void pendulaCreated(const std::vector<Pendulum*> &add);
--- a/src/Pendulum.h
+++ b/src/Pendulum.h
@ -4,7 +4,8 @@
 #include <QVector2D>
 #include <QColor>
 #include "Vector.h"
-#include "GLWidget.h"
+
+class GLWidget;

 class Pendulum {
 public:
@ -16,6 +17,7 @@ public:
 	double kineticEnergy() const;
 private:
 	friend class GLWidget;
+	friend class Simulation;
 	std::vector<Vector> X, V;
 	std::vector<double> M, L;
 	QColor color;
--- a/src/Simulation.cpp
+++ b/src/Simulation.cpp
@ -5,17 +5,131 @@
 #include <omp.h>
 #include <iostream>
 #include "FPS.h"
+#include "Pendulum.h"
+#include <QOpenGLContext>
+#include <QOpenGLShaderProgram>
+#include <QOffscreenSurface>

 Simulation::Simulation() {
 	ups = new FPS;
 	ups->setUpdateInterval(100);
-
 	timer = new QTimer(this);
 	QTimer::connect(timer, &QTimer::timeout, this, &Simulation::update);
 	timer->setInterval(updateInterval);
 	timer->start();
 };

+void Simulation::initialize() {
+
+	// OpenGL Offscreen functions
+	{
+		context = new QOpenGLContext;
+		auto format = QSurfaceFormat::defaultFormat();
+		format.setVersion(4, 3);
+		context->setFormat(format);
+		context->create();
+		context->makeCurrent(offSurface);
+		f = new QOpenGLFunctions_4_3_Core;
+        if (!f->initializeOpenGLFunctions()){
+            std::string message = "OpenGL 4.3 is required for Compute Shaders! Falling back to CPU-Multithreading.";
+            printf("%s\n", message.c_str());
+            useGPUAcceleration = false;
+            emit gpuNotSupported(message);
+        } else if (!context->hasExtension("GL_ARB_gpu_shader_fp64")){
+			std::string message = "Double precision not supported by OpenGL! Falling back to CPU-Multithreading.";
+			printf("%s\n", message.c_str());
+			useGPUAcceleration = false;
+			emit gpuNotSupported(message);
+		}
+	}
+
+    if (!useGPUAcceleration){
+		context->doneCurrent();
+		return;
+	}
+
+
+	// Read GPU Limits
+	{
+		for (int i = 0; i < 3; i++){
+			f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, i, gpuLimits.maxWGCount + i);
+			f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, i, gpuLimits.maxWGSize + i);
+		}
+		f->glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &gpuLimits.maxWGInvocations);
+
+		printf("Max work group count: (%d, %d, %d)\n", gpuLimits.maxWGCount[0], gpuLimits.maxWGCount[1], gpuLimits.maxWGCount[2]);
+		printf("Max work group size: (%d, %d, %d)\n", gpuLimits.maxWGSize[0], gpuLimits.maxWGSize[1], gpuLimits.maxWGSize[2]);
+		printf("Max work group invocations (x * y * z): %d\n", gpuLimits.maxWGInvocations);
+	}
+
+	// Shader
+	{
+		program = new QOpenGLShaderProgram;
+		program->addShaderFromSourceFile(QOpenGLShader::Compute, ":/shaders/compute.glsl");
+		program->link();
+
+		f->glGenBuffers(6, &SSBO.positions);
+		f->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, SSBO.positions);
+        f->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, SSBO.velocities);
+        f->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, SSBO.invMasses);
+        f->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, SSBO.lengths);
+        f->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, SSBO.indices);
+        f->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, SSBO.segmentCounts);
+	}
+
+	context->doneCurrent();
+}
+
+// When the layout changes
+void Simulation::updateGPUData() {
+	context->makeCurrent(offSurface);
+
+	std::vector<GLdouble> positions;
+	std::vector<GLdouble> velocities;
+	std::vector<GLdouble> invMasses;
+	std::vector<GLdouble> lengths;
+	std::vector<GLuint> indices;
+	std::vector<GLuint> segmentCounts;
+
+	GLuint currentIndex = 0;
+	for (const Pendulum * p : pendula){
+		indices.push_back(currentIndex);
+		currentIndex += p->X.size();
+		segmentCounts.push_back(p->X.size());
+		for (size_t i = 0; i < p->X.size(); i++){
+			positions.push_back(p->X[i].x);
+			positions.push_back(p->X[i].y);
+			velocities.push_back(p->V[i].x);
+			velocities.push_back(p->V[i].y);
+			invMasses.push_back(1.0 / p->M[i]);
+			lengths.push_back(p->L[i]);
+		}
+	}
+
+	auto posSize = GLsizeiptr(positions.size() * sizeof(GLdouble));
+	auto velSize = GLsizeiptr(velocities.size() * sizeof(GLdouble));
+	auto invMassSize = GLsizeiptr(invMasses.size() * sizeof(GLdouble));
+	auto lengthSize = GLsizeiptr(lengths.size() * sizeof(GLdouble));
+	auto indicesSize = GLsizeiptr(indices.size() * sizeof(GLuint));
+	auto segmentCountsSize = GLsizeiptr(segmentCounts.size() * sizeof(GLuint));
+
+    f->glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.positions);
+    f->glBufferData(GL_SHADER_STORAGE_BUFFER, posSize, positions.data(), GL_DYNAMIC_DRAW);
+    f->glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.velocities);
+    f->glBufferData(GL_SHADER_STORAGE_BUFFER, velSize, velocities.data(), GL_DYNAMIC_DRAW);
+    f->glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.invMasses);
+    f->glBufferData(GL_SHADER_STORAGE_BUFFER, invMassSize, invMasses.data(), GL_STATIC_DRAW);
+    f->glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.lengths);
+    f->glBufferData(GL_SHADER_STORAGE_BUFFER, lengthSize, lengths.data(), GL_STATIC_DRAW);
+    f->glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.indices);
+    f->glBufferData(GL_SHADER_STORAGE_BUFFER, indicesSize, indices.data(), GL_STATIC_DRAW);
+    f->glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.segmentCounts);
+    f->glBufferData(GL_SHADER_STORAGE_BUFFER, segmentCountsSize, segmentCounts.data(), GL_STATIC_DRAW);
+
+	context->doneCurrent();
+}
+
+// Every few milliseconds
 void Simulation::update() {
 	if (!isPlaying)
 		return;
@ -29,8 +143,58 @@ void Simulation::update() {
 	double newPotentialEnergy = 0;
 	double newKineticEnergy = 0;

+	if (useGPUAcceleration) {
+		context->makeCurrent(offSurface);
+		program->bind();
+        f->glUniform1d(program->uniformLocation("h"), h);
+        f->glUniform1d(program->uniformLocation("gravity"), gravity);
+        f->glUniform1ui(program->uniformLocation("substeps"), substeps);
+
+        f->glDispatchCompute(pendula.size(), 1, 1);
+        f->glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
+
+		// Read updated positions
+		{
+            f->glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.positions);
+			auto * newPositions = (double*) f->glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
+			int index = 0;
+			for (Pendulum * p : pendula){
+				for (Vector &point : p->X){
+					point.x = newPositions[index++];
+					point.y = newPositions[index++];
+				}
+			}
+            f->glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
+		}
+
+		// Read updated velocities
+		{
+            f->glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO.velocities);
+			auto * newVelocities = (double*) f->glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
+			int index = 0;
+			for (Pendulum * p : pendula){
+				for (Vector &velocity : p->V){
+					velocity.x = newVelocities[index++];
+					velocity.y = newVelocities[index++];
+				}
+			}
+            f->glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
+		}
+
+		#pragma omp parallel for
+		for (int i = 0; i < pendula.size(); i++){ // NOLINT(*-loop-convert) not ranged based for msvc
+			double localPotentialEnergy = pendula[i]->potentialEnergy(gravity);
+			double localKineticEnergy = pendula[i]->kineticEnergy();
+			#pragma omp atomic
+			newPotentialEnergy += localPotentialEnergy;
+			#pragma omp atomic
+			newKineticEnergy += localKineticEnergy;
+		}
+		context->doneCurrent();
+	} else {
+		// CPU-Multithreading
 		#pragma omp parallel for
-	for (int i = 0; i < pendula.size(); i++){ // NOLINT(*-loop-convert) // not ranged based for msvc
+		for (int i = 0; i < pendula.size(); i++){ // NOLINT(*-loop-convert) not ranged based for msvc
 			for (int k = 0; k < substeps; k++)
 				pendula[i]->update(h, gravity);
 			double localPotentialEnergy = pendula[i]->potentialEnergy(gravity);
@ -40,6 +204,7 @@ void Simulation::update() {
 			#pragma omp atomic
 			newKineticEnergy += localKineticEnergy;
 		}
+	}

 	potentialEnergy = newPotentialEnergy;
 	kineticEnergy = newKineticEnergy;
@ -59,6 +224,8 @@ void Simulation::clearPendula() {
 	pendula.shrink_to_fit();

 	updateEnergy();
+    if (useGPUAcceleration)
+        updateGPUData();

 	emit layoutChanged();
 }
@ -70,6 +237,8 @@ void Simulation::addPendula(const std::vector<Pendulum *> &add) {
 		pendula.push_back(p);

 	updateEnergy();
+    if (useGPUAcceleration)
+        updateGPUData();

 	emit layoutChanged();
 }
@ -82,3 +251,9 @@ void Simulation::updateEnergy() {
 	}
 }

+void Simulation::useGPUChanged(int state) {
+	useGPUAcceleration = state == Qt::Checked;
+	if (useGPUAcceleration)
+		updateGPUData();
+}
+
--- a/src/Simulation.h
+++ b/src/Simulation.h
@ -2,13 +2,14 @@
 #include <cstdint>
 #include <QObject>
 #include <chrono>
-#include "Pendulum.h"
 #include <semaphore>
+#include <QOpenGLFunctions_4_3_Core>

-using namespace std::chrono;
-
+class QOffscreenSurface;
 class QTimer;
 class FPS;
+class Pendulum;
+class QOpenGLShaderProgram;

 class Simulation : public QObject {
 	Q_OBJECT
@ -30,16 +31,39 @@ public:
 	std::binary_semaphore semaphore = std::binary_semaphore(1);
 	FPS * ups;

+	QOffscreenSurface* offSurface;
+	QOpenGLContext* context;
+
 	void updateEnergy();
 signals:
+	void gpuNotSupported(const std::string &message);
 	void layoutChanged();
 	void positionChanged();
 public slots:
+	void initialize();
 	void clearPendula();
 	void addPendula(const std::vector<Pendulum *> &add);
+	void useGPUChanged(int);
 private slots:
 	void update();
 private:
+    QOpenGLFunctions_4_3_Core * f;
+	void updateGPUData();
+	struct GPULimits {
+		int maxWGCount[3];
+		int maxWGSize[3];
+		int maxWGInvocations;
+	} gpuLimits;
+	QOpenGLShaderProgram * program;
+	struct SSBO {
+		GLuint positions;
+		GLuint velocities;
+		GLuint invMasses;
+		GLuint lengths;
+		GLuint indices;
+		GLuint segmentCounts;
+	} SSBO;
 	QTimer * timer;
 	int updateInterval = 16;
+	bool useGPUAcceleration = true;
 };
--- a/src/Slider.h
+++ b/src/Slider.h
@ -1,3 +1,5 @@
+#pragma once
+
 #include <QSlider>
 #include <functional>
 #include <QLabel>
--- a/src/main.cpp
+++ b/src/main.cpp
@ -2,6 +2,7 @@
 #include <QSurfaceFormat>
 #include "MainWindow.h"
 #include <QIcon>
+#include <QThread>

 int main(int argc, char* argv[]) {
 	QSurfaceFormat fmt = QSurfaceFormat::defaultFormat();
@ -19,5 +20,7 @@ int main(int argc, char* argv[]) {
 	w.setWindowIcon(QIcon(":/icons/app_icon.ico"));
 	w.show();

+	w.simulationThread->start();
+
 	return QApplication::exec();
 }
Author	SHA1	Message	Date
Benjamin Kraft	4ee7ee44cc	maybe fixed for windows	1 year ago
Benjamin Kraft	b9e00628f3	fixed version 3.3 and 4.3, support < 4.3	1 year ago
Benjamin Kraft	05baa0db8a	gpu acceleration more efficient	1 year ago
Benjamin Kraft	08cabd9d8e	added gpu simulation	1 year ago
Benjamin Kraft	df169be295	optimize includes	1 year ago
Benjamin Kraft	e2bf29bf75	start compute shader	1 year ago
Benjamin Kraft	50b63c59aa	higher opengl version (4.3)	1 year ago
Benjamin Kraft	24895e5cd4	higher opengl version (4.3)	1 year ago