GPU621/DPS921 | Participants | Groups and Projects | Resources | Glossary

Team Failure

Team Members

John Iannandrea, TBB Heat diffusion
Colin Campbell, OpenMP Heat diffusion
Mateya Lucic, Cilk Plus Heat diffusion

Assignment

Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm.

Serial

This is the serial version of the code we have parallelized

class SerialDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		for (int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			std::copy(u, u + N * N, ui);
		}
	}
};

Omp

class OMPDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		#pragma omp parallel for
		for (int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		#pragma omp parallel for
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			std::copy(u, u + N * N, ui);
		}
	}
};

Cilk

class CilkDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		cilk_for(int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		cilk_for(int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		cilk_for(int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			u[0:N*N] = ui[0:N*N];
		}
	}
};

TBB

class TBBEvolve {
	float* u;
	float* ui;
	float delta, deltaT;
	const float diff = 0.5;
	int N;
public:
	TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {}
	void operator()(tbb::blocked_range2d<int> r) const{
		for (int row = r.rows().begin(); row < r.rows().end(); row++) {
			#pragma simd
			for (int col = r.cols().begin(); col < r.cols().end(); col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
};

class TBBDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){

	}
public:
	TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1);
			tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N));
		}
	}
};

Results

What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest.

CDOT Wiki ^β

Team failure

Team Failure

Team Members

Assignment

Serial

Omp

Cilk

TBB

Results

CDOT Wiki β

Team failure

Team Failure

Team Members

Assignment

Serial

Omp

Cilk

TBB

Results

CDOT Wiki ^β