Team failure

From CDOT Wiki
Revision as of 11:41, 14 April 2016 by John Iannandrea (talk | contribs) (Progress)
Jump to: navigation, search

GPU621/DPS921 | Participants | Groups and Projects | Resources | Glossary

Team Failure

Team Members

  1. John Iannandrea, TBB Heat diffusion
  2. Colin Campbell, OpenMP Heat diffusion
  3. Mateya Lucic, Cilk Plus Heat diffusion

Email All


Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm.


This is the serial version of the code we have parallelized

class SerialDiffuser : public IDiffuser {
	void evolveTimestep(){
		for (int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
	SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			std::copy(u, u + N * N, ui);


class OMPDiffuser : public IDiffuser {
	void evolveTimestep(){
		#pragma omp parallel for
		for (int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
	OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		#pragma omp parallel for
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			std::copy(u, u + N * N, ui);


class CilkDiffuser : public IDiffuser {
	void evolveTimestep(){
		cilk_for(int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
	CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		cilk_for(int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
	void compute(){
		cilk_for(int m = 1; m < timeSteps; m++) {
			u[0:N*N] = ui[0:N*N];


class TBBEvolve {
	float* u;
	float* ui;
	float delta, deltaT;
	const float diff = 0.5;
	int N;
	TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {}
	void operator()(tbb::blocked_range2d<int> r) const{
		for (int row = r.rows().begin(); row < r.rows().end(); row++) {
			#pragma simd
			for (int col = r.cols().begin(); col < r.cols().end(); col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);

class TBBDiffuser : public IDiffuser {
	void evolveTimestep(){

	TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1);
			tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N));
