Changes

Jump to: navigation, search

Team failure

4,492 bytes added, 22:20, 14 April 2016
Team Members
# [mailto:jmiannandrea@senecacollege.ca?subject=gpu John Iannandrea], TBB Heat diffusion
# [mailto:@senecacollege.ca?subject=gpu Colin Campbell], OpenMP Heat diffusion
# [mailto:mlucic3@senecacollege.ca?subject=gpu Mateya Lucic], Cilk Plus Heat diffusion
[mailto:jmiannandrea@senecacollege.ca,mlucic3@senecacollege.ca?subject=GPU Email All]
== Progress Assignment ==Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm. === Serial ===This is the serial version of the code we have parallelized  <pre>class SerialDiffuser : public IDiffuser {protected: void evolveTimestep(){ for (int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;  u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } }public: SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ for (int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ for (int m = 1; m < timeSteps; m++) { evolveTimestep(); std::copy(u, u + N * N, ui); } }};</pre> ==== Omp ====  <pre>class OMPDiffuser : public IDiffuser {protected: void evolveTimestep(){ #pragma omp parallel for for (int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;  u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } }public: OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ #pragma omp parallel for for (int row =0; row < N; row++) { for (int col =0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <=0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >=0.05)) ui[row * N + col] = Assignment 1 .0; } } } void compute(){ for (int m = 1; m < timeSteps; m++) { evolveTimestep(); std::copy(u, u + N * N, ui); } }};</pre> ==== Cilk ==== <pre>class CilkDiffuser : public IDiffuser {protected: void evolveTimestep(){ cilk_for(int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;  u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } }public: CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ cilk_for(int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >=0.05)) ui[row * N + col] =1.0; } } } void compute(){ cilk_for(int m =1; m < timeSteps; m++) { evolveTimestep(); u[0:N*N] = ui[0:N*N]; } }};</pre> ==== TBB ==== Assignment   <pre>class TBBEvolve { float* u; float* ui; float delta, deltaT; const float diff = 0.5; int N;public: TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {} void operator()(tbb::blocked_range2d<int> r) const{ for (int row = r.rows().begin(); row < r.rows().end(); row++) { #pragma simd for (int col = r.cols().begin(); col < r.cols().end(); col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;  u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } }}; class TBBDiffuser : public IDiffuser {protected: void evolveTimestep(){  }public: TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ for (int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2 ) >=0.05)) ui[row * N + col] =1.0; } } } void compute(){ for (int m =1; m < timeSteps; m++) { tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1); tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N)); } }};</pre>  === Assignment 3 Results === What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest. [[Image:GeyIa97.png|640px]] [[Image:TP4107j.png|300px]]

Navigation menu