Changes

Ghost Cells

5,260 bytes added, 01:37, 7 April 2019

→‎Assignment 3

=== Assignment 3 ===

{| class="wikitable mw-collapsible mw-collapsed"

! PCIe Optimization

|-

|

/*

* Poisson Method using two arrays.

* Non-Ghost Cells Method

* Multiple PCIe Calls made, once per iteration

* by Tony Sim

*/

#include <cstring>

#include <cstdlib>

#include <iomanip>

#include <iostream>

#include <string>

#include <cuda_runtime.h>

#include "poisson.cuh"

namespace DPS{

Poisson::Poisson(std::ifstream& ifs) {

std::string line;

nColumns = 0;

bufferSide = 0;

nRowsTotal = 0;

/* find number of columns */

std::getline(ifs,line);

for (size_t i = 0 ; i < line.size() ; i++){

if(line[i]==' ') nColumns++;

}

nColumns++;

/* find number of rows */

nRowsTotal++; /* already fetched one */

while(std::getline(ifs,line))

nRowsTotal++;

ifs.clear();

try{

for (size_t i = 0 ; i < 2 ; i++)

h_data[i] = new float[ (nColumns+2) * (nRowsTotal+2)]; /* add edge buffers */

}

catch (...){

throw std::runtime_error("Failed to Allocate Memory");

}

/* readin data */

std::cout <<"Reading in data"<<std::endl;

ifs.seekg(0,ifs.beg);

/* allocate memory to all but the edge buffer, index 0 and max for each row and column */

for (size_t i = 0 ; i < nRowsTotal+2 ; i++){

for (size_t j = 0 ; j < nColumns+2 ; j++){

float val = 0;

if(!(i == 0 || i == nRowsTotal + 1 || j == 0 || j == nColumns + 1))

ifs >> val;

h_data[0][i*(nColumns+2)+j] = val;

}

std::cout <<"Setting buffer"<<std::endl;

std::memset(h_data[1],0,(nRowsTotal+2)*(nColumns+2)*sizeof(float));

bool state = devMemSet();

/* DEBUG */ std::cout << state << std::endl;

}

Poisson::Poisson(const size_t r, const size_t c, float* d) {

bufferSide = 0;

nRowsTotal = r;

nColumns = c;

try{

h_data[0] = new float[(r+2)*(c+2)];

h_data[1] = new float[(r+2)*(c+2)];

}

catch (...){

throw std::runtime_error("Failed to Allocate Memory");

}

std::memcpy(h_data[0],d,(r+2)*(c+2)*sizeof(float));

std::memset(h_data[1],0,(r+2)*(c+2)*sizeof(float));

devMemSet();

}

Poisson::~Poisson(){

for( size_t i = 0 ; i < 2 ; i++){

delete [] h_data[i];

cudaFree(d_data[i]);

}

bool Poisson::devMemSet(){

for(size_t i = 0 ; i < 2 ; i++){

cudaMalloc(&d_data[i],(nColumns+2)*(nRowsTotal+2)*sizeof(float));

if(d_data[i] != nullptr){

cudaError_t state = cudaMemcpy((void*)d_data[i],(const void*)h_data[i],(nColumns+2)*(nRowsTotal+2)*sizeof(float),cudaMemcpyHostToDevice);

if(state != cudaSuccess)

std::cerr << "ERROR on devMemSet for : " << i <<" with : " << cudaGetErrorString(state)<< std::endl;

}

return d_data[0]&&d_data[1];

}

float* Poisson::operator()(const size_t nIterations, const float wx, const float wy){

/* calculate the grid, block, where block has 1024 threads total */

unsigned int blockx = 32;

unsigned int blocky = 32;

unsigned int gridx = ((nRowsTotal+2)+blockx-1)/blockx;

unsigned int gridy = ((nRowsTotal+2)+blocky-1)/blocky;

/* create dim3 */

dim3 dBlock= {blockx,blocky};

dim3 dGrid = {gridx,gridy};

/* run iterations */

for (size_t i = 0; i < nIterations; i++) {

update<<<dGrid,dBlock>>>(d_data[1-bufferSide],d_data[bufferSide],nColumns, nRowsTotal, wx, wy);

bufferSwitch();

}

/* DEBUG */ h_data[bufferSide][1*(nColumns+2) + 1] = 100.0f;

/* output results from device to host */

cudaError_t state = cudaMemcpy(h_data[bufferSide],d_data[bufferSide],(nColumns+2)*(nRowsTotal+2)*sizeof(float),cudaMemcpyDeviceToHost);

if(state != cudaSuccess)

std::cout << "ERROR on () when copying data back to host" <<" with : " << cudaGetErrorString(state)<< std::endl;

return h_data[bufferSide];

}

void Poisson::show(std::ostream& ofs) const{

ofs << std::fixed << std::setprecision(1);

for (size_t j = 1; j <= nColumns ; j++) {

for (size_t i = 1 ; i <= nRowsTotal ; i++)

ofs << std::setw(8) << h_data[bufferSide][i * (nColumns+2) + j]<<",";

ofs << std::endl;

}

__global__ void update (float* newD, const float* currD, int nCol, int nRow, const float wx, const float wy){

size_t j = blockDim.x * blockIdx.x + threadIdx.x + 1; /* for x axis */

size_t i = blockDim.y * blockIdx.y + threadIdx.y + 1; /* for y axis */

float curr = currD[i * (nCol+2)+ j];

float dir1 = currD[(i+1) * (nCol+2) +j];

float dir2 = currD[(i-1) * (nCol+2) +j];

float dir3 = currD[i * (nCol+2) +j+1];

float dir4 = currD[i * (nCol+2) +j-1];

newD[i*(nCol+2)+j] = curr + wx * (dir1+dir2-2.0f*curr) + wy * (dir3+dir4-2.0f*curr);

__syncthreads();

}

</source>

|}

Ysim2

70

edits

Changes

Ghost Cells

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

get involved with CDOT

courses

course projects

links

Tools