Open main menu

CDOT Wiki β

Changes

TeamDS

1,704 bytes added, 00:25, 12 April 2017
Launch Config GPU Optimized Phase 2
=== GPU Optimization Phase 2 ===
For every n, we are calculating the XYCoords n number of times which is a total of n^2 times. Since the XYCoord of pixels are fixed for every pixel, we can pre generate XYCoord arrays to be mapped by a single arrayindex. However, this will increase the GPU's access to global memory. We will need to benchmark and see if this will give better times or not.
=== GenerateXYCoord Kernel ===
}
</syntaxhighlight >
 
 
=== Launch Config GPU Optimized Phase 2 ===
 
<syntaxhighlight lang="cpp">
int main(int argc, char **argv)
{
if (argc != 2)
{
cout << "Incorrect number of arg";
return 1;
}
 
char* path = argv[1];
BinaryBitmap* bitmap = LoadBitmap(path);
if (bitmap == NULL)
return 1;
 
 
int d;
cudaDeviceProp prop;
cudaGetDevice(&d);
cudaGetDeviceProperties(&prop, d);
unsigned ntpb = prop.maxThreadsDim[0];
 
int size = bitmap->GetSize();
int width = bitmap->_width;
int height = bitmap->_height;
 
// Allocate GPU memory
float* d_src;
float * d_dst;
int* d_xCoord;
int* d_yCoord;
cudaMalloc((void**)&d_src, size * sizeof(float));
cudaMalloc((void**)&d_dst, size * sizeof(float));
cudaMalloc((void**)&d_xCoord, size * sizeof(int));
cudaMalloc((void**)&d_yCoord, size * sizeof(int));
 
 
// Copy src to device src
cudaMemcpy(d_src, bitmap->_pixels, size * sizeof(float), cudaMemcpyHostToDevice);
 
// Prepare kernal launch
// Calc how many blocks to launch
int numOfBlocks = ((size + ntpb - 1) / ntpb);
 
 
// Launch grid for pre-calculating XYCoords
GenerateXYCoord << <numOfBlocks, ntpb >> >(d_xCoord, d_yCoord, width, height);
 
// Launch grid for converting
SDFGenerateCuda << <numOfBlocks, ntpb >> >(d_src, d_dst, d_xCoord, d_yCoord, size, 64);
// Wait for kernel to finish before copying
cudaDeviceSynchronize();
 
// buffer array for SDF pixels
float* dst = new float[bitmap->GetSize()];
cudaMemcpy(dst, d_dst, size * sizeof(float), cudaMemcpyDeviceToHost);
 
 
SaveBitmap(path, dst);
 
Pause();
 
// Free memory back
cudaFree(d_src);
cudaFree(d_dst);
cudaFree(d_xCoord);
cudaFree(d_yCoord);
delete bitmap;
delete dst;
 
return 0;
 
}
</syntaxhighlight >
116
edits