Revision as of 12:50, 8 March 2013

Team Carlos

Team Members

Carlos Conejo

Progress

Assignment 1

For my assignment 1, I profiled a Summarized Area Table code. Here is the code:

// sat.cpp

/*

Profiling Results for the summarizedAreaTable() function ------

Word Problem Seconds

250 1.50

500 25.87

750 173.99

1000 658.34

/

include <iostream>

include <cstdlib>

using namespace std;

/* Creates the Matrice */

void createMatrice(float** a, int size){

for(int i = 0; i < size; i++)

a[i] = new float[size];

}

/* Initializes the matrice to any random number between 1 and 9 */

void initializeMatrice(float** a, int size){

float f = 1.0 / RAND_MAX;

for(int i = 0; i < size; i++)

for(int j = 0; j < size; j++)

a[i][j] = rand() * f;

}

/* Creates the summarized area table */

void summarizedAreaTable(float** a, float** b, int size){

int k = 0;

float sum = 0.0;

for(int i = size-1; i >= 0; i--){

for(int j = 0; j < size; j++){

for(int k = i; k < size; k++){

for(int m = 0; m <= j; m++){

sum += a[k][m];

}

b[i][j] = sum;

sum = 0.0;

}

int main(int argc, char* argv[]){

if(argc == 2){ // only one argument (program name + one argument) allowed

int size = atoi(argv[1]);

float **a = new float*[size];

float **b = new float*[size];

createMatrice(a,size); // creates the matrice a

createMatrice(b,size); // creates the matrice b

initializeMatrice(a,size); // initializes the matrices

summarizedAreaTable(a,b,size); // Does the SAT on a and stores it on b

cout << "Finished" << endl;

return 0;

}

else if(argc < 2)

cout << "Please provide a size" << endl; // when no arguments are supplied

else

cout << "Only one size is allowed" << endl; // when more than one argument(the program name + one or more arguments) is supplied

}

/* To print the results

cout << "a is: " << endl;

for(int i = 0; i < size; i++){

for(int j = 0; j < size; j++)

cout << a[i][j] << " ";

cout << endl;

}

/

Assignment 2

include <iostream>

include <cstdlib>

include <cuda_runtime.h>

using namespace std;

/* Initializes the matrix to any random number between 0 and 1 */

void initializeMatrix(float* a, int size){

float f = 1.0 / RAND_MAX;

for(int i = 0; i < size * size; i++)

a[i] = rand() * f;

}

/* Displays the matrix */

void matrixDisplay(char matrix, float* a, int size){

int i = 0;

cout << matrix <<" is: " << endl;

while(i < size*size){

for(int j = 0; j < size; j++, i++)

cout << a[i] << " ";

cout << endl;

}

/* Creates the Summed area table */

__global__ void SummedAreaTable(float* A, float* B, int size){

int idx = blockIdx.x * blockDim.x + threadIdx.x;

float sum = 0;

int rest;

if(idx < (size*size)){

if(idx < size)

rest = idx;

else{

rest = idx - size;

while(rest >= size)

rest = rest - size;

}

for(int i = (size*size) - size + rest; i >= idx - rest; i -= size)

for(int j = i, k = rest; k >= 0; j--, k--)

sum += A[j];

B[idx] = sum;

}

int main(int argc, char* argv[]){

if(argc == 2){ // only one argument (program name + one argument) allowed

int size = atoi(argv[1]);

int tSize = size * size;

int d;

int nThreads;

int mThreads;

int Blocks;

int Threads;

float *a = new float[tSize];

float *A;

float *b = new float[tSize];

float *B;

cudaError_t error; // error handler

/* Gets the maximum number of threads and blocks */

cudaDeviceProp prop;

cudaGetDevice(&d);

cudaGetDeviceProperties(&prop, d);

nThreads = prop.maxThreadsDim[0];

mThreads = nThreads * prop.maxGridSize[0];

/* Checks if the size of the matrix is less than the maximum number of threads */

if((tSize) < nThreads){

Blocks = 1;

Threads = tSize;

}

/* Checks if the size of the matrix is greater than the maximum number of threads */

else if((tSize) > nThreads){

Blocks = (tSize + nThreads - 1) / nThreads;

Threads = nThreads;

}

/* Checks if the size of the matrix is less than the maximum number of threads multipled by the maximum number of blocks */

else if((tSize) > mThreads){

tSize = mThreads;

Blocks = (tSize + nThreads - 1) / nThreads;

Threads = nThreads;

}

dim3 dGrid(Blocks, Blocks, 1); // sets the grids

dim3 dBlock(Threads, Threads, 1); // sets the blocks

initializeMatrix(a,size); // initializes the matrix a

error = cudaMalloc((void**)&A, tSize * sizeof(float)); // allocates memory on the device for matrix A;

if (error != cudaSuccess) {

cout << cudaGetErrorString(error) << endl;

error = cudaMalloc((void**)&B, tSize * sizeof(float)); // allocates memory on the device for matrix B;

if (error != cudaSuccess) {

cout << cudaGetErrorString(error) << endl;

error = cudaMemcpy(A, a, tSize * sizeof(float), cudaMemcpyHostToDevice); // copies the host matrix a into the device matrix A

if (error != cudaSuccess) {

cout << cudaGetErrorString(error) << endl;

/* Performs the SAT on the device on A and stores it on B */

SummedAreaTable<<<Blocks,Threads>>>(A,B,size); // Does the SAT on a and stores it on b

cudaDeviceSynchronize(); // synchronizes the host and the device

error = cudaGetLastError();

if (error != cudaSuccess) {

cout << cudaGetErrorString(error) << endl;

cudaFree(a);

cudaFree(b);

delete [] a;

delete [] b;

return 3;

}

/* copies device matrix B into host matrix b */

error = cudaMemcpy(b, B, tSize * sizeof(float), cudaMemcpyDeviceToHost);

if (error != cudaSuccess) {

cout << cudaGetErrorString(error) << endl;

//matrixDisplay('a', a, size); //uncomment/comment to display/not display the matrix a

//matrixDisplay('b', b, size); //uncomment/comment to display/not display the matrix b

/* deallocates space in both host memory and device memory */

cudaFree(A);

cudaFree(B);

delete [] a;

delete [] b;

cudaDeviceReset();

cout << "Finished" << endl;

return 0;

}

else if(argc < 2){

cout << "Please provide a size" << endl; // when no arguments are supplied

return 0;

}

else{

cout << "Only one size is allowed" << endl; // when more than one argument(the program name + one or more arguments) is supplied

return 0;

}

Difference between revisions of "Carlos"

Revision as of 12:50, 8 March 2013

Contents

Team Carlos

Team Members

Progress

Assignment 1

Assignment 2

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

get involved with CDOT

courses

course projects

links

Tools

@@ Line 166: / Line 166: @@
 */
+=== Assignment 2 ===
+#include <iostream>
+#include <cstdlib>
+#include <cuda_runtime.h>
+using namespace std;
+/* Initializes the matrix to any random number between 0 and 1 */
+void initializeMatrix(float* a, int size){
+	float f = 1.0 / RAND_MAX;
+	for(int i = 0; i < size * size; i++)
+			a[i] = rand() * f;
+}
+/* Displays the matrix */
+void matrixDisplay(char matrix, float* a, int size){
+	int i = 0;
+	cout << matrix <<" is: " << endl;
+	while(i < size*size){
+		for(int j = 0; j < size; j++, i++)
+			cout << a[i] << " ";
+		cout << endl;
+	}
+}
+/* Creates the Summed area table */
+__global__ void SummedAreaTable(float* A, float* B, int size){
+	int idx = blockIdx.x * blockDim.x + threadIdx.x;
+	float sum = 0;
+	int rest;
+	if(idx < (size*size)){
+		if(idx < size)
+			rest = idx;
+		else{
+			rest = idx - size;
+			while(rest >= size)
+				rest = rest - size;
+		}
+		for(int i = (size*size) - size + rest; i >= idx - rest; i -= size)
+			for(int j = i, k = rest; k >= 0; j--, k--)
+				sum += A[j];
+		B[idx] = sum;
+	}
+}
+int main(int argc, char* argv[]){
+	if(argc == 2){ // only one argument (program name + one argument) allowed
+		int size = atoi(argv[1]);
+		int tSize = size * size;
+		int d;
+		int nThreads;
+		int mThreads;
+		int Blocks;
+		int Threads;
+		float *a = new float[tSize];
+		float *A;
+		float *b = new float[tSize];
+		float *B;
+		cudaError_t error; // error handler
+		/* Gets the maximum number of threads and blocks */
+		cudaDeviceProp prop;
+		cudaGetDevice(&d);
+		cudaGetDeviceProperties(&prop, d);
+		nThreads = prop.maxThreadsDim[0];
+		mThreads = nThreads * prop.maxGridSize[0];
+		/* Checks if the size of the matrix is less than the maximum number of threads */
+		if((tSize) < nThreads){
+			Blocks = 1;
+			Threads = tSize;
+		}
+		/* Checks if the size of the matrix is greater than the maximum number of threads */
+		else if((tSize) > nThreads){
+			Blocks = (tSize + nThreads - 1) / nThreads;
+			Threads = nThreads;
+		}
+		/* Checks if the size of the matrix is less than the maximum number of threads multipled by the maximum number of blocks */
+		else if((tSize) > mThreads){
+			tSize = mThreads;
+			Blocks = (tSize + nThreads - 1) / nThreads;
+			Threads = nThreads;
+		}
+		dim3 dGrid(Blocks, Blocks, 1); // sets the grids
+		dim3 dBlock(Threads, Threads, 1); // sets the blocks
+		initializeMatrix(a,size); // initializes the matrix a
+		error = cudaMalloc((void**)&A, tSize * sizeof(float)); // allocates memory on the device for matrix A;
+		if (error != cudaSuccess) {
+			cout << cudaGetErrorString(error) << endl;
+    }
+		error = cudaMalloc((void**)&B, tSize * sizeof(float)); // allocates memory on the device for matrix B;
+		if (error != cudaSuccess) {
+			cout << cudaGetErrorString(error) << endl;
+    }
+		error = cudaMemcpy(A, a, tSize * sizeof(float), cudaMemcpyHostToDevice); // copies the host matrix a into the device matrix A
+		if (error != cudaSuccess) {
+			cout << cudaGetErrorString(error) << endl;
+    }
+		/* Performs the SAT on the device on A and stores it on B */
+		SummedAreaTable<<<Blocks,Threads>>>(A,B,size); // Does the SAT on a and stores it on b
+		cudaDeviceSynchronize(); // synchronizes the host and the device
+		error = cudaGetLastError();
+		if (error != cudaSuccess) {
+			cout << cudaGetErrorString(error) << endl;
+			cudaFree(a);
+			cudaFree(b);
+			delete [] a;
+			delete [] b;
+			return 3;
+	  }
+		/* copies device matrix B into host matrix b */
+		error = cudaMemcpy(b, B, tSize * sizeof(float), cudaMemcpyDeviceToHost);
+		if (error != cudaSuccess) {
+			cout << cudaGetErrorString(error) << endl;
+    }
+		//matrixDisplay('a', a, size); //uncomment/comment to display/not display the matrix a
+		//matrixDisplay('b', b, size); //uncomment/comment to display/not display the matrix b
+		/* deallocates space in both host memory and device memory */
+		cudaFree(A);
+		cudaFree(B);
+		delete [] a;
+		delete [] b;
+		cudaDeviceReset();
+		cout << "Finished" << endl;
+		return 0;
+	}
+	else if(argc < 2){
+		cout << "Please provide a size" << endl; // when no arguments are supplied
+		return 0;
+	}
+	else{
+		cout << "Only one size is allowed" << endl; // when more than one argument(the program name + one or more arguments) is supplied
+		return 0;
+	}
+}