Changes

Jump to: navigation, search

Sirius

2,804 bytes added, 10:42, 9 April 2018
Flat Profile
The application has the opportunity to receive an incredible boost to performance with the addition of parallel programming as most of the computational time is made up of calculating the average of every pixel which can be calculated concurrently, while only requiring a single synchronization at the end before we display the image.
 
=== Source Code for Box Blur ===
<syntaxhighlight lang="cpp">
int findingNeighbors(Mat img, int i, int j, int neighbour,float * b, float * g, float * r) {
int row_limit = img.rows;
int column_limit = img.cols;
Scalar temp;
double sum = 0, blue=0, red=0, green=0;
 
for (int x = i - floor(neighbour / 2); x <= i + floor(neighbour / 2); x++) {
for (int y = j - floor(neighbour / 2); y <= j + floor(neighbour / 2); y++) {
if (x >= 0 && y >= 0 && x < row_limit && y < column_limit) {
temp = img.at<Vec3b>(x, y);
blue += temp.val[0];
green += temp.val[1];
red += temp.val[2];
}
}
}
*b = blue / pow(neighbour, 2);
*g = green / pow(neighbour, 2);
*r = red / pow(neighbour, 2);
return 1;
}
</syntaxhighlight>
=== Algorithms (Joseph Pildush)===
0.00 17.52 0.00 1 0.00 0.00 _GLOBAL__sub_I__Z10setRandArrRSt6vectorIiSaIiEEi
0.00 17.52 0.00 1 0.00 0.00 void std::__insertion_sort<__gnu_cxx::__normal_iterator<int*, std::vector<int, std::allocator<int> > >, __gnu_cxx::__ops::_Iter_less_iter>(__gnu_cxx::__normal_iterator<int*, std::vector<int, std::allocator<int> > >, __gnu_cxx::__normal_iterator<int*, std::vector<int, std::allocator<int> > >, __gnu_cxx::__ops::_Iter_less_iter)
</source>
 
==== Source Code ====
<source>
//std::sort Algorithm
void stdSort(vector<int>& array,int arrSize,steady_clock::time_point ts,steady_clock::time_point te){
cout << "--==Execution Time of std::sort Alogirthm==--" << endl;
/*std::sort Algorithm*/
//Time the fill of 1 vector
ts = steady_clock::now();
//Fill array with random numbers
setRandArr(array, arrSize);
te = steady_clock::now();
printTiming("std::sort Vector (1) Initialize", te - ts);
//Start timing of std::sort
ts = steady_clock::now();
//Use std::sort to sort vector array1
sort(array.begin(),array.end());
//End timing std::sort
te = steady_clock::now();
//Print Results
printTiming("std::sort algorithm", te - ts);
}
 
//saxpy Algorithm
void saxpyAlg(int arrSize,steady_clock::time_point ts,steady_clock::time_point te){
cout << endl << "--==Execution Time of saxpy Alogirthm==--" << endl;
/*saxpy Algorithm*/
vector<int> saxpyX,saxpyY;
int saxpyA = 15;
//Time the fill of 2 vectors
ts = steady_clock::now();
setRandArr(saxpyX, arrSize);
setRandArr(saxpyY, arrSize);
te = steady_clock::now();
printTiming("saxpy Vectors (2) Initialize", te - ts);
//Start timing of saxpy
ts = steady_clock::now();
for (int i = 0;i < arrSize;++i)
saxpyY[i] = saxpyA*saxpyX[i] + saxpyY[i];
//End timing of saxpy
te = steady_clock::now();
printTiming("saxpy Algorithm", te - ts);
}
 
//Prefix Sum Algorithm
void prefixSum(vector<int>& array,int arrSize,steady_clock::time_point ts,steady_clock::time_point te){
cout << endl << "--==Execution Time of Prefix-Sum Alogirthm==--" << endl;
/*Prefix-Sum Algorithm*/
vector<int> psSum;
array.clear();
//Time the fill of 1 vector
ts = steady_clock::now();
//Fill array with random numbers
setRandArr(array, arrSize);
te = steady_clock::now();
printTiming("Prefix-Sum Vector (1) Initialize", te - ts);
//Start timing of Prefix-Sum
ts = steady_clock::now();
psSum.push_back(array[0]);
for (int i = 1;i < arrSize;++i)
psSum.push_back(psSum[i - 1] + array[i]);
//End timing of Prefix-Sum
te = steady_clock::now();
printTiming("Prefix-Sum Algorithm", te - ts);
}
</source>
<syntaxhighlight lang="cpp>
int iDevice; cudaDeviceProp prop; cudaGetDevice(&iDevice); cudaGetDeviceProperties(&prop, iDevice); int resident_threads = prop.maxThreadsPerMultiProcessor; int resident_blocks = 8; if (prop.major >= 3 && prop.major < 5) { resident_blocks = 16;
}
else if (prop.major >= 5 && prop.major <= 6) { resident_blocks = 32; } //determine threads/block dim3 blockDims(resident_threads/resident_blocks,1,1);
//Calculate grid size to cover the whole image dim3 gridDims(pixels/blockDims.x);
</syntaxhighlight>
81
edits

Navigation menu