Changes

Jump to: navigation, search

Algo holics

3,543 bytes added, 02:14, 31 March 2019
Assignment 2
=== Assignment 2 ===
We decided to choose the neural network code to paralellize. The reason being it gives us a good learning opportunity and more importantly that we can potentially speed up the execution time of the program by using CUDA.
 
 
=== Parallelize ===
 
{| class="wikitable mw-collapsible mw-collapsed"
! Flat profile
|-
|
 
#include <iostream>
#include <algorithm>
#include <vector>
#include <ctime>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
 
 
__global__ void merge(std::vector<int> arrayInput, unsigned int b, unsigned int c, unsigned int e) {
 
int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
std::vector<int> C(arrayInput);
unsigned int i1 = b;
unsigned int i2 = c + 1; //start point in each piece
unsigned int n1 = c;
unsigned int n2 = e; //end point of each piece
 
if (idx < e) {
for (unsigned i = b; i <= e; ++i)
{
if (i1 > n1) //
{
C[i] = arrayInput[i2];
++i2;
}else if (i2 > n2){
C[i] = arrayInput[i1];
++i1;
}else if (arrayInput[i1] <= arrayInput[i2]) {
C[i] = arrayInput[i1];
++i1;
}else{
C[i] = arrayInput[i2];
++i2;
}
}
}
 
 
}
 
void check_sort(std::vector<int> array); //test sorted arrays
void merge_sort(std::vector<int> array); //recurrent merge sort
//std::vector<int> merge(std::vector<int> array, unsigned int b, unsigned int c, unsigned int e);
 
int main()
{
unsigned int size;
std::cout << "Please enter the size of your array:" << std::endl;
std::cin >> size;
 
std::vector<int> initial_array(size); //array of random elements
 
//prefill arrays
srand(time(0));
for (unsigned int i; i < initial_array.size(); ++i) {
initial_array[i] = rand();
}
 
merge_sort(initial_array);
 
/* DEBUGGER
std::cout << "initial array" << std::endl;
for (unsigned int i = 0; i < initial_array.size(); ++i) {
std::cout << initial_array[i] << " ";
}
std::cout << std::endl;
*/
 
return 0;
}
 
void merge_sort(std::vector<int> arrayInput)
{
std::cout << "merge sort" << std::endl;
check_sort(arrayInput);
/* DEBUGGER
std::cout << "initial array" << std::endl;
for (unsigned int i = 0; i < array.size(); ++i) {
std::cout << array[i] << " ";
} */
 
int d;
cudaDeviceProp prop;
cudaGetDevice(&d);
cudaGetDeviceProperties(&prop, d);
 
float* d_a;
 
float start_time = clock();
unsigned int n = arrayInput.size();
 
 
float* h_a = new float[n];
cudaMalloc((void**)&d_a, n * sizeof(float));
 
 
 
 
 
for (unsigned int s = 1; s < n; s *= 2)
{
for (unsigned int b = 0; b < n; b += s * 2)
{
unsigned int c = std::min(b + s - 1, n - 1);
unsigned int e = std::min(c + s, n - 1);
merge <<< 1,n >> > (arrayInput, b, c, e);
 
 
}
}
float end_time = clock() - start_time;
/* DEBUGGER
for (unsigned int i = 0; i < array.size(); ++i) {
std::cout << array[i] << " ";
} */
check_sort(arrayInput);
std::cout << "time: " << end_time / 1000 << std::endl;
 
 
cudaMemcpy(h_a, d_a, n * sizeof(float), cudaMemcpyDeviceToHost);
 
 
cudaFree(d_a);
delete[] h_a;
 
}
 
/*
std::vector<int> merge(std::vector<int> array, unsigned int b, unsigned int c, unsigned int e) {
std::vector<int> C(array);
 
 
 
unsigned int i1 = b;
unsigned int i2 = c + 1; //start point in each piece
unsigned int n1 = c;
unsigned int n2 = e; //end point of each piece
 
for (unsigned i = b; i <= e; ++i)
{
if (i1 > n1) //
{
C[i] = array[i2];
++i2;
}
else if (i2 > n2)
C[i] = array[i1];
++i1;
{
}else if (array[i1] <= array[i2]) {
C[i] = array[i1];
++i1;
}
else
{
C[i] = array[i2];
++i2;
}
}
return C;
 
}*/
 
 
void check_sort(std::vector<int> array)
{
for (unsigned int i = 0; i < (array.size() - 1); ++i)
{
if (array[i] >(array[i + 1]))
{
std::cout << "unsorted" << std::endl;
return;
}
}
std::cout << "sorted" << std::endl;
}
 
 
|}
 
=== Assignment 3 ===
8
edits

Navigation menu