//%%file merge.cu #include #include #include #include //Generates random numbers and assigns them to the array void fillArray(int* arr, int size) { for (int i = 0; i < size; i++) { arr[i] = rand() % size; } } __global__ void mergesort_kernal(int* arr,int *data,int begin,int end, int depth){ if((end-begin) < 2){ return; } int middle = (end+begin)/2; int left_index = begin; int middle_index = middle; int current; int n = end-begin; cudaStream_t s,s1; // Launches a new block to sort the left part. cudaStreamCreateWithFlags(&s,cudaStreamNonBlocking); mergesort_kernal<<< 1, 1, 0, s >>>(arr,data, begin, middle, depth+1); cudaStreamDestroy(s); // Launches a new block to sort the right part. cudaStreamCreateWithFlags(&s1,cudaStreamNonBlocking); mergesort_kernal<<< 1, 1, 0, s1 >>>(arr,data, middle, end, depth+1); cudaStreamDestroy(s1); // Waits for children's work. cudaDeviceSynchronize(); // Merges children's generated partition. for (current = begin; current < end; current++) { if (left_index < middle && (middle_index >= end || arr[left_index] <= arr[middle_index])){ data[current] = arr[left_index]; left_index++; }else{ data[current] = arr[middle_index]; middle_index++; } } // Copies from the auxiliary memory to the main memory. for(current = begin; current < end; current++){ arr[current] = data[current]; } } //Host function, also acts as a wrapper void merge_sort(int* a,int n){ int* d_a; int* d_aux; int left = 0; int right = n; //Sets the device depth, bigger the number bigger the depth limit? cudaDeviceSetLimit(cudaLimitDevRuntimeSyncDepth, 18); // Allocate GPU memory. cudaMalloc((void**)&d_a,n*sizeof(int)); cudaMalloc((void**)&d_aux,n*sizeof(int)); cudaMemcpy(d_a,a, n*sizeof(int), cudaMemcpyHostToDevice); // Launch on device mergesort_kernal<<< 1, 1 >>>(d_a,d_aux, left, right, 0); //Sync device cudaDeviceSynchronize(); // Copy back to CPU memory space cudaMemcpy(a,d_a, n*sizeof(int), cudaMemcpyDeviceToHost); // Free memory on the device cudaFree(d_aux); cudaFree(d_a); cudaDeviceReset(); } //print the array void printArray(int* arr, int n){ for(int i = 0; i < n; i++){ std::cout << arr[i] << std::endl; } } int main(int argc, char *argv[]) { //Get the size of the array int n = std::atoi(argv[1]); // Create 6 arrays of size n and allocate memory for them int *mergeArray = new int[n]; //Fill the array with randomly generated numbers between 0 & n fillArray(mergeArray, n); //Sort using bubble merge_sort(mergeArray, n); //Check //printArray(mergeArray, n); std::cout << "Sort is complete!" << std::endl; //Deallocate the arrays delete[] mergeArray; return 0; }