Changes

Jump to: navigation, search

Savy Cat

4,476 bytes added, 01:08, 3 April 2018
Assignment 2
==== Initial CUDA Code ====
This code will use the device to rotate the image given in command line arguments by 90 degrees clockwise one time. It is just to verify everything is working as expected. We will then change the code to rotate the same images the same number of times as before.
;image.h
}
return img;
}</nowiki>
 
;rotate90.cu
 
<nowiki>
// Evan Marinzel - DPS915 Project
// Rotate.cu
 
#include <iostream>
#include <iomanip>
#include "image.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
 
 
__global__ void rot90(float* src, float* dst, int src_w, int src_h, int z) {
int k = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
if (k < src_w && j < src_h)
dst[(src_h - 1 - j) + k * src_h + src_w * src_h * z] = src[threadIdx.x + threadIdx.y * src_w + src_w * src_h * z];
 
}
 
int main(int argc, char** argv) {
 
if (argc != 2) {
std::cerr << argv[0] << ": invalid number of arguments\n";
std::cerr << "Usage: " << argv[0] << " image.jpg\n";
return 1;
}
 
// Retrieving cuda device properties
int d;
cudaDeviceProp prop;
cudaGetDevice(&d);
cudaGetDeviceProperties(&prop, d);
unsigned ntpb = 32;
 
// Host and device array of pixel values for original (src) and rotated (dst) image
PX_TYPE* h_src = nullptr;
PX_TYPE* h_dst = nullptr;
PX_TYPE* d_src = nullptr;
PX_TYPE* d_dst = nullptr;
 
// Width and height of original image
int w, h;
 
// Allocate host memory for source array, initialize pixel value array from .jpg file, and retrieve width and height.
std::cout << "Opening image ..." << std::endl;
h_src = getImage(argv[1], w, h);
std::cout << "Opening image complete." << std::endl;
 
// Display 40x40px sample of h_src and print pixel values to console to verify .jpg loaded correctly
std::cout << "Displaying h_src and printing color values to console ..." << std::endl;
display(h_src, w, h);
 
// Allocate host memory for rotated version
h_dst = new PX_TYPE[w * h * 3];
 
// Calculate block dimensions
int nbx = (w + ntpb - 1) / ntpb;
int nby = (h + ntpb - 1) / ntpb;
 
// Define block and grid dimensions
dim3 dGrid(nbx, nby, 1);
dim3 dBlock(ntpb, ntpb, 1);
 
// Print h_src dimensions and size to console
std::cout << argv[1] << " Image Data" << std::endl;
std::cout << std::setfill('=') << std::setw(strlen(argv[1]) + 11) << "=" << std::setfill(' ') << std::endl;
std::cout << std::setw(17) << std::right << "Width: " << w << "px" << std::endl;
std::cout << std::setw(17) << std::right << "Height: " << h << "px" << std::endl;
std::cout << std::setw(17) << std::right << "Colour Channels: " << 3 << std::endl;
std::cout << std::setw(17) << std::right << "Pixel Size: " << sizeof(PX_TYPE) << " bytes" << std::endl;
std::cout << std::setw(17) << std::right << "Total Size: " << w * h * 3 * sizeof(PX_TYPE) << " bytes" << std::endl;
std::cout << std::endl;
 
// Print grid details and total number of threads
std::cout << "Number of blocks (x): " << nbx << std::endl;
std::cout << "Number of blocks (y): " << nby << std::endl;
std::cout << "Number of threads per block (x): " << ntpb << std::endl;
std::cout << "Number of threads per block (y): " << ntpb << std::endl;
std::cout << "Operations required for one colour channel: " << w * h << std::endl;
std::cout << "Total threads available: " << ntpb * ntpb * nby * nbx << std::endl;
 
// Allocate device memory for src and dst
std::cout << "Allocating device memory ..." << std::endl;
cudaMalloc((void**)&d_src, w * h * sizeof(float) * 3);
cudaMalloc((void**)&d_dst, w * h * sizeof(float) * 3);
// Copy h_src to d_src
std::cout << "Copying source image to device ..." << std::endl;
cudaMemcpy(d_src, h_src, w * h * sizeof(float) * 3, cudaMemcpyHostToDevice);
 
// Launch grid 3 times (one grid per colour channel)
std::cout << "Performing rotation ..." << std::endl;
for (int i = 0; i < 3; i++) {
rot90 << <dGrid, dBlock >> > (d_src, d_dst, w, h, i);
}
 
// Copy d_dst to h_dst
std::cout << "Copying rotated image to host ..." << std::endl;
cudaMemcpy(h_dst, d_dst, w * h * sizeof(float) * 3, cudaMemcpyDeviceToHost);
 
// Dealocate memory
std::cout << "Dealocating device memory ..." << std::endl;
cudaFree(d_src);
cudaFree(d_dst);
 
// Display 40x40px sample of h_dst and print pixel values to console to verify rotation worked
std::cout << "Displaying h_dst and printing color values to console ..." << std::endl;
display(h_dst, h, w);
 
return 0;
 
}</nowiki>
=== Assignment 3 ===
93
edits

Navigation menu