Changes

Jump to: navigation, search

Savy Cat

10 bytes added, 13:10, 10 April 2018
Profiling With Nsight
// Allocate device memory for src and dst
std::cout << "Allocating device memory ..." << std::endl;
cudaMalloc((void**)&d_src, w * h * sizeof(floatPX_TYPE) * 3); cudaMalloc((void**)&d_dst, w * h * sizeof(floatPX_TYPE) * 3);
// Copy h_src to d_src
std::cout << "Copying source image to device ..." << std::endl;
cudaMemcpy(d_src, h_src, w * h * sizeof(floatPX_TYPE) * 3, cudaMemcpyHostToDevice);
// Rotate image 6 x 2 times, copying result back to host each time
// Copy d_dst to h_dst
std::cout << "Copying result to host ..." << std::endl;
cudaMemcpy(h_dst, d_dst, w * h * sizeof(floatPX_TYPE) * 3, cudaMemcpyDeviceToHost);
// Rotate again
// Copy d_src to h_src
cudaMemcpy(h_src, d_src, w * h * sizeof(floatPX_TYPE) * 3, cudaMemcpyDeviceToHost);
std::cout << "Copying result to host ..." << std::endl;
}
93
edits

Navigation menu