Changes

Jump to: navigation, search

Algo holics

11,533 bytes added, 01:19, 8 April 2019
COSINE TRANSFORMATION (A Discrete Cosine Transform for Real Data)
This [https://www.youtube.com/watch?v=tW3Hc0Wrgl0 Link] can be used for better understanding of the above formula.
----
Here is the [https://people.sc.fsu.edu/~jburkardt/cpp_src/cosine_transform/cosine_transform.html source code] used.
 
To increase the efficiency of the program we transformed the '''cosine_transform_data''' function into a kernel name '''cosTransformKernel''' which offload the compute intense calculation of the program to the GPU.
 
 
 
 
{| class="wikitable mw-collapsible mw-collapsed"
! Modified Code
|-
|
 
%%cu
# include <iostream>
# include <iomanip>
# include <ctime>
# include <chrono>
# include <cstdlib>
# include <cmath>
#include <cuda_runtime.h>
using namespace std;
using namespace std::chrono;
const double pi = 3.141592653589793;
const int ntpb = 1024;
void cosine_transform_test01 ( int size );
 
double *r8vec_uniform_01_new ( int n, int &seed ){
int i;
const int i4_huge = 2147483647;
int k;
double *r;
 
if ( seed == 0 )
{
cerr << "\n";
cerr << "R8VEC_UNIFORM_01_NEW - Fatal error!\n";
cerr << " Input value of SEED = 0.\n";
exit ( 1 );
}
 
r = new double[n];
 
for ( i = 0; i < n; i++ )
{
k = seed / 127773;
 
seed = 16807 * ( seed - k * 127773 ) - k * 2836;
 
if ( seed < 0 )
{
seed = seed + i4_huge;
}
r[i] = ( double ) ( seed ) * 4.656612875E-10;
}
return r;
}
 
double *cosine_transform_data ( int n, double d[] ){
double angle;
double *c;
int i;
int j;
 
c = new double[n];
 
for ( i = 0; i < n; i++ )
{
c[i] = 0.0;
for ( j = 0; j < n; j++ )
{
angle = pi * ( double ) ( i * ( 2 * j + 1 ) ) / ( double ) ( 2 * n );
c[i] = c[i] + cos ( angle ) * d[j];
}
c[i] = c[i] * sqrt ( 2.0 / ( double ) ( n ) );
}
return c;
}
 
void reportTime(const char* msg, steady_clock::duration span) {
auto ms = duration_cast<milliseconds>(span);
std::cout << msg << " - took - " <<
ms.count() << " millisecs" << std::endl;
}
__global__ void cosTransformKernel(double *a, double *b, int n){
double angle;
const double pi = 3.141592653589793;
int i = blockIdx.x * blockDim.x + threadIdx.x;
for(int j=0; j<n; j++){
angle = pi * (double) (i*(2*j+1)) / (double)(2*n);
b[i] += cos ( angle ) * a[j];
}
b[i] *= sqrt( 2.0 / (double) n );
}
 
int main (int argc, char* argv[] ){
if (argc != 2) {
std::cerr << argv[0] << ": invalid number of arguments\n";
std::cerr << "Usage: " << argv[0] << " size_of_vector\n";
return 1;
}
int n = std::atoi(argv[1]);
cosine_transform_test01 (n);
return 0;
}
 
void cosine_transform_test01 ( int size){
int n = size;
int seed;
double *r;
double *hs; //host side pointer to store the array returned from host side cosine_transform_data, for comparison purposes
double *s = new double[n];
double *d_a;
double *d_b;
//allocate memory on the device for the randomly generated array and for the array in which transform values will be stored
cudaMalloc((void**)&d_a,sizeof(double) * n);
cudaMalloc((void**)&d_b,sizeof(double) * n);
seed = 123456789;
r = r8vec_uniform_01_new ( n, seed );
//copy randomly generated values from host to device
cudaMemcpy(d_a,r,sizeof(double)*n,cudaMemcpyHostToDevice);
int nblks = (n + ntpb - 1) / ntpb;
steady_clock::time_point ts, te;
ts = steady_clock::now();
cosTransformKernel<<<nblks,ntpb>>>(d_a,d_b,size);
cudaDeviceSynchronize();
te = steady_clock::now();
reportTime("Cosine Transform on device",te-ts);
cudaMemcpy(s,d_b,sizeof(double)*n,cudaMemcpyDeviceToHost);
ts = steady_clock::now();
hs = cosine_transform_data ( n, r );
te = steady_clock::now();
reportTime("Cosine Transform on host",te-ts);
 
cudaFree(d_a);
cudaFree(d_b);
delete [] r;
delete [] s;
delete [] hs;
}
 
 
|}
 
 
'''Analysis'''
 
 
To analyze the flat profile, enter the following command:
 
> gprof -p -b myapp > myapp.flt
 
-p directs the profiler (gprof) to output a flat profile.
 
-b directs the profiler to omit detailed explanations of the column headings from the output.
 
 
The flat profile for the above execution looks like:
 
{| class="wikitable mw-collapsible mw-collapsed"
! Flat Profile
|-
|
 
 
1
2
3
4 granularity: each sample hit covers 2 byte(s) for 0.68% of 1.47 seconds
5
6 index % time self children called name
7 <spontaneous>
8 [1] 100.0 0.00 1.47 main [1]
9 0.00 1.47 1/1 cosine_transform_test01(int) [3]
10 -----------------------------------------------
11 1.47 0.00 1/1 cosine_transform_test01(int) [3]
12 [2] 100.0 1.47 0.00 1 cosine_transform_data(int, double*) [2]
13 -----------------------------------------------
14 0.00 1.47 1/1 main [1]
15 [3] 100.0 0.00 1.47 1 cosine_transform_test01(int) [3]
16 1.47 0.00 1/1 cosine_transform_data(int, double*) [2]
17 0.00 0.00 1/1 r8vec_uniform_01_new(int, int&) [14]
18 0.00 0.00 1/1 reportTime(char const*, std::chrono::duration<long, std::ratio<1l, 1000000000l> >) [13]
19 0.00 0.00 1/1 std::common_type<std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >::type std::chrono::operator-<std::chrono::_V2::s teady_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >(std::chrono::time_point<std::chrono::_V2::steady_clock, std::chrono::duration<long, std::ratio<1l, 10 00000000l> > > const&, std::chrono::time_point<std::chrono::_V2::steady_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> > > const&) [21]
20 -----------------------------------------------
21 0.00 0.00 1/3 std::chrono::duration<long, std::ratio<1l, 1000l> > std::chrono::__duration_cast_impl<std::chrono::duration<long, std::ratio<1l, 1000l> >, std::ratio<1l, 1000000l>, long, true, false>: :__cast<long, std::ratio<1l, 1000000000l> >(std::chrono::duration<long, std::ratio<1l, 1000000000l> > const&) [18]
22 0.00 0.00 2/3 std::common_type<std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >::type std::chrono::operator-<long, std::ratio<1l , 1000000000l>, long, std::ratio<1l, 1000000000l> >(std::chrono::duration<long, std::ratio<1l, 1000000000l> > const&, std::chrono::duration<long, std::ratio<1l, 1000000000l> > const&) [22]
23 [10] 0.0 0.00 0.00 3 std::chrono::duration<long, std::ratio<1l, 1000000000l> >::count() const [10]
24 -----------------------------------------------
25 0.00 0.00 2/2 std::common_type<std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >::type std::chrono::operator-<std::chrono::_V2::s teady_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >(std::chrono::time_point<std::chrono::_V2::steady_clock, std::chrono::duration<long, std::ratio<1l, 10 00000000l> > > const&, std::chrono::time_point<std::chrono::_V2::steady_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> > > const&) [21]
26 [11] 0.0 0.00 0.00 2 std::chrono::time_point<std::chrono::_V2::steady_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >::time_since_epoch() const [11]
27 -----------------------------------------------
28 0.00 0.00 1/1 __libc_csu_init [28]
29 [12] 0.0 0.00 0.00 1 _GLOBAL__sub_I__Z20r8vec_uniform_01_newiRi [12]
30 0.00 0.00 1/1 __static_initialization_and_destruction_0(int, int) [15]
31 -----------------------------------------------
32 0.00 0.00 1/1 cosine_transform_test01(int) [3]
33 [13] 0.0 0.00 0.00 1 reportTime(char const*, std::chrono::duration<long, std::ratio<1l, 1000000000l> >) [13]
34 0.00 0.00 1/1 std::enable_if<std::chrono::__is_duration<std::chrono::duration<long, std::ratio<1l, 1000l> > >::value, std::chrono::duration<long, std::ratio<1l, 1000l> > >::type std::chrono::duratio n_cast<std::chrono::duration<long, std::ratio<1l, 1000l> >, long, std::ratio<1l, 1000000000l> >(std::chrono::duration<long, std::ratio<1l, 1000000000l> > const&) [17]
35 0.00 0.00 1/1 std::chrono::duration<long, std::ratio<1l, 1000l> >::count() const [16]
36 -----------------------------------------------
37 0.00 0.00 1/1 cosine_transform_test01(int) [3]
38 [14] 0.0 0.00 0.00 1 r8vec_uniform_01_new(int, int&) [14]
39 -----------------------------------------------
40 0.00 0.00 1/1 _GLOBAL__sub_I__Z20r8vec_uniform_01_newiRi [12]
41 [15] 0.0 0.00 0.00 1 __static_initialization_and_destruction_0(int, int) [15]
42 -----------------------------------------------
43 0.00 0.00 1/1 reportTime(char const*, std::chrono::duration<long, std::ratio<1l, 1000000000l> >) [13]
44 [16] 0.0 0.00 0.00 1 std::chrono::duration<long, std::ratio<1l, 1000l> >::count() const [16]
45 -----------------------------------------------
46 0.00 0.00 1/1 reportTime(char const*, std::chrono::duration<long, std::ratio<1l, 1000000000l> >) [13]
47 [17] 0.0 0.00 0.00 1 std::enable_if<std::chrono::__is_duration<std::chrono::duration<long, std::ratio<1l, 1000l> > >::value, std::chrono::duration<long, std::ratio<1l, 1000l> > >::type std::chrono::duration_ca st<std::chrono::duration<long, std::ratio<1l, 1000l> >, long, std::ratio<1l, 1000000000l> >(std::chrono::duration<long, std::ratio<1l, 1000000000l> > const&) [17]
48 0.00 0.00 1/1 std::chrono::duration<long, std::ratio<1l, 1000l> > std::chrono::__duration_cast_impl<std::chrono::duration<long, std::ratio<1l, 1000l> >, std::ratio<1l, 1000000l>, long, true, false>: :__cast<long, std::ratio<1l, 1000000000l> >(std::chrono::duration<long, std::ratio<1l, 1000000000l> > const&) [18]
49 -----------------------------------------------
50 0.00 0.00 1/1 std::enable_if<std::chrono::__is_duration<std::chrono::duration<long, std::ratio<1l, 1000l> > >::value, std::chrono::duration<long, std::ratio<1l, 1000l> > >::type std::chrono::duratio n_cast<std::chrono::duration<long, std::ratio<1l, 1000l> >, long, std::ratio<1l, 1000000000l> >(std::chrono::duration<long, std::ratio<1l, 1000000000l> > const&) [17]
51 [18] 0.0 0.00 0.00 1 std::chrono::duration<long, std::ratio<1l, 1000l> > std::chrono::__duration_cast_impl<std::chrono::duration<long, std::ratio<1l, 1000l> >, std::ratio<1l, 1000000l>, long, true, false>::__c ast<long, std::ratio<1l, 1000000000l> >(std::chrono::duration<long, std::ratio<1l, 1000000000l> > const&) [18]
52 0.00 0.00 1/3 std::chrono::duration<long, std::ratio<1l, 1000000000l> >::count() const [10]
 
 
|}
Here is the [https://people.sc.fsu.edu/~jburkardt/cpp_src/cosine_transform/cosine_transform.html source code] used.
=== Assignment 3 ===
85
edits

Navigation menu