Open main menu

CDOT Wiki β

Changes

GPU621/Intel oneMKL - Math Kernel Library

25 bytes added, 02:08, 1 December 2021
MKL version
#include <stdlib.h>
#include "mkl.h"
 
/* Consider adjusting LOOP_COUNT based on the performance of your computer */
/* to make sure that total run time is at least 1 second */
#define LOOP_COUNT 220 // 220 for more accurate statistics
 
int main()
{
double alpha, beta;
double s_initial, s_elapsed;
 
printf("\n This example demonstrates threading impact on computing real matrix product \n"
" C=alpha*A*B+beta*C using Intel(R) MKL function dgemm, where A, B, and C are \n"
" matrices and alpha and beta are double precision scalars \n\n");
 
m = 2000, p = 200, n = 1000;
printf(" Initializing data for matrix multiplication C=A*B for matrix \n"
" A(%ix%i) and matrix B(%ix%i)\n\n", m, p, p, n);
alpha = 1.0; beta = 0.0;
 
printf(" Allocating memory for matrices aligned on 64-byte boundary for better \n"
" performance \n\n");
return 1;
}
 
printf(" Intializing matrix data \n\n");
for (i = 0; i < (m * p); i++) {
A[i] = (double)(i + 1);
}
 
for (i = 0; i < (p * n); i++) {
B[i] = (double)(-i - 1);
}
 
for (i = 0; i < (m * n); i++) {
C[i] = 0.0;
}
 
max_threads = mkl_get_max_threads();
printf(" Finding max number %d of threads Intel(R) MKL can use for parallel runs \n\n", max_threads);
 
printf(" Running Intel(R) MKL from 1 to %i threads \n\n", max_threads * 2);
for (i = 1; i <= max_threads * 2; i++) {
for (j = 0; j < (m * n); j++)
C[j] = 0.0;
 
mkl_set_num_threads(i);
 
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
m, n, p, alpha, A, p, B, n, beta, C, n);
 
s_initial = dsecnd();
for (r = 0; r < LOOP_COUNT; r++) {
}
s_elapsed = (dsecnd() - s_initial) / LOOP_COUNT;
 
printf(" == Matrix multiplication using Intel(R) MKL dgemm completed ==\n"
" == at %.5f milliseconds using %d thread(s) ==\n\n", (s_elapsed * 1000), i);
}
 
printf(" Deallocating memory \n\n");
mkl_free(A);
mkl_free(B);
mkl_free(C);
 
if (s_elapsed < 0.9 / LOOP_COUNT) {
s_elapsed = 1.0 / LOOP_COUNT / s_elapsed;
" of measurements\n\n", i);
}
 
printf(" Example completed. \n\n");
return 0;
}
 
</pre>
==References==
https://www.intel.com/content/www/us/en/developer/articles/technical/a-simple-example-to-measure-the-performance-of-an-intel-mkl-function.html
37
edits