Changes

Jump to: navigation, search

GPU621/Threadless Horsemen

3,851 bytes added, 17:27, 25 November 2018
Comparing Multi-threading in Julia vs OpenMP
* coroutines / green threads
== OpenMP vs Julia Code ==
* add Julia code from is more concise than C/C++ (also simpler to initialize arrays, don't need to manage memory)* Note that OpenMp code uses loop interchange (ikj) and Julia doesn't (ijk), which will be explained in the next section {| class="wikitable"|-! OpenMP ! Julia Multi-threading|-|<source>#include <omp.h>#include <iostream>#include <iomanip>#include <cstdlib>#include <chrono> #pragma once#define N 512typedef double array[N]; // matmul returns the product c = a * b//void matmul(const double a[][N], const double b[][N], double c[][N], int n) { #pragma omp parallel for for (int i = 0; i < n; i++) { for (int k = 0; k < n; k++) { for (int j = 0; j < n; j++) { c[i][j] += a[i][k] * b[k][j]; } } }} // checksum returns the sum of the coefficients in matrix x[N][N]//double checksum(const double x[][N]) { double sum = 0.0;  #pragma omp parallel for for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { sum += x[i][j]; } } return sum;} // initialize initializes matrix a[N][N]//void initialize(double a[][N]) {  #pragma omp parallel for for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { a[i][j] = static_cast<double>(i * j) / (N * N); } }} void reportTime(const char* msg, std::chrono::steady_clock::duration span) { auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(span); std::cout << msg << ": " << ms.count() << " milliseconds" << std::endl;} int main(int argc, char** argv) { if (argc != 1) { std::cerr << "*** Incorrect number of arguments ***\n"; std::cerr << "Usage: " << argv[0] << "\n"; return 1; } std::cout << std::fixed << std::setprecision(2);  // allocate memory for matrices double* a = new double[N*N]; double* b = new double[N*N]; double* c = new double[N*N]; array* aa = (array*)a; array* bb = (array*)b; array* cc = (array*)c; std::cout << "Matrix Multiplication " << N << " by " << N << std::endl;  // initialize a and b std::chrono::steady_clock::time_point ts, te; ts = std::chrono::steady_clock::now(); initialize(aa); initialize(bb); double* t = c; for (int i = 0; i < N * N; i++) *t++ = 0.0; te = std::chrono::steady_clock::now(); reportTime("initialization", te - ts);  // multiply matrices a and b ts = std::chrono::steady_clock::now(); matmul(aa, bb, cc, N); te = std::chrono::steady_clock::now(); reportTime("execution", te - ts); std::cout << "checksum = " << checksum(cc) << std::endl;  // deallocate memory delete[] a; delete[] b; delete[] c;  std::cout << "max threads: " << omp_get_max_threads() << std::endl;}</source>|<source>N = 512 function matmul(a, b, c, n) Threads.@threads for i = 1:n for j = 1:n for k = 1:n c[i,j] += a[i,k] * b[k,j] end end end  return cend  function checksum(x) sum = 0.0  Threads.@threads for i = 1:N for j = 1:N sum += x[i,j] end end  return sumend # creates and returns 2d array of zeroesfunction initialize() a = zeros(Float64, N, N)  Threads.@threads for i = 1:N for j = 1:N a[i,j] = (convert(Float64,((i-1) * (j-1))) / convert(Float64, (N * N))) end end  return aend a = initialize();b = initialize();c = initialize(); start_t = time() d = matmul(a, b, c, N) end_t = time()run_time = end_t - start_t sum = checksum(d) println("checksum = $sum") println("matmul func time (s):")println(run_time) println("num threads: ")nt = Threads.nthreads()println(nt)</source>|} More code here: https://github.com/tsarkarsc/parallel_prog 
== OpenMP vs Julia Results ==
* add graphs
93
edits

Navigation menu