# Comparing Multi-threading in Julia vs OpenMP

## Introduction: The Julia Programming Language

• used for scientific computing
• alternative to matlab
• faster than python, not as fast c, has a repl for quick edit-run debug cycles

## Julia's Forms of Parallelism

• multi-core / distributed processing (like mpi?)

## OpenMP vs Julia Code

• Julia code is more concise than C/C++ (also simpler to initialize arrays, don't need to manage memory)
• Note that OpenMp code uses loop interchange (ikj) and Julia doesn't (ijk), which will be explained in the next section
```#include <omp.h>
#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <chrono>

#pragma once
#define N 512
typedef double array[N];

// matmul returns the product c = a * b
//
void matmul(const double a[][N], const double b[][N], double c[][N], int n) {

#pragma omp parallel for
for (int i = 0; i < n; i++) {
for (int k = 0; k < n; k++) {
for (int j = 0; j < n; j++) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
}

// checksum returns the sum of the coefficients in matrix x[N][N]
//
double checksum(const double x[][N]) {
double sum = 0.0;

#pragma omp parallel for
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
sum += x[i][j];
}
}
return sum;
}

// initialize initializes matrix a[N][N]
//
void initialize(double a[][N]) {

#pragma omp parallel for
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
a[i][j] = static_cast<double>(i * j) / (N * N);
}
}
}

void reportTime(const char* msg, std::chrono::steady_clock::duration span) {
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(span);
std::cout << msg << ": " <<
ms.count() << " milliseconds" << std::endl;
}

int main(int argc, char** argv) {
if (argc != 1) {
std::cerr << "*** Incorrect number of arguments ***\n";
std::cerr << "Usage: " << argv[0] << "\n";
return 1;
}
std::cout << std::fixed << std::setprecision(2);

// allocate memory for matrices
double* a = new double[N*N];
double* b = new double[N*N];
double* c = new double[N*N];
array* aa = (array*)a;
array* bb = (array*)b;
array* cc = (array*)c;
std::cout << "Matrix Multiplication " << N << " by " << N << std::endl;

// initialize a and b
initialize(aa);
initialize(bb);
double* t = c;
for (int i = 0; i < N * N; i++)
*t++ = 0.0;
reportTime("initialization", te - ts);

// multiply matrices a and b
matmul(aa, bb, cc, N);
reportTime("execution", te - ts);
std::cout << "checksum = " << checksum(cc) << std::endl;

// deallocate memory
delete[] a;
delete[] b;
delete[] c;

}```
```N = 512

function matmul(a, b, c, n)
for j = 1:n
for k = 1:n
c[i,j] += a[i,k] * b[k,j]
end
end
end

return c
end

function checksum(x)
sum = 0.0

for j = 1:N
sum += x[i,j]
end
end

return sum
end

# creates and returns 2d array of zeroes
function initialize()
a = zeros(Float64, N, N)

for j = 1:N
a[i,j] = (convert(Float64,((i-1) * (j-1))) / convert(Float64, (N * N)))
end
end

return a
end

a = initialize();
b = initialize();
c = initialize();

start_t = time()

d = matmul(a, b, c, N)

end_t = time()
run_time = end_t - start_t

sum = checksum(d)

println("checksum = \$sum")

println("matmul func time (s):")
println(run_time)

println(nt)```

More code here: https://github.com/tsarkarsc/parallel_prog