Open main menu

CDOT Wiki β

Changes

GPU621/False Sharing

1,956 bytes added, 02:09, 26 November 2021
Analyzing Workshop Example
=== Analyzing Workshop Example ===
<pre>
#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <chrono>
#include <omp.h>
 
#define NUM_THREADS 8
 
using namespace std::chrono;
 
// report system time
void reportTime(const char* msg, steady_clock::duration span)
{
auto ms = duration_cast<milliseconds>(span);
std::cout << msg << " - took - " <<
ms.count() << " milliseconds" << std::endl;
}
 
int main(int argc, char** argv)
{
if (argc != 2)
{
std::cerr << argv[0] << ": invalid number of arguments\n";
std::cerr << "Usage: " << argv[0] << " no_of_slices\n";
return 1;
}
int n = std::atoi(argv[1]);
steady_clock::time_point ts, te;
 
// calculate pi by integrating the area under 1/(1 + x^2) in n steps
ts = steady_clock::now();
 
int actual_thread_count;
double pi = 0.0f;
double sum[NUM_THREADS] = { 0.0f };
double step = 1.0 / (double)n;
 
omp_set_num_threads(NUM_THREADS);
#pragma omp parallel
{
int id, num_threads;
double x;
 
id = omp_get_thread_num();
num_threads = omp_get_num_threads();
 
// get master thread to return how many threads were actually created
if (id == 0)
{
actual_thread_count = num_threads;
}
 
// each thread is responsible for calculating the area of a specific set of sections underneath the curve
for (int i = id; i < n; i = i + num_threads)
{
x = ((double)i + 0.5f) * step;
sum[id] += 1.0f / (1.0f + x * x);
}
}
 
// sum up each calculation to get approximation of pi
for (int i = 0; i < actual_thread_count; i++)
{
pi += 4 * sum[i] * step;
}
 
te = steady_clock::now();
 
std::cout << "n = " << n <<
std::fixed << std::setprecision(15) <<
"\n pi(exact) = " << 3.141592653589793 <<
"\n pi(calcd) = " << pi << std::endl;
reportTime("Integration", te - ts);
}
</pre>
== Solutions to False Sharing ==
83
edits