Open main menu

CDOT Wiki β

Changes

Team False Sharing

341 bytes added, 21:56, 17 December 2017
Thread Local Variables
===Thread Local Variables===
<sourcelang="cpp">
#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <chrono>
#include <algorithm>
#include <omp.h>
#include "timer.h"
#define NUM_THREADS 8
#define DIM 1000using namespace std::chrono; int main(int argc, const char ** argv) { int* matrix = new int[DIM*DIM]; int odds = 0; // Initialize matrix to random Values srand(200) {; struct sfor (int i = 0; i < DIM; i++) { float value for(int j = 0; j < DIM;++j){ }Array matrix[4i*DIM + j]= rand()%50; } Timer stopwatch;} int numThreadsUsed; const int SomeBigNumber = 100000000threads_used;
omp_set_num_threads(NUM_THREADS);
double start_time = omp_get_wtime();#pragma omp parallel { int count_odds = 0.0;#pragma omp for for(int i = 0; i < 4DIM;++i){ for(int j = 0; j < DIM; ++j){ if(i ==0 && j==0){numThreadsUsed threads_used = omp_get_num_threads();} float tmp = Array if( matrix[i*DIM + j].value; for(int j % 2 != 0;j < SomeBigNumber;j++){ tmp = tmp ++ (float)rand()count_odds;
}
}
#pragma omp critical
Array[i].value odds += tmpcount_odds; }
double time = omp_get_wtime() - start_time;
std::cout<<"Execution Time: "<<time<<std::endl; std::cout<<"Threads Used: "<<numThreadsUsedthreads_used<<std::endl; std::cout<<"Odds: "<<odds<<std::endl;
return 0;
}
</source>
Wasting memory to put your data on different cache lines is not ideal solution to the False Sharing problem even though it works. Using local variables, instead of contiguous array locations, the writes to memory will be spread out to different cache lines. Another benefit to this approach is that you do not have multiple threads writing to the same cache line, invalidating the data and bottlenecking the processes.
 
= Intel VTune Amplifier =
96
edits