Open main menu

CDOT Wiki β

GPU621/Intel Advisor

Revision as of 09:17, 22 November 2018 by Jespiritu (talk | contribs) (Examples)

Intel Parallel Studio Advisor

Group Members

  1. Jeffrey Espiritu
  2. Thaharim Ahmed
  3. eMail All

Introduction

Vectorization

Register

Vector Register

Instruction Set Architecture

Examples

// Workshop 2 - Calculate PI by integrating 1/(1+x^2)
// w2.serial.cpp

#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <chrono>
#include <omp.h>
using namespace std::chrono;

// report system time
//
void reportTime(const char* msg, steady_clock::duration span) {
    auto ms = duration_cast<milliseconds>(span);
    std::cout << msg << " - took - " <<
        ms.count() << " milliseconds" << std::endl;
}

int main(int argc, char** argv) {
    if (argc != 2) {
        std::cerr << argv[0] << ": invalid number of arguments\n";
        std::cerr << "Usage: " << argv[0] << "  no_of_slices\n";
        return 1;
    }
    int i;
    int nthreads;
    int n = std::atoi(argv[1]);
    int mnt = omp_get_max_threads();
    steady_clock::time_point ts, te;

    double sum = 0.0;  // scalar accumulator
    // calculate pi by integrating the area under 1/(1 + x^2) in n steps 
    double pi = 0.0;
    double stepSize = 1.0 / (double)n;

    ts = steady_clock::now();

    #pragma omp parallel
    {
        int i, tid, nt;
        double x, psum;
        tid = omp_get_thread_num();
        nt = omp_get_num_threads();
        if (tid == 0) nthreads = nt;

        for (i = tid, psum = 0.0; i < n; i += nt) {
            x = ((double)i + 0.5) * stepSize;
            psum += 1.0 / (1.0 + x * x);
        }
        
        #pragma omp critical
        sum += psum;
    }
    
    pi = 4.0 * sum * stepSize;

    te = steady_clock::now();

    std::cout << "n = " << n << "\n" <<
        mnt << " threads available\n" <<
        nthreads << " threads used.\nTime = " <<
        std::fixed << std::setprecision(15) <<
        "\n pi(exact) = " << 3.141592653589793 <<
        "\n pi(calcd) = " << pi << std::endl;
    reportTime("Integration", te - ts);
}

Intel Advisor Tutorial Example

Loop Unrolling

Pointer Alias

Memory Alignment

Dependencies

Summary