Changes

Jump to: navigation, search

TriForce

3,788 bytes added, 14:17, 8 April 2019
Kernel Optimization Attempts
[[File:Julia.jpg]]
|}
 
This problem would be fairly simple to parallelize. In the image created by Julia sets each pixel is independent of the others. This problem involves Complex numbers, but that can be simply represented by using two arrays, or pairs of floats.
==== Assignment 1: Selection for parallelizing ====
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; col++)
printf("%3d", result[row][col]); printf("\n"); }
}
|}
Reduced superSolve runtime from 5.2 to 3.8ms
[[File:Unoptimized_vs_Optimized.png]]
These Kernels change a minor part of the Optimized Kernel or use a slightly different algorithm in an attempt to make it faster
  Change : Replaces the boolean array hasSeen with a single int & uses bitwise operators Theory : Since local array variables of threads are stored in Global memory this was an attempt to move that into the a register Result : No speed up noticed, suggesting that more is happening beyond arrays stored in Global memory, perhaps some type of paging, more testing would be needed on something less erratic then a Sudoku Solver
{| class="wikitable mw-collapsible mw-collapsed"
! Using a int as a boolean array (Kernel)
|-
|
}
|}
Change : Remove the counters, and logic which checks for a section needing a value in one place Theory : The counting logic requires a additional nested loop each solve cycle and created more thread divergence Result : The algorithm is slower, probably because 'sections requiring a single value' adds more values early in the kernel resulting in less passes overall Also this kernel is similar to one of my earlier builds, which was unable to solve the 9x9 getting stuck on every square having more then one possible value
{| class="wikitable mw-collapsible mw-collapsed"
! Dropping Section Logic (Kernel)
|-
|
__global__ void solve(int* d_a) { // Used to remember which row | col | box ( section ) have which values __shared__ bool rowHas[N][N]; __shared__ bool colHas[N][N]; __shared__ bool boxHas[N][N]; // Used to ensure that the table has changed __shared__ bool changed; // Number of spaces which can place the number in each section // Where the square is located in the Sudoku int row = threadIdx.x; int col = threadIdx.y; int box = row / BOX_W + (col / BOX_W) * BOX_W; // Unique identifier for each square in row, col, box // Corresponds to the generic Sudoku Solve // Using a Sudoku to solve a Sudoku !!! int offset = col + (row % BOX_W) * BOX_W + (box % BOX_W); // Square's location in the Sudoku int gridIdx = col * N + row; int at = d_a[gridIdx]; bool notSeen[N]; for (int i = 0; i < N; ++i) notSeen[i] = true; rowHas[col][row] = false; colHas[col][row] = false; boxHas[col][row] = false; __syncthreads(); if (at != UNASSIGNED) { rowHas[row][at - 1] = true; colHas[col][at - 1] = true; boxHas[box][at - 1] = true; } // Previous loop has not changed any values do { // RESET counters __syncthreads(); if (gridIdx == 0) // forget previous change changed = false; int count = 0; // number of values which can fit in this square int guess = 0; // last value found which can fit in this square for (int idx = 0; idx < N; ++idx) { // Ensures that every square in each section is working on a different number in the section int num = (idx + offset) % N; if (at == UNASSIGNED && notSeen[num]) { if (rowHas[row][num] || boxHas[box][num] || colHas[col][num]) notSeen[num] = false; else { ++count; guess = num; } } __syncthreads(); } if (count == 1) { at = guess + 1; rowHas[row][guess] = true; colHas[col][guess] = true; boxHas[box][guess] = true; changed = true; } __syncthreads(); } while (changed); //SOLVED CHECK if (!(rowHas[row][col] || colHas[row][col] || boxHas[row][col])) changed = true; __syncthreads(); if (changed && gridIdx == 0) at = 0; d_a[gridIdx] = at; }|}
// Used to ensure Change : Quickly finds one section that the table has changed __shared__ bool changed;  // Number of spaces which can place the number requires a single value in each one spot, by checking all sections at once and remembering a single section  // Where Theory : Similar to the square is located in the Sudoku int row = threadIdx.x; int col = threadIdx.y; int box = row / BOX_W + (col / BOX_W) * BOX_W;  // Unique identifier for each square in row, colprevious Kernel, box // Corresponds trying to remove the generic Sudoku Solve // Using a Sudoku to solve a Sudoku !!! int offset = col + (row % BOX_W) * BOX_W + (box % BOX_W);  // Square's location in the Sudoku int gridIdx = col * N + row;  int at = d_a[gridIdx];  bool notSeen[N]; for (int i = 0; i < N; ++i) notSeen[i] = true;  rowHas[col][row] = false; colHas[col][row] = false; boxHas[col][row] = false; __syncthreads();  if (at != UNASSIGNED) { rowHas[row][at - 1] = true; colHas[col][at - 1] = true; boxHas[box][at - 1] = true; } // Previous second loop has not changed any values do { // RESET counters __syncthreads();  if (gridIdx == 0) // forget previous change changed = false; int count = 0; // number of values which can fit in this square int guess = 0; // last value found which can fit in this square for (int idx = 0; idx < N; ++idx) { // Ensures that every square in each Result : Surprisingly slow, gains little benefit from the section logic and shared memory, yet is working on a different number in the section int num = (idx + offset) % N; if (at == UNASSIGNED && notSeen[num]) { if (rowHas[row][num] || boxHas[box][num] || colHas[col][num]) notSeen[num] = false; else { ++still required to count; guess = num; } } __syncthreads(); }   if (count == 1) { at = guess + 1; rowHas[row][guess] = true; colHas[col][guess] = true; boxHas[box][guess] = true; changed = true; } __syncthreads(); } while (changed);  //SOLVED CHECK if (!(rowHas[row][col] || colHas[row][col] || boxHas[row][col])) changed = true; __syncthreads(); if (changed && gridIdx == 0) at = 0;  d_a[gridIdx] = at;}|}all values
{| class="wikitable mw-collapsible mw-collapsed"
! Notify (Kernel) - Determines a single section that has a limited value (removes section loop)
|-
|
__global__ void solve(int* d_a) { // Used to remember which row | col | box ( section ) have which values __shared__ bool rowHas[N][N]; __shared__ bool colHas[N][N]; __shared__ bool boxHas[N][N]; // Used to ensure that the table has changed __shared__ bool changed; // Number of spaces which can place the number in each section __shared__ int rowCount[N][N]; __shared__ int colCount[N][N]; __shared__ int boxCount[N][N]; // Where the square is located in the Sudoku int row = threadIdx.x; int col = threadIdx.y; int box = row / BOX_W + (col / BOX_W) * BOX_W; // Unique identifier for each square in row, col, box // Corresponds to the generic Sudoku Solve // Using a Sudoku to solve a Sudoku !!! int offset = col + (row % BOX_W) * BOX_W + (box % BOX_W); // Square's location in the Sudoku int gridIdx = col * N + row; int at = d_a[gridIdx]; bool notSeen[N]; for (int i = 0; i < N; ++i) notSeen[i] = true; rowHas[col][row] = false; colHas[col][row] = false; boxHas[col][row] = false; __syncthreads(); __shared__ int notify; if (at != UNASSIGNED) { rowHas[row][at - 1] = true; colHas[col][at - 1] = true; boxHas[box][at - 1] = true; } // Previous loop has not changed any values do { // RESET counters rowCount[col][row] = 0; colCount[col][row] = 0; boxCount[col][row] = 0; __syncthreads(); if (gridIdx == 0) { // forget previous change changed = false; notify = -1; } int count = 0; // number of values which can fit in this square int guess = 0; // last value found which can fit in this square for (int idx = 0; idx < N; ++idx) { // Ensures that every square in each section is working on a different number in the section int num = (idx + offset) % N; if (at == UNASSIGNED && notSeen[num]) { if (rowHas[row][num] || boxHas[box][num] || colHas[col][num]) notSeen[num] = false; else { ++count; guess = num; rowCount[row][num]++; colCount[col][num]++; boxCount[box][num]++; } } __syncthreads(); } if (rowCount[row][col] == 1 || colCount[row][col] == 1 || boxCount[row][col] == 1) notify = col; __syncthreads(); // Find values which can go in only one spot in the section if (notify > 0 && at == UNASSIGNED && notSeen[notify] && (rowCount[row][notify] == 1 || boxCount[box][notify] == 1 || colCount[col][notify] == 1)) { // In this section this value can only appear in this square guess = notify; count = 1; } if (count == 1) { at = guess + 1; rowHas[row][guess] = true; colHas[col][guess] = true; boxHas[box][guess] = true; changed = true; } __syncthreads(); } while (changed); //SOLVED CHECK if (!(rowHas[row][col] || colHas[row][col] || boxHas[row][col])) changed = true; __syncthreads(); if (changed && gridIdx == 0) at = 0; d_a[gridIdx] = at; }|}
// Unique identifier for each square in row, col, box // Corresponds to Change : Refactors the generic Sudoku Solve // Using a Sudoku algorithm to solve a Sudoku !!! int offset = col + (row % BOX_W) * BOX_W + (box % BOX_W);  // Square's location in count the Sudoku int gridIdx = col * N + row;  int at = d_a[gridIdx];  bool notSeen[N]; for (int i = 0; i < N; ++i) notSeen[i] = true;  rowHas[col][row] = false; colHas[col][row] = false; boxHas[col][row] = false; __syncthreads(); __shared__ int notify; if (at != UNASSIGNED) { rowHas[row][at - 1] = true; colHas[col][at - 1] = true; boxHas[box][at - 1] = true; } // Previous loop has not changed any values do { // RESET counters rowCount[col][row] = 0; colCount[col][row] = 0; boxCount[col][row] = 0; __syncthreads();  if (gridIdx == 0) { // forget previous change changed = false; notify = -1; } int count = 0; // number of values which total numbers that can fit in this a squareor section int guess = 0; // last value found which can fit in this square Then counts down as values are added for (int idx = 0; idx < N; ++idx) { // Ensures Theory : Remove redundant counting logic that every square in occurred during the Optimized Kernel each section pass Result : Not faster, HOWEVER there is working on a different number in the section int num = (idx + offset) % N; if (at == UNASSIGNED && slight error, by setting notSeen[num]) { if (rowHas[row][num] || boxHas[box][num] || colHas[col][num]) notSeen[num] = false; else { ++count; guess = num; rowCount[row][num]++; colCount[col][num]++; boxCount[box][num]++; } } __syncthreads(); } if (rowCount[row][col] == 1 || colCount[row][col] == 1 || boxCount[row][col] == 1) notify = col; __syncthreads(); // Find values which can go in only one spot in 0, the section if (notify > 0 && at == UNASSIGNED && notSeen[notify] && (rowCount[row][notify] == 1 || boxCount[box][notify] == 1 || colCount[col][notify] == 1)) { // In this section this value can only appear in this square guess = notify; count = 1; }  if (count == 1) { at = guess + 1; rowHas[row][guess] = true; colHas[col][guess] = true; boxHas[box][guess] = true; changed = true; } __syncthreads(); } while (changed);  //SOLVED CHECK if (!(rowHas[row][col] || colHas[row][col] || boxHas[row][col])) changed = true; __syncthreads(); if (changed && gridIdx == 0) at = 0;  d_a[gridIdx] = at;}|}counters will rarely reach one
{| class="wikitable mw-collapsible mw-collapsed"
! CountDown - Counts down instead of revisiting numbersusing Int as Boolean Array(EDITED now 4.28 seconds)
|-
|
__global__ void solve(int* d_a) { // Used to remember which row | col | box ( section ) have which values __shared__ bool rowHas[N][N]; __shared__ bool colHas[N][N]; __shared__ bool boxHas[N][N]; // Used to ensure that the table has changed __shared__ bool changed; // Number of spaces which can place the number in each section __shared__ int rowCount[N][N]; __shared__ int colCount[N][N]; __shared__ int boxCount[N][N]; // Where the square is located in the Sudoku int row = threadIdx.x; int col = threadIdx.y; int box = row / BOX_W + (col / BOX_W) * BOX_W; int gridIdx = col * N + row; int at = d_a[gridIdx]; // Unique identifier for each square in row, col, box // Corresponds to the generic Sudoku Solve // Using a Sudoku to solve a Sudoku !!! int offset = col + (row % BOX_W) * BOX_W + (box % BOX_W); // Square's location in the Sudoku int count = 0; //Number of values which can fit in this square int notSeen = 0; //Boolean Array as an Integer if (gridIdx == 0) changed = true; rowHas[col][row] = false; colHas[col][row] = false; boxHas[col][row] = false; rowCount[col][row] = 0; colCount[col][row] = 0; boxCount[col][row] = 0; __syncthreads(); if (at != UNASSIGNED) { rowHas[row][at - 1] = true; colHas[col][at - 1] = true; boxHas[box][at - 1] = true; } __syncthreads(); int guess; int b_shuttle = 1; for (int idx = 0; idx < N; ++idx) { int num = (idx + offset) % N; if (at == UNASSIGNED && !(rowHas[row][num] || boxHas[box][num] || colHas[col][num])) { notSeen |= b_shuttle; //this value can go here ++count; //how many values this square can have guess = num; //how many values this section can have rowCount[row][num]++; colCount[col][num]++; boxCount[box][num]++; } __syncthreads(); b_shuttle <<= 1; } if (at == UNASSIGNED && count == 0) //NOT POSSIBLE SUDOKU changed = false; __syncthreads(); if (count == 1) { at = guess + 1; notSeen = count = 0; rowHas[row][guess] = true; colHas[col][guess] = true; boxHas[box][guess] = true; } // Previous loop has not changed any values  while (changed) { __syncthreads(); if (gridIdx == 0) // forget previous change changed = false; bool inSection = true; int b_shuttle = 1; for (int idx = 0; idx < N; ++idx) { // Ensures that every square in each section is working on a different number in the section int num = (idx + offset) % N; if (b_shuttle & notSeen) {&& if (at != UNASSIGNED || rowHas[row][num] || boxHas[box][num] || colHas[col][num])) { notSeen ^= b_shuttle; --count; rowCount[row][num]--; colCount[col][num]--; boxCount[box][num]--; } else if (inSection) { guess notSeen ^= numb_shuttle; } --count; } __syncthreads(); if ((b_shuttle & notSeen) && (count == 1 || rowCount[row][num] == 1 || boxCount[box][num] == 1 || colCount[col][num] == 1)){ rowHas[row][num] = true; colHas[col][num] = true; boxHas[box][num] = true; changed = true; notSeen ^= b_shuttle; inSection at = falsenum + 1; count = 0; } b_shuttle <<= 1; } __syncthreads(); }; if (!(rowHas[row][col] && colHas[row][col] && boxHas[box][col])) changed = true; //HAVE NOT SOLVED the sudoku __syncthreads(); if (changed && gridIdx == 0) at = 0; d_a[gridIdx] = at; }|}
if (count == 1 || !inSection) { Change : uses countdown logic with a boolean array at = guess + 1; notSeen = count = 0; rowHas[row][guess] = true; colHas[col][guess] = true; boxHas[box][guess] = true; changed = true; } __syncthreads(); }; Result : Similar times to other Countdown kernel
if (!(rowHas[row][col] && colHas[row][col] && boxHas[box][col]))
changed = true; //HAVE NOT SOLVED the sudoku
__syncthreads();
if (changed && gridIdx == 0)
at = 0;
d_a[gridIdx] = at;
}
|}
{| class="wikitable mw-collapsible mw-collapsed"
! Countdown Boolean Array(EDITED - now 4.37ms)
|-
|
__global__ void solve(int* d_a) { // Used to remember which row | col | box ( section ) have which values __shared__ bool rowHas[N][N]; __shared__ bool colHas[N][N]; __shared__ bool boxHas[N][N]; // Used to ensure that the table has changed __shared__ bool changed; // Number of spaces which can place the number in each section __shared__ int rowCount[N][N]; __shared__ int colCount[N][N]; __shared__ int boxCount[N][N]; // Where the square is located in the Sudoku int row = threadIdx.x; int col = threadIdx.y; int box = row / BOX_W + (col / BOX_W) * BOX_W; int gridIdx = col * N + row; int at = d_a[gridIdx]; // Unique identifier for each square in row, col, box // Corresponds to the generic Sudoku Solve // Using a Sudoku to solve a Sudoku !!! int offset = col + (row % BOX_W) * BOX_W + (box % BOX_W); // Square's location in the Sudoku int count = 0; //Number of values which can fit in this square bool notSeen[N]; //Boolean Array as an Integer for (int i idx = 0; i idx < N; ++iidx) notSeen[iidx] = false; if (gridIdx == 0) changed = true; rowHas[col][row] = false; colHas[col][row] = false; boxHas[col][row] = false; rowCount[col][row] = 0; colCount[col][row] = 0; boxCount[col][row] = 0; __syncthreads(); if (at != UNASSIGNED) { rowHas[row][at - 1] = true; colHas[col][at - 1] = true; boxHas[box][at - 1] = true; } __syncthreads(); int guess; for (int idx = 0; idx < N; ++idx) { int num = (idx + offset) % N; if (at == UNASSIGNED && !(rowHas[row][num] || boxHas[box][num] || colHas[col][num])) { notSeen[num] = true; //this value can go here ++count; //how many values this square can have guess = num; //how many values this section can have rowCount[row][num]++; colCount[col][num]++; boxCount[box][num]++; } __syncthreads(); } if (at == UNASSIGNED && count == 0) //NOT POSSIBLE SUDOKU changed = false; __syncthreads(); if (count == 1) { at = guess + 1; count = 0; notSeen[guess] = false; rowHas[row][guess] = true; colHas[col][guess] = true; boxHas[box][guess] = true; } // Previous loop has not changed any values  while (changed) { __syncthreads(); if (gridIdx == 0) // forget previous change changed = false; bool inSection = true; for (int idx = 0; idx < N; ++idx) { // Ensures that every square in each section is working on a different number in the section int num = (idx + offset) % N; if (at == UNASSIGNED && notSeen[num]) {&& if (at != UNASSIGNED || rowHas[row][num] || boxHas[box][num] || colHas[col][num])) { notSeen[num] = false; --count; rowCount[row][num]--; colCount[col][num]--; boxCount[box][num]--; } else if (inSection) { guess notSeen[num] = numfalse; } --count; } __syncthreads(); if (at == UNASSIGNED && notSeen[num] && (count == 1 || rowCount[row][num] == 1 || boxCount[box][num] == 1 || colCount[col][num] == 1)) inSection = false;  }  if (count == 1 || !inSection) { at = guess + 1; count = 0; rowHas[row][guessnum] = true; colHas[col][guessnum] = true; boxHas[box][guessnum] = true; changed = true; notSeen[num] = false; at = num + 1; count = 0; } } __syncthreads(); }; if (!(rowHas[row][col] && colHas[row][col] && boxHas[box][col])) changed = true; //HAVE NOT SOLVED the sudoku __syncthreads(); if (changed && gridIdx == 0) at = 0; d_a[gridIdx] = at; }
|}
[[File:Kernel_Compare.png]]

Navigation menu