Changes

Jump to: navigation, search

TriForce

3,537 bytes added, 17:37, 21 March 2019
Progress
=== Assignment 3 ===
 
{| class="wikitable mw-collapsible mw-collapsed"
! Optimized kernel : Single Pass
|-
|
__global__ void superSolve(int * d_a) {
 
//Used to remember which row | col | box ( section ) have which values
__shared__ bool rowHas[N][N];
__shared__ bool colHas[N][N];
__shared__ bool boxHas[N][N];
 
//Used to ensure that the table has changed
__shared__ int added, past;
 
//Number of spaces which can place the number in each section
__shared__ int rowCount[N][N];
__shared__ int colCount[N][N];
__shared__ int boxCount[N][N];
//Use registry memory to remember what values were seen
bool notSeen[N];
//Where the square is located in the Sudoku
int row = threadIdx.x;
int col = threadIdx.y;
int box = row / BOXWIDTH + (col / BOXWIDTH) * BOXWIDTH;
 
//Unique identifier for each square in row, col, box
//Corresponds to the generic Sudoku Solve
//Using a Sudoku to solve a Sudoku !!!
int offset = col + (row % BOXWIDTH) * BOXWIDTH + (box % BOXWIDTH);
 
//Square's location in the Sudoku
int gridIdx = col * N + row;
 
int at = d_a[gridIdx];
 
if (gridIdx == 0)//Thread at 0,0 sets values
added = -1;
 
rowHas[row][col] = false;
colHas[row][col] = false;
boxHas[row][col] = false;
 
for (int i = 0; i < N; i++)
notSeen[i] = true;
 
__syncthreads();
 
if (at != UNASSIGNED) {
rowHas[row][at - 1] = true;
colHas[col][at - 1] = true;
boxHas[box][at - 1] = true;
}
 
//Previous loop has not changed any values
do {
//RESET counters
rowCount[row][col] = 0;
colCount[row][col] = 0;
boxCount[row][col] = 0;
__syncthreads();
 
if (gridIdx == 0) //forget previous change
past = added;
int count = 0; //number of values which can fit in this square
int guess = at; //last value found which can fit in this square
for (int idx = 0; idx < N; ++idx) {
//Ensures that every square in each section is working on a different number in the section
int num = (idx + offset) % N;
if (at == UNASSIGNED && notSeen[num]){
if (rowHas[row][num] || boxHas[box][num] || colHas[col][num]) {
notSeen[num] = false;
} else {
count++;
guess = num + 1;
rowCount[row][num] ++;
colCount[col][num] ++;
boxCount[box][num] ++;
}
}
__syncthreads();
}
//Find values which can go in only one spot in the section
if (count > 1) {
for (int idx = 0; idx < N; ++idx) {
if (notSeen[idx] &&
( rowCount[row][idx] == 1 || boxCount[box][idx] || colCount[col][idx] == 1)) {
//In this section this value can only appear in this square
count = 1;
guess = idx + 1;
}
}
}
//One value Must go here
if (count == 1) {
at = guess--;
rowHas[row][guess] = true;
colHas[col][guess] = true;
boxHas[box][guess] = true;
added = gridIdx;
}
__syncthreads();
} while (added != past);
 
d_a[gridIdx] = at;
}
|}
Reduced superSolve runtime from 5.1 to 1.2ms
 
Changes:
Used faster memory
-each thread now remembers which values it has seen in a boolean array
Reduced Thread Divergence
- values are only assigned to the grid after the kernel 'solves' the sudoku removing wait times for assigning to global memory
- at value in kernel and rowHas, colHas, boxHas, updated in a single place, reducing wait time for updating
Coalesced Memory
- change modifying _Has and _Count arrays from col->row to row->col as row(threadIdx.x) is our fastest moving dimension
- query in order of row->box->col for the same reason
Clarified Code
- use gridIdx == 0 rather then !gridIdx
- use a do-while loop rather then a while loop

Navigation menu