Changes

Jump to: navigation, search

The B-Team

8,895 bytes added, 06:37, 21 November 2012
no edit summary
{{GPU610/DPS915 Index | 20123}}
= The B-Team =
 
== Team Members ==
# [mailto:kmbarnhart@learn.senecac.on.ca?sujbect=DPS915 Kyle Barnhart]
= Assignment 2 =
== Mandelbrot Set ==
=== Repo ===
https://github.com/KyleBarnhart/Fractal/tree/cuda
 
=== Description ===
The new version of my fractal program is running on the GPU and has been highly parallelized. It is running more than ten times faster than the original version. I am sure this is highly GPU dependent and I am running it on an Nvidia Geforce GTA 660 Ti with 1344 cuda cores. However I think the comparison is fair because my CPU is also very fast. It is an Intel i5 2500k which is 4 cores at 3.3 Ghz stock and is running overclocked to 4.6 Ghz. A slower GPU compared to a slower CPU might be able to expect similar speed ups.
 
Parallelizing the main Mandelbrot function ended up requiring me to move a few other functions to the GPU and rewriting the anti-aliasing function. The anti-aliasing previously made many calls to the Mandelbrot function. This was not good for the GPU and I changed it so that it makes a list of points to run the anti-aliasing algorithm over. Another issue with the anti-aliasing was selection the right value to return. I previously found the best quality result is to return the median value. The anti-aliasing required its own Mandelbrot function as well as moving the median function to the device. I also move the function that converts Mandelbrot escape values to RGB values to the device. This was because I found that I could account for half the running time depending on settings.
 
=== Code ===
<big><pre>
__device__ void swap(ElementType* a, ElementType* b)
{
ElementType t = *a;
*a = *b;
*b = t;
}
</pre></big>
<big><pre>
__device__ ElementType getMedian(ElementType* arr, AlisingFactorSqType median, AlisingFactorSqType n)
{
AlisingFactorSqType low, high ;
AlisingFactorSqType middle, ll, hh;
 
low = 0 ; high = n-1 ;
for (;;) {
if (high <= low) /* One element only */
return arr[median] ;
 
if (high == low + 1) { /* Two elements only */
if (arr[low] > arr[high])
swap(&arr[low], &arr[high]) ;
return arr[median];
}
 
/* Find median of low, middle and high items; swap into position low */
middle = (low + high) / 2;
if (arr[middle] > arr[high]) swap(&arr[middle], &arr[high]) ;
if (arr[low] > arr[high]) swap(&arr[low], &arr[high]) ;
if (arr[middle] > arr[low]) swap(&arr[middle], &arr[low]) ;
 
/* Swap low item (now in position middle) into position (low+1) */
swap(&arr[middle], &arr[low+1]) ;
 
/* Nibble from each end towards middle, swapping items when stuck */
ll = low + 1;
hh = high;
for (;;) {
do ll++; while (arr[low] > arr[ll]) ;
do hh--; while (arr[hh] > arr[low]) ;
 
if (hh < ll)
break;
 
swap(&arr[ll], &arr[hh]) ;
}
 
/* Swap middle item (in position low) back into correct position */
swap(&arr[low], &arr[hh]) ;
 
/* Re-set active partition */
if (hh <= median)
low = ll;
if (hh >= median)
high = hh - 1;
}
}
</pre></big>
<big><pre>
__device__ ElementType mandelbrot(ElementType c_i, ElementType c_r, IterationType iterations)
{
ElementType z_r = c_r;
ElementType z_i = c_i;
ElementType z2_r = z_r * z_r;
ElementType z2_i = z_i * z_i;
IterationType n = 0;
while(n < iterations && z2_r + z2_i < 4.0)
{
z_i = 2.0 * z_r * z_i + c_i;
z_r = z2_r - z2_i + c_r;
z2_r = z_r * z_r;
z2_i = z_i * z_i;
n++;
}
 
z_i = 2.0 * z_r * z_i + c_i;
z_r = z2_r - z2_i + c_r;
z2_r = z_r * z_r;
z2_i = z_i * z_i;
 
z_i = 2.0 * z_r * z_i + c_i;
z_r = z2_r - z2_i + c_r;
z2_r = z_r * z_r;
z2_i = z_i * z_i;
n += 2;
 
if(n > iterations)
{
return (ElementType)iterations;
}
else
{
return (ElementType)n + 1.0 - log(log(sqrt(z2_r + z2_i)))/log(2.0);;
}
}
</pre></big>
<big><pre>
// Calculate if c is in Mandelbrot set.
// Return number of iterations.
__global__ void getFractal(ElementType* img, ElementType yMax, ElementType xMin, ElementType xScale, ElementType yScale, IterationType iterations, DimensionType width, DimensionType height)
{
DimensionType dx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
DimensionType dy = blockIdx.y * BLOCK_SIZE + threadIdx.y;
 
if(dx >= width || dy >= height)
return;
ElementType c_i = yMax - (ElementType)dy * yScale;
ElementType c_r = xMin + (ElementType)dx * xScale;
 
DimensionSqType c = (DimensionSqType)dy * (DimensionSqType)width + (DimensionSqType)dx;
img[c] = mandelbrot(c_i, c_r, iterations);
}
</pre></big>
<big><pre>
// Calculate if c is in Mandelbrot set.
// Return number of iterations.
__global__ void getFractalSSAA(ElementType* img, DimensionSqType* list, DimensionSqType length, ElementType yMax, ElementType xMin,
ElementType xScale, ElementType yScale, IterationType iterations,
DimensionType width, AlisingFactorType ssaafactor)
{
DimensionType curr = blockIdx.x * BLOCK_SIZE * BLOCK_SIZE + threadIdx.x;
 
if(curr >= length)
return;
 
DimensionType dx = list[curr] % width;
DimensionType dy = list[curr] / width;
 
ElementType c_i = yMax - (ElementType)dy * yScale;
ElementType c_r = xMin + (ElementType)dx * xScale;
 
// Get sub pixel width and height
ElementType xSubScale = xScale / ((ElementType)ssaafactor);
ElementType ySubScale = yScale / ((ElementType)ssaafactor);
// Get the centre of the top left subpixel
xMin = c_r - (xScale / 2.0) + (xSubScale / 2.0);
yMax = c_i + (yScale / 2.0) - (ySubScale / 2.0);
 
AlisingFactorSqType factor2 = (AlisingFactorSqType)ssaafactor * (AlisingFactorSqType)ssaafactor;
// Get the values for each pixel in fractal
ElementType subpixels[MAX_ALIASING_FACTOR * MAX_ALIASING_FACTOR];
for(AlisingFactorType x = 0; x < ssaafactor; x++)
{
for(AlisingFactorType y = 0; y < ssaafactor; y++)
{
subpixels[x * ssaafactor + y] = mandelbrot(yMax - ySubScale * y , xMin + xSubScale * x, iterations);
}
}
 
if(factor2 % 2 != 0)
{
img[list[curr]] = getMedian(subpixels, factor2 / 2, factor2);
}
else
{
img[list[curr]] = (getMedian(subpixels, factor2 / 2 - 1, factor2)
+ getMedian(subpixels, factor2 / 2, factor2))
/ 2.0;
}
}
</pre></big>
<big><pre>
__device__ void getRGB(ElementType value, BYTE* rgb)
{
short colourInt = (short)(value * 1791.0f);
BYTE bracket = colourInt / 256;
BYTE colour = (BYTE)(colourInt % 256);
 
switch (bracket)
{
case 0:
rgb[0] = colour;
rgb[1] = 0;
rgb[2] = 0;
break;
case 1:
rgb[0] = 255;
rgb[1] = colour;
rgb[2] = 0;
break;
case 2:
rgb[0] = 255 - colour;
rgb[1] = 255;
rgb[2] = 0;
break;
 
case 3:
rgb[0] = 0;
rgb[1] = 255;
rgb[2] = colour;
break;
 
case 4:
rgb[0] = 0;
rgb[1] = 255 - colour;
rgb[2] = 255;
break;
 
case 5:
rgb[0] = colour;
rgb[1] = 0;
rgb[2] = 255;
break;
 
case 6:
rgb[0] = 255 - colour;
rgb[1] = 0;
rgb[2] = 255 - colour;
break;
 
default:
rgb[0] = 0;
rgb[1] = 0;
rgb[2] = 0;
break;
}
}
</pre></big>
<big><pre>
__global__ void getBmpRGB(BYTE* image, ElementType* values, DimensionType width, DimensionType height, IterationType iterations)
{
DimensionType dy = blockIdx.y * BLOCK_SIZE + threadIdx.y;
DimensionType dx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
if(dx >= width || dy >= height)
return;
 
DimensionType c = dy * width + dx;
BYTE rgbValue[3];
 
getRGB(values[c]/(ElementType)iterations, rgbValue);
image[c*3] = rgbValue[2];
image[c*3 + 1] = rgbValue[1];
image[c*3 + 2] = rgbValue[0];
}
</pre></big>
<big><pre>
__global__ void getBmpRGBfromHistorgram(ElementType* map, BYTE* image, ElementType* values, DimensionType width, DimensionType height)
{
DimensionType dy = blockIdx.y * BLOCK_SIZE + threadIdx.y;
DimensionType dx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
if(dx >= width || dy >= height)
return;
 
DimensionType c = dy * width + dx;
 
IterationType ival = (IterationType)values[c];
 
ElementType colourVal = map[ival] + (values[c] - (ElementType)ival) * (map[ival + 1] - map[ival]);
BYTE rgbValue[3];
 
getRGB(colourVal, rgbValue);
image[c*3] = rgbValue[2];
image[c*3 + 1] = rgbValue[1];
image[c*3 + 2] = rgbValue[0];
}
</pre></big>
 
 
 
= Assignment 1 =
== Mandelbrot Set ==
=== Repo ===
1
edit

Navigation menu