GPU610/NullPointerException

From CDOT Wiki
Jump to: navigation, search


GPU610/DPS915 | Student List | Group and Project Index | Student Resources | Glossary

NullPointerException

Team Members

  1. Saad Mohammad

Email All

Progress

Assignment 1

Philip Aziz

  • Philip Aziz - Calculating Prime Numbers

Only the inner loop would be possible to Parellelize but would still require to validate 1 prime number at a time due to need to validate against previous prime numbers.

while (count < n){

               flag = 1;
               i = 1;
               checkKey = int((sqrt (prime))+1);
               //checks prime numbers for divisible values
               while(primes[i] < checkKey and flag){
                       if ((prime % primes[i])==0){
                               flag = 0;
                       }
                       i++;
               }
               if(flag){
                       primes[count] = prime;
                       count++;
               }
               prime +=2;
       }



Time by Level
n Elapsed Time
10,000 0
100,000 1
1,000,000 8
10,000,000 220
100,000,000 6200

Saad Mohammad

Project

Encrypts and decrypts images. Original project file can be found at: Image-Encrypt Github

Profiling Resource

Profiled the application using:

SYSTEM

Matrix

IMAGE

JPEG Format
324KB
1598 x 982

CUSTOM MAKEFILE

# Makefile for Assignment 1
# Saad Mohammad
#

GCC_VERSION = 4.7.1
PREFIX = /usr/local/gcc/${GCC_VERSION}/bin/
CC = ${PREFIX}gcc
CPP = ${PREFIX}g++

image_encrypt: main.o encryptjpeg.o accessjpeg.o aesencrypt.o
	$(CPP) -pg -ow1 main.o encryptjpeg.o accessjpeg.o aesencrypt.o -o image_encrypt
	rm *.o
	chmod +x+x+x image_encrypt

main.o: main.cpp
	$(CPP) -c -O2 -g -pg -std=c++0x main.cpp

encryptjpeg.o: encryptjpeg.cpp
	$(CPP) -c -O2 -g -pg -std=c++0x encryptjpeg.cpp

accessjpeg.o: accessjpeg.cpp
	$(CPP) -c -O2 -g -pg -std=c++0x accessjpeg.cpp

aesencrypt.o: aesencrypt.cpp
	$(CPP) -c -O2 -g -pg -std=c++0x aesencrypt.cpp

clean:
	rm -f image_encrypt *.o
Profiling Results

Encryption:

Each sample counts as 0.01 seconds.

 %   cumulative   self              self     total           
time   seconds   seconds    calls  us/call  us/call  name    
53.85      0.14     0.14  5979744     0.02     0.02  aesEncrypt::ffmul(unsigned char, unsigned char)
30.77      0.22     0.08   207630     0.39     0.39  aesEncrypt::subBytes(char*)
11.54      0.25     0.03   186867     0.16     0.91  aesEncrypt::mixColumns(char*)
 3.85      0.26     0.01    20763     0.48    12.52  aesEncrypt::encryptBlock(char*)
 0.00      0.26     0.00   207630     0.00     0.00  aesEncrypt::shiftRows(char*)
 0.00      0.26     0.00    20764     0.00     0.00  accessJpeg::hasMore()
 0.00      0.26     0.00    20763     0.00     0.00  accessJpeg::accessBlock()
 0.00      0.26     0.00       40     0.00     0.00  aesEncrypt::sboxify(unsigned char)
 0.00      0.26     0.00       10     0.00     0.00  aesEncrypt::roundify(unsigned char)
 0.00      0.26     0.00        1     0.00     0.00  _GLOBAL__sub_I__ZN10accessJpegC2Ev
 0.00      0.26     0.00        1     0.00     0.00  _GLOBAL__sub_I__ZN11encryptJpeg7processEc
 0.00      0.26     0.00        1     0.00     0.00  _GLOBAL__sub_I_main
 0.00      0.26     0.00        1     0.00     0.00  accessJpeg::readInFile(char*)
 0.00      0.26     0.00        1     0.00     0.00  accessJpeg::jumpToStart()
 0.00      0.26     0.00        1     0.00     0.00  accessJpeg::writeOutFile(char*)
 0.00      0.26     0.00        1     0.00     0.00  accessJpeg::~accessJpeg()
 0.00      0.26     0.00        1     0.00     0.00  aesEncrypt::setTextKey(std::string)
 0.00      0.26     0.00        1     0.00     0.00  aesEncrypt::expandKey()
 0.00      0.26     0.00        1     0.00     0.00  aesEncrypt::~aesEncrypt()

Decryption:

Each sample counts as 0.01 seconds.

 %   cumulative   self              self     total           
time   seconds   seconds    calls  us/call  us/call  name    
58.97      0.23     0.23 11959488     0.02     0.02  aesEncrypt::ffmul(unsigned char, unsigned char)
25.64      0.33     0.10   207630     0.48     0.48  aesEncrypt::invSubBytes(char*)
 7.69      0.36     0.03   186867     0.16     1.39  aesEncrypt::invMixColumns(char*)
 5.13      0.38     0.02    20763     0.96    18.78  aesEncrypt::decryptBlock(char*)
 2.56      0.39     0.01   207630     0.05     0.05  aesEncrypt::invShiftRows(char*)
 0.00      0.39     0.00    20764     0.00     0.00  accessJpeg::hasMore()
 0.00      0.39     0.00    20763     0.00     0.00  accessJpeg::accessBlock()
 0.00      0.39     0.00       40     0.00     0.00  aesEncrypt::sboxify(unsigned char)
 0.00      0.39     0.00       10     0.00     0.00  aesEncrypt::roundify(unsigned char)
 0.00      0.39     0.00        1     0.00     0.00  _GLOBAL__sub_I__ZN10accessJpegC2Ev
 0.00      0.39     0.00        1     0.00     0.00  _GLOBAL__sub_I__ZN11encryptJpeg7processEc
 0.00      0.39     0.00        1     0.00     0.00  _GLOBAL__sub_I_main
 0.00      0.39     0.00        1     0.00     0.00  accessJpeg::readInFile(char*)
 0.00      0.39     0.00        1     0.00     0.00  accessJpeg::jumpToStart()
 0.00      0.39     0.00        1     0.00     0.00  accessJpeg::writeOutFile(char*)
 0.00      0.39     0.00        1     0.00     0.00  accessJpeg::~accessJpeg()
 0.00      0.39     0.00        1     0.00     0.00  aesEncrypt::setTextKey(std::string)
 0.00      0.39     0.00        1     0.00     0.00  aesEncrypt::expandKey()
 0.00      0.39     0.00        1     0.00     0.00  aesEncrypt::~aesEncrypt()

Assignment 2

Changes

The following diff file shows the changes made to parallelize the code:


diff --git a/aesencrypt.cpp b/aesencrypt.cpp
deleted file mode 100755
index ab189d7..0000000
--- a/aesencrypt.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-/********************************************************
- Winter 2013
- GPU610 Assignment
- Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan
- Team: NullPointerException
-
- Original Project: <https://github.com/markwatson/Image-Encrypt>
- Commit: c7b4ee3852a5fe3c9b80171a1e621f514f10ac33
- Original Author: Mark Watson
- ********************************************************/
-
-#include <string>
-#include <stdexcept>
-#include "aesencrypt.h"
-
-aesEncrypt::aesEncrypt()
-{
-    // set values
-    Nk = 4; // this is in words
-    Nb = BLOCK_SIZE; // !! important, this is in bytes, not words
-    Nr = 10; // extrapolated fom table for aes standard
-    fullkey = NULL;
-    expandedkey = NULL;
-}
-
-aesEncrypt::~aesEncrypt()
-{
-    if (fullkey != NULL)
-        delete[] fullkey;
-    if (expandedkey != NULL)
-        delete[] expandedkey;
-}
-
-
-bool aesEncrypt::encryptBlock(char* block)
-{
-    xorRoundKey(block, expandedkey);
-    for(int cnt = 1;cnt <= (Nr-1);cnt++)
-    {
-        subBytes(block);
-        shiftRows(block);
-        mixColumns(block);
-        xorRoundKey(block, expandedkey + (cnt * Nb));
-    }
-    subBytes(block);
-    shiftRows(block);
-    xorRoundKey(block, expandedkey + (Nr * Nb));
-   
-    return true; // could error check in future
-}
-
-bool aesEncrypt::decryptBlock(char* block)
-{
-    xorRoundKey(block, expandedkey + (Nr * Nb));
-
-    for(int cnt = Nr-1; cnt > 0;cnt--)
-    {
-        invShiftRows(block);
-        invSubBytes(block);
-        xorRoundKey(block, expandedkey + cnt * Nb);
-        invMixColumns(block);
-    }
-
-    invShiftRows(block);
-    invSubBytes(block);
-    xorRoundKey(block, expandedkey);
-   
-    return true;
-}
-
-void aesEncrypt::setTextKey(std::string key)
-{
-    // set the key
-    fullkey = new char [Nk * 4];
-   
-    // go through the text key copying till full key is full
-    int pos = 0; // start grabbing from string at 0
-    for (int cnt = 0;cnt < BLOCK_SIZE;cnt++)
-    {
-        try
-        {
-            fullkey[cnt] = key.at(pos);
-        }
-        catch (std::out_of_range&)
-        {
-            // recover by repeating key
-            pos = pos - (key.length()+1);
-            cnt--;
-        }
-
-        pos++; // incrmemnt string position
-    }
-   
-    expandKey();
-}
-
-void aesEncrypt::expandKey()
-{
-    // limit
-    int limit = (Nb/4)*(Nr+1);
-    // grab some fresh memory for the expanded key
-    expandedkey = new char [4*limit];
-    // main counter
-    int cnt;
-    // temp loaction
-    unsigned char temp[4];
-
-    // set first key
-    for(cnt = 0;cnt < Nk;cnt++)
-    {
-        for (int n = 0; n < 4; n++)
-        {
-            expandedkey[cnt*4+n] = fullkey[cnt*4+n];
-        }
-    }
-    // continue where we left off with other keys
-    for (cnt = Nk;cnt < limit;cnt++)
-    {
-        // copy key into temp location
-        for(int n=0;n<4;n++)
-        {
-            temp[n] = expandedkey[(cnt-1)*4+n];
-        }
-        if ((cnt % Nk) == 0)
-        {
-            // rotate and then sboxify
-            rotate_left(temp);
-            for (int n = 0;n<4;n++)
-                temp[n] = sboxify(temp[n]);
-            // round
-            temp[0] ^= roundify(cnt/Nk);
-        }
-        // xor temp
-        for (int n = 0; n < 4; n++)
-        {
-            expandedkey[cnt*4+n] = expandedkey[(cnt-Nk)*4+n] ^ temp[n];
-        }
-    }
-    // rearrange
-    unsigned char* temp2;
-    temp2 = new unsigned char [Nb];
-    for (int all = 0; all <= Nr;all++)
-    {
-        // copy into temp
-        for (int i = 0; i < Nb; i++)
-        {
-            temp2[i] = expandedkey[Nb*all+i];
-        }
-        for (cnt = 0;cnt < Nb/4;cnt++)
-        {
-            for (int i = 0; i < Nb/4;i++)
-            {
-                expandedkey[Nb*all+(cnt+i*4)] = temp2[i+cnt*4];
-            }
-        }
-    }
-    delete[] temp2; // cleanup
-}
-
-void aesEncrypt::subBytes(char * state)
-{
-    for (int cnt = 0;cnt < Nb;cnt++)
-    {
-        state[cnt] = sboxify(state[cnt]);
-    }
-}
-
-void aesEncrypt::invSubBytes(char * state)
-{
-    for (int cnt = 0;cnt < Nb;cnt++)
-    {
-        state[cnt] = invsboxify(state[cnt]);
-    }
-}
-
-void aesEncrypt::invShiftRows(char * state)
-{
-    for (int row = 1; row <4; row++)
-    {
-        // see below
-        for (int n = 1; n < (row+1); n++)
-        {
-            rotate_right((unsigned char*) state + (row*4));
-        }
-    }
-}
-
-void aesEncrypt::invMixColumns(char * state)
-{
-    unsigned char temp[4];
-
-    for (int col = 0; col < (Nb/4); col++)
-    {
-        for (int n = 0; n < 4; n++)
-        {
-            temp[n] = state[n*4+col];
-        }
-        for (int cnt = 0; cnt < 4; cnt++)
-        {
-            state[cnt*4+col] = ffmul(0x0e, temp[cnt]) ^
-                               ffmul(0x0b, temp[(cnt+1) % 4]) ^
-                               ffmul(0x0d, temp[(cnt+2) % 4]) ^
-                               ffmul(0x09, temp[(cnt+3) % 4]);
-        }
-    }
-}
-
-void aesEncrypt::shiftRows(char * state)
-{
-    for (int row = 1; row < 4;row++)
-    {
-        // shift it over needed amount of times
-        // 0 for row 1, 1, for row 2, etc.
-        for (int n = 1; n < (row+1);n++)
-        {
-            // pass in a pointer to the row
-            rotate_left((unsigned char*) state + (row*4));
-        }
-    }
-}
-
-void aesEncrypt::mixColumns(char * state)
-{
-    unsigned char temp[4];
-
-    for (int col = 0; col < (Nb/4); col++)
-    {
-        for (int n = 0; n < 4; n++)
-        {
-            temp[n] = state[n*4+col]; //state[col*4+n];
-        }
-        for (int cnt = 0; cnt < 4; cnt++)
-        {
-            state[cnt*4+col] = ffmul(0x02, temp[cnt]) ^
-                               ffmul(0x03, temp[(cnt+1) % 4]) ^
-                               temp[(cnt+2) % 4] ^
-                               temp[(cnt+3) % 4];
-
-        }
-    }
-}
-
-void aesEncrypt::xorRoundKey(char * state, char * key)
-{
-    for (int cnt = 0;cnt < Nb;cnt++)
-    {
-        state[cnt] ^= key[cnt];
-    }
-}
-
-
-// this function multiplies two values
-// ints are passed in to deal with overflow
-unsigned char aesEncrypt::ffmul(unsigned char x, unsigned char y) {
-    if (x == 0 && y == 0) return 0; // special case...
-    // bitsets
-    std::bitset<8> p (0);
-    std::bitset<8> a (x);
-    std::bitset<8> b (y);
-    // bool
-    bool left_a;
-    // run
-    for(int cnt = 0; cnt < 8;cnt++)
-    {
-        if (b[0]) p ^= a;
-        left_a = a[7];
-        a<<=1;
-        if (left_a) a^=0x1b;
-        b>>=1;
-    }
-    return (unsigned char) p.to_ulong();
-}
-
-unsigned char aesEncrypt::sboxify(unsigned char val)
-{
-    //printf("\n sbox: %i \n", val);
-    unsigned char sbox[256] = {
-    //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
-    0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, //0
-    0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, //1
-    0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, //2
-    0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, //3
-    0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, //4
-    0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, //5
-    0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, //6
-    0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, //7
-    0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, //8
-    0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, //9
-    0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, //A
-    0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, //B
-    0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, //C
-    0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, //D
-    0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, //E
-    0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16  //F
-    };
-    return sbox[val];
-}
-
-unsigned char aesEncrypt::invsboxify(unsigned char val)
-{
-    unsigned char sbox[256] = {
-    //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
-    0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
-    0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
-    0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
-    0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
-    0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
-    0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
-    0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
-    0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
-    0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
-    0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
-    0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
-    0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
-    0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
-    0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
-    0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
-    0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
-    };
-    return sbox[val];
-}
-
-unsigned char aesEncrypt::roundify(unsigned char index)
-{
-    unsigned char round[255] = { 
-    0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a,  
-    0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39,  
-    0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a,  
-    0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8,  
-    0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef,  
-    0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc,  
-    0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b,  
-    0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3,  
-    0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94,  
-    0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20,  
-    0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35,  
-    0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f,  
-    0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04,  
-    0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63,  
-    0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd,  
-    0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb
-    };
-    return round[index];
-}
diff --git a/aesencrypt.cu b/aesencrypt.cu
index ab189d7..4c89090 100755
--- a/aesencrypt.cu
+++ b/aesencrypt.cu
@@ -1,17 +1,23 @@
 /********************************************************
- Winter 2013
- GPU610 Assignment
- Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan
- Team: NullPointerException
-
- Original Project: <https://github.com/markwatson/Image-Encrypt>
- Commit: c7b4ee3852a5fe3c9b80171a1e621f514f10ac33
- Original Author: Mark Watson
- ********************************************************/
+Winter 2013
+GPU610 Assignment
+Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan
+Team: NullPointerException
+
+Original Project: <https://github.com/markwatson/Image-Encrypt>
+Commit: c7b4ee3852a5fe3c9b80171a1e621f514f10ac33
+Original Author: Mark Watson
+********************************************************/
 
 #include <string>
 #include <stdexcept>
-#include "aesencrypt.h"
+#include <cstdlib>
+#include <iostream>
+#include <device_launch_parameters.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include "aesencrypt.cuh"
+using namespace std;
 
 aesEncrypt::aesEncrypt()
 {
@@ -21,6 +27,38 @@ aesEncrypt::aesEncrypt()
     Nr = 10; // extrapolated fom table for aes standard
     fullkey = NULL;
     expandedkey = NULL;
+       
+    initalizeDeviceMemory();
+}
+
+void aesEncrypt::initalizeDeviceMemory(){
+        unsigned char sbox[256] = {
+        //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
+        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, //0
+        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, //1
+        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, //2
+        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, //3
+        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, //4
+        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, //5
+        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, //6
+        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, //7
+        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, //8
+        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, //9
+        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, //A
+        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, //B
+        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, //C
+        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, //D
+        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, //E
+        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16  //F
+    };
+    //unsigned char* deviceBoxify;
+    cudaMalloc((void**)&deviceBoxify, sizeof(char) * 256);
+    cudaMemcpy(deviceBoxify, sbox, sizeof( char) * 256, cudaMemcpyHostToDevice);
+}
+
+void aesEncrypt::cleanDeviceMemory(){
+    cudaFree(deviceBoxify);
+    cudaFree(deviceExpandedKey);
 }
 
 aesEncrypt::~aesEncrypt()
@@ -29,28 +67,113 @@ aesEncrypt::~aesEncrypt()
         delete[] fullkey;
     if (expandedkey != NULL)
         delete[] expandedkey;
+   
+    cleanDeviceMemory();
+}
+
+__global__ void xorRoundKeyGlobal(char * state, char * key)
+{
+    int idx = threadIdx.x;
+    state[idx] = (int)((int)state[idx] ^ (int)key[idx]);
+}
+
+char* aesEncrypt::copyChar(char* source, int size){
+
+
+    return NULL;
+}
+
+__global__ void subBytesCuda(char * state, unsigned char * sbBox)
+{
+    int idx = threadIdx.x;
+    int n = static_cast<unsigned char>(state[idx]);
+    state[idx] = sbBox[n];
 }
 
+__global__ void shiftRowsCuda(char* state)
+{
+    int row = threadIdx.x + 1;
+    int j = row *4;
+    unsigned char *tmp = (unsigned char*) state +j;
+   
+    for(int i=1; i < (row+1); i++)
+    {
+        unsigned char trimmings;
+        trimmings = tmp[0];
+        tmp[0] = tmp[1];
+        tmp[1] = tmp[2];
+        tmp[2] = tmp[3];
+        tmp[3] = trimmings;
+
+    }   
+}
 
 bool aesEncrypt::encryptBlock(char* block)
 {
-    xorRoundKey(block, expandedkey);
+    //Initalize device memory
+    char* deviceBlock;
+    char* deviceTempChar;
+    cudaMalloc((void**)&deviceBlock, sizeof(char) * (BLOCK_SIZE));
+   
+    //Copy memory to device
+    cudaMemcpy(deviceBlock, block,sizeof( char) * (BLOCK_SIZE), cudaMemcpyHostToDevice);
+   
+    //CUDA CALL for: xorRoundKey(block, expandedkey)
+    xorRoundKeyGlobal<<<1, Nb>>>(deviceBlock, deviceExpandedKey);
+    cudaDeviceSynchronize();
+
     for(int cnt = 1;cnt <= (Nr-1);cnt++)
     {
-        subBytes(block);
-        shiftRows(block);
-        mixColumns(block);
-        xorRoundKey(block, expandedkey + (cnt * Nb));
+        //CUDA CALL for: subBytes(block))
+        subBytesCuda<<<1,Nb>>>(deviceBlock, deviceBoxify);
+        cudaDeviceSynchronize();
+       
+        //CUDA CALL for: shiftRows(block);
+        shiftRowsCuda<<<1,4>>>(deviceBlock);
+        cudaDeviceSynchronize();
+       
+        //mixColumns(block); // Disabled
+   
+        //CUDA CALL for: xorRoundKey(block, expandedkey + (cnt * Nb)))
+        cudaMalloc((void**)&deviceTempChar, sizeof(char)*(Nb));
+        cudaMemcpy(deviceTempChar, expandedkey + (cnt * Nb), sizeof(char) *  Nb, cudaMemcpyHostToDevice);
+
+        xorRoundKeyGlobal<<<1, Nb>>>(deviceBlock, deviceTempChar); //xorRoundKey(block, expandedkey);
+        cudaDeviceSynchronize();
+       
+        cudaFree(deviceTempChar);
     }
-    subBytes(block);
-    shiftRows(block);
-    xorRoundKey(block, expandedkey + (Nr * Nb));
+    //CUDA CALL for: subBytes(block))
+    subBytesCuda<<<1,Nb>>>(deviceBlock, deviceBoxify);
+    cudaDeviceSynchronize();
+
+    //CUDA CALL for: shiftRows(block);
+    shiftRowsCuda<<<1,4>>>(deviceBlock);
+    cudaDeviceSynchronize();
+
+    //CUDA CALL for: xorRoundKey(block, expandedkey + (Nr * Nb))
+    cudaMalloc((void**)&deviceTempChar, sizeof(char)*(Nb+1));
+    cudaMemcpy(deviceTempChar, expandedkey + (Nr * Nb), sizeof(char) * (Nb +1), cudaMemcpyHostToDevice);
+
+    xorRoundKeyGlobal<<<1, Nb>>>(deviceBlock, deviceTempChar); //xorRoundKey(block, expandedkey); ////////////////////////////////////////////////////////IMPORTANT
+    cudaDeviceSynchronize();
+
+   
+    //END_FUNCTION
+    cudaMemcpy(block, deviceBlock, sizeof(char)* (BLOCK_SIZE), cudaMemcpyDeviceToHost);
+
+    //Free memory
+    cudaFree(deviceBlock);
+    cudaFree(deviceTempChar);
+   
    
     return true; // could error check in future
 }
 
+
 bool aesEncrypt::decryptBlock(char* block)
 {
+
     xorRoundKey(block, expandedkey + (Nr * Nb));
 
     for(int cnt = Nr-1; cnt > 0;cnt--)
@@ -58,13 +181,13 @@ bool aesEncrypt::decryptBlock(char* block)
         invShiftRows(block);
         invSubBytes(block);
         xorRoundKey(block, expandedkey + cnt * Nb);
-        invMixColumns(block);
+        //invMixColumns(block); // Disabled
     }
 
     invShiftRows(block);
     invSubBytes(block);
     xorRoundKey(block, expandedkey);
-   
+
     return true;
 }
 
@@ -72,7 +195,7 @@ void aesEncrypt::setTextKey(std::string key)
 {
     // set the key
     fullkey = new char [Nk * 4];
-   
+
     // go through the text key copying till full key is full
     int pos = 0; // start grabbing from string at 0
     for (int cnt = 0;cnt < BLOCK_SIZE;cnt++)
@@ -90,8 +213,12 @@ void aesEncrypt::setTextKey(std::string key)
 
         pos++; // incrmemnt string position
     }
-   
+
     expandKey();
+   
+    //Save host expanded key to device
+    cudaMalloc((void**)&deviceExpandedKey, sizeof(char) * (strlen(expandedkey) +1));
+    cudaMemcpy(deviceExpandedKey, expandedkey, sizeof(char) * (strlen(expandedkey) +1), cudaMemcpyHostToDevice);
 }
 
 void aesEncrypt::expandKey()
@@ -154,6 +281,7 @@ void aesEncrypt::expandKey()
             }
         }
     }
+
     delete[] temp2; // cleanup
 }
 
@@ -198,9 +326,9 @@ void aesEncrypt::invMixColumns(char * state)
         for (int cnt = 0; cnt < 4; cnt++)
         {
             state[cnt*4+col] = ffmul(0x0e, temp[cnt]) ^
-                               ffmul(0x0b, temp[(cnt+1) % 4]) ^
-                               ffmul(0x0d, temp[(cnt+2) % 4]) ^
-                               ffmul(0x09, temp[(cnt+3) % 4]);
+                ffmul(0x0b, temp[(cnt+1) % 4]) ^
+                ffmul(0x0d, temp[(cnt+2) % 4]) ^
+                ffmul(0x09, temp[(cnt+3) % 4]);
         }
     }
 }
@@ -232,9 +360,9 @@ void aesEncrypt::mixColumns(char * state)
         for (int cnt = 0; cnt < 4; cnt++)
         {
             state[cnt*4+col] = ffmul(0x02, temp[cnt]) ^
-                               ffmul(0x03, temp[(cnt+1) % 4]) ^
-                               temp[(cnt+2) % 4] ^
-                               temp[(cnt+3) % 4];
+                ffmul(0x03, temp[(cnt+1) % 4]) ^
+                temp[(cnt+2) % 4] ^
+                temp[(cnt+3) % 4];
 
         }
     }
@@ -242,13 +370,20 @@ void aesEncrypt::mixColumns(char * state)
 
 void aesEncrypt::xorRoundKey(char * state, char * key)
 {
+    //cout << "EQ: \t\t" ;
     for (int cnt = 0;cnt < Nb;cnt++)
     {
+        //cout << (int) state[cnt] << " ^ "
+        //cout << (int) key[cnt] << " ";//"= ";
         state[cnt] ^= key[cnt];
+        //cout << (int) state[cnt] << endl;
     }
+
+    //cout << endl;
 }
 
 
+
 // this function multiplies two values
 // ints are passed in to deal with overflow
 unsigned char aesEncrypt::ffmul(unsigned char x, unsigned char y) {
@@ -275,23 +410,23 @@ unsigned char aesEncrypt::sboxify(unsigned char val)
 {
     //printf("\n sbox: %i \n", val);
     unsigned char sbox[256] = {
-    //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
-    0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, //0
-    0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, //1
-    0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, //2
-    0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, //3
-    0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, //4
-    0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, //5
-    0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, //6
-    0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, //7
-    0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, //8
-    0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, //9
-    0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, //A
-    0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, //B
-    0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, //C
-    0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, //D
-    0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, //E
-    0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16  //F
+        //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
+        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, //0
+        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, //1
+        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, //2
+        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, //3
+        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, //4
+        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, //5
+        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, //6
+        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, //7
+        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, //8
+        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, //9
+        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, //A
+        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, //B
+        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, //C
+        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, //D
+        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, //E
+        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16  //F
     };
     return sbox[val];
 }
@@ -299,23 +434,23 @@ unsigned char aesEncrypt::sboxify(unsigned char val)
 unsigned char aesEncrypt::invsboxify(unsigned char val)
 {
     unsigned char sbox[256] = {
-    //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
-    0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
-    0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
-    0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
-    0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
-    0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
-    0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
-    0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
-    0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
-    0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
-    0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
-    0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
-    0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
-    0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
-    0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
-    0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
-    0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+        //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
+        0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+        0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+        0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+        0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+        0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+        0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+        0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+        0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+        0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+        0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+        0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+        0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+        0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+        0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+        0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+        0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
     };
     return sbox[val];
 }
@@ -323,22 +458,22 @@ unsigned char aesEncrypt::invsboxify(unsigned char val)
 unsigned char aesEncrypt::roundify(unsigned char index)
 {
     unsigned char round[255] = { 
-    0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a,  
-    0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39,  
-    0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a,  
-    0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8,  
-    0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef,  
-    0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc,  
-    0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b,  
-    0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3,  
-    0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94,  
-    0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20,  
-    0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35,  
-    0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f,  
-    0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04,  
-    0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63,  
-    0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd,  
-    0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb
+        0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a,  
+        0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39,  
+        0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a,  
+        0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8,  
+        0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef,  
+        0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc,  
+        0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b,  
+        0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3,  
+        0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94,  
+        0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20,  
+        0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35,  
+        0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f,  
+        0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04,  
+        0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63,  
+        0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd,  
+        0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb
     };
     return round[index];
 }
diff --git a/aesencrypt.cuh b/aesencrypt.cuh
index 7d7afc0..7de2276 100755
--- a/aesencrypt.cuh
+++ b/aesencrypt.cuh
@@ -24,6 +24,16 @@ private:
     std::string textkey;
     char* expandedkey;
     char* fullkey;
+
+    //Device memory
+    unsigned char* deviceBoxify;
+    char* deviceExpandedKey;
+   
+
+    char* copyChar(char* source, int size); //by me
+    void initalizeDeviceMemory(); //by me
+    void cleanDeviceMemory(); //by me
+   
 public:
     aesEncrypt();
     ~aesEncrypt();
@@ -45,7 +55,6 @@ public:
     unsigned char roundify(unsigned char index);
    
     unsigned char ffmul(unsigned char, unsigned char);
-   
     // assumes a word
     void rotate_left(unsigned char * tmp) {
         unsigned char trimmings;
diff --git a/aesencrypt.h b/aesencrypt.h
deleted file mode 100755
index 7d7afc0..0000000
--- a/aesencrypt.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/********************************************************
- Winter 2013
- GPU610 Assignment
- Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan
- Team: NullPointerException
-
- Original Project: <https://github.com/markwatson/Image-Encrypt>
- Commit: c7b4ee3852a5fe3c9b80171a1e621f514f10ac33
- Original Author: Mark Watson
- ********************************************************/
-
-#ifndef AESENCRYPT_H
-#define AESENCRYPT_H
-#define BLOCK_SIZE 16
-
-#include <string>
-#include <bitset>
-#include "stdio.h"
-
-class aesEncrypt
-{
-private:
-    int Nk, Nb, Nr; // lengths.
-    std::string textkey;
-    char* expandedkey;
-    char* fullkey;
-public:
-    aesEncrypt();
-    ~aesEncrypt();
-    bool encryptBlock(char*);
-    bool decryptBlock(char*);
-    void setTextKey(std::string);
-    void expandKey();
-
-    void subBytes(char *);
-    void invSubBytes(char *);
-    void shiftRows(char *);
-    void invShiftRows(char *);
-    void mixColumns(char *);
-    void invMixColumns(char *);
-    void xorRoundKey(char *, char *);
-
-    unsigned char sboxify(unsigned char val);
-    unsigned char invsboxify(unsigned char val);
-    unsigned char roundify(unsigned char index);
-   
-    unsigned char ffmul(unsigned char, unsigned char);
-   
-    // assumes a word
-    void rotate_left(unsigned char * tmp) {
-        unsigned char trimmings;
-        trimmings = tmp[0];
-        tmp[0] = tmp[1];
-        tmp[1] = tmp[2];
-        tmp[2] = tmp[3];
-        tmp[3] = trimmings;
-    }
-    void rotate_right(unsigned char * tmp) {
-        unsigned char trimmings;
-        trimmings = tmp[3];
-        tmp[3] = tmp[2];
-        tmp[2] = tmp[1];
-        tmp[1] = tmp[0];
-        tmp[0] = trimmings;
-    }
-    unsigned char rcon(unsigned char in) {
-        unsigned char c=1;
-        if(in == 0) 
-                return 0;
-        while(in != 1) {
-            unsigned char b;
-            b = c & 0x80;
-            c <<= 1;
-            if(b == 0x80) {
-                c ^= 0x1b;
-            }
-                in--;
-        }
-        return c;
-    }
-
-};
-
-#endif // AESENCRYPT_H
diff --git a/encryptjpeg.cpp b/encryptjpeg.cpp
index 7e1710b..a722b0d 100755
--- a/encryptjpeg.cpp
+++ b/encryptjpeg.cpp
@@ -12,7 +12,7 @@
 #include <iostream>
 #include <iomanip>
 #include "accessjpeg.h"
-#include "aesencrypt.h"
+#include "aesencrypt.cuh"
 #include "encryptjpeg.h"
 
 bool encryptJpeg::process(char action)
diff --git a/encryptjpeg.h b/encryptjpeg.h
index ed97895..f412ec0 100755
--- a/encryptjpeg.h
+++ b/encryptjpeg.h
@@ -14,7 +14,7 @@
 
 #include <string>
 #include "accessjpeg.h"
-#include "aesencrypt.h"
+#include "aesencrypt.cuh"
 
 class encryptJpeg
 {
diff --git a/main.cpp b/main.cpp
deleted file mode 100755
index 4b08144..0000000
--- a/main.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/********************************************************
- Winter 2013
- GPU610 Assignment
- Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan
- Team: NullPointerException
-
- Original Project: <https://github.com/markwatson/Image-Encrypt>
- Commit: c7b4ee3852a5fe3c9b80171a1e621f514f10ac33
- Original Author: Mark Watson
- ********************************************************/
-
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <ctime>
-#include "encryptjpeg.h"
-#include "aesencrypt.h"
-using namespace std;
-
-int main(int argc, char *argv[])
-{
-    // validate command line arguments
-    if (argc != 4 || (argv[1][1] != 'e' && argv[1][1] != 'd'))
-    {
-        cout << "Invalid command line arguments." << endl
-             << "Please something like the following:" << endl
-             << "\t To encrypt: \t image_encrypt -e <input> <output>\n"
-             << "\t To decrypt: \t image_encrypt -d <input> <output>\n";
-        return 0;
-    }
-
-    // run encryption
-    try
-    {
-    clock_t cb, ce;
-
-        // variables
-        encryptJpeg enc;
-        string key;
-       
-        // set files
-        enc.setInFile(argv[2]);
-        enc.setOutFile(argv[3]);
-       
-        // set key
-        cout << "Please enter a key: ";
-        //cin >> key;
-        key = "gpu610";
-        //key = "123456789012345678";
-        cout << endl << "Default key will be used: " << key << endl;
-        enc.setPlainKey(key);
-
-        // do the actual encryption
-        cb = clock();
-
-        enc.process(argv[1][1]);
-        // stop timer
-        ce = clock();
-        cout << "Process took: " <<
-        double(ce - cb)/CLOCKS_PER_SEC << " secs" << endl;
-    }
-    catch(encryptJpeg::invalidInFile)
-    {
-        cout << "That input file is not valid" << endl;
-    }
-    catch(encryptJpeg::invalidOutFile)
-    {
-        cout << "Could not open output file for writing" << endl;
-    }
-    catch(encryptJpeg::invalidAction)
-    {
-        cout << "The program encountered an internal error..." << endl;
-    }
-
-   
-    return 0;
-}



Assignment 3


diff --git a/accessjpeg.cpp b/accessjpeg.cpp

index f967465..2bfb7eb 100644

--- a/accessjpeg.cpp

+++ b/accessjpeg.cpp

@@ -1,7 +1,7 @@

 /********************************************************

  Winter 2013 

  GPU610 Assignment

- Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan

+ Author: Saad Mohammad

  Team: NullPointerException

  

  Original Project: <https://github.com/markwatson/Image-Encrypt>

@@ -93,3 +93,13 @@ char * accessJpeg::accessBlock()

 	cursor += BLOCK_SIZE;

 	return ret;

 }

+

+int accessJpeg::getNumberOfBlocks(){

+	return ((size - cursor)/BLOCK_SIZE) + 1;

+}

+

+void accessJpeg::goToStart()

+{

+	cursor += 0;

+	jumpToStart();

+}

diff --git a/accessjpeg.h b/accessjpeg.h

index 5293bd9..fe87d0d 100644

--- a/accessjpeg.h

+++ b/accessjpeg.h

@@ -1,7 +1,7 @@

 /********************************************************

  Winter 2013 

  GPU610 Assignment

- Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan

+ Author: Saad Mohammad

  Team: NullPointerException

  

  Original Project: <https://github.com/markwatson/Image-Encrypt>

@@ -29,6 +29,8 @@ public:

 	bool jumpToStart();

 	char * accessBlock();

 	bool hasMore();

+	int getNumberOfBlocks();

+	void goToStart();

 };

 

 #endif // ACCESSJPEG_H

diff --git a/aesencrypt.cu b/aesencrypt.cu

index 4c89090..fc4393b 100644

--- a/aesencrypt.cu

+++ b/aesencrypt.cu

@@ -1,7 +1,7 @@

 /********************************************************

 Winter 2013 

 GPU610 Assignment

-Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan

+Author: Saad Mohammad

 Team: NullPointerException

 

 Original Project: <https://github.com/markwatson/Image-Encrypt>

@@ -19,6 +19,8 @@ Original Author: Mark Watson

 #include "aesencrypt.cuh"

 using namespace std;

 

+const int NTPB = 1024; //number of threads per block

+

 aesEncrypt::aesEncrypt()

 {

 	// set values

@@ -71,105 +73,135 @@ aesEncrypt::~aesEncrypt()

 	cleanDeviceMemory();

 }

 

-__global__ void xorRoundKeyGlobal(char * state, char * key)

-{

-	int idx = threadIdx.x;

-	state[idx] = (int)((int)state[idx] ^ (int)key[idx]);

-}

-

-char* aesEncrypt::copyChar(char* source, int size){

+__global__ void encryptKernel(char **state, char *key, unsigned char *sbBox, int Nb, int Nr, int size){

+	int position = 0;

+	int idx = blockIdx.x * blockDim.x + threadIdx.x;

 

+	if(idx < size){

+		char* block = state[idx];

+	

+		//CALL for: xorRoundKey(block, expandedkey)

+		for(int i=0;i<Nb;i++){

+			block[i] = (int)((int)block[i] ^ (int)key[i + position]);

+		}

+		

+		for(int cnt=1; cnt <= (Nr-1); cnt++){

+			//CALL for: subBytes(block))

+			for(int i=0;i<Nb;i++){

+				int n = static_cast<unsigned char>(block[i]);

+				block[i] = sbBox[n];

+			}

 

-	return NULL;

-}

+			//CALL for: shiftRows(block);

+			for(int i=0;i<4;i++){

+				int row = i + 1;

+				int j = row *4;

+				unsigned char *tmp = (unsigned char*) block +j;

+	

+				for(int i=1; i < (row+1); i++)

+				{

+					unsigned char trimmings;

+					trimmings = tmp[0];

+					tmp[0] = tmp[1];

+					tmp[1] = tmp[2];

+					tmp[2] = tmp[3];

+					tmp[3] = trimmings;

+				}	

+			}

 

-__global__ void subBytesCuda(char * state, unsigned char * sbBox)

-{

-	int idx = threadIdx.x;

-	int n = static_cast<unsigned char>(state[idx]);

-	state[idx] = sbBox[n];

-}

+			//CALL for: xorRoundKey(block, expandedkey)

+			position = cnt * Nb;

+			for(int i=0;i<Nb;i++){

+				block[i] = (int)((int)block[i] ^ (int)key[i + position]);

+			}

 

-__global__ void shiftRowsCuda(char* state)

-{

-	int row = threadIdx.x + 1;

-	int j = row *4;

-	unsigned char *tmp = (unsigned char*) state +j;

+			position = 0;

+		}

 	

-	for(int i=1; i < (row+1); i++)

-	{

-		unsigned char trimmings;

-		trimmings = tmp[0];

-		tmp[0] = tmp[1];

-		tmp[1] = tmp[2];

-		tmp[2] = tmp[3];

-		tmp[3] = trimmings;

-

-	}	

-}

+		//CALL for: subBytes(block))

+		for(int i=0;i<Nb;i++){

+			int n = static_cast<unsigned char>(block[i]);

+			block[i] = sbBox[n];

+		}

 

-bool aesEncrypt::encryptBlock(char* block)

-{

-	//Initalize device memory

-	char* deviceBlock;

-	char* deviceTempChar;

-	cudaMalloc((void**)&deviceBlock, sizeof(char) * (BLOCK_SIZE));

-	

-	//Copy memory to device

-	cudaMemcpy(deviceBlock, block,sizeof( char) * (BLOCK_SIZE), cudaMemcpyHostToDevice);

+		//CALL for: shiftRows(block);

+		for(int i=0;i<4;i++){

+			int row = i + 1;

+			int j = row *4;

+			unsigned char *tmp = (unsigned char*) block +j;

 	

-	//CUDA CALL for: xorRoundKey(block, expandedkey)

-	xorRoundKeyGlobal<<<1, Nb>>>(deviceBlock, deviceExpandedKey); 

-	cudaDeviceSynchronize();

+			for(int i=1; i < (row+1); i++)

+			{

+				unsigned char trimmings;

+				trimmings = tmp[0];

+				tmp[0] = tmp[1];

+				tmp[1] = tmp[2];

+				tmp[2] = tmp[3];

+				tmp[3] = trimmings;

+			}	

+		}

 

-	for(int cnt = 1;cnt <= (Nr-1);cnt++)

-	{

-		//CUDA CALL for: subBytes(block))

-		subBytesCuda<<<1,Nb>>>(deviceBlock, deviceBoxify);

-		cudaDeviceSynchronize();

-		

-		//CUDA CALL for: shiftRows(block);

-		shiftRowsCuda<<<1,4>>>(deviceBlock);

-		cudaDeviceSynchronize();

-		

-		//mixColumns(block); // Disabled

-	

-		//CUDA CALL for: xorRoundKey(block, expandedkey + (cnt * Nb)))

-		cudaMalloc((void**)&deviceTempChar, sizeof(char)*(Nb));

-		cudaMemcpy(deviceTempChar, expandedkey + (cnt * Nb), sizeof(char) *  Nb, cudaMemcpyHostToDevice);

+		//CALL for: xorRoundKey(block, expandedkey)

+		position = Nr * Nb;

+		for(int i=0;i<Nb;i++){

+			block[i] = (int)((int)block[i] ^ (int)key[i + position]);

+		}

+	}

+}

 

-		xorRoundKeyGlobal<<<1, Nb>>>(deviceBlock, deviceTempChar); //xorRoundKey(block, expandedkey);

-		cudaDeviceSynchronize();

-		

-		cudaFree(deviceTempChar);

+/*

+ * Assigns the location of eachBlock into the 2D pointer (block)

+ */

+__global__ void copyToBlocks(char** block, char* eachBlock, int index){

+	char *b = eachBlock;

+	block[index] = b;

+}

+

+/*

+ * Returns the value of block[index] into eachBlock

+ */

+__global__ void copyFromBlocks(char** block, char* eachBlock, int index){

+	for(int i=0; i < BLOCK_SIZE; i++){

+		eachBlock[i] = block[index][i];

 	}

-	//CUDA CALL for: subBytes(block))

-	subBytesCuda<<<1,Nb>>>(deviceBlock, deviceBoxify);

-	cudaDeviceSynchronize();

+}

 

-	//CUDA CALL for: shiftRows(block);

-	shiftRowsCuda<<<1,4>>>(deviceBlock);

-	cudaDeviceSynchronize();

+void aesEncrypt::encryptBlock(char** blocks, int size)

+{

+	int NBLKS = (size + NTPB - 1) / NTPB;

 

-	//CUDA CALL for: xorRoundKey(block, expandedkey + (Nr * Nb))

-	cudaMalloc((void**)&deviceTempChar, sizeof(char)*(Nb+1));

-	cudaMemcpy(deviceTempChar, expandedkey + (Nr * Nb), sizeof(char) * (Nb +1), cudaMemcpyHostToDevice);

+	//Initalize device memory

+	char** deviceBlock;

+	cudaMalloc((void**)&deviceBlock, size * sizeof(char*));

 

-	xorRoundKeyGlobal<<<1, Nb>>>(deviceBlock, deviceTempChar); //xorRoundKey(block, expandedkey); ////////////////////////////////////////////////////////IMPORTANT

-	cudaDeviceSynchronize();

+	//Copy host block to device block

+	for(int i=0; i < size; i++){

+		char* eachBlock;

 

+		cudaMalloc((void**)&eachBlock, BLOCK_SIZE * sizeof(char));

+		cudaMemcpy(eachBlock, blocks[i] ,sizeof(char) * (BLOCK_SIZE), cudaMemcpyHostToDevice);

+		copyToBlocks<<<1,1>>>(deviceBlock, eachBlock, i);

+	}

 	

-	//END_FUNCTION

-	cudaMemcpy(block, deviceBlock, sizeof(char)* (BLOCK_SIZE), cudaMemcpyDeviceToHost);

-

-	//Free memory

-	cudaFree(deviceBlock);

-	cudaFree(deviceTempChar);

+	//Encrypt blocks in parallel

+	encryptKernel<<<NBLKS, NTPB>>>(deviceBlock, deviceExpandedKey, deviceBoxify, Nb, Nr, size);

 	

+	//Copy back to host block

+	for(int i=0; i < size; i++){

+		char *hostBlock = new char [BLOCK_SIZE];

+		char* retBlock;

 	

-	return true; // could error check in future

-}

+		cudaMalloc((void**)&retBlock, BLOCK_SIZE * sizeof(char));

+		copyFromBlocks<<<1,1>>>(deviceBlock, retBlock, i);

+		cudaMemcpy(hostBlock, retBlock ,sizeof(char) * (BLOCK_SIZE), cudaMemcpyDeviceToHost);

+		

+		blocks[i] = hostBlock;

+

+		cudaFree(retBlock); //Clean from memory

+	}

 

+	cudaFree(deviceBlock); //Clean from memory

+}

 

 bool aesEncrypt::decryptBlock(char* block)

 {

@@ -181,7 +213,6 @@ bool aesEncrypt::decryptBlock(char* block)

 		invShiftRows(block);

 		invSubBytes(block);

 		xorRoundKey(block, expandedkey + cnt * Nb);

-		//invMixColumns(block); // Disabled

 	}

 

 	invShiftRows(block);

@@ -215,10 +246,7 @@ void aesEncrypt::setTextKey(std::string key)

 	}

 

 	expandKey();

-	

-	//Save host expanded key to device

-	cudaMalloc((void**)&deviceExpandedKey, sizeof(char) * (strlen(expandedkey) +1));

-	cudaMemcpy(deviceExpandedKey, expandedkey, sizeof(char) * (strlen(expandedkey) +1), cudaMemcpyHostToDevice);

+

 }

 

 void aesEncrypt::expandKey()

@@ -283,6 +311,10 @@ void aesEncrypt::expandKey()

 	}

 

 	delete[] temp2; // cleanup

+

+	//Save host expanded key to device

+	cudaMalloc((void**)&deviceExpandedKey, sizeof(char) * (4 * limit));

+	cudaMemcpy(deviceExpandedKey, expandedkey, sizeof(char) * (4 * limit), cudaMemcpyHostToDevice);

 }

 

 void aesEncrypt::subBytes(char * state)

@@ -370,13 +402,9 @@ void aesEncrypt::mixColumns(char * state)

 

 void aesEncrypt::xorRoundKey(char * state, char * key)

 {

-	//cout << "EQ: \t\t" ;

 	for (int cnt = 0;cnt < Nb;cnt++)

 	{

-		//cout << (int) state[cnt] << " ^ " 

-		//cout << (int) key[cnt] << " ";//"= ";

 		state[cnt] ^= key[cnt];

-		//cout << (int) state[cnt] << endl;

 	}

 

 	//cout << endl;

diff --git a/aesencrypt.cuh b/aesencrypt.cuh

index 7de2276..d340dcb 100644

--- a/aesencrypt.cuh

+++ b/aesencrypt.cuh

@@ -1,7 +1,7 @@

 /********************************************************

  Winter 2013 

  GPU610 Assignment

- Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan

+ Author: Saad Mohammad

  Team: NullPointerException

  

  Original Project: <https://github.com/markwatson/Image-Encrypt>

@@ -11,10 +11,10 @@

 

 #ifndef AESENCRYPT_H

 #define AESENCRYPT_H

-#define BLOCK_SIZE 16

 

 #include <string>

 #include <bitset>

+#include "accessjpeg.h"

 #include "stdio.h"

 

 class aesEncrypt

@@ -37,7 +37,7 @@ private:

 public:

 	aesEncrypt();

 	~aesEncrypt();

-	bool encryptBlock(char*);

+	void encryptBlock(char**, int);

 	bool decryptBlock(char*);

 	void setTextKey(std::string);

 	void expandKey();

diff --git a/encryptjpeg.cpp b/encryptjpeg.cpp

index a722b0d..2692aa5 100644

--- a/encryptjpeg.cpp

+++ b/encryptjpeg.cpp

@@ -1,7 +1,7 @@

 /********************************************************

  Winter 2013 

  GPU610 Assignment

- Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan

+ Author: Saad Mohammad

  Team: NullPointerException

  

  Original Project: <https://github.com/markwatson/Image-Encrypt>

@@ -14,6 +14,8 @@

 #include "accessjpeg.h"

 #include "aesencrypt.cuh"

 #include "encryptjpeg.h"

+#include "cuda_runtime.h"

+using namespace std;

 

 bool encryptJpeg::process(char action)

 {

@@ -31,26 +33,45 @@ bool encryptJpeg::process(char action)

 	

 	// set the key

 	encrypter.setTextKey(key);

-		

-	// do the encryption

-	while (file.hasMore())

+

+	if (action == 'e')

+	{

+		int size = file.getNumberOfBlocks();

+		char **state = new char*[size];

+		int count = 0;

+

+		for(int i=0; i <size; i++)

+		{

+			char *temp = new char[BLOCK_SIZE+8];

+			ptr = file.accessBlock();

+			memcpy(temp, ptr, BLOCK_SIZE);

+			state[i] = temp;

+		}

+

+		encrypter.encryptBlock(state, size); // run the encryption

+		

+		file.goToStart();

+		while (file.hasMore())

+		{

+			ptr = file.accessBlock();

+			memcpy(ptr, state[count], BLOCK_SIZE);

+			delete state[count++]; //Delete from memory

+		}

+

+		delete state; //Delete from memory

+

+	} else if (action == 'd')

+	{

+		while (file.hasMore()){

+			ptr = file.accessBlock();

+			encrypter.decryptBlock(ptr);

+		}

+	} else

 	{

-		ptr = file.accessBlock(); // get a block of the image

-		if (action == 'e')

-		{

-			encrypter.encryptBlock(ptr); // run the encryption

-		}

-		else if (action == 'd')

-		{

-			encrypter.decryptBlock(ptr);

-		}

-		else

-		{

-			throw invalidAction();

-			return false;

-		}

-	}

-	

+		throw invalidAction();

+		return false;

+	}

+

 	// write out the file

 	out_success = file.writeOutFile(out_file);

 	if (!out_success)

diff --git a/encryptjpeg.h b/encryptjpeg.h

index f412ec0..208b0d5 100644

--- a/encryptjpeg.h

+++ b/encryptjpeg.h

@@ -1,7 +1,7 @@

 /********************************************************

  Winter 2013 

  GPU610 Assignment

- Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan

+ Author: Saad Mohammad

  Team: NullPointerException

  

  Original Project: <https://github.com/markwatson/Image-Encrypt>

diff --git a/main.cu b/main.cu

index 37707d2..33a9a54 100644

--- a/main.cu

+++ b/main.cu

@@ -1,7 +1,7 @@

 /********************************************************

  Winter 2013 

  GPU610 Assignment

- Author: Saad Mohammad, Phillip Aziz, Natesh Mayuranathan

+ Author: Saad Mohammad

  Team: NullPointerException

  

  Original Project: <https://github.com/markwatson/Image-Encrypt>

@@ -15,6 +15,7 @@

 #include <ctime>

 #include "encryptjpeg.h"

 #include "aesencrypt.cuh"

+#include "cuda_runtime.h"

 using namespace std;

 

 int main(int argc, char *argv[])

@@ -72,6 +73,7 @@ int main(int argc, char *argv[])

 		cout << "The program encountered an internal error..." << endl;

 	}

 

+	cudaDeviceReset();

 	

 	return 0;

 }

Final Results

Smohammad project outcome.jpg