diff --git a/MicroBenchmarks/harris/harris.h b/MicroBenchmarks/harris/harris.h --- a/MicroBenchmarks/harris/harris.h +++ b/MicroBenchmarks/harris/harris.h @@ -14,37 +14,32 @@ #define BOX_SIZE 10 /* Comment this to not use google benchmark library */ -#define BENCHMARK_LIB +#define BENCHMARK_LIB // Image Size -#define HEIGHT 2050 -#define WIDTH 2050 +#define HEIGHT 2048 +#define WIDTH 2048 // Parameters For harris kernel -#define THRESHOLD 0.1 +#define THRESHOLD 0.1 // ============================================================================ // Initialize a checkboard image -void initCheckboardImage(int height, int width); +void initCheckboardImage(int height, int width); // print image to output.txt -void printImage(int height, int width, float img[(2 + HEIGHT)][2 + WIDTH]); +void printImage(int height, int width, float img[HEIGHT][WIDTH]); // harris kernel from polymage_naive.cpp -void harrisKernel(int height - , int width - , float inputImg[2 + HEIGHT][2 + WIDTH] - , float outputImg[(2 + HEIGHT)][2 + WIDTH] - , float Ix [(2 + HEIGHT)][2 + WIDTH] - , float Iy [(2 + HEIGHT)][2 + WIDTH] - , float Ixx [(2 + HEIGHT)][2 + WIDTH] - , float Ixy [(2 + HEIGHT)][2 + WIDTH] - , float Iyy [(2 + HEIGHT)][2 + WIDTH] - , float Sxx [(2 + HEIGHT)][2 + WIDTH] - , float Sxy [(2 + HEIGHT)][2 + WIDTH] - , float Syy [(2 + HEIGHT)][2 + WIDTH] - , float det [(2 + HEIGHT)][2 + WIDTH] - , float trace [(2 + HEIGHT)][2 + WIDTH]); +void harrisKernel(int height, int width, float inputImg[4 + HEIGHT][4 + WIDTH], + float outputImg[HEIGHT][WIDTH], + float Ix[2 + HEIGHT][2 + WIDTH], + float Iy[2 + HEIGHT][2 + WIDTH], + float Ixx[2 + HEIGHT][2 + WIDTH], + float Ixy[2 + HEIGHT][2 + WIDTH], + float Iyy[2 + HEIGHT][2 + WIDTH], float Sxx[HEIGHT][WIDTH], + float Sxy[HEIGHT][WIDTH], float Syy[HEIGHT][WIDTH], + float det[HEIGHT][WIDTH], float trace[HEIGHT][WIDTH]); // ============================================================================ #endif diff --git a/MicroBenchmarks/harris/harris.reference_output b/MicroBenchmarks/harris/harris.reference_output --- a/MicroBenchmarks/harris/harris.reference_output +++ b/MicroBenchmarks/harris/harris.reference_output @@ -1 +1 @@ -8e390d93bb5942d09e958f5c791c94ad +36d1e6a1490ed3da2e3c323cb57852ba diff --git a/MicroBenchmarks/harris/harrisKernel.cpp b/MicroBenchmarks/harris/harrisKernel.cpp --- a/MicroBenchmarks/harris/harrisKernel.cpp +++ b/MicroBenchmarks/harris/harrisKernel.cpp @@ -1,112 +1,99 @@ #include "harris.h" // harris kernel from polymage_naive.cpp -void harrisKernel( - int height, int width, float inputImg[2 + HEIGHT][2 + WIDTH], - float outputImg[(2 + HEIGHT)][2 + WIDTH], float Ix[(2 + HEIGHT)][2 + WIDTH], - float Iy[(2 + HEIGHT)][2 + WIDTH], float Ixx[(2 + HEIGHT)][2 + WIDTH], - float Ixy[(2 + HEIGHT)][2 + WIDTH], float Iyy[(2 + HEIGHT)][2 + WIDTH], - float Sxx[(2 + HEIGHT)][2 + WIDTH], float Sxy[(2 + HEIGHT)][2 + WIDTH], - float Syy[(2 + HEIGHT)][2 + WIDTH], float det[(2 + HEIGHT)][2 + WIDTH], - float trace[(2 + HEIGHT)][2 + WIDTH]) { - for (int _i0 = 0; (_i0 - height < 0); _i0++) { - for (int _i1 = 0; (_i1 - width < 0); _i1++) { - Iy[_i0 + 1][_i1 + 1] = - (((((((inputImg[_i0][_i1]) * -0.0833333333333f) + - ((inputImg[_i0][_i1 + 2]) * 0.0833333333333f)) + - ((inputImg[_i0 + 1][_i1]) * -0.166666666667f)) + - ((inputImg[_i0 + 1][_i1 + 2]) * 0.166666666667f)) + - ((inputImg[_i0 + 2][_i1]) * -0.0833333333333f)) + - ((inputImg[_i0 + 2][_i1 + 2]) * 0.0833333333333f)); +void harrisKernel(int height, int width, float inputImg[4 + HEIGHT][4 + WIDTH], + float outputImg[HEIGHT][WIDTH], + float Ix[2 + HEIGHT][2 + WIDTH], + float Iy[2 + HEIGHT][2 + WIDTH], + float Ixx[2 + HEIGHT][2 + WIDTH], + float Ixy[2 + HEIGHT][2 + WIDTH], + float Iyy[2 + HEIGHT][2 + WIDTH], float Sxx[HEIGHT][WIDTH], + float Sxy[HEIGHT][WIDTH], float Syy[HEIGHT][WIDTH], + float det[HEIGHT][WIDTH], float trace[HEIGHT][WIDTH]) { + for (int _i0 = 0; _i0 < height + 2; _i0++) { + for (int _i1 = 0; _i1 < width + 2; _i1++) { + Iy[_i0][_i1] = inputImg[_i0][_i1] * -0.0833333333333f + + inputImg[_i0][_i1 + 2] * 0.0833333333333f + + inputImg[_i0 + 1][_i1] * -0.166666666667f + + inputImg[_i0 + 1][_i1 + 2] * 0.166666666667f + + inputImg[_i0 + 2][_i1] * -0.0833333333333f + + inputImg[_i0 + 2][_i1 + 2] * 0.0833333333333f; } } - for (int _i0 = 0; (_i0 - height < 0); _i0++) { - for (int _i1 = 0; (_i1 - width < 0); _i1++) { - Ix[_i0 + 1][_i1 + 1] = - ((((((inputImg[_i0][_i1] * -0.0833333333333f) + - (inputImg[_i0 + 2][_i1] * 0.0833333333333f)) + - (inputImg[_i0][_i1 + 1] * -0.166666666667f)) + - (inputImg[_i0 + 2][_i1 + 1] * 0.166666666667f)) + - (inputImg[_i0][_i1 + 2] * -0.0833333333333f)) + - (inputImg[_i0 + 2][_i1 + 2] * 0.0833333333333f)); + for (int _i0 = 0; _i0 < height + 2; _i0++) { + for (int _i1 = 0; _i1 < width + 2; _i1++) { + Ix[_i0][_i1] = inputImg[_i0][_i1] * -0.0833333333333f + + inputImg[_i0 + 2][_i1] * 0.0833333333333f + + inputImg[_i0][_i1 + 1] * -0.166666666667f + + inputImg[_i0 + 2][_i1 + 1] * 0.166666666667f + + inputImg[_i0][_i1 + 2] * -0.0833333333333f + + inputImg[_i0 + 2][_i1 + 2] * 0.0833333333333f; } } - for (int _i0 = 1; (_i0 - height - 1 < 0); _i0++) { - for (int _i1 = 1; (_i1 - width - 1 < 0); _i1++) { + for (int _i0 = 0; _i0 < height + 2; _i0++) { + for (int _i1 = 0; _i1 < width + 2; _i1++) { Iyy[_i0][_i1] = Iy[_i0][_i1] * Iy[_i0][_i1]; } } - for (int _i0 = 1; (_i0 - height - 1 < 0); _i0++) { - for (int _i1 = 1; (_i1 - width - 1 < 0); _i1++) { + for (int _i0 = 0; _i0 < height + 2; _i0++) { + for (int _i1 = 0; _i1 < width + 2; _i1++) { Ixy[_i0][_i1] = Ix[_i0][_i1] * Iy[_i0][_i1]; } } - for (int _i0 = 1; (_i0 - height - 1 < 0); _i0++) { - for (int _i1 = 1; (_i1 - width - 1 < 0); _i1++) { + for (int _i0 = 0; _i0 < height + 2; _i0++) { + for (int _i1 = 0; _i1 < width + 2; _i1++) { Ixx[_i0][_i1] = Ix[_i0][_i1] * Ix[_i0][_i1]; } } - for (int _i0 = 2; (_i0 < height); _i0++) { - for (int _i1 = 2; (_i1 < width); _i1++) { - Syy[_i0][_i1] = ((((((((Iyy[-1 + _i0][-1 + _i1] + Iyy[-1 + _i0][_i1]) + - Iyy[-1 + _i0][1 + _i1]) + - Iyy[_i0][-1 + _i1]) + - Iyy[_i0][_i1]) + - Iyy[_i0][1 + _i1]) + - Iyy[1 + _i0][-1 + _i1]) + - Iyy[1 + _i0][_i1]) + - Iyy[1 + _i0][1 + _i1]); + for (int _i0 = 0; _i0 < height; _i0++) { + for (int _i1 = 0; _i1 < width; _i1++) { + Syy[_i0][_i1] = + Iyy[_i0 + 1][_i1] + Iyy[_i0][_i1 + 1] + Iyy[_i0][_i1 + 2] + + Iyy[_i0 + 1][_i1] + Iyy[_i0 + 1][_i1 + 1] + Iyy[_i0 + 1][_i1 + 2] + + Iyy[_i0 + 2][_i1] + Iyy[_i0 + 2][_i1 + 1] + Iyy[_i0 + 2][_i1 + 2]; } } - for (int _i0 = 2; (_i0 < height); _i0++) { - for (int _i1 = 2; (_i1 < width); _i1++) { - Sxy[_i0][_i1] = ((((((((Ixy[-1 + _i0][-1 + _i1] + Ixy[-1 + _i0][_i1]) + - Ixy[-1 + _i0][1 + _i1]) + - Ixy[_i0][-1 + _i1]) + - Ixy[_i0][_i1]) + - Ixy[_i0][1 + _i1]) + - Ixy[1 + _i0][-1 + _i1]) + - Ixy[1 + _i0][_i1]) + - Ixy[1 + _i0][1 + _i1]); + for (int _i0 = 0; _i0 < height; _i0++) { + for (int _i1 = 0; _i1 < width; _i1++) { + Sxy[_i0][_i1] = Ixy[_i0][_i1] + Ixy[_i0][_i1 + 1] + Ixy[_i0][_i1 + 2] + + Ixy[_i0 + 1][_i1] + Ixy[_i0 + 1][_i1 + 1] + + Ixy[_i0 + 1][_i1 + 2] + Ixy[_i0 + 2][_i1] + + Ixy[_i0 + 2][_i1 + 1] + Ixy[_i0 + 2][_i1 + 2]; } } - for (int _i0 = 2; (_i0 < height); _i0++) { - for (int _i1 = 2; (_i1 < width); _i1++) { - Sxx[_i0][_i1] = ((((((((Ixx[-1 + _i0][-1 + _i1] + Ixx[-1 + _i0][_i1]) + - Ixx[-1 + _i0][1 + _i1]) + - Ixx[_i0][-1 + _i1]) + - Ixx[_i0][_i1]) + - Ixx[_i0][1 + _i1]) + - Ixx[1 + _i0][-1 + _i1]) + - Ixx[1 + _i0][_i1]) + - Ixx[1 + _i0][1 + _i1]); + for (int _i0 = 0; _i0 < height; _i0++) { + for (int _i1 = 0; _i1 < width; _i1++) { + Sxx[_i0][_i1] = Ixx[_i0][_i1] + Ixx[_i0][_i1 + 1] + Ixx[_i0][_i1 + 2] + + Ixx[_i0 + 1][_i1] + Ixx[_i0 + 1][_i1 + 1] + + Ixx[_i0 + 1][_i1 + 2] + Ixx[_i0 + 2][_i1] + + Ixx[_i0 + 2][_i1 + 1] + Ixx[_i0 + 2][_i1 + 2]; } } - for (int _i0 = 2; (_i0 < height); _i0++) { - for (int _i1 = 2; (_i1 < width); _i1++) { - trace[_i0][_i1] = (Sxx[_i0][_i1] + Syy[_i0][_i1]); + for (int _i0 = 0; _i0 < height; _i0++) { + for (int _i1 = 0; _i1 < width; _i1++) { + trace[_i0][_i1] = Sxx[_i0][_i1] + Syy[_i0][_i1]; } } - for (int _i0 = 2; (_i0 < height); _i0++) { - for (int _i1 = 2; (_i1 < width); _i1++) { + for (int _i0 = 0; _i0 < height; _i0++) { + for (int _i1 = 0; _i1 < width; _i1++) { det[_i0][_i1] = - ((Sxx[_i0][_i1] * Syy[_i0][_i1]) - (Sxy[_i0][_i1] * Sxy[_i0][_i1])); + Sxx[_i0][_i1] * Syy[_i0][_i1] - Sxy[_i0][_i1] * Sxy[_i0][_i1]; } } - for (int _i0 = 2; (_i0 < height); _i0++) { - for (int _i1 = 2; (_i1 < width); _i1++) { + for (int _i0 = 0; _i0 < height; _i0++) { + for (int _i1 = 0; _i1 < width; _i1++) { outputImg[_i0][_i1] = - (det[_i0][_i1] - ((0.04f * trace[_i0][_i1]) * trace[_i0][_i1])); + det[_i0][_i1] - 0.04f * trace[_i0][_i1] * trace[_i0][_i1]; } } } diff --git a/MicroBenchmarks/harris/main.cpp b/MicroBenchmarks/harris/main.cpp --- a/MicroBenchmarks/harris/main.cpp +++ b/MicroBenchmarks/harris/main.cpp @@ -7,7 +7,7 @@ // This function initializes the input image to checkbox image void initCheckboardImage(int height, int width, - float image[(2 + HEIGHT)][2 + WIDTH]) { + float image[4 + HEIGHT][4 + WIDTH]) { int last_pixel_x = 0; int last_pixel_y = 0; for (int i = 0; i < height; i++) { @@ -29,12 +29,12 @@ } // Writes image matrix to a file. -void printImage(int height, int width, float arr[(2 + HEIGHT)][2 + WIDTH], +void printImage(int height, int width, float arr[HEIGHT][WIDTH], int dummy) { std::ofstream myfile; myfile.open("output.txt"); - for (int i = 0; i < height - 2; i++) { - for (int j = 0; j < width - 2; j++) { + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { if (arr[i][j] < 0) { myfile << 0; } else if (arr[i][j] > 255) { @@ -58,25 +58,25 @@ int height = state.range(0); int width = state.range(1); - float(*image)[HEIGHT + 2][WIDTH + 2]; - image = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - initCheckboardImage((HEIGHT + 2), (WIDTH + 2), *image); + float(*image)[HEIGHT + 4][WIDTH + 4]; + image = (float(*)[HEIGHT + 4][WIDTH + 4]) + malloc(sizeof(float) * (4 + HEIGHT) * (4 + WIDTH)); + initCheckboardImage(HEIGHT + 4, WIDTH + 4, *image); - float(*imageOutput)[2 + HEIGHT][2 + WIDTH]; - imageOutput = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); + float(*imageOutput)[HEIGHT][WIDTH]; + imageOutput = (float(*)[HEIGHT][WIDTH]) + malloc(sizeof(float) * HEIGHT * WIDTH); float(*Ix)[2 + HEIGHT][2 + WIDTH]; float(*Iy)[2 + HEIGHT][2 + WIDTH]; float(*Ixx)[2 + HEIGHT][2 + WIDTH]; float(*Ixy)[2 + HEIGHT][2 + WIDTH]; float(*Iyy)[2 + HEIGHT][2 + WIDTH]; - float(*Sxx)[2 + HEIGHT][2 + WIDTH]; - float(*Sxy)[2 + HEIGHT][2 + WIDTH]; - float(*Syy)[2 + HEIGHT][2 + WIDTH]; - float(*det)[2 + HEIGHT][2 + WIDTH]; - float(*trace)[2 + HEIGHT][2 + WIDTH]; + float(*Sxx)[HEIGHT][WIDTH]; + float(*Sxy)[HEIGHT][WIDTH]; + float(*Syy)[HEIGHT][WIDTH]; + float(*det)[HEIGHT][WIDTH]; + float(*trace)[HEIGHT][WIDTH]; Ix = (float(*)[2 + HEIGHT][2 + WIDTH]) malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); @@ -88,16 +88,11 @@ malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); Iyy = (float(*)[2 + HEIGHT][2 + WIDTH]) malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - Sxx = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - Sxy = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - Syy = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - det = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - trace = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); + Sxx = (float(*)[HEIGHT][WIDTH])malloc(sizeof(float) * HEIGHT * WIDTH); + Sxy = (float(*)[HEIGHT][WIDTH])malloc(sizeof(float) * HEIGHT * WIDTH); + Syy = (float(*)[HEIGHT][WIDTH])malloc(sizeof(float) * HEIGHT * WIDTH); + det = (float(*)[HEIGHT][WIDTH])malloc(sizeof(float) * HEIGHT * WIDTH); + trace = (float(*)[HEIGHT][WIDTH])malloc(sizeof(float) * HEIGHT * WIDTH); harrisKernel(height, width, *image, *imageOutput, *Ix, *Iy, *Ixx, *Ixy, *Iyy, *Sxx, *Sxy, *Syy, *det, *trace); @@ -118,14 +113,13 @@ free((void *)det); free((void *)trace); - for (int i = 0; i < height + 2; i++) { - for (int j = 0; j < width + 2; j++) { + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { sum = (sum + 1) & (int)(*imageOutput)[i][j]; } } - state.SetBytesProcessed(sizeof(float) * (height + 2) * (width + 2) * - state.iterations()); + state.SetBytesProcessed(sizeof(float) * height * width * state.iterations()); free((void *)imageOutput); free((void *)image); @@ -149,25 +143,25 @@ #endif // Extra Call to verify output of kernel - float(*image)[HEIGHT + 2][WIDTH + 2]; - image = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - initCheckboardImage((HEIGHT + 2), (WIDTH + 2), *image); + float(*image)[HEIGHT + 4][WIDTH + 4]; + image = (float(*)[HEIGHT + 4][WIDTH + 4]) + malloc(sizeof(float) * (HEIGHT + 4) * (WIDTH + 4)); + initCheckboardImage(HEIGHT + 4, WIDTH + 4, *image); - float(*imageOutput)[2 + HEIGHT][2 + WIDTH]; - imageOutput = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); + float(*imageOutput)[HEIGHT][WIDTH]; + imageOutput = (float(*)[HEIGHT][WIDTH]) + malloc(sizeof(float) * HEIGHT * WIDTH); float(*Ix)[2 + HEIGHT][2 + WIDTH]; float(*Iy)[2 + HEIGHT][2 + WIDTH]; float(*Ixx)[2 + HEIGHT][2 + WIDTH]; float(*Ixy)[2 + HEIGHT][2 + WIDTH]; float(*Iyy)[2 + HEIGHT][2 + WIDTH]; - float(*Sxx)[2 + HEIGHT][2 + WIDTH]; - float(*Sxy)[2 + HEIGHT][2 + WIDTH]; - float(*Syy)[2 + HEIGHT][2 + WIDTH]; - float(*det)[2 + HEIGHT][2 + WIDTH]; - float(*trace)[2 + HEIGHT][2 + WIDTH]; + float(*Sxx)[HEIGHT][WIDTH]; + float(*Sxy)[HEIGHT][WIDTH]; + float(*Syy)[HEIGHT][WIDTH]; + float(*det)[HEIGHT][WIDTH]; + float(*trace)[HEIGHT][WIDTH]; Ix = (float(*)[2 + HEIGHT][2 + WIDTH]) malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); @@ -179,16 +173,11 @@ malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); Iyy = (float(*)[2 + HEIGHT][2 + WIDTH]) malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - Sxx = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - Sxy = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - Syy = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - det = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); - trace = (float(*)[2 + HEIGHT][2 + WIDTH]) - malloc(sizeof(float) * (2 + HEIGHT) * (2 + WIDTH)); + Sxx = (float(*)[HEIGHT][WIDTH])malloc(sizeof(float) * HEIGHT * WIDTH); + Sxy = (float(*)[HEIGHT][WIDTH])malloc(sizeof(float) * HEIGHT * WIDTH); + Syy = (float(*)[HEIGHT][WIDTH])malloc(sizeof(float) * HEIGHT * WIDTH); + det = (float(*)[HEIGHT][WIDTH])malloc(sizeof(float) * HEIGHT * WIDTH); + trace = (float(*)[HEIGHT][WIDTH])malloc(sizeof(float) * HEIGHT * WIDTH); harrisKernel(HEIGHT, WIDTH, *image, *imageOutput, *Ix, *Iy, *Ixx, *Ixy, *Iyy, *Sxx, *Sxy, *Syy, *det, *trace); @@ -205,9 +194,9 @@ free((void *)trace); if (argc == 2) { - printImage(HEIGHT + 2, WIDTH + 2, *imageOutput, sum); + printImage(HEIGHT, WIDTH, *imageOutput, sum); } else { - printImage(HEIGHT + 2, WIDTH + 2, *imageOutput, -1); + printImage(HEIGHT, WIDTH, *imageOutput, -1); } free((void *)image);