Changeset View
Changeset View
Standalone View
Standalone View
MicroBenchmarks/ImageProcessing/Blur/main.cpp
/** | |||||
Pankaj Kukreja | |||||
github.com/proton0001 | |||||
Indian Institute of Technology Hyderabad | |||||
*/ | |||||
#include "ImageHelper.h" | |||||
#include "blur.h" | |||||
#include <iostream> | |||||
#define BENCHMARK_LIB | |||||
#ifdef BENCHMARK_LIB | |||||
#include "benchmark/benchmark.h" | |||||
#endif | |||||
extern "C" void boxBlurKernel(int height, int width, int *inpImage, | |||||
int *outpImage); | |||||
extern "C" void gaussianBlurKernel(int height, int width, int *inpImage, | |||||
int *outpImage); | |||||
int *inputImage; | |||||
int main(int argc, char *argv[]) { | |||||
#ifdef BENCHMARK_LIB | |||||
::benchmark::Initialize(&argc, argv); | |||||
#endif | |||||
const char *boxBlurOutputFileName = (const char *)"./boxBlurOutput.txt"; | |||||
const char *gaussianBlurOutputFileName = | |||||
(const char *)"./gaussianBlurOutput.txt"; | |||||
inputImage = (int *)malloc(sizeof(int) * (HEIGHT) * (WIDTH)); | |||||
if (inputImage == NULL) { | |||||
std::cerr << "Insufficient memory\n"; | |||||
exit(1); | |||||
} | |||||
initializeRandomImage(inputImage, HEIGHT, WIDTH); | |||||
// Run Kernels Using Benchmark Library | |||||
#ifdef BENCHMARK_LIB | |||||
::benchmark::RunSpecifiedBenchmarks(); | |||||
#endif | |||||
// Run Kernels once more and save output in a file for Verification | |||||
int *outputImage; | |||||
outputImage = (int *)malloc(sizeof(int) * (HEIGHT) * (WIDTH)); | |||||
if (outputImage == NULL) { | |||||
std::cerr << "Insufficient memory\n"; | |||||
exit(1); | |||||
} | |||||
boxBlurKernel(HEIGHT, WIDTH, inputImage, outputImage); | |||||
// Blur not applied on edges so we add a black border | |||||
// Otherwise we may get garbage value which may create problem in output | |||||
// verification | |||||
int offset = (BOX_SIZE - 1) / 2; | |||||
// Top Edge | |||||
for (int i = 0; i < offset; i++) { | |||||
for (int j = 0; j < WIDTH; j++) { | |||||
outputImage[i * WIDTH + j] = 0; | |||||
} | |||||
} | |||||
// Bottom Edge | |||||
for (int i = HEIGHT - offset; i < HEIGHT; i++) { | |||||
for (int j = 0; j < WIDTH; j++) { | |||||
outputImage[i * WIDTH + j] = 0; | |||||
} | |||||
} | |||||
// Left Edge | |||||
for (int i = 0; i < HEIGHT; i++) { | |||||
for (int j = 0; j < offset; j++) { | |||||
outputImage[i * WIDTH + j] = 0; | |||||
} | |||||
} | |||||
// Right Edge | |||||
for (int i = 0; i < HEIGHT; i++) { | |||||
for (int j = WIDTH - offset; j < WIDTH; j++) { | |||||
outputImage[i * WIDTH + j] = 0; | |||||
} | |||||
} | |||||
saveImage(outputImage, boxBlurOutputFileName, HEIGHT, WIDTH); | |||||
gaussianBlurKernel(HEIGHT, WIDTH, inputImage, outputImage); | |||||
// Top Edge | |||||
for (int i = 0; i < offset; i++) { | |||||
for (int j = 0; j < WIDTH; j++) { | |||||
outputImage[i * WIDTH + j] = 0; | |||||
} | |||||
} | |||||
// Bottom Edge | |||||
for (int i = HEIGHT - offset; i < HEIGHT; i++) { | |||||
for (int j = 0; j < WIDTH; j++) { | |||||
outputImage[i * WIDTH + j] = 0; | |||||
} | |||||
} | |||||
// Left Edge | |||||
for (int i = 0; i < HEIGHT; i++) { | |||||
for (int j = 0; j < offset; j++) { | |||||
outputImage[i * WIDTH + j] = 0; | |||||
} | |||||
} | |||||
// Right Edge | |||||
for (int i = 0; i < HEIGHT; i++) { | |||||
for (int j = WIDTH - offset; j < WIDTH; j++) { | |||||
outputImage[i * WIDTH + j] = 0; | |||||
} | |||||
} | |||||
saveImage(outputImage, gaussianBlurOutputFileName, HEIGHT, WIDTH); | |||||
free(outputImage); | |||||
free(inputImage); | |||||
return EXIT_SUCCESS; | |||||
} | |||||
#ifdef BENCHMARK_LIB | |||||
void BENCHMARK_boxBlurKernel(benchmark::State &state) { | |||||
int height = state.range(0); | |||||
int width = state.range(1); | |||||
int *outputImage = (int *)malloc(sizeof(int) * (height) * (width)); | |||||
if (outputImage == NULL) { | |||||
std::cerr << "Insufficient memory\n"; | |||||
exit(1); | |||||
} | |||||
/* This call is to warm up the cache */ | |||||
boxBlurKernel(height, width, inputImage, outputImage); | |||||
for (auto _ : state) { | |||||
boxBlurKernel(height, width, inputImage, outputImage); | |||||
} | |||||
/* Since we are not passing state.range as 20 this if case will always be | |||||
* false. This if case is to prevent above kernel calls from getting optimized | |||||
* out */ | |||||
if (state.range(0) == 20) { | |||||
saveImage(outputImage, (const char *)"testFailed.txt", height, width); | |||||
} | |||||
free(outputImage); | |||||
} | |||||
BENCHMARK(BENCHMARK_boxBlurKernel) | |||||
->Args({128, 128}) | |||||
->Args({256, 256}) | |||||
->Args({512, 512}) | |||||
->Args({1024, 1024}) | |||||
->Unit(benchmark::kMicrosecond); | |||||
Meinersbur: [nit] empty lines | |||||
void BENCHMARK_GAUSSIAN_BLUR(benchmark::State &state) { | |||||
int height = state.range(0); | |||||
int width = state.range(1); | |||||
int *outputImage = (int *)malloc(sizeof(int) * (height) * (width)); | |||||
if (outputImage == NULL) { | |||||
std::cerr << "Insufficient memory\n"; | |||||
exit(1); | |||||
Not Done ReplyInline ActionsYou probably want a ::benchmark::DoNotOptimize(...) around this and other calls to the kernel. dberris: You probably want a `::benchmark::DoNotOptimize(...)` around this and other calls to the kernel. | |||||
Not Done ReplyInline ActionsI don't think I can use "::benchmark::DoNotOptimize(...)" here as the functions return void and DoNotOptimize forces the compiler to flush pending writes to memory. proton: I don't think I can use "::benchmark::DoNotOptimize(...)" here as the functions return void and… | |||||
} | |||||
/* This call is to warm up the cache */ | |||||
gaussianBlurKernel(height, width, inputImage, outputImage); | |||||
for (auto _ : state) { | |||||
gaussianBlurKernel(height, width, inputImage, outputImage); | |||||
} | |||||
/* Since we are not passing state.range as 20 this if case will always be | |||||
* false. This if case is to prevent above kernel calls from getting optimized | |||||
* out */ | |||||
Not Done ReplyInline Actions@dberris Would ::benchmark::ClobberMemory() be a viable alternative? Meinersbur: @dberris Would `::benchmark::ClobberMemory()` be a viable alternative? | |||||
Not Done ReplyInline ActionsIt may be. dberris: It may be. | |||||
Not Done ReplyInline ActionsI suggest to leave it as is for the moment. If we found a canonical approach, we can change this and the other benchmarks afterwards. Could you add a comment that this is supposed keep the compiler to optimize the computation away? Meinersbur: I suggest to leave it as is for the moment. If we found a canonical approach, we can change… | |||||
Not Done ReplyInline Actions@proton Please don't forget to add a comment about that this code is meant to inhibit too-aggressive compiler optimizations. Meinersbur: @proton Please don't forget to add a comment about that this code is meant to inhibit too… | |||||
if (state.range(0) == 20) { | |||||
saveImage(outputImage, (const char *)"testFailed.txt", height, width); | |||||
} | |||||
free(outputImage); | |||||
} | |||||
BENCHMARK(BENCHMARK_GAUSSIAN_BLUR) | |||||
->Args({128, 128}) | |||||
->Args({256, 256}) | |||||
->Args({512, 512}) | |||||
->Args({1024, 1024}) | |||||
->Unit(benchmark::kMicrosecond); | |||||
#endif |
[nit] empty lines