Index: test-suite/trunk/Bitcode/CMakeLists.txt =================================================================== --- test-suite/trunk/Bitcode/CMakeLists.txt +++ test-suite/trunk/Bitcode/CMakeLists.txt @@ -0,0 +1,7 @@ +if(NOT TEST_SUITE_BENCHMARKING_ONLY) + llvm_add_subdirectories(Regression) + if(ARCH STREQUAL "x86") + llvm_add_subdirectories(simd_ops) + endif() +endif() + Index: test-suite/trunk/Bitcode/README_HalideCodegen =================================================================== --- test-suite/trunk/Bitcode/README_HalideCodegen +++ test-suite/trunk/Bitcode/README_HalideCodegen @@ -0,0 +1,28 @@ +Intro to Halide codegeneration pattern (based on Halide documentation) +=========================================================================== + + +functionName: +============ +Functions generated by Halide are regular functions with a user chosen name and arguments. +These functions are considered to be the public methods called by external programs. +The body checks the correctness of its arguments, then calls the function +that contains the actual computation. This function is called "__functionName". + +__functionName: +============= +Actual function as defined by the Halide programmer. +These are considered to be private functions, called through the above public function. + +functionName_argv: +================= +If the function is externally visible, Halide also creates an argv wrapper. +This is useful for calling from JIT and other machine interfaces. +The wrapper calls the function with an array of pointer arguments. +This is easier for the JIT to call than a function with an unknown +(at compile time) argument list. + + +E.g. The unit tests "simd_ops" contain functions called "unopt_test_op" and "unopt_scalar_test_op" + + Index: test-suite/trunk/Bitcode/Regression/CMakeLists.txt =================================================================== --- test-suite/trunk/Bitcode/Regression/CMakeLists.txt +++ test-suite/trunk/Bitcode/Regression/CMakeLists.txt @@ -0,0 +1,3 @@ +if(ARCH STREQUAL "x86") + add_subdirectory(vector_widen) +endif() Index: test-suite/trunk/Bitcode/Regression/vector_widen/CMakeLists.txt =================================================================== --- test-suite/trunk/Bitcode/Regression/vector_widen/CMakeLists.txt +++ test-suite/trunk/Bitcode/Regression/vector_widen/CMakeLists.txt @@ -0,0 +1,10 @@ +SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/halide_runtime.bc PROPERTIES LANGUAGE CXX) +SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/vector_widen.bc PROPERTIES LANGUAGE CXX) + +list(APPEND LDFLAGS -lpthread -ldl) + +set(Source ${CMAKE_CURRENT_SOURCE_DIR}/driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/halide_runtime.bc ${CMAKE_CURRENT_SOURCE_DIR}/vector_widen.bc) +set(PROG widen_bug) +llvm_multisource() + + Index: test-suite/trunk/Bitcode/Regression/vector_widen/driver.cpp =================================================================== --- test-suite/trunk/Bitcode/Regression/vector_widen/driver.cpp +++ test-suite/trunk/Bitcode/Regression/vector_widen/driver.cpp @@ -0,0 +1,63 @@ +#include "halide_buffer.h" + +template +T rand_value() { + return (T)(rand() * 0.125) - 100; +} + +template +buffer_t make_buffer(int w, int h) { + T *mem = new T[w*h]; + buffer_t buf = {0}; + buf.host = (uint8_t *)mem; + buf.extent[0] = w; + buf.extent[1] = h; + buf.elem_size = sizeof(T); + buf.stride[0] = 1; + buf.stride[1] = w; + + for (int i = 0; i < w*h; i++) { + mem[i] = rand_value(); + } + + return buf; +} + + +int main(int argc, char **argv) { + unsigned int err_code = 0; +#if (!__has_builtin(__builtin_cpu_supports) && (defined(__i386__) || defined(__x86_64__))) + return err_code; +#endif +#if defined(__i386__) || defined(__x86_64__) + if (!__builtin_cpu_supports("avx")) { + return err_code; + } +#endif + time_t seed; + if (argc > 1) { + seed = atoi(argv[1]); + } + else { + seed = time(NULL); + srand (seed); + } + const int W = 4096, H = 512; + // Make some input buffers + buffer_t bufs[] = { + make_buffer(W, H), + make_buffer(W, H) + }; + + for (int i = 0; i < 1; i++) { + filter f = filters[i]; + f.fn(bufs + 0, + bufs + 1); + } + + for (int i = 0; i < sizeof(bufs)/sizeof(buffer_t); i++) { + delete[] bufs[i].host; + } + + return err_code; +} Index: test-suite/trunk/Bitcode/Regression/vector_widen/halide_buffer.h =================================================================== --- test-suite/trunk/Bitcode/Regression/vector_widen/halide_buffer.h +++ test-suite/trunk/Bitcode/Regression/vector_widen/halide_buffer.h @@ -0,0 +1,54 @@ +#include +#include +#include +#ifndef HALIDE_ATTRIBUTE_ALIGN + #ifdef _MSC_VER + #define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x)) + #else + #define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x))) + #endif +#endif +#ifndef BUFFER_T_DEFINED +#define BUFFER_T_DEFINED +#include +#include +typedef struct buffer_t { + uint64_t dev; + uint8_t* host; + int32_t extent[4]; + int32_t stride[4]; + int32_t min[4]; + int32_t elem_size; + HALIDE_ATTRIBUTE_ALIGN(1) bool host_dirty; + HALIDE_ATTRIBUTE_ALIGN(1) bool dev_dirty; + HALIDE_ATTRIBUTE_ALIGN(1) uint8_t _padding[10 - sizeof(void *)]; +} buffer_t; +#endif +struct halide_filter_metadata_t; +#ifndef HALIDE_FUNCTION_ATTRS +#define HALIDE_FUNCTION_ATTRS +#endif +#ifndef FILTER_T_DEFINED +#define FILTER_T_DEFINED +typedef struct filter { + const char *name; + int (*fn)(buffer_t *, // uint8 + buffer_t *); // uint32 +} filter; +#endif + +#ifdef __cplusplus +extern "C" { +#endif +int vector_widen(buffer_t *_in_u8_buffer, buffer_t *widen_buffer) HALIDE_FUNCTION_ATTRS; +int vector_widen_argv(void **args) HALIDE_FUNCTION_ATTRS; +extern const struct halide_filter_metadata_t vector_widen_metadata; +#ifdef __cplusplus +} // extern "C" +#endif + +filter filters[] = { +{"vector_widen", &vector_widen}, +{NULL, NULL}}; + + Index: test-suite/trunk/Bitcode/simd_ops/CMakeLists.txt =================================================================== --- test-suite/trunk/Bitcode/simd_ops/CMakeLists.txt +++ test-suite/trunk/Bitcode/simd_ops/CMakeLists.txt @@ -0,0 +1,18 @@ +file(GLOB uosources ${CMAKE_CURRENT_SOURCE_DIR}/${ARCH}_tests/*.bc) +SET_SOURCE_FILES_PROPERTIES(${uosources} PROPERTIES LANGUAGE CXX) +file(GLOB scalar_sources ${CMAKE_CURRENT_SOURCE_DIR}/${ARCH}_scalar_tests/*.bc) +SET_SOURCE_FILES_PROPERTIES(${scalar_sources} PROPERTIES LANGUAGE CXX) +SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/simd_op_check_runtime.bc PROPERTIES LANGUAGE CXX) + +list(APPEND LDFLAGS -lpthread -ldl) + +foreach(sourcebc ${uosources}) + string(REGEX REPLACE ".[cp]+$" "" pathbc ${sourcebc}) + string(REGEX REPLACE ".*/" "" namebc ${pathbc}) + string(REPLACE "." "" namebc ${namebc}) + set(Source ${CMAKE_CURRENT_SOURCE_DIR}/simd_ops.cpp ${CMAKE_CURRENT_SOURCE_DIR}/simd_op_check_runtime.bc ${CMAKE_CURRENT_SOURCE_DIR}/${ARCH}_tests/${namebc}.bc ${CMAKE_CURRENT_SOURCE_DIR}/${ARCH}_scalar_tests/scalar_${namebc}.bc) + set(PROG simd_ops_${namebc}) + llvm_multisource() +endforeach() + + Index: test-suite/trunk/Bitcode/simd_ops/filter_test_op.h =================================================================== --- test-suite/trunk/Bitcode/simd_ops/filter_test_op.h +++ test-suite/trunk/Bitcode/simd_ops/filter_test_op.h @@ -0,0 +1,37 @@ +#include "halide_buffer.h" +#ifndef FILTER_T_DEFINED +#define FILTER_T_DEFINED +typedef struct filter { + const char *name; + int (*fn)(buffer_t *, // float32 + buffer_t *, // float64 + buffer_t *, // int8 + buffer_t *, // uint8 + buffer_t *, // int16 + buffer_t *, // uint16 + buffer_t *, // int32 + buffer_t *, // uint32 + buffer_t *, // int64 + buffer_t *, // uint64 + buffer_t *); // output +} filter; +#endif +#ifdef __cplusplus +extern "C" { +#endif +int test_op(buffer_t *_in_f32_buffer, buffer_t *_in_f64_buffer, buffer_t *_in_i8_buffer, buffer_t *_in_u8_buffer, buffer_t *_in_i16_buffer, buffer_t *_in_u16_buffer, buffer_t *_in_i32_buffer, buffer_t *_in_u32_buffer, buffer_t *_in_i64_buffer, buffer_t *_in_u64_buffer, buffer_t *_out_op_buffer) HALIDE_FUNCTION_ATTRS; +int test_op_argv(void **args) HALIDE_FUNCTION_ATTRS; +extern const struct halide_filter_metadata_t test_op_metadata; + +int scalar_test_op(buffer_t *_in_f32_buffer, buffer_t *_in_f64_buffer, buffer_t *_in_i8_buffer, buffer_t *_in_u8_buffer, buffer_t *_in_i16_buffer, buffer_t *_in_u16_buffer, buffer_t *_in_i32_buffer, buffer_t *_in_u32_buffer, buffer_t *_in_i64_buffer, buffer_t *_in_u64_buffer, buffer_t *_out_op_buffer) HALIDE_FUNCTION_ATTRS; +int scalar_test_op_argv(void **args) HALIDE_FUNCTION_ATTRS; +extern const struct halide_filter_metadata_t scalar_test_op_metadata; +#ifdef __cplusplus +} // extern "C" +#endif + +filter filters[] = { +{"test_op", &test_op}, +{"scalar_test_op", &scalar_test_op}, +{NULL, NULL}}; + Index: test-suite/trunk/Bitcode/simd_ops/halide_buffer.h =================================================================== --- test-suite/trunk/Bitcode/simd_ops/halide_buffer.h +++ test-suite/trunk/Bitcode/simd_ops/halide_buffer.h @@ -0,0 +1,31 @@ +#include +#include +#include +#ifndef HALIDE_ATTRIBUTE_ALIGN + #ifdef _MSC_VER + #define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x)) + #else + #define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x))) + #endif +#endif +#ifndef BUFFER_T_DEFINED +#define BUFFER_T_DEFINED +#include +#include +typedef struct buffer_t { + uint64_t dev; + uint8_t* host; + int32_t extent[4]; + int32_t stride[4]; + int32_t min[4]; + int32_t elem_size; + HALIDE_ATTRIBUTE_ALIGN(1) bool host_dirty; + HALIDE_ATTRIBUTE_ALIGN(1) bool dev_dirty; + HALIDE_ATTRIBUTE_ALIGN(1) uint8_t _padding[10 - sizeof(void *)]; +} buffer_t; +#endif +struct halide_filter_metadata_t; +#ifndef HALIDE_FUNCTION_ATTRS +#define HALIDE_FUNCTION_ATTRS +#endif + Index: test-suite/trunk/Bitcode/simd_ops/simd_ops.cpp =================================================================== --- test-suite/trunk/Bitcode/simd_ops/simd_ops.cpp +++ test-suite/trunk/Bitcode/simd_ops/simd_ops.cpp @@ -0,0 +1,107 @@ +#include "filter_test_op.h" + +template +T rand_value() { + return (T)(rand() * 0.125) - 100; +} + +// Even on android, we want errors to stdout +extern "C" void halide_print(void *, const char *msg) { + printf("%s\n", msg); +} + +template +buffer_t make_buffer(int w, int h) { + T *mem = new T[w*h]; + buffer_t buf = {0}; + buf.host = (uint8_t *)mem; + buf.extent[0] = w; + buf.extent[1] = h; + buf.elem_size = sizeof(T); + buf.stride[0] = 1; + buf.stride[1] = w; + + for (int i = 0; i < w*h; i++) { + mem[i] = rand_value(); + } + + return buf; +} + + +int main(int argc, char **argv) { + unsigned int err_code = 0; +#if (!__has_builtin(__builtin_cpu_supports) && (defined(__i386__) || defined(__x86_64__))) + return err_code; +#endif +#if defined(__i386__) || defined(__x86_64__) + if (!__builtin_cpu_supports("avx")) { + return err_code; + } +#endif + time_t seed; + if (argc > 1) { + seed = atoi(argv[1]); + } + else { + seed = time(NULL); + srand (seed); + } + int W = 256, H = 100; + // Make some input buffers + buffer_t bufs[] = { + make_buffer(W, H), + make_buffer(W, H), + make_buffer(W, H), + make_buffer(W, H), + make_buffer(W, H), + make_buffer(W, H), + make_buffer(W, H), + make_buffer(W, H), + make_buffer(W, H), + make_buffer(W, H) + }; + + int NO = 2; + buffer_t out[] = { + make_buffer(W, H), + make_buffer(W, H) + }; + double *out_value[NO]; + + for (int i = 0; i < NO; i++) { + filter f = filters[i]; + f.fn(bufs + 0, + bufs + 1, + bufs + 2, + bufs + 3, + bufs + 4, + bufs + 5, + bufs + 6, + bufs + 7, + bufs + 8, + bufs + 9, + &(out[i])); + out_value[i] = (double *)(out[i].host); + } + + int err; + for (int i = 0; i < W*H; i++) { + if ((err = out_value[0][i] - out_value[1][i]) > 0.0001) { + fprintf(stderr, "Code generation error (%d): %d. Seer used %ld\n", i, err, seed); + err_code = 1; + break; + } + } + + for (int i = 0; i < sizeof(bufs)/sizeof(buffer_t); i++) { + delete[] bufs[i].host; + } + + for (int i = 0; i < NO; i++) { + delete[] out[i].host; + } + + return err_code; +} +