Index: test-suite/trunk/LICENSE.TXT =================================================================== --- test-suite/trunk/LICENSE.TXT +++ test-suite/trunk/LICENSE.TXT @@ -87,6 +87,7 @@ Pathfinder: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C/Pathfinder XSBench: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench miniGMG: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG +RSBench: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench CLAMR: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR HPCCG: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C++/HPCCG PENNANT: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/CMakeLists.txt =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/CMakeLists.txt +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/CMakeLists.txt @@ -2,3 +2,4 @@ add_subdirectory(Pathfinder) add_subdirectory(miniAMR) add_subdirectory(miniGMG) +add_subdirectory(RSBench) Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/Makefile =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/Makefile +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/Makefile @@ -4,6 +4,7 @@ PARALLEL_DIRS = XSBench \ miniAMR \ Pathfinder \ - miniGMG + miniGMG \ + RSBench include $(LEVEL)/Makefile.programs Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeLists.txt =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeLists.txt +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeLists.txt @@ -0,0 +1,5 @@ +set(PROG rsbench) +list(APPEND LDFLAGS -lm) +list(APPEND CFLAGS -std=gnu99) +set(RUN_OPTIONS -s small -l 100000 -p 1000 -w 1000) +llvm_multisource() Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/LICENSE =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/LICENSE +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2013 Argonne National Laboratory + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/Makefile =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/Makefile +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/Makefile @@ -0,0 +1,7 @@ +LEVEL = ../../../.. + +PROG = rsbench +LDFLAGS = -lm +CFLAGS = -std=gnu99 +RUN_OPTIONS = -s small -l 100000 -p 1000 -w 1000 +include $(LEVEL)/MultiSource/Makefile.multisrc Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/glibc_compat_rand.h =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/glibc_compat_rand.h +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/glibc_compat_rand.h @@ -0,0 +1,16 @@ +/*===------------- glibc_compat_rand.h- glibc rand emulation --------------===*\ + * |* + * |* The LLVM Compiler Infrastructure + * |* + * |* This file is distributed under the University of Illinois Open Source + * |* License. See LICENSE.TXT for details. + * |* + * \*===----------------------------------------------------------------------===*/ + +#ifndef GLIBC_COMPAT_RAND_H +#define GLIBC_COMPAT_RAND_H + +int glibc_compat_rand(void); +void glibc_compat_srand(unsigned int seed); + +#endif /* GLIBC_COMPAT_RAND_H */ Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/glibc_compat_rand.c =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/glibc_compat_rand.c +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/glibc_compat_rand.c @@ -0,0 +1,60 @@ +/*===------------ glibc_compat_rand.c - glibc rand emulation --------------===*\ + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * +\*===----------------------------------------------------------------------===*/ + +#include "glibc_compat_rand.h" + +/** + * This rand implementation is designed to emulate the implementation of + * rand/srand in recent versions of glibc. This is used for programs which + * require this specific rand implementation in order to pass verification + * tests. + * + * For more information, see: http://www.mathstat.dal.ca/~selinger/random/ + **/ + +#define TABLE_SIZE 344 +static unsigned int table[TABLE_SIZE]; +static int next; + +int glibc_compat_rand(void) { + /* Calculate the indices i-3 and i-31 in the circular vector. */ + int i3 = (next < 3) ? (TABLE_SIZE + next - 3) : (next - 3); + int i31 = (next < 31) ? (TABLE_SIZE + next - 31) : (next - 31); + + table[next] = table[i3] + table[i31]; + unsigned int r = table[next] >> 1; + + ++next; + if (next >= TABLE_SIZE) + next = 0; + + return r; +} + +void glibc_compat_srand(unsigned int seed) { + if (seed == 0) + seed = 1; + + table[0] = seed; + + for (int i = 1; i < 31; i++) { + int r = (16807ll * table[i - 1]) % 2147483647; + if (r < 0) + r += 2147483647; + + table[i] = r; + } + + for (int i = 31; i < 34; i++) + table[i] = table[i - 31]; + for (int i = 34; i < TABLE_SIZE; i++) + table[i] = table[i - 31] + table[i - 3]; + + next = 0; +} Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/init.c =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/init.c +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/init.c @@ -0,0 +1,139 @@ +#include "rsbench.h" + +// JRT +int * generate_n_poles( Input input ) +{ + int total_resonances = input.avg_n_poles * input.n_nuclides; + + int * R = (int *) calloc( input.n_nuclides, sizeof(int)); + + for( int i = 0; i < total_resonances; i++ ) + R[rand() % input.n_nuclides]++; + + // Ensure all nuclides have at least 1 resonance + for( int i = 0; i < input.n_nuclides; i++ ) + if( R[i] == 0 ) + R[i] = 1; + + /* Debug + for( int i = 0; i < input.n_nuclides; i++ ) + printf("R[%d] = %d\n", i, R[i]); + */ + + return R; +} + +int * generate_n_windows( Input input ) +{ + int total_resonances = input.avg_n_windows * input.n_nuclides; + + int * R = (int *) calloc( input.n_nuclides, sizeof(int)); + + for( int i = 0; i < total_resonances; i++ ) + R[rand() % input.n_nuclides]++; + + // Ensure all nuclides have at least 1 resonance + for( int i = 0; i < input.n_nuclides; i++ ) + if( R[i] == 0 ) + R[i] = 1; + + /* Debug + for( int i = 0; i < input.n_nuclides; i++ ) + printf("R[%d] = %d\n", i, R[i]); + */ + + return R; +} + +// +Pole ** generate_poles( Input input, int * n_poles ) +{ + // Pole Scaling Factor -- Used to bias hitting of the fast Faddeeva + // region to approximately 99.5% (i.e., only 0.5% of lookups should + // require the full eval). + double f = 152.5; + // Allocating 2D contiguous matrix + Pole ** R = (Pole **) malloc( input.n_nuclides * sizeof( Pole *)); + Pole * contiguous = (Pole *) malloc( input.n_nuclides * input.avg_n_poles * sizeof(Pole)); + + int k = 0; + for( int i = 0; i < input.n_nuclides; i++ ) + { + R[i] = &contiguous[k]; + k += n_poles[i]; + } + + // fill with data + for( int i = 0; i < input.n_nuclides; i++ ) + for( int j = 0; j < n_poles[i]; j++ ) + { + R[i][j].MP_EA = f*((double) rand() / RAND_MAX + (double) rand() / RAND_MAX * I); + R[i][j].MP_RT = f*(double) rand() / RAND_MAX + (double) rand() / RAND_MAX * I; + R[i][j].MP_RA = f*(double) rand() / RAND_MAX + (double) rand() / RAND_MAX * I; + R[i][j].MP_RF = f*(double) rand() / RAND_MAX + (double) rand() / RAND_MAX * I; + R[i][j].l_value = rand() % input.numL; + } + + /* Debug + for( int i = 0; i < input.n_nuclides; i++ ) + for( int j = 0; j < n_poles[i]; j++ ) + printf("R[%d][%d]: Eo = %lf lambda_o = %lf Tn = %lf Tg = %lf Tf = %lf\n", i, j, R[i][j].Eo, R[i][j].lambda_o, R[i][j].Tn, R[i][j].Tg, R[i][j].Tf); + */ + + return R; +} + +Window ** generate_window_params( Input input, int * n_windows, int * n_poles ) +{ + // Allocating 2D contiguous matrix + Window ** R = (Window **) malloc( input.n_nuclides * sizeof( Window *)); + Window * contiguous = (Window *) malloc( input.n_nuclides * input.avg_n_windows * sizeof(Window)); + + int k = 0; + for( int i = 0; i < input.n_nuclides; i++ ) + { + R[i] = &contiguous[k]; + k += n_windows[i]; + } + + // fill with data + for( int i = 0; i < input.n_nuclides; i++ ) + { + int space = n_poles[i] / n_windows[i]; + int remainder = n_poles[i] - space * n_windows[i]; + int ctr = 0; + for( int j = 0; j < n_windows[i]; j++ ) + { + R[i][j].T = (double) rand() / RAND_MAX; + R[i][j].A = (double) rand() / RAND_MAX; + R[i][j].F = (double) rand() / RAND_MAX; + R[i][j].start = ctr; + R[i][j].end = ctr + space - 1; + + ctr += space; + + if ( j < remainder ) + { + ctr++; + R[i][j].end++; + } + } + } + + return R; +} + +double ** generate_pseudo_K0RS( Input input ) +{ + double ** R = (double **) malloc( input.n_nuclides * sizeof( double * )); + double * contiguous = (double *) malloc( input.n_nuclides * input.numL * sizeof(double)); + + for( int i = 0; i < input.n_nuclides; i++ ) + R[i] = &contiguous[i*input.numL]; + + for( int i = 0; i < input.n_nuclides; i++) + for( int j = 0; j < input.numL; j++ ) + R[i][j] = (double) rand() / RAND_MAX; + + return R; +} Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/io.c =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/io.c +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/io.c @@ -0,0 +1,228 @@ +#include "rsbench.h" + +// Prints program logo +void logo(int version) +{ + border_print(); + printf( +" _____ _____ ____ _ \n" +" | __ \\ / ____| _ \\ | | \n" +" | |__) | (___ | |_) | ___ _ __ ___| |__ \n" +" | _ / \\___ \\| _ < / _ \\ '_ \\ / __| '_ \\ \n" +" | | \\ \\ ____) | |_) | __/ | | | (__| | | |\n" +" |_| \\_\\_____/|____/ \\___|_| |_|\\___|_| |_|\n" + ); + border_print(); + center_print("Developed at Argonne National Laboratory", 79); + char v[100]; + sprintf(v, "Version: %d", version); + center_print(v, 79); + border_print(); +} + +// Prints Section titles in center of 80 char terminal +void center_print(const char *s, int width) +{ + int length = strlen(s); + int i; + for (i=0; i<=(width-length)/2; i++) { + fputs(" ", stdout); + } + fputs(s, stdout); + fputs("\n", stdout); +} + +void border_print(void) +{ + printf( + "===================================================================" + "=============\n"); +} + +// Prints comma separated integers - for ease of reading +void fancy_int( int a ) +{ + if( a < 1000 ) + printf("%d\n",a); + + else if( a >= 1000 && a < 1000000 ) + printf("%d,%03d\n", a / 1000, a % 1000); + + else if( a >= 1000000 && a < 1000000000 ) + printf("%d,%03d,%03d\n", a / 1000000, (a % 1000000) / 1000, a % 1000 ); + + else if( a >= 1000000000 ) + printf("%d,%03d,%03d,%03d\n", + a / 1000000000, + (a % 1000000000) / 1000000, + (a % 1000000) / 1000, + a % 1000 ); + else + printf("%d\n",a); +} + +Input read_CLI( int argc, char * argv[] ) +{ + Input input; + + // defaults to max threads on the system + #ifdef OPENMP + input.nthreads = omp_get_num_procs(); + #else + input.nthreads = 1; + #endif + // defaults to 355 (corresponding to H-M Large benchmark) + input.n_nuclides = 355; + // defaults to 10,000,000 + input.lookups = 10000000; + // defaults to H-M Large benchmark + input.HM = LARGE; + // defaults to 3000 resonancs (avg) per nuclide + input.avg_n_poles = 1000; + // defaults to 100 + input.avg_n_windows = 100; + // defaults to 4; + input.numL = 4; + // defaults to no temperature dependence (Doppler broadening) + input.doppler = 1; + + // Collect Raw Input + for( int i = 1; i < argc; i++ ) + { + char * arg = argv[i]; + + // nthreads (-t) + if( strcmp(arg, "-t") == 0 ) + { + if( ++i < argc ) + input.nthreads = atoi(argv[i]); + else + print_CLI_error(); + } + // lookups (-l) + else if( strcmp(arg, "-l") == 0 ) + { + if( ++i < argc ) + input.lookups = atoi(argv[i]); + else + print_CLI_error(); + } + // nuclides (-n) + else if( strcmp(arg, "-n") == 0 ) + { + if( ++i < argc ) + input.n_nuclides = atoi(argv[i]); + else + print_CLI_error(); + } + // HM (-s) + else if( strcmp(arg, "-s") == 0 ) + { + if( ++i < argc ) + { + if( strcmp(argv[i], "small") == 0 ) + input.HM = SMALL; + else if ( strcmp(argv[i], "large") == 0 ) + input.HM = LARGE; + else + print_CLI_error(); + } + else + print_CLI_error(); + } + // Doppler Broadening (Temperature Dependence) + else if( strcmp(arg, "-d") == 0 ) + { + input.doppler = 0; + } + // Avg number of windows per nuclide (-w) + else if( strcmp(arg, "-w") == 0 ) + { + if( ++i < argc ) + input.avg_n_windows = atoi(argv[i]); + else + print_CLI_error(); + } + // Avg number of poles per nuclide (-p) + else if( strcmp(arg, "-p") == 0 ) + { + if( ++i < argc ) + input.avg_n_poles = atoi(argv[i]); + else + print_CLI_error(); + } + else + print_CLI_error(); + } + + // Validate Input + + // Validate nthreads + if( input.nthreads < 1 ) + print_CLI_error(); + + // Validate n_isotopes + if( input.n_nuclides < 1 ) + print_CLI_error(); + + // Validate lookups + if( input.lookups < 1 ) + print_CLI_error(); + + // Validate lookups + if( input.avg_n_poles < 1 ) + print_CLI_error(); + + // Validate lookups + if( input.avg_n_windows < 1 ) + print_CLI_error(); + + // Set HM size specific parameters + // (defaults to large) + if( input.HM == SMALL ) + input.n_nuclides = 68; + + // Return input struct + return input; +} + +void print_CLI_error(void) +{ + printf("Usage: ./multibench \n"); + printf("Options include:\n"); + printf(" -t Number of OpenMP threads to run\n"); + printf(" -s Size of H-M Benchmark to run (small, large)\n"); + printf(" -l Number of Cross-section (XS) lookups\n"); + printf(" -p Average Number of Poles per Nuclide\n"); + printf(" -w Average Number of Windows per Nuclide\n"); + printf(" -d Disables Temperature Dependence (Doppler Broadening)\n"); + printf("Default is equivalent to: -s large -l 10000000 -p 1000 -w 100\n"); + printf("See readme for full description of default run values\n"); + exit(4); +} + +void print_input_summary(Input input) +{ + // Calculate Estimate of Memory Usage + size_t mem = get_mem_estimate(input); + + printf("Materials: 12\n"); + printf("H-M Benchmark Size: "); + if( input.HM == 0 ) + printf("Small\n"); + else + printf("Large\n"); + if( input.doppler == 1 ) + printf("Temperature Dependence: ON\n"); + else + printf("Temperature Dependence: OFF\n"); + printf("Total Nuclides: %d\n", input.n_nuclides); + printf("Avg Poles per Nuclide: "); fancy_int(input.avg_n_poles); + printf("Avg Windows per Nuclide: "); fancy_int(input.avg_n_windows); + printf("XS Lookups: "); fancy_int(input.lookups); + printf("Threads: %d\n", input.nthreads); + printf("Est. Memory Usage (MB): %.1lf\n", mem / 1024.0 / 1024.0); + #ifdef PAPI + printf("PAPI Performance Counters: ON\n"); + #endif +} Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/main.c =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/main.c +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/main.c @@ -0,0 +1,200 @@ +#include "rsbench.h" + +int main(int argc, char * argv[]) +{ + // ===================================================================== + // Initialization & Command Line Read-In + // ===================================================================== + + int version = 9; + #ifdef OPENMP + int max_procs = omp_get_num_procs(); + #else + int max_procs = 1; + #endif + double start = 0.0, stop = 0.0; + + srand(time(NULL)); + + // Process CLI Fields + Input input = read_CLI( argc, argv ); + + #ifdef OPENMP + // Set number of OpenMP Threads + omp_set_num_threads(input.nthreads); + #endif + // ===================================================================== + // Print-out of Input Summary + // ===================================================================== + logo(version); + center_print("INPUT SUMMARY", 79); + border_print(); + print_input_summary(input); + + // ===================================================================== + // Prepare Pole Paremeter Grids + // ===================================================================== + border_print(); + center_print("INITIALIZATION", 79); + border_print(); + + #if defined(TIMING) && defined(OPENMP) + start = omp_get_wtime(); + #endif + + // Allocate & fill energy grids + printf("Generating resonance distributions...\n"); + int * n_poles = generate_n_poles( input ); + + // Allocate & fill Window grids + printf("Generating window distributions...\n"); + int * n_windows = generate_n_windows( input ); + + // Get material data + printf("Loading Hoogenboom-Martin material data...\n"); + Materials materials = get_materials( input ); + + // Prepare full resonance grid + printf("Generating resonance parameter grid...\n"); + Pole ** poles = generate_poles( input, n_poles ); + + // Prepare full Window grid + printf("Generating window parameter grid...\n"); + Window ** windows = generate_window_params( input, n_windows, n_poles); + + // Prepare 0K Resonances + printf("Generating 0K l_value data...\n"); + double ** pseudo_K0RS = generate_pseudo_K0RS( input ); + + CalcDataPtrs data; + data.n_poles = n_poles; + data.n_windows = n_windows; + data.materials = materials; + data.poles = poles; + data.windows = windows; + data.pseudo_K0RS = pseudo_K0RS; + #if defined(TIMING) && defined(OPENMP) + stop = omp_get_wtime(); + #endif + printf("Initialization Complete. (%.2lf seconds)\n", stop-start); + + // ===================================================================== + // Cross Section (XS) Parallel Lookup Simulation Begins + // ===================================================================== + border_print(); + center_print("SIMULATION", 79); + border_print(); + + printf("Beginning Simulation.\n"); + #ifndef STATUS + printf("Calculating XS's...\n"); + #endif + + #ifdef PAPI + /* initialize papi with one thread here */ + if ( PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT){ + fprintf(stderr, "PAPI library init error!\n"); + exit(1); + } + #endif + + #if defined(TIMING) && defined(OPENMP) + start = omp_get_wtime(); + #endif + + long g_abrarov = 0; + long g_alls = 0; + #pragma omp parallel default(none) \ + shared(input, data) \ + reduction(+:g_abrarov, g_alls) + { + unsigned long seed = time(NULL)+1; + double macro_xs[4]; + double * xs = (double *) calloc(4, sizeof(double)); + int thread = 0; + #ifdef OPENMP + thread = omp_get_thread_num(); + #endif + seed += thread; + int mat; + double E; + long abrarov = 0; + long alls = 0; + + #ifdef PAPI + int eventset = PAPI_NULL; + int num_papi_events; + #pragma omp critical + { + counter_init(&eventset, &num_papi_events); + } + #endif + complex double * sigTfactors = + (complex double *) malloc( input.numL * sizeof(complex double) ); + + #pragma omp for schedule(dynamic) + for( int i = 0; i < input.lookups; i++ ) + { + #ifdef STATUS + if( thread == 0 && i % 1000 == 0 ) + printf("\rCalculating XS's... (%.0lf%% completed)", + (i / ( (double)input.lookups / + (double) input.nthreads )) / + (double) input.nthreads * 100.0); + #endif + mat = pick_mat( &seed ); + E = rn( &seed ); + calculate_macro_xs( macro_xs, mat, E, input, data, sigTfactors, &abrarov, &alls ); + // Results are copied onto heap to avoid some compiler + // flags (-flto) from optimizing out function call + memcpy(xs, macro_xs, 4*sizeof(double)); + } + + free(sigTfactors); + + // Accumulate global counters + g_abrarov = abrarov; + g_alls = alls; + + #ifdef PAPI + if( thread == 0 ) + { + printf("\n"); + border_print(); + center_print("PAPI COUNTER RESULTS", 79); + border_print(); + printf("Count \tSmybol \tDescription\n"); + } + { + #pragma omp barrier + } + counter_stop(&eventset, num_papi_events); + #endif + } + + #if defined(TIMING) && defined(OPENMP) + stop = omp_get_wtime(); + #endif + + #ifndef PAPI + printf("\nSimulation Complete.\n"); + #endif + // ===================================================================== + // Print / Save Results and Exit + // ===================================================================== + #ifdef RESULTS + border_print(); + center_print("RESULTS", 79); + border_print(); + + printf("Threads: %d\n", input.nthreads); + if( input.doppler) + printf("Slow Faddeeva: %.2lf%%\n", (double) g_abrarov/g_alls * 100.f); + printf("Runtime: %.3lf seconds\n", stop-start); + printf("Lookups: "); fancy_int(input.lookups); + printf("Lookups/s: "); fancy_int((double) input.lookups / (stop-start)); + + border_print(); + #endif + return 0; +} Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/material.c =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/material.c +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/material.c @@ -0,0 +1,169 @@ +#include "rsbench.h" + +// Handles all material creation tasks - returns Material struct +Materials get_materials(Input input) +{ + Materials M; + M.num_nucs = load_num_nucs(input); + M.mats = load_mats(input, M.num_nucs); + M.concs = load_concs(M.num_nucs); + + return M; +} + +// num_nucs represents the number of nuclides that each material contains +int * load_num_nucs(Input input) +{ + int * num_nucs = (int*)malloc(12*sizeof(int)); + + // Material 0 is a special case (fuel). The H-M small reactor uses + // 34 nuclides, while H-M larges uses 300. + if( input.n_nuclides == 68 ) + num_nucs[0] = 34; // HM Small is 34, H-M Large is 321 + else + num_nucs[0] = 321; // HM Small is 34, H-M Large is 321 + + num_nucs[1] = 5; + num_nucs[2] = 4; + num_nucs[3] = 4; + num_nucs[4] = 27; + num_nucs[5] = 21; + num_nucs[6] = 21; + num_nucs[7] = 21; + num_nucs[8] = 21; + num_nucs[9] = 21; + num_nucs[10] = 9; + num_nucs[11] = 9; + + return num_nucs; +} + +// Assigns an array of nuclide ID's to each material +int ** load_mats( Input input, int * num_nucs ) +{ + int ** mats = (int **) malloc( 12 * sizeof(int *) ); + for( int i = 0; i < 12; i++ ) + mats[i] = (int *) malloc(num_nucs[i] * sizeof(int) ); + + // Small H-M has 34 fuel nuclides + int mats0_Sml[] = { 58, 59, 60, 61, 40, 42, 43, 44, 45, 46, 1, 2, 3, 7, + 8, 9, 10, 29, 57, 47, 48, 0, 62, 15, 33, 34, 52, 53, + 54, 55, 56, 18, 23, 41 }; //fuel + // Large H-M has 300 fuel nuclides + int mats0_Lrg[321] = { 58, 59, 60, 61, 40, 42, 43, 44, 45, 46, 1, 2, 3, 7, + 8, 9, 10, 29, 57, 47, 48, 0, 62, 15, 33, 34, 52, 53, + 54, 55, 56, 18, 23, 41 }; //fuel + for( int i = 0; i < 321-34; i++ ) + mats0_Lrg[34+i] = 68 + i; // H-M large adds nuclides to fuel only + + // These are the non-fuel materials + int mats1[] = { 63, 64, 65, 66, 67 }; // cladding + int mats2[] = { 24, 41, 4, 5 }; // cold borated water + int mats3[] = { 24, 41, 4, 5 }; // hot borated water + int mats4[] = { 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, 27, 28, 29, + 30, 31, 32, 26, 49, 50, 51, 11, 12, 13, 14, 6, 16, + 17 }; // RPV + int mats5[] = { 24, 41, 4, 5, 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, + 49, 50, 51, 11, 12, 13, 14 }; // lower radial reflector + int mats6[] = { 24, 41, 4, 5, 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, + 49, 50, 51, 11, 12, 13, 14 }; // top reflector / plate + int mats7[] = { 24, 41, 4, 5, 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, + 49, 50, 51, 11, 12, 13, 14 }; // bottom plate + int mats8[] = { 24, 41, 4, 5, 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, + 49, 50, 51, 11, 12, 13, 14 }; // bottom nozzle + int mats9[] = { 24, 41, 4, 5, 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, + 49, 50, 51, 11, 12, 13, 14 }; // top nozzle + int mats10[] = { 24, 41, 4, 5, 63, 64, 65, 66, 67 }; // top of FA's + int mats11[] = { 24, 41, 4, 5, 63, 64, 65, 66, 67 }; // bottom FA's + + // H-M large v small dependency + if( input.n_nuclides == 68 ) + memcpy( mats[0], mats0_Sml, num_nucs[0] * sizeof(int) ); + else + memcpy( mats[0], mats0_Lrg, num_nucs[0] * sizeof(int) ); + + // Copy other materials + memcpy( mats[1], mats1, num_nucs[1] * sizeof(int) ); + memcpy( mats[2], mats2, num_nucs[2] * sizeof(int) ); + memcpy( mats[3], mats3, num_nucs[3] * sizeof(int) ); + memcpy( mats[4], mats4, num_nucs[4] * sizeof(int) ); + memcpy( mats[5], mats5, num_nucs[5] * sizeof(int) ); + memcpy( mats[6], mats6, num_nucs[6] * sizeof(int) ); + memcpy( mats[7], mats7, num_nucs[7] * sizeof(int) ); + memcpy( mats[8], mats8, num_nucs[8] * sizeof(int) ); + memcpy( mats[9], mats9, num_nucs[9] * sizeof(int) ); + memcpy( mats[10], mats10, num_nucs[10] * sizeof(int) ); + memcpy( mats[11], mats11, num_nucs[11] * sizeof(int) ); + + // test + /* + for( int i = 0; i < 12; i++ ) + for( int j = 0; j < num_nucs[i]; j++ ) + printf("material %d - Nuclide %d: %d\n", + i, j, mats[i][j]); + */ + + return mats; +} + +// Creates a randomized array of 'concentrations' of nuclides in each mat +double ** load_concs( int * num_nucs ) +{ + double ** concs = (double **)malloc( 12 * sizeof( double *) ); + + for( int i = 0; i < 12; i++ ) + concs[i] = (double *)malloc( num_nucs[i] * sizeof(double) ); + + for( int i = 0; i < 12; i++ ) + for( int j = 0; j < num_nucs[i]; j++ ) + concs[i][j] = (double) rand() / (double) RAND_MAX; + + // test + /* + for( int i = 0; i < 12; i++ ) + for( int j = 0; j < num_nucs[i]; j++ ) + printf("concs[%d][%d] = %lf\n", i, j, concs[i][j] ); + */ + + return concs; +} + +// picks a material based on a probabilistic distribution +int pick_mat( unsigned long * seed ) +{ + // I have a nice spreadsheet supporting these numbers. They are + // the fractions (by volume) of material in the core. Not a + // *perfect* approximation of where XS lookups are going to occur, + // but this will do a good job of biasing the system nonetheless. + + // Also could be argued that doing fractions by weight would be + // a better approximation, but volume does a good enough job for now. + + double dist[12]; + dist[0] = 0.140; // fuel + dist[1] = 0.052; // cladding + dist[2] = 0.275; // cold, borated water + dist[3] = 0.134; // hot, borated water + dist[4] = 0.154; // RPV + dist[5] = 0.064; // Lower, radial reflector + dist[6] = 0.066; // Upper reflector / top plate + dist[7] = 0.055; // bottom plate + dist[8] = 0.008; // bottom nozzle + dist[9] = 0.015; // top nozzle + dist[10] = 0.025; // top of fuel assemblies + dist[11] = 0.013; // bottom of fuel assemblies + + double roll = rn(seed); + + // makes a pick based on the distro + for( int i = 0; i < 12; i++ ) + { + double running = 0; + for( int j = i; j > 0; j-- ) + running += dist[j]; + if( roll < running ) + return i; + } + + return 0; +} Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/rsbench.h =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/rsbench.h +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/rsbench.h @@ -0,0 +1,105 @@ +#include +#ifdef OPENMP +#include +#endif +#include +#include +#include +#include +#include + +#ifdef PAPI +#include "papi.h" +#endif + +#define PI 3.14159265359 + +#include "glibc_compat_rand.h" +#ifndef NO_GLIBC_COMPAT_RAND +#define rand glibc_compat_rand +#define srand glibc_compat_srand +#endif + +// typedefs +typedef enum __hm{SMALL, LARGE, XL, XXL} HM_size; + +typedef struct{ + int nthreads; + int n_nuclides; + int lookups; + HM_size HM; + int avg_n_poles; + int avg_n_windows; + int numL; + int doppler; +} Input; + +typedef struct{ + int * num_nucs; + int ** mats; + double ** concs; +} Materials; + +typedef struct{ + complex double MP_EA; + complex double MP_RT; + complex double MP_RA; + complex double MP_RF; + short int l_value; +} Pole; + +typedef struct{ + double T; + double A; + double F; + int start; + int end; +} Window; + +typedef struct{ + int * n_poles; + int * n_windows; + Materials materials; + Pole ** poles; + Window ** windows; + double ** pseudo_K0RS; +} CalcDataPtrs; + + +// io.c +void logo(int version); +void center_print(const char *s, int width); +void border_print(void); +void fancy_int( int a ); +Input read_CLI( int argc, char * argv[] ); +void print_CLI_error(void); +void print_input_summary(Input input); + +// init.c +int * generate_n_poles( Input input ); +int * generate_n_windows( Input input ); +Pole ** generate_poles( Input input, int * n_poles ); +Window ** generate_window_params( Input input, int * n_windows, int * n_poles ); +double ** generate_pseudo_K0RS( Input input ); + +// material.c +int * load_num_nucs(Input input); +int ** load_mats( Input input, int * num_nucs ); +double ** load_concs( int * num_nucs ); +int pick_mat( unsigned long * seed ); +Materials get_materials(Input input); + +// utils.c +double rn(unsigned long * seed); +size_t get_mem_estimate( Input input ); + +// xs_kernel.c +double complex fast_nuclear_W( double complex Z ); +void calculate_macro_xs( double * macro_xs, int mat, double E, Input input, CalcDataPtrs data, complex double * sigTfactors, long * abrarov, long * alls ); +void calculate_micro_xs( double * micro_xs, int nuc, double E, Input input, CalcDataPtrs data, complex double * sigTfactors); +void calculate_micro_xs_doppler( double * micro_xs, int nuc, double E, Input input, CalcDataPtrs data, complex double * sigTfactors, long * abrarov, long * alls); +void calculate_sig_T( int nuc, double E, Input input, CalcDataPtrs data, complex double * sigTfactors ); + +// papi.c +void counter_init( int *eventset, int *num_papi_events ); +void counter_stop( int * eventset, int num_papi_events ); Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/rsbench.reference_output =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/rsbench.reference_output +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/rsbench.reference_output @@ -0,0 +1,40 @@ +================================================================================ + _____ _____ ____ _ + | __ \ / ____| _ \ | | + | |__) | (___ | |_) | ___ _ __ ___| |__ + | _ / \___ \| _ < / _ \ '_ \ / __| '_ \ + | | \ \ ____) | |_) | __/ | | | (__| | | | + |_| \_\_____/|____/ \___|_| |_|\___|_| |_| +================================================================================ + Developed at Argonne National Laboratory + Version: 9 +================================================================================ + INPUT SUMMARY +================================================================================ +Materials: 12 +H-M Benchmark Size: Small +Temperature Dependence: ON +Total Nuclides: 68 +Avg Poles per Nuclide: 1,000 +Avg Windows per Nuclide: 1,000 +XS Lookups: 100,000 +Threads: 1 +Est. Memory Usage (MB): 6.7 +================================================================================ + INITIALIZATION +================================================================================ +Generating resonance distributions... +Generating window distributions... +Loading Hoogenboom-Martin material data... +Generating resonance parameter grid... +Generating window parameter grid... +Generating 0K l_value data... +Initialization Complete. (0.00 seconds) +================================================================================ + SIMULATION +================================================================================ +Beginning Simulation. +Calculating XS's... + +Simulation Complete. +exit 0 Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/utils.c =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/utils.c +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/utils.c @@ -0,0 +1,27 @@ +#include "rsbench.h" + +// Park & Miller Multiplicative Conguential Algorithm +// From "Numerical Recipes" Second Edition +double rn(unsigned long * seed) +{ + double ret; + unsigned long n1; + unsigned long a = 16807; + unsigned long m = 2147483647; + n1 = ( a * (*seed) ) % m; + *seed = n1; + ret = (double) n1 / m; + return ret; +} + +size_t get_mem_estimate( Input input ) +{ + size_t poles = input.n_nuclides * input.avg_n_poles * sizeof(Pole) + input.n_nuclides * sizeof(Pole *); + size_t windows = input.n_nuclides * input.avg_n_windows * sizeof(Window) + input.n_nuclides * sizeof(Window *); + size_t pseudo_K0RS = input.n_nuclides * input.numL * sizeof( double ) + input.n_nuclides * sizeof(double); + size_t other = input.n_nuclides * 2 * sizeof(int); + + size_t total = poles + windows + pseudo_K0RS + other; + + return total; +} Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/xs_kernel.c =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/xs_kernel.c +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/xs_kernel.c @@ -0,0 +1,227 @@ +#include "rsbench.h" + +// This function uses a combination of the Abrarov Approximation +// and the QUICK_W three term asymptotic expansion. +// Only expected to use Abrarov ~0.5% of the time. +double complex fast_nuclear_W( double complex Z ) +{ + // Abrarov + if( cabs(Z) < 6.0 ) + { + // Precomputed parts for speeding things up + // (N = 10, Tm = 12.0) + double complex prefactor = 8.124330e+01 * I; + double an[10] = { + 2.758402e-01, + 2.245740e-01, + 1.594149e-01, + 9.866577e-02, + 5.324414e-02, + 2.505215e-02, + 1.027747e-02, + 3.676164e-03, + 1.146494e-03, + 3.117570e-04 + }; + double neg_1n[10] = { + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0 + }; + + double denominator_left[10] = { + 9.869604e+00, + 3.947842e+01, + 8.882644e+01, + 1.579137e+02, + 2.467401e+02, + 3.553058e+02, + 4.836106e+02, + 6.316547e+02, + 7.994380e+02, + 9.869604e+02 + }; + + double complex W = I * ( 1 - cexp(I*12.*Z) ) / (12. * Z ); + double complex sum = 0; + for( int n = 0; n < 10; n++ ) + { + complex double top = neg_1n[n] * cexp(I*12.*Z) - 1.; + complex double bot = denominator_left[n] - 144.*Z*Z; + sum += an[n] * (top/bot); + } + W += prefactor * Z * sum; + return W; + } + + // QUICK_2 3 Term Asymptotic Expansion (Accurate to O(1e-6)). + // Pre-computed parameters + double a = 0.512424224754768462984202823134979415014943561548661637413182; + double b = 0.275255128608410950901357962647054304017026259671664935783653; + double c = 0.051765358792987823963876628425793170829107067780337219430904; + double d = 2.724744871391589049098642037352945695982973740328335064216346; + + // Three Term Asymptotic Expansion + double complex W = I * Z * (a/(Z*Z - b) + c/(Z*Z - d)); + + return W; +} + +void calculate_macro_xs( double * macro_xs, int mat, double E, Input input, CalcDataPtrs data, complex double * sigTfactors, long * abrarov, long * alls ) +{ + // zero out macro vector + for( int i = 0; i < 4; i++ ) + macro_xs[i] = 0; + + // for nuclide in mat + for( int i = 0; i < (data.materials).num_nucs[mat]; i++ ) + { + double micro_xs[4]; + int nuc = (data.materials).mats[mat][i]; + + if( input.doppler == 1 ) + calculate_micro_xs_doppler( micro_xs, nuc, E, input, data, sigTfactors, abrarov, alls); + else + calculate_micro_xs( micro_xs, nuc, E, input, data, sigTfactors); + + for( int j = 0; j < 4; j++ ) + { + macro_xs[j] += micro_xs[j] * data.materials.concs[mat][i]; + } + } + + /* Debug + printf("E = %.2lf, mat = %d, macro_xs[0] = %.2lf, macro_xs[1] = %.2lf, macro_xs[2] = %.2lf, macro_xs[3] = %.2lf\n", + E, mat, macro_xs[0], macro_xs[1], macro_xs[2], macro_xs[3] ); + */ + +} + +// No Temperature dependence (i.e., 0K evaluation) +void calculate_micro_xs( double * micro_xs, int nuc, double E, Input input, CalcDataPtrs data, complex double * sigTfactors) +{ + // MicroScopic XS's to Calculate + double sigT; + double sigA; + double sigF; + double sigE; + + // Calculate Window Index + double spacing = 1.0 / data.n_windows[nuc]; + int window = (int) ( E / spacing ); + if( window == data.n_windows[nuc] ) + window--; + + // Calculate sigTfactors + calculate_sig_T(nuc, E, input, data, sigTfactors ); + + // Calculate contributions from window "background" (i.e., poles outside window (pre-calculated) + Window w = data.windows[nuc][window]; + sigT = E * w.T; + sigA = E * w.A; + sigF = E * w.F; + + // Loop over Poles within window, add contributions + for( int i = w.start; i < w.end; i++ ) + { + complex double PSIIKI; + complex double CDUM; + Pole pole = data.poles[nuc][i]; + PSIIKI = -(0.0 - 1.0 * _Complex_I ) / ( pole.MP_EA - sqrt(E) ); + CDUM = PSIIKI / E; + sigT += creal( pole.MP_RT * CDUM * sigTfactors[pole.l_value] ); + sigA += creal( pole.MP_RA * CDUM); + sigF += creal( pole.MP_RF * CDUM); + } + + sigE = sigT - sigA; + + micro_xs[0] = sigT; + micro_xs[1] = sigA; + micro_xs[2] = sigF; + micro_xs[3] = sigE; +} + +// Temperature Dependent Variation of Kernel +// (This involves using the Complex Faddeeva function to +// Doppler broaden the poles within the window) +void calculate_micro_xs_doppler( double * micro_xs, int nuc, double E, Input input, CalcDataPtrs data, complex double * sigTfactors, long * abrarov, long * alls) +{ + // MicroScopic XS's to Calculate + double sigT; + double sigA; + double sigF; + double sigE; + + // Calculate Window Index + double spacing = 1.0 / data.n_windows[nuc]; + int window = (int) ( E / spacing ); + if( window == data.n_windows[nuc] ) + window--; + + // Calculate sigTfactors + calculate_sig_T(nuc, E, input, data, sigTfactors ); + + // Calculate contributions from window "background" (i.e., poles outside window (pre-calculated) + Window w = data.windows[nuc][window]; + sigT = E * w.T; + sigA = E * w.A; + sigF = E * w.F; + + double dopp = 0.5; + + // Loop over Poles within window, add contributions + for( int i = w.start; i < w.end; i++ ) + { + Pole pole = data.poles[nuc][i]; + + // Prep Z + double complex Z = (E - pole.MP_EA) * dopp; + if( cabs(Z) < 6.0 ) + (*abrarov)++; + (*alls)++; + + // Evaluate Fadeeva Function + complex double faddeeva = fast_nuclear_W( Z ); + + // Update W + sigT += creal( pole.MP_RT * faddeeva * sigTfactors[pole.l_value] ); + sigA += creal( pole.MP_RA * faddeeva); + sigF += creal( pole.MP_RF * faddeeva); + } + + sigE = sigT - sigA; + + micro_xs[0] = sigT; + micro_xs[1] = sigA; + micro_xs[2] = sigF; + micro_xs[3] = sigE; +} + +void calculate_sig_T( int nuc, double E, Input input, CalcDataPtrs data, complex double * sigTfactors ) +{ + double phi; + + for( int i = 0; i < input.numL; i++ ) + { + phi = data.pseudo_K0RS[nuc][i] * sqrt(E); + + if( i == 1 ) + phi -= - atan( phi ); + else if( i == 2 ) + phi -= atan( 3.0 * phi / (3.0 - phi*phi)); + else if( i == 3 ) + phi -= atan(phi*(15.0-phi*phi)/(15.0-6.0*phi*phi)); + + phi *= 2.0; + + sigTfactors[i] = cos(phi) - sin(phi) * _Complex_I; + } +}