Index: LICENSE.TXT =================================================================== --- LICENSE.TXT +++ LICENSE.TXT @@ -84,6 +84,7 @@ llvm-test/MultiSource/Benchmarks/ASC_Sequoia/sphot smg2000: llvm-test/MultiSource/Benchmarks/ASCI_Purple/SMG2000 XSBench: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench +CLAMR: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR Fhourstones: llvm-test/MultiSource/Benchmarks/Fhourstones Fhourstones-3.1: llvm-test/MultiSource/Benchmarks/Fhourstones-3.1 McCat: llvm-test/MultiSource/Benchmarks/McCat Index: MultiSource/Benchmarks/CMakeLists.txt =================================================================== --- MultiSource/Benchmarks/CMakeLists.txt +++ MultiSource/Benchmarks/CMakeLists.txt @@ -19,6 +19,7 @@ add_subdirectory(nbench) add_subdirectory(sim) add_subdirectory(DOE-ProxyApps-C) +add_subdirectory(DOE-ProxyApps-C++) if((NOT "${TARGET_OS}" STREQUAL "Darwin") OR (NOT "${ARCH}" STREQUAL "ARM")) add_subdirectory(TSVC) Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Bounds.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Bounds.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * Other LANL authors + * + */ + +#include + +#ifndef _Bounds_ +#define _Bounds_ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "Globals.h" + +typedef struct { + TVector min, max; +} TBounds; + +extern void Bounds_Copy(TBounds* src, TBounds* dest); +extern void Bounds_Infinite(TBounds* b); +extern void Bounds_AddBounds(TBounds* b, TBounds* add); +extern void Bounds_AddEpsilon(TBounds* b, double add); +extern bool Bounds_IsOverlappingBounds(TBounds* b, TBounds* tst); +extern double Bounds_WidthAxis(TBounds* b, unsigned int axis); +extern double Bounds_CenterAxis(TBounds* b, unsigned int axis); + +#ifdef __cplusplus +} +#endif + +#endif Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Bounds.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Bounds.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * Other LANL authors + * + */ +#include "Bounds.h" + +#define MEMCPY(s,d,n,t) {memcpy((void*)d, (void*)s, n * sizeof(t)); } +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +void Bounds_Copy(TBounds* src, TBounds* dest) { + assert(src && dest); + MEMCPY(src, dest, 1, TBounds); +} + +void Bounds_Infinite(TBounds* b){ + assert(b); + b->min.x = POSITIVE_INFINITY; + b->min.y = POSITIVE_INFINITY; + b->max.x = NEGATIVE_INFINITY; + b->max.y = NEGATIVE_INFINITY; +} + +void Bounds_AddBounds(TBounds* b, TBounds* add) { + assert(b && add); + b->min.x = MIN(b->min.x, add->min.x); + b->min.y = MIN(b->min.y, add->min.y); + b->max.x = MAX(b->max.x, add->max.x); + b->max.y = MAX(b->max.y, add->max.y); +} + +void Bounds_AddEpsilon(TBounds* b, double add) { + assert(b); + b->min.x = b->min.x - add; + b->min.y = b->min.y - add; + b->max.x = b->max.x + add; + b->max.y = b->max.y + add; +} + +bool Bounds_IsOverlappingBounds(TBounds* b, TBounds* tst) { + assert(b && tst); + if((tst->max.x < b->min.x) || (tst->min.x > b->max.x)) + return(false); + if((tst->max.y < b->min.y) || (tst->min.y > b->max.y)) + return(false); + return(true); +} + +double Bounds_WidthAxis(TBounds* b, unsigned int axis) +{ + double width; + + assert(b); + if(axis == XAXIS) + width = b->max.x - b->min.x; + else if(axis == YAXIS) + width = b->max.y - b->min.y; + else + assert(NULL); + return(width); +} + +double Bounds_CenterAxis(TBounds* b, unsigned int axis) +{ + double center; + + assert(b); + if(axis == XAXIS) + center = (b->min.x + b->max.x) * 0.5; + else if(axis == YAXIS) + center = (b->min.y + b->max.y) * 0.5; + else + assert(NULL); + return(center); +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CLAMR.reference_output =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CLAMR.reference_output @@ -0,0 +1,19 @@ +Mass of initialized cells equal to 31290.8709635 +Iteration 0 timestep n/a Sim Time 0.0 cells 4412 Mass Sum 31290.8709635 +Iteration 100 timestep 0.000349 Sim Time 0.045244 cells 4652 Mass Sum 31290.8709635 Mass Change 0 +Iteration 200 timestep 0.000386 Sim Time 0.080903 cells 4760 Mass Sum 31290.8709635 Mass Change 0 +Iteration 300 timestep 0.000442 Sim Time 0.121853 cells 4892 Mass Sum 31290.8709635 Mass Change 0 +Iteration 400 timestep 0.000502 Sim Time 0.169292 cells 4976 Mass Sum 31290.8709635 Mass Change -3.63798e-12 +Iteration 500 timestep 0.000614 Sim Time 0.224092 cells 5096 Mass Sum 31290.8709635 Mass Change -3.63798e-12 +Iteration 600 timestep 0.000701 Sim Time 0.288037 cells 5372 Mass Sum 31290.8709635 Mass Change -3.63798e-12 +Iteration 700 timestep 0.000787 Sim Time 0.362393 cells 5780 Mass Sum 31290.8709635 Mass Change -7.27596e-12 +Iteration 800 timestep 0.000922 Sim Time 0.449275 cells 6152 Mass Sum 31290.8709635 Mass Change -1.09139e-11 +Iteration 900 timestep 0.001131 Sim Time 0.551298 cells 6704 Mass Sum 31290.8709635 Mass Change -1.09139e-11 +Iteration 1000 timestep 0.001318 Sim Time 0.672188 cells 7208 Mass Sum 31290.8709635 Mass Change -1.09139e-11 +Using hash tables to calculate neighbors +hash table size bytes 278784 +Initial order is Hilbert sort. No cycle reorder. Local Stencil is on. +CPU: rezone frequency 17.1000 percent +CPU: calc neigh frequency 17.2000 percent +CPU: refine_smooth_iter per rezone 0.0000 +exit 0 Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeLists.txt =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeLists.txt @@ -0,0 +1,3 @@ +set(PROG CLAMR) +set(RUN_OPTIONS -n 64 -t 1000) +llvm_multisource() Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Cmd.hh =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Cmd.hh @@ -0,0 +1,306 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +#ifndef CMDHHINCLUDE +#define CMDHHINCLUDE + +// *************************************************************************** +// *************************************************************************** +// Generalized command class. +// *************************************************************************** +// *************************************************************************** + +#include +#include +#include +#include +#include +#include "Variable.hh" +#include "Function.hh" + +namespace PP +{ +using std::string; +using std::deque; +using std::vector; +using std::map; +using std::stringstream; + +//class Variable; +//class Function; + +class Cmd +{ +public: + Cmd(); + Cmd(string s, map *v, map *f, + deque *lstr, int lnum, int file_lnum, string fname, + stringstream &serr, int &ierr); + + void set_index_base(int base); + void set_case_sensitive(bool case_sensitive_in); + + void add_word(string str, int lnum, int file_lnum, string fname); + void erase_word(int iw); + void erase_last_word(); + void reset_name_type(); + void delete_words(int i1, int i2); + void check_ppmm(stringstream &serr, int &ierr); + void remove_commas(); + void handle_two_words(); + bool check_input_end(bool kill_run, stringstream &serr, int &ierr); + void print_duplicate_line(int iw, stringstream &ss, int fn_width, + int lnum_width, string after_lnum); + void get_duplicate_sizes(int iw, int &fn_width, int &lnum_width); + + string get_cmd_filename(stringstream &ssfiles); + void handle_quotes(stringstream &serr, int &ierr); + void handle_exe_args(string &sout); + void deprecated_input01(string action, stringstream &serr, int &ierr); + void fatal_error(int iw, stringstream &serr, int &ierr); + void warning(int iw, stringstream &serr, int &ierr); + + void get_bool_int(string &cname, int *array_vals, const vector &size, + vector &dup_cmd1, vector &dup_wdex1, + int dup_fatal, vector &dup_vals, + bool skip, stringstream &serr, int &ierr); + + void get_bool(string &cname, bool *array_vals, const vector &size, + vector &dup_cmd1, vector &dup_wdex1, + int dup_fatal, vector &dup_vals, + bool skip, stringstream &serr, int &ierr); + + void get_int(string &cname, int *array_vals, const vector &size, + vector &dup_cmd1, vector &dup_wdex1, + int dup_fatal, vector &dup_vals, + bool skip, stringstream &serr, int &ierr); + + void get_int(string &cname, int64_t *array_vals, const vector &size, + vector &dup_cmd1, vector &dup_wdex1, + int dup_fatal, vector &dup_vals, + bool skip, stringstream &serr, int &ierr); + + void get_real(string &cname, double *array_vals, const vector &size, + vector &dup_cmd1, vector &dup_wdex1, + int dup_fatal, vector &dup_vals, + bool skip, stringstream &serr, int &ierr); + + void get_char(string &cname, vector &vstr, const vector &size, + bool single_char, vector &dup_cmd1, + vector &dup_wdex1, int dup_fatal, + vector &dup_vals, bool skip, + stringstream &serr, int &ierr); + + void get_size(vector &size, stringstream &serr, int &ierr); + void get_sizeb(vector &size, stringstream &serr, int &ierr); + + // Handle unary minus and plus in command lines. + void handle_cmd_unary_minus(stringstream &serr, int &ierr); + void handle_cmd_unary_plus(stringstream &serr, int &ierr); + + // Handle multiplicity in command lines, i.e. a(1)=15*3.0 + void handle_cmd_multiplicity(stringstream &serr, int &ierr); + + // Handle variables. + bool check_for_dimension(stringstream &serr, int &ierr); + bool check_for_var_description(stringstream &serr, int &ierr); + void substitute_variables(stringstream &serr, int &ierr); + void set_variables(stringstream &serr, int &ierr); + + // Math evaluation. + void math_eval(stringstream &serr, int &ierr); + void check_misplaced_math(stringstream &serr, int &ierr); + + // Handle comments. + void single_line_comments(); + void multi_line_comments(int &level); + + // Handle processed flags. + void clear_processed(); + void set_processed(bool ip); + void check_processed(bool &good, stringstream &serr, int &ierr); + + // If statements. + void handle_if(bool &skip, deque &skip_level, + deque &satisfied, + stringstream &serr, int &ierr); + + // Do loops. + void handle_do(bool &skip, deque &do_start, int &cdex, + bool &end_do_loop, stringstream &serr, int &ierr); + bool find_matching_enddo(int &dlev, bool &stop_checking); + + // Subroutines + void handle_subroutines(bool &skip, + bool &go_to_sub, string &sub_name, + bool &go_to_call, + stringstream &serr, int &ierr); + bool find_subroutine(string &sub_name); + void get_call_args(vector &sargs, vector &sargs_isvar, + stringstream &serr, int &ierr); + void get_sub_args(vector &sargs, vector &sargs_isvar); + void copy_call_args(vector &sargs, vector &sargs_isvar); + void copy_sub_args(vector &sargs, vector &sargs_isvar); + + // Accessor functions. + string get_cmd_name() { return cmd_name; } + string get_cmd_type() { return cmd_type; } + int get_nwords() { return words.size(); } + string get_string(int iw) { + if ((int)words.size() <= iw) return ""; + return words[iw].get_string(); + } + string get_original_str() { return original_str; } + + int get_line_number(int iw) { return words[iw].get_line_number(); } + int get_file_line_number(int iw) { return words[iw].get_file_line_number(); } + string get_filename(int iw) { return words[iw].get_filename(); } + string get_filename() { return filename; } + deque *get_lines() { return lines; } + + void set_filename(string fn) { + filename = fn; + for (int iw=0; iw<(int)words.size(); iw++) { + words[iw].set_filename(fn); + } + } + + bool is_include() { if(words[0].get_string() == "include") return true; + return false; } + + // Debug + void print_all_words(); + void print_all_words(stringstream &ss); + void print_using_words(stringstream &ss); + void print_using_words_fm(stringstream &ss); + void print_original_string(stringstream &ss); + + +private: + // Initialization method for this class. + void init(); + + void process_string(string in_str, stringstream &serr, int &ierr); + bool extract_next_word(int &istart, string &str, string &word, + stringstream &serr, int &ierr); + int find_closing_symbol(string opensym, string closesym, int i1); + bool handle_innermost_parens(int &i1, int &i2, int &iwres, int &nargs, + bool remp, stringstream &serr, int &ierr); + void evaluate_function(int iw1, int &i2, int &nargs, + stringstream &serr, int &ierr); + void seval(int &i1, int &i2, stringstream &serr, int &ierr); + void handle_unary_op(int i1, int &i2, string utype, + stringstream &serr, int &ierr); + void do_unary_op(int ip, string utype); + void handle_star_star(); // ** exponentiation + void handle_ops(); + + void subvar_w0(int i1, int &i2, stringstream &serr, int &ierr); + void subvar0(int vardex, string &varname, int increment, + stringstream &serr, int &ierr); + bool evaluate_variable(int iw1, int &i2, int &nargs, + stringstream &serr, int &ierr); + + int find(int i1, int i2, string s); + int find_last(string s, int i1, int i2); + int find_any_char(int i1, int i2, string s); + void replace_words(int i1, int i2, Word &w); + void replace_words(int i1, int i2, vector &vw); + void merge_words(int i1, int i2); + bool separate_str(string &subs, string &fstr, vector &vs); + int find_equals(); + + bool check_syntax(vector &istart, stringstream &serr, int &ierr); + bool get_nvals(vector &istart, const vector &size, + int &nvals, stringstream &serr, int &ierr); + void debug_print_words(string s); + + void fatal_error2(stringstream &serr, int &ierr); + void error_dup_line(string &cname, int wdex, int cdex, + vector &dup_wdex1, vector &dup_cmd1, + vector &dup_vals, const vector &size, + int dup_fatal, stringstream &serr, int &ierr); + + + // This is needed for telling the user what line in the input + // file or include file the error occurred on. + // + // line_number The line_number corresponding to this command, this is + // an index into lines and starts from 1, not 0. + // lines Pointer to the deque of original lines. This contains all + // the lines from the input file and any include files. + // file_line_number The line number in the input file or include file. + // filename The name of the input file or include file. + // + // file_line_number and filename are needed to that the user can open + // the file and go to the line in error. + int line_number, file_line_number; + string filename; + deque *lines; + + // index base, generally 1 for Fortran style and 0 for C/C++, default 1 + // int index_base; -- using static variable instead + + // The original string before processing. + string original_str; + + // Pointer to the map of variables. + map *vmap; + + // Pointer to the map of functions. + map *fmap; + + // Definitions of white space, delimiters, etc. + string white_space; + string delims; + + // Storage for all the words on the line. + deque words; + + // The name and type of the command. + string cmd_name; + string cmd_type; + + // Used for subroutines. + vector call_args, sub_args; + vector call_args_isvar, sub_args_isvar; +}; + + +} // end of PP namespace + +#endif + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Cmd.cc =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Cmd.cc @@ -0,0 +1,3972 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +// *************************************************************************** +// *************************************************************************** +// This class holds command lines broken up into words. +// The term command is used in a general sense, it includes variable +// assignments, do loops, usual commands, etc. +// *************************************************************************** +// *************************************************************************** +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Parser_utils.hh" +#include "Variable.hh" +#include "Function.hh" +#include "Word.hh" +#include "Parser_math.hh" +#include "Cmd.hh" + +namespace PP +{ +using std::cout; +using std::endl; +using std::string; +using std::deque; +using std::vector; +using std::stringstream; +using std::pair; +using std::ifstream; +using std::ios; +using std::setw; + +// index base, generally 1 for Fortran style and 0 for C/C++, default 1 +static int index_base = 1; +static bool case_sensitive = false; + +// =========================================================================== +// Default constructor. +// =========================================================================== +Cmd::Cmd() +{ + init(); +} + + +// =========================================================================== +// Constructor including map of variables. +// =========================================================================== +Cmd::Cmd(string s, map *v, map *f, + deque *lstr, int lnum, int file_lnum, string fname, + stringstream &serr, int &ierr) +{ + init(); + vmap = v; + fmap = f; + original_str = s; + line_number = lnum; + file_line_number = file_lnum; + filename = fname; + lines = lstr; + process_string(s, serr, ierr); +} + + +// =========================================================================== +// Add a word to this cmd. +// =========================================================================== +void Cmd::add_word(string str, int lnum, int file_lnum, string fname) +{ + Word w(str, lnum, file_lnum, fname, lines); + words.push_back(w); +} + + +// =========================================================================== +// Erase a word from this cmd. +// =========================================================================== +void Cmd::erase_word(int iw) +{ + words.erase(words.begin()+iw); +} +void Cmd::erase_last_word() +{ + words.erase(words.begin()+(int)words.size()-1); +} + + +// =========================================================================== +// Remove words that are commas. +// =========================================================================== +void Cmd::remove_commas() +{ + for (int i=0; i<(int)words.size(); i++) { + if (words[i].is_comma()) { + words.erase(words.begin()+i); + i -= 1; + } + } +} + + +// =========================================================================== +// Initialize various private data. +// =========================================================================== +void Cmd::init() +{ + original_str = ""; + //processed = false; + white_space = " \t"; + delims = " \t()[],*/+-=!#"; + vmap = NULL; + fmap = NULL; + line_number = 0; + file_line_number = 0; + filename = ""; +} + + +// =========================================================================== +// Set index base for input file indexing. 1 -- Fortran like, 0 -- Other +// languages +// =========================================================================== +void Cmd::set_index_base(int base) +{ + //cout << "Info:: Setting index base to " << base << endl; + index_base = base; +} + +// =========================================================================== +// Set case sensitivity for input file commands. +// =========================================================================== +void Cmd::set_case_sensitive(bool case_sensitive_in) +{ + case_sensitive = case_sensitive_in; +} + +// =========================================================================== +// Process a string. +// Break the string into words and copy each word to a double ended queue. +// =========================================================================== +void Cmd::process_string(string in_str, stringstream &serr, int &ierr) +{ + //cout << "&&&&& Original line = " << endl; + //cout << in_str << endl; + //cout << "01234567890123456789012345678901234567890123456789" << endl; + //cout << "0 1 2 3 4 " << endl; + + string s; + int istart = 0; + bool found = false; + //int plevel = 0; + for (;;) { + delims = " \t()[],*/+-=!#"; + + // Extract the next word from the line. + found = extract_next_word(istart, in_str, s, serr, ierr); + if (!found) break; + + // Create a new word using the word that was found. + // This removes quotes if there are any and types the word. + Word w(s, line_number, file_line_number, filename, lines); + + // Copy the word to the end of the queue. + words.push_back(w); + } + + // Set the command name and type. + reset_name_type(); +} + + +// =========================================================================== +// Given a string, str, and a starting position in +// that string, istart, extract the next word and +// pass it back as a string. +// =========================================================================== +bool Cmd::extract_next_word(int &istart, string &str, string &word, + stringstream &serr, int &ierr) +{ + // To suppress compiler warnings of unused parameters + //assert(serr == serr); + assert(ierr == ierr); + + // If istart is out of bounds then there is nothing to do. + if (istart < 0) return false; + if (istart >= (int)str.size()) return false; + + // Find the next non blank character. + int i1 = str.find_first_not_of(white_space, istart); + + // If a non whitespace character was not found then there are no more + // words to extract. + if (i1 == (int)string::npos) return false; + + // If the non blank character that was found is a delimiter, like + // ()[]+-/* ... then it needs to be a word by itself. + if (delims.find(str[i1], 0) != string::npos) { + word = str[i1]; + istart = i1+1; + return true; + } + + // At this point we have found the start of a word. The end of the + // word will be one of the delimiters like ()[]+=-*/spacetab ... + string wend = delims; + + // A word delimited by quotes is handled differently. If the i1 + // position in the string is a beginning quotes then we need to search + // for an ending quotes. Anything between quotes is part of the word + // including delimters. + bool quotes = false; + if (str[i1] == '"') { + quotes = true; + wend = "\""; + } + if (str[i1] == '\'') { + quotes = true; + wend = "\'"; + } + + // Search for the end of the word by finding the next delimiter. The + // delimiter is one index past the end of the word. + // But if the next delimiter is + or - then we have to consider that + // this could be a floating point number in which case we continue + // past the + or - to find the next delimiter. + int i2; + int i1_start = i1+1; + for(;;) { + i2 = str.find_first_of(wend, i1_start); + + // If a delimiter was not found then the word extends to the end + // of the line. + if (i2 == (int)string::npos) { + i2 = str.size(); + break; + } + else { + // Check for a floating point number (fpn). For example + // 1.34e+14 or -3.8E-19 + // i2 might point to the + or - in e+14 or E-19, so we check + // for that case. Note that if the + or - is not found, then it + // could be a number like 1.e14 but then i2 would point to + // something after e14 and we would be ok. + // If we do find +e or -e, then everything in front of it needs + // to be a digit, if not then this is not a number. + bool fpn = false; + if (str[i2] == '+' || str[i2] == '-') { + if (str[i2-1] == 'e' || str[i2-1] == 'E' || + str[i2-1] == 'd' || str[i2-1] == 'D') { + fpn = true; + for (int j=i1; j<=i2-2; j++) { + if (!isdigit(str[j]) && str[j] != '.') { + fpn = false; + break; + } + } + } + } + + if (!fpn) break; + i1_start = i2+1; + } + } + + // If the word is quoted then it should end in quotes. + // We do not check for quotes matching here because at this + // point we might be in a comment region where quotes mismatch + // is allowed. We check for quotes mismatch later. + if (quotes) { + if (i2 >= (int)str.size()) i2 = (int)str.size() - 1; + } + /* + if (quotes) { + bool missing = false; + if (i2 >= (int)str.size()) missing = true; + else if (str[i1] == '\"' && str[i2] != '\"') missing = true; + else if (str[i1] == '\'' && str[i2] != '\'') missing = true; + else if (str[i1] == '\"' && str[i2] == '\'') missing = true; + else if (str[i1] == '\'' && str[i2] == '\"') missing = true; + if (missing) { + fatal_error2(serr, ierr); + serr << "Quotes mismatch found." << endl; + serr << "A starting quotes must have a closing quotes." << endl; + serr << "Double quotes, \", must be matched with double quotes." << endl; + serr << "Single quotes, \', must be matched with single quotes." << endl; + ierr = 2; + return false; + } + } + */ + + // We include the quotes symbols in the word. The quote symbols will + // be removed elsewhere. + if (quotes) i2 += 1; + + // The word is now delimited by i1 and i2-1, return it in word. + word = str.substr(i1, i2 - i1); + + // Update the starting point for finding the next word. + istart = i2; + + // A word was successfully found so return true. + return true; +} + + +// =========================================================================== +// Reset the command name and type. Consider the following command: +// * lasdkj */ cmd = 5.0 +// The original command name is "*", but after the multi-line comment is +// removed, the command name should be "cmd". +// =========================================================================== +void Cmd::reset_name_type() +{ + if ((int)words.size() == 0) { + cmd_name = " "; + cmd_type = " "; + return; + } + cmd_name = words[0].get_string(); + if (! case_sensitive) { + transform(cmd_name.begin(), cmd_name.end(), cmd_name.begin(), tolower); + } + cmd_type = "command"; + if (words[0].is_variable()) cmd_type = "assignment"; + if (cmd_name == "parser_list_variables") cmd_type = "debug"; + if (cmd_name == "parser_list_functions") cmd_type = "debug"; + if (cmd_name == "parser_print_fbuffer") cmd_type = "debug"; + if (cmd_name == "if") cmd_type = "internal_cmd"; + if (cmd_name == "elseif") cmd_type = "internal_cmd"; + if (cmd_name == "endif") cmd_type = "internal_cmd"; + if (cmd_name == "do") cmd_type = "internal_cmd"; + if (cmd_name == "return") cmd_type = "internal_cmd"; + if (cmd_name == "enddo") cmd_type = "internal_cmd"; + if (cmd_name == "stop") cmd_type = "internal_cmd"; + if (cmd_name == "when") cmd_type = "internal_cmd"; + if (cmd_name == "endwhen") cmd_type = "internal_cmd"; +} + + +// =========================================================================== +// Given a line like +// include filename1 filename2 filename3 ... +// Find the first filename that exists and return that. +// This should only be called on the io processor. +// =========================================================================== +string Cmd::get_cmd_filename(stringstream &ssfiles) +{ + for (int i=1; i<(int)words.size(); i++) { + string fn = words[i].get_string(); + + // The quotes may still be on the word, strip them off if they are + // present. + int len = (int)fn.size(); + if ((fn[len-1] == '\"') || (fn[len-1] == '\'')) { + fn.erase(fn.end() - 1); + } + if ((fn[0] == '\"') || (fn[0] == '\'')) { + fn.erase(fn.begin()); + } + + ssfiles << " " << fn << endl; + + // Open the file to test if it exists. + ifstream instm(fn.c_str(), ios::in); + instm.close(); + if( instm.fail() ) continue; + return fn; + } + return ""; +} + + +// =========================================================================== +// Handle unary minus in a command line (not in math(..)) +// =========================================================================== +void Cmd::handle_cmd_unary_minus(stringstream &serr, int &ierr) +{ + int ipstart = 0; + for (;;) { + int ip = find(ipstart, (int)words.size()-1, "-"); + + // If we do not find any more minus signs then we are done. + if (ip == -1) return; + + // The word after the minus sign must be a number. + if (!words[ip+1].is_number()) { + words[ip+1].fatal_error(serr, ierr); + serr << "Expected the object following the unary - to" + " be a number." << endl; + serr << "Instead, it was " << words[ip+1].get_string() << endl; + ierr = 2; + return; + } + + // Actually do the negate operation. + do_unary_op(ip, "-"); + ipstart = ip+1; + continue; + } +} + + +// =========================================================================== +// Handle unary plus in a command line (not in math(..)) +// =========================================================================== +void Cmd::handle_cmd_unary_plus(stringstream &serr, int &ierr) +{ + int ipstart = 0; + for (;;) { + int ip = find(ipstart, (int)words.size()-1, "+"); + + // If we do not find any more minus signs then we are done. + if (ip == -1) return; + + // The word after the plus sign must be a number. + if (!words[ip+1].is_number()) { + words[ip+1].fatal_error(serr, ierr); + serr << "Expected the object following the unary + to" + " be a number." << endl; + serr << "Instead, it was " << words[ip+1].get_string() << endl; + ierr = 2; + return; + } + + // The + sign is not needed. + delete_words(ip, ip); + ipstart = ip+1; + continue; + } +} + + +// =========================================================================== +// The following type of command is allowed: +// a(1) = 15*3.0 +// meaning that 3.0 is to be replicated 15 times and thus a(1)-a(15) is set +// by this command. +// =========================================================================== +void Cmd::handle_cmd_multiplicity(stringstream &serr, int &ierr) +{ + int ipstart = 0; + for (;;) { + int ip = find(ipstart, (int)words.size()-1, "*"); + + // If we do not find any more asterisks then we are done. + if (ip == -1) return; + + if (ip==0) { + fatal_error2(serr, ierr); + serr << "Asterisk cannot be at the start of a line." << endl; + ierr = 2; + return; + } + + if (ip == (int)words.size()-1) { + words[ip].fatal_error(serr, ierr); + serr << "Asterisk cannot be at the end of a line." << endl; + ierr = 2; + return; + } + + // The word after the asterisk must be a number or a boolean. + // Wait, why is this? We actually allow strings also, really + // we allow anything. + //if (!words[ip+1].is_number() && !words[ip+1].is_bool()) { + // words[ip+1].fatal_error(serr, ierr); + // serr << "Expected the object following the * to" + // " be a number or a logical." << endl; + // serr << "Instead, it was " << words[ip+1].get_string() << endl; + // ierr = 2; + // return; + //} + + // The word before the asterisk must be a number. + if (!words[ip-1].is_number()) { + words[ip-1].fatal_error(serr, ierr); + serr << "Expected the object before the * to" + " be a number." << endl; + serr << "Instead, it was " << words[ip-1].get_string() << endl; + ierr = 2; + return; + } + + // Set the multiplicity. + int imult = words[ip-1].get_int(serr, ierr); + words[ip+1].set_multiplicity(imult); + Word w = words[ip+1]; + replace_words(ip-1, ip+1, w); + ipstart = ip; + } +} + + + +// *************************************************************************** +// *************************************************************************** +// Functions for getting values from the commands. +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Get boolean values. This gets all the words past the = sign, +// converts them to bool (and then to int), and puts them in the output arrays. +// +// The expected commands are: +// cmdname = .true. 0d +// cmdname(5) = false true false 1d +// cmdname(5,9) = true false true 2d +// etc. +// +// We also allow +// cmdname = false true false +// and we will supply the starting indices of (1) or (1,1), etc. +// +// But note that the , is gone at this point, so the 2d command is +// cmdname ( 5 9 ) = true false true 2d +// +// This function works for any dimension, 0,1,2,3,... +// +// We pass the result back as an int because of the incompatibility between +// fortran logical and c++ bool. +// =========================================================================== +void Cmd::get_bool_int(string &cname, int *array_vals, const vector &size, + vector &dup_cmd1, vector &dup_wdex1, + int dup_fatal, vector &dup_vals, + bool skip, stringstream &serr, int &ierr) +{ + // Get the dimension of the array, 0,1,2,3,... + int dim = (int)size.size(); + + // Check syntax, for example an equals sign must be present, and set istart. + // istart Position in array_vals where we start filling it. + // Note that istart starts from index base (default 1, Fortran style) + // Use set_index_base_zero for C/C++ index convention + vector istart(dim,0); + if (!check_syntax(istart, serr, ierr)) return; + + // If skipping, we don't need to get array values. + if (skip) { + set_processed(true); + return; + } + + // Get the number of values past the = sign. + // Also mark the words up to and including the = sign as processed. + int nvals = 0; + if (!get_nvals(istart, size, nvals, serr, ierr)) return; + + // 0d is a special case. + if (dim == 0) { + bool b = words[2].get_bool(serr, ierr); + int cvalue = 0; + if (b) cvalue = 1; + *array_vals = cvalue; + return; + } + + // Get the values and return. + //int ieqp1 = 5 + dim - 1; + int ieqp1 = find_equals() + 1; + Parser_utils putils(index_base); + int k = putils.start_dex(istart, size); + for (int i=ieqp1; i<(int)words.size(); i++) { + bool b = words[i].get_bool(serr, ierr); + int cvalue = 0; + if (b) cvalue = 1; + int imult = words[i].get_multiplicity(); + for (int j=1; j<=imult; j++) { + error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, + size, dup_fatal, serr, ierr); + array_vals[k++] = cvalue; + } + } +} + +// =========================================================================== +// Get boolean values. This gets all the words past the = sign, +// converts them to bool and puts them in the output arrays. +// +// The expected commands are: +// cmdname = .true. 0d +// cmdname(5) = false true false 1d +// cmdname(5,9) = true false true 2d +// etc. +// +// We also allow +// cmdname = false true false +// and we will supply the starting indices of (1) or (1,1), etc. +// +// But note that the , is gone at this point, so the 2d command is +// cmdname ( 5 9 ) = true false true 2d +// +// This function works for any dimension, 0,1,2,3,... +// =========================================================================== +void Cmd::get_bool(string &cname, bool *array_vals, const vector &size, + vector &dup_cmd1, vector &dup_wdex1, + int dup_fatal, vector &dup_vals, + bool skip, stringstream &serr, int &ierr) +{ + // Get the dimension of the array, 0,1,2,3,... + int dim = (int)size.size(); + + // Check syntax, for example an equals sign must be present, and set istart. + // istart Position in array_vals where we start filling it. + // Note that istart starts from index base (default 1, Fortran style) + // Use set_index_base_zero for C/C++ index convention + vector istart(dim,0); + if (!check_syntax(istart, serr, ierr)) return; + + // If skipping, we don't need to get array values. + if (skip) { + set_processed(true); + return; + } + + // Get the number of values past the = sign. + // Also mark the words up to and including the = sign as processed. + int nvals = 0; + if (!get_nvals(istart, size, nvals, serr, ierr)) return; + + // 0d is a special case. + if (dim == 0) { + bool b = words[2].get_bool(serr, ierr); + *array_vals = b; + return; + } + + // Get the values and return. + //int ieqp1 = 5 + dim - 1; + int ieqp1 = find_equals() + 1; + Parser_utils putils(index_base); + int k = putils.start_dex(istart, size); + for (int i=ieqp1; i<(int)words.size(); i++) { + bool b = words[i].get_bool(serr, ierr); + int imult = words[i].get_multiplicity(); + for (int j=1; j<=imult; j++) { + error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, + size, dup_fatal, serr, ierr); + array_vals[k++] = b; + } + } +} + + +// =========================================================================== +// Get integer values. This gets all the words past the = sign, +// converts them to int, and puts them in the output arrays. +// +// The expected commands are: +// cmdname = some_int 0d +// cmdname(5) = 3, 5, -15, 10 1d +// cmdname(5,9) = 3, 7, -20, 154 2d +// etc. +// +// We also allow +// cmdname = 3, 5, -15, 10 +// and we will supply the starting indices of (1) or (1,1), etc. +// +// But note that the , is gone at this point, so the 2d command is +// cmdname ( 5 9 ) = 3 7 -20 154 ... +// +// This function works for any dimension, 0,1,2,3,... +// =========================================================================== +void Cmd::get_int(string &cname, int *array_vals, const vector &size, + vector &dup_cmd1, vector &dup_wdex1, + int dup_fatal, vector &dup_vals, + bool skip, stringstream &serr, int &ierr) +{ + // Get the dimension of the array, 0,1,2,3,... + int dim = (int)size.size(); + + // Check syntax, for example an equals sign must be present, and set istart. + // istart Position in array_vals where we start filling it. + // Note that istart starts from index base (default 1, Fortran style) + // Use set_index_base_zero for C/C++ index convention + vector istart(dim,0); + if (!check_syntax(istart, serr, ierr)) return; + + // If skipping, we don't need to get array values. + if (skip) { + set_processed(true); + return; + } + + // Get the number of values past the = sign. + // Also mark the words up to and including the = sign as processed. + int nvals = 0; + if (!get_nvals(istart, size, nvals, serr, ierr)) return; + + // 0d is a special case. + if (dim == 0) { + *array_vals = words[2].get_int(serr, ierr); + return; + } + + // Get the values and return. + //int ieqp1 = 5 + dim - 1; + int ieqp1 = find_equals() + 1; + Parser_utils putils(index_base); + int k = putils.start_dex(istart, size); + for (int i=ieqp1; i<(int)words.size(); i++) { + int iw = words[i].get_int(serr, ierr); + int imult = words[i].get_multiplicity(); + for (int j=1; j<=imult; j++) { + error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, + size, dup_fatal, serr, ierr); + array_vals[k++] = iw; + } + } +} + + +// =========================================================================== +// Get int64_t values. This gets all the words past the = sign, +// converts them to int, and puts them in the output arrays. +// +// The expected commands are: +// cmdname = some_int 0d +// cmdname(5) = 3, 5, -15, 10 1d +// cmdname(5,9) = 3, 7, -20, 154 2d +// etc. +// +// We also allow +// cmdname = 3, 5, -15, 10 +// and we will supply the starting indices of (1) or (1,1), etc. +// +// But note that the , is gone at this point, so the 2d command is +// cmdname ( 5 9 ) = 3 7 -20 154 ... +// +// This function works for any dimension, 0,1,2,3,... +// =========================================================================== +void Cmd::get_int(string &cname, int64_t *array_vals, const vector &size, + vector &dup_cmd1, vector &dup_wdex1, + int dup_fatal, vector &dup_vals, + bool skip, stringstream &serr, int &ierr) +{ + // Get the dimension of the array, 0,1,2,3,... + int dim = (int)size.size(); + + // Check syntax, for example an equals sign must be present, and set istart. + // istart Position in array_vals where we start filling it. + // Note that istart starts from index base (default 1, Fortran style) + // Use set_index_base_zero for C/C++ index convention + vector istart(dim,0); + if (!check_syntax(istart, serr, ierr)) return; + + // If skipping, we don't need to get array values. + if (skip) { + set_processed(true); + return; + } + + // Get the number of values past the = sign. + // Also mark the words up to and including the = sign as processed. + int nvals = 0; + if (!get_nvals(istart, size, nvals, serr, ierr)) return; + + // 0d is a special case. + if (dim == 0) { + *array_vals = words[2].get_int64_t(serr, ierr); + return; + } + + // Get the values and return. + //int ieqp1 = 5 + dim - 1; + int ieqp1 = find_equals() + 1; + Parser_utils putils(index_base); + int k = putils.start_dex(istart, size); + for (int i=ieqp1; i<(int)words.size(); i++) { + int64_t iw = words[i].get_int64_t(serr, ierr); + int imult = words[i].get_multiplicity(); + for (int j=1; j<=imult; j++) { + error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, + size, dup_fatal, serr, ierr); + array_vals[k++] = iw; + } + } +} + +// =========================================================================== +// Get the real (double) values. This gets all the words past the = sign, +// converts them to doubles, and puts them in the output arrays. +// +// The expected commands are: +// cmdname = some_double 0d +// cmdname(5) = 3.0, 35, -15e20, 10.154 1d +// cmdname(5,9) = 3.0, 35, -15e20, 10.154 2d +// etc. +// +// We also allow +// cmdname = 3.0, 35, -15e20, 10.154 +// and we will supply the starting indices of (1) or (1,1), etc. +// +// But note that the , is gone at this point, so the 2d command is +// cmdname ( 5 9 ) = 3.0 35 -15e20 10.154 ... +// +// This function works for any dimension, 0,1,2,3,... +// =========================================================================== +void Cmd::get_real(string &cname, double *array_vals, const vector &size, + vector &dup_cmd1, vector &dup_wdex1, + int dup_fatal, vector &dup_vals, + bool skip, stringstream &serr, int &ierr) +{ + // Get the dimension of the array, 0,1,2,3,... + int dim = (int)size.size(); + + // Check syntax, for example an equals sign must be present, and set istart. + // istart Position in array_vals where we start filling it. + // Note that istart starts from index base (default 1, Fortran style) + // Use set_index_base_zero for C/C++ index convention + vector istart(dim,0); + if (!check_syntax(istart, serr, ierr)) return; + + // If skipping, we don't need to get array values. + if (skip) { + set_processed(true); + return; + } + + // Get the number of values past the = sign. + // Also mark the words up to and including the = sign as processed. + int nvals = 0; + if (!get_nvals(istart, size, nvals, serr, ierr)) return; + + // 0d is a special case. + // Note that we do not increment dup_vals for 0d because duplicate scalar + // commands are handled differently from array commands. + if (dim == 0) { + *array_vals = words[2].get_double(serr, ierr); + return; + } + + // All other dimensions. + //int ieqp1 = 5 + dim - 1; + int ieqp1 = find_equals() + 1; + Parser_utils putils(index_base); + int k = putils.start_dex(istart, size); + for (int i=ieqp1; i<(int)words.size(); i++) { + double d = words[i].get_double(serr, ierr); + int imult = words[i].get_multiplicity(); + for (int j=1; j<=imult; j++) { + error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, + size, dup_fatal, serr, ierr); + array_vals[k++] = d; + } + } +} + + +// =========================================================================== +// Get the character values. This gets all the words past the = sign, +// converts them to chars, and puts them in the output arrays. +// +// The expected commands are: +// cmdname = q 0d single character +// cmdname = char_string 0d character string +// cmdname(3) = "May" "the", "force", "be" 1d array of strings +// cmdname(5,9) = "11" "21" "31" 2d +// etc. +// +// We also allow +// cmdname = "May" "the", "force", "be" +// and we will supply the starting indices of (1) or (1,1), etc. +// +// But note that the , is gone at this point, so the 2d command is +// cmdname ( 5 9 ) = "11" "21" "31" +// +// This function works for any dimension, 0,1,2,3,... +// For 0d, it has an extra flag to distinguish between single characters +// and a character string. +// =========================================================================== +void Cmd::get_char(string &cname, vector &vstr, const vector &size, + bool single_char, vector &dup_cmd1, + vector &dup_wdex1, int dup_fatal, + vector &dup_vals, bool skip, + stringstream &serr, int &ierr) +{ + // Get the dimension of the array, 0,1,2,3,... + int dim = (int)size.size(); + + // Check syntax, for example an equals sign must be present, and set istart. + // istart Position in array_vals where we start filling it. + // Note that istart starts from index base (default 1, Fortran style) + // Use set_index_base_zero for C/C++ index convention + vector istart(dim,0); + if (!check_syntax(istart, serr, ierr)) return; + + // If skipping, we don't need to get array values. + if (skip) { + set_processed(true); + return; + } + + // Get the number of values past the = sign. + // Also mark the words up to and including the = sign as processed. + int nvals = 0; + if (!get_nvals(istart, size, nvals, serr, ierr)) return; + + // 0d is a special case - get a single char + if (dim == 0 && single_char) { + vstr[0] = words[2].get_single_char(serr, ierr); + return; + } + + // 0d is a special case - get a single string + if (dim == 0) { + vstr[0] = words[2].get_stringp(); + return; + } + + // Get the value and return, dim > 0. + // get_stringp is the same as get_string except get_stringp also marks + // the word as being processed. + //int ieqp1 = 5 + dim - 1; + int ieqp1 = find_equals() + 1; + Parser_utils putils(index_base); + int k = putils.start_dex(istart, size); + for (int i=ieqp1; i<(int)words.size(); i++) { + string s = words[i].get_stringp(); + int imult = words[i].get_multiplicity(); + for (int j=1; j<=imult; j++) { + error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, + size, dup_fatal, serr, ierr); + vstr[k++] = s; + } + } + +} + + +// =========================================================================== +// Get sizes of arrays, this works for dimensions 1,2,3,... +// +// The size vector contains the sizes (or bounds) of each array dimensions. +// It is assumed in this routine that all but the last size is known (this +// is input) and that this routine will determine the last size. See the +// get_sizeb function below where a different assumption is made. +// +// Suppose, for example, we have a 3d array called a3d which is dimensioned +// a3d(5,3,:). The first two dimensions are known, 5 and 3, the last dimension +// is unknown and will be determined by this routine. +// +// size is a vector of ints of size 3, with elements 5,3,? where the ? is +// to be determined. +// +// Note that this routine is called in a loop over all the lines in the +// input which is why we set the size using maximum. +// =========================================================================== +void Cmd::get_size(vector &size, stringstream &serr, int &ierr) +{ + // Get the dimension of the array, 0,1,2,3,... + int dim = (int)size.size(); + + // Check syntax, also sets istart. + // istart Position in array where we start filling it. + // Example command might be a3d(3,2,2) = ... In this case istart + // would be a vector of length 3 contining 3,2,2 + // Note that istart starts from index base (default 1, Fortran style) + // Use set_index_base_zero for C/C++ index convention + vector istart(dim,0); + if (!check_syntax(istart, serr, ierr)) return; + + // Get the number of values past the = sign. + int nvals = 0; + vector size0(dim,0); + if (!get_nvals(istart, size0, nvals, serr, ierr)) return; + + int sm = 1; + for (int i=0; i size[dim-1]) { + size[dim-1] = maxval; + } +} + + +// =========================================================================== +// This is a special purpose routine to get sizes for certain 2d arrays. +// +// Suppose we have the following input +// mults(1,1) = 0. 0. 1. 5. 6. 9. +// mults(1,2) = 3. 5. 8. 9. 10. 11. 20. 10 +// mults(1,3) = 30. 5. 38. 3. +// In this case we don't know the size of either of the array dimensions, and +// of course the user does not know the size either and thus cannot somehow +// merge the above two lines. +// +// The purpose of this function is to obtain sizes for both the array +// dimensions so memory allocation of the array can be done. +// +// The size vector contains the sizes (or bounds) of each array dimensions. +// For the above example, this function would determine size[0] to be 8 and +// size[1] to be 3. 8 is just the max of the number of values put in per +// entry and 3 is just the max of the second index. +// +// Note that this routine is called in a loop over all the lines in the +// input which is why we set the size using maximum. +// +// This routine only works for 2d arrays. +// =========================================================================== +void Cmd::get_sizeb(vector &size, stringstream &serr, int &ierr) +{ + // Get the dimension of the array, 0,1,2,3,... + int dim = (int)size.size(); + + // This is a special purpose routine, dim must be 2. + if (dim != 2) { + fatal_error2(serr, ierr); + serr << "Cmd.cc, get_sizeb, internal error." << endl; + serr << "dim != 2, dim=" << dim << endl << endl; + ierr = 2; + return; + } + + // Check syntax, also sets istart. + // istart Position in array where we start filling it. + // Example command might be a3d(3,2,2) = ... In this case istart + // would be a vector of length 3 contining 3,2,2 + // Note that istart starts from index base (default 1, Fortran style) + // Use set_index_base_zero for C/C++ index convention + vector istart(dim,0); + if (!check_syntax(istart, serr, ierr)) return; + + // Get the number of values past the = sign. + int nvals = 0; + vector size0(dim,0); + if (!get_nvals(istart, size0, nvals, serr, ierr)) return; + + // Set the size vector + int maxval = istart[0] + nvals - 1; + if (maxval > size[0]) { + size[0] = maxval; + } + + maxval = istart[1]; + if (maxval > size[1]) { + size[1] = maxval; + } +} + + +// =========================================================================== +// Check command syntax for any dimension array. The expected command is: +// cmdname = .true. 0d +// cmdname(5) = 1, 3, -4 1d +// cmdname(3,4) = 1.e19, 23., -45. 2d +// etc. +// +// We also allow +// cmdname = "May" "the", "force", "be" +// and we will supply the starting indices of (1) or (1,1), etc. +// +// Note that at this point, the commas have been removed so the 2d command +// is actually +// cmdname ( 3 4 ) = 1.e19 23. -45. +// =========================================================================== +bool Cmd::check_syntax(vector &istart, stringstream &serr, int &ierr) +{ + // Get the dimension of the array, 0,1,2,3,... + int dim = (int)istart.size(); + + bool skip_check = false; + if (dim > 0) { + int ieqt = find_equals(); + if (ieqt == 1) skip_check = true; + } + + // Must be at least a certain number of words on the line. + int nw_min = 3; + int nw_min_wc = 3; + if (dim > 0 && (!skip_check)) { + nw_min = 6 + dim - 1; + nw_min_wc = nw_min + dim - 1; + } + if ((int)words.size() < nw_min) { + fatal_error2(serr, ierr); + serr << "Expected number words in this line >= " << nw_min_wc << endl; + serr << "Actual number words = " << words.size() << endl << endl; + ierr = 2; + // If there aren't enough words on the line, then it is hopeless. + return false; + } + + + // Word at index ieq must be an = sign. + int ieq = 1; + int ieq_wc = 2; + if (dim > 0 && (!skip_check)) { + ieq = 4 + dim -1; + ieq_wc = ieq + 1 + dim - 1; + } + if (words[ieq].get_string() != "=") { + words[ieq].fatal_error(serr, ierr); + serr << "Expected an equals sign for symbol " << ieq_wc << endl; + serr << "Instead symbol " << ieq_wc << " is: " << + words[ieq].get_string() << endl << endl; + ierr = 2; + } + + // The value must not have any multiplicity, i.e. be just a single value. + // This only applies to 0d, values for arrays can have multiplicity. + if (dim == 0) { + if (words[2].get_multiplicity() != 1) { + words[2].fatal_error(serr, ierr); + serr << "Multiplicity not equal 1 for " << words[2].get_string() << endl; + serr << "Multiplicity is: " << words[2].get_multiplicity() << endl << endl; + ierr = 2; + } + } + + // Nothing more to check for 0d. + if (dim == 0) return true; + + if (!skip_check) { + // Word at index 1 must be a "(". + if (words[1].get_string() != "(") { + words[1].fatal_error(serr, ierr); + serr << "Expected an open parenthesis ,(, following the command name" + " in this line," << endl; + serr << "For example: " << cmd_name << "(...) = ..." << endl; + serr << "Instead found: " << words[1].get_string() << endl << endl; + ierr = 2; + } + + // There must be a closing parenthses. + int irp = 3 + dim - 1; + if (words[irp].get_string() != ")") { + words[irp].fatal_error(serr, ierr); + serr << "Expected a close parenthesis ,), following the array indices" + " in this line," << endl; + serr << "For example: " << cmd_name << "(...) = ..." << endl; + serr << "Instead found: " << words[irp].get_string() << endl << endl; + ierr = 2; + } + } + + + // istart Position in array_vals where we start filling it. + // Note that istart starts from index base (default 1, Fortran style) + // Use set_index_base_zero for C/C++ index convention + if (skip_check) { + for (int i=0; i= " << index_base << endl; + serr << "Integer includes numbers like 3, 3., 3.0, but not 3.5" << endl; + serr << "The index input is: " << istart[i] << endl << endl; + ierr2 = 2; + } + } + if (ierr2 == 2) ierr = 2; + } + + for (int i=0; i &istart, const vector &size, + int &nvals, stringstream &serr, int &ierr) +{ + int nvals_cur; + // Get the array dimension, 0,1,2,3,... + int dim = (int)istart.size(); + + // 0d is a special case. + if (dim == 0) { + nvals = 1; + //if (size[0] == 0) return true; + words[0].set_processed(true); + words[1].set_processed(true); + return true; + } + + // Index of word after equals sign. + //int ieqp1 = 5 + dim - 1; + int ieqp1 = find_equals() + 1; + + // nvals Number of values after the = sign. + nvals = 0; + for (int i=ieqp1; i<(int)words.size(); i++) { + nvals_cur = words[i].get_multiplicity(); + if( nvals_cur <= 0 ){ + fatal_error2(serr, ierr); + serr << "Count must be positive [" << nvals_cur << "]" << endl; + ierr = 2; + } + nvals += nvals_cur; + } + + // This is for the get size function. We just want nvals and do not want + // to do the check or marking as processed. + if (size[0] == 0) return true; + + // Get the max size of the array. + //int maxvals = size1*size2; + int maxvals = size[0]; + for (int i=2; i<=dim; i++) { + maxvals *= size[i-1]; + } + + //int ip = istart[0]-1; + //if (dim == 2) { + // ip = istart[0]-1 + (istart[1]-1)*size[0]; + //} + //if (dim == 3) { + // ip = istart[0]-1 + (istart[1]-1)*size[0] + (istart[2]-1)*size[0]*size[1]; + //} + //if (dim == 4) { + // ip = istart[0]-1 + (istart[1]-1)*size[0] + (istart[2]-1)*size[0]*size[1] + + // (istart[3]-1)*size[0]*size[1]*size[2]; + //} + //ip += nvals - maxvals; + + // Find the excess, i.e. the max array position the user is trying to + // fill compared with the max size allowed. + Parser_utils putils(index_base); + int ix = putils.start_dex(istart, size); + int excess = ix + nvals - maxvals; + + //cout << "&&&&&cw ip = " << ip << " excess = " << excess << endl; + + // Check that the number of values input by the user does not exceed + // the array size. + //int excess = istart1 - 1 + (istart2-1)*size1 + nvals - maxvals; + if (excess > 0) { + fatal_error2(serr, ierr); + serr << "Maximum number of values allowed = " << maxvals << endl; + serr << "(for multi-dimension arrays this max number is" << endl; + serr << " max_dim1 * max_dim2 * ...)" << endl; + serr << "This command exceeds that value by " << + excess << endl << endl; + ierr = 2; + } + + // If fatal errors, then do not attempt further processing. + if (ierr == 2) return false; + + // Mark as processed. + for (int i=0; i::iterator p; + p = vmap->find(varname); + if (p == vmap->end()) { + Variable v(varname); + vmap->insert(pair(v.get_varname(), v)); + } + p = vmap->find(varname); + + // Extract the bounds from the line. + vector bounds; + for (int i=2; i<(int)words.size(); i++) { + if (words[i].get_string() == "(") continue; + if (words[i].is_comma()) continue; + if (words[i].get_string() == ":") continue; + if (words[i].get_string() == ")") break; + + // Get the bounds, note that this also makes sure it is an integer. + bounds.push_back(words[i].get_int(serr, ierr)); + } + + // Actually set the bounds for the variable. + int lnum = words[0].get_line_number(); + int file_lnum = words[0].get_file_line_number(); + string fname = words[0].get_filename(); + p->second.set_bounds(bounds, lnum, file_lnum, fname, + lines, serr, ierr); + + return true; +} + + +// =========================================================================== +// Check for the command +// variable_description variable_name description +// If found, then set the description for the variable. +// Create the variable if necessary. +// =========================================================================== +bool Cmd::check_for_var_description(stringstream &serr, int &ierr) +{ + if (words[0].get_string() != "variable_description") return false; + + // Must be 3 words in the line. + if (words.size() != 3) { + words[0].fatal_error(serr, ierr); + serr << "The variable_description command must have 3 words on the" + " line" << endl; + serr << "First word = variable_description" << endl; + serr << "Second word = name of the variable" << endl; + serr << "Third word = description (usually some phrase in quotes)" << endl; + serr << "This command has " << words.size() << + " words instead of 3 words." << endl; + ierr = 2; + return true; + } + + // The variable name is word 1. + string varname = words[1].get_string(); + if (!words[1].is_variable()) { + words[0].fatal_error(serr, ierr); + serr << "Expected a variable name as word 2" << endl; + serr << "Variable names must begin with the $ character." << endl; + serr << "This variable name does not begin with a $ character." << endl; + serr << "Note that putting quotes around a variable name makes it" << endl; + serr << "a string, not a variable." << endl; + serr << "Variable name = " << varname << endl; + ierr = 2; + return true; + } + + // Get the description. + string vardes = words[2].get_string(); + + // Find the variable, if not found, then create it. + map::iterator p; + p = vmap->find(varname); + if (p == vmap->end()) { + Variable v(varname); + vmap->insert(pair(v.get_varname(), v)); + } + p = vmap->find(varname); + + // Cannot change pre-defined variables. + if (p->second.is_pre_defined()) { + words[0].fatal_error(serr, ierr); + serr << "Cannot change the description for a pre-defined" + " variable" << endl; + serr << "Variable name = " << varname << " is pre-defined." << endl; + ierr = 2; + return true; + } + + // Actually set the description. + p->second.set_description(vardes); + + return true; +} + + +// =========================================================================== +// Go through each word on the line (starting after the equals sign if +// present), and replace each variable with its value. +// This is for scalar variables only. +// =========================================================================== +void Cmd::substitute_variables(stringstream &serr, int &ierr) +{ + int irstart = 0; + for (int i=0; i<(int)words.size(); i++) { + if (words[i].get_string() == "=") { + irstart = 1; + break; + } + } + int nw1 = (int)words.size()-1; + subvar_w0(irstart, nw1, serr, ierr); +} + + +// =========================================================================== +// Scan words i1 through i2 inclusive, replace any variables found with +// their value. +// If the variable is followed by ++ or --, handle that also. +// This is for scalar variables only. +// =========================================================================== +void Cmd::subvar_w0(int i1, int &i2, stringstream &serr, int &ierr) +{ + for (int i=i1; i<=i2; i++) { + string s = words[i].get_string(); + if (words[i].is_variable()) { + int increment = 0; + if (i < i2) { + string ppmm = words[i+1].get_string(); + if (ppmm == "++") increment = 1; + if (ppmm == "--") increment = -1; + } + subvar0(i, s, increment, serr, ierr); + if (increment != 0) { + delete_words(i+1,i+1); + i2 -= 1; + } + } + } +} + + +// =========================================================================== +// Given a variable name, varname, and its index in the words array, vardex, +// replace it with its value. +// This is for scalar variables only. +// =========================================================================== +void Cmd::subvar0(int vardex, string &varname, int increment, + stringstream &serr, int &ierr) +{ + vector adex; + + map::iterator p; + p = vmap->find(varname); + if (p != vmap->end()) { + int lnum = words[vardex].get_line_number(); + int file_lnum = words[vardex].get_file_line_number(); + string fname = words[vardex].get_filename(); + string svalue = p->second.get_var_value(adex, words[vardex].get_string(), + lnum, file_lnum, fname, + lines, serr, ierr); + //int increment = words[vardex].get_increment(); + if (increment != 0) p->second.bump_var(adex, increment, + lnum, file_lnum, fname, + lines, serr, ierr); + //words[vardex].set_increment(0); + words[vardex].set_value(svalue); + } + else { + // The variable has not been defined yet. + words[vardex].fatal_error(serr, ierr); + serr << "Attempted to use a variable before it was defined." + << endl; + serr << "Undefined variable = " << varname << endl; + ierr = 2; + } +} + + +// =========================================================================== +// Store the variable value(s), define if needed. +// Examples: +// $radius = 3.0 0d +// $radius(1) = 3.0 4. 5.6e19 1d +// $radius(3,4) = 3.0 4. 5.6e19 4 5 9 2d +// ... +// +// This function works for any dimension, 0,1,2,3,... +// =========================================================================== +void Cmd::set_variables(stringstream &serr, int &ierr) +{ + //cout << "&&&&&cw Enter set_variables, words[0] = " << words[0].get_string() << endl; + int ieq = -1; + for (int i=0; i<(int)words.size(); i++) { + if (words[i].get_string() == "=") { + ieq = i; + break; + } + } + + // If an equals sign was not found on the line, then this is not a + // variable assignment. + if (ieq == -1) return; + + // If the first character of the first word is not a $, then this is not + // a variable assignment. + string vname = words[0].get_string(); + if (!words[0].is_variable()) return; + + // Define a few common things. + int lnum = words[0].get_line_number(); + int file_lnum = words[0].get_file_line_number(); + string fname = words[0].get_filename(); + vector valvec; + + int dim = 0; + if (ieq >= 4) dim = ieq - 3; + //cout << "&&&&&cw vname=" << vname << " ieq=" << ieq << " dim=" << dim << endl; + + // Do some checking. + if (dim == 0) { + // Must be 3 words in the line (for example: $radius = 3.0) + if (words.size() != 3) { + fatal_error2(serr, ierr); + serr << "Expected number words in this line = 3" << endl; + serr << "Actual number words = " << words.size() << endl << endl; + ierr = 2; + // If there aren't enough words on the line, then it is hopeless. + if (words.size() < 3) return; + } + + // The value must not have any multiplicity, i.e. be just a single value. + // This only applies to 0d, values for arrays can have multiplicity. + if (words[2].get_multiplicity() != 1) { + words[2].fatal_error(serr, ierr); + serr << "Multiplicity not equal 1 for " << words[2].get_string() << endl; + serr << "Multiplicity is: " << words[2].get_multiplicity() << endl << endl; + ierr = 2; + } + } + + if (dim > 0) { + int nw_min = dim + 5; + int nw_min_wc = nw_min + dim - 1; + if ((int)words.size() < nw_min) { + fatal_error2(serr, ierr); + serr << "Expected number of symbols in this line >= " << nw_min_wc << endl; + serr << "Actual number of symbols is less than expected." << endl << endl; + ierr = 2; + // If there aren't enough words on the line, then it is hopeless. + return; + } + + // Word at index 1 must be a "(". + if (words[1].get_string() != "(") { + words[1].fatal_error(serr, ierr); + serr << "Expected an open parenthesis ,(, following the variable name" + " in this line," << endl; + serr << "For example: " << vname << "(...) = ..." << endl; + serr << "Instead found: " << words[1].get_string() << endl << endl; + ierr = 2; + return; + } + + // There must be a closing parenthses. + int irp = 3 + dim - 1; + if (words[irp].get_string() != ")") { + words[irp].fatal_error(serr, ierr); + serr << "Expected a close parenthesis ,), following the array indices" + " in this line," << endl; + serr << "For example: " << vname << "(...) = ..." << endl; + serr << "Instead found: " << words[irp].get_string() << endl << endl; + ierr = 2; + return; + } + } + + // Store the values in a vector. + for (int i=ieq+1; i<(int)words.size(); i++) { + int imult = words[i].get_multiplicity(); + string s = words[i].get_string(); + for (int j=1; j<=imult; j++) { + valvec.push_back(s); + } + } + + // Store the array indices in a vector. + vector istart(dim,0); + int ierr2 = 0; + for (int d=0; d= " << index_base << endl; + serr << "Integer includes numbers like 3, 3., 3.0, but not 3.5" << endl; + serr << "The index input is: " << istart[d] << endl << endl; + ierr2 = 2; + } + } + if (ierr2 == 2) { + ierr = 2; + return; + } + + // Find the variable name in the variable map. + map::iterator p; + p = vmap->find(vname); + + // If the variable is found in the variable map, then replace + // its value with the new value. If the variable is not found + // in the variable map, then add it as a new variable. + if (p != vmap->end()) { + p->second.set_var_value(istart, valvec, lnum, file_lnum, fname, + lines, serr, ierr); + } + else { + Variable v(vname, istart, valvec, lnum, file_lnum, fname, lines, + serr, ierr); + vmap->insert(pair(v.get_varname(), v)); + } +} + + + +// =========================================================================== +// Evaluate a variable. +// We have a word followed by multiple arguments. Find out if the word is +// a variable, use the arguments to get the variable value, replace the +// variable and arguments with its value. +// +// This function works for any dimension, 0,1,2,3,... +// =========================================================================== +bool Cmd::evaluate_variable(int iw1, int &i2, int &nargs, + stringstream &serr, int &ierr) +{ + // If there is no map of variables, then we do nothing. + if (vmap == NULL) return false; + + // Do nothing if the word is not a variable (begins with $). + if (!words[iw1].is_variable()) return false; + + // Get the variable name. + string varname = words[iw1].get_string(); + + // Find the variable. + map::iterator p; + p = vmap->find(varname); + + // The variable was not found. + if (p == vmap->end()) { + words[iw1].fatal_error(serr, ierr); + serr << "Trying to use a variable before it is defined." << endl; + serr << "Undefined variable = " << varname << endl; + serr << "The list of defined variables (at this point) is:" << endl; + for (p=vmap->begin(); p!=vmap->end(); p++) { + serr << p->second.get_varname() << endl; + } + ierr = 2; + return true; + } + + // The variable was found, do the evaluation and replace the words. + + // Check to see if all the variable arguments have a value. + bool has_value = true; + for (int i=0; i vdex; + for (int i=0; isecond.get_var_value(vdex, varname, ln, file_ln, + fname, lines, serr, ierr); + //int increment = words[iw1].get_increment(); + if (increment != 0) p->second.bump_var(vdex, increment, ln, file_ln, + fname, lines, serr, ierr); + //words[iw1].set_increment(0); + Word w(result, ln, file_ln, fname, lines); + replace_words(iw1, iw1+nargs, w); + i2 -= nargs; + + // If the variable was followed by a ++ or --, then remove the + // ++ or -- since it has been used. + if (increment != 0) { + delete_words(iw1+1, iw1+1); + i2 -= 1; + } + + return true; +} + + + + + + +// *************************************************************************** +// *************************************************************************** +// Math evaluation. +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Math evaluation driver. +// =========================================================================== +void Cmd::math_eval(stringstream &serr, int &ierr) +{ + // Combine * * into **, i.e. form the exponentiation operator. + handle_star_star(); + + // Ops like .and., .eq., ... can at this point be part of larger words, + // they need to be extracted as individual words. + handle_ops(); + + //cout << "&&&&&cw Cmd.cc, Enter math_eval" << endl; + //for (int i=0; i<(int)words.size(); i++) { + // cout << words[i].get_string() << endl; + //} + + int ieq = find_any_char(0, (int)words.size()-1, "="); + + bool ifcmd = false; + if (words[0].get_string() == "if") { + ifcmd = true; + ieq = -1; + } + + for (int i=0; i<(int)words.size(); i++) { + if (words[i].get_string() == "(" /*&& i>ieq*/) { + if (ifcmd && i>1) continue; + if (i > (int)words.size()-2) { + words[i].fatal_error(serr, ierr); + serr << "Expected (...)" << endl; + serr << "Found " << words[(int)words.size()-2].get_string() << + words[(int)words.size()-1].get_string() << endl; + ierr = 2; + return; + } + + // The starting index of the math expression. + int istart = i+1; + + // Find the ending index of the math expression. + int iclose = find_closing_symbol("(", ")", istart); + if (iclose == -1) { + words[i+1].fatal_error(serr, ierr); + serr << "Did not find a closing parenthesis, ), in" + " math expression" << endl; + serr << "Check for unbalanced parentheses in math expression." << endl; + ierr = 2; + return; + } + int iend = iclose - 1; + + int iwres = 0; + int nargs = 0; + for (;;) { + if (!handle_innermost_parens(istart, iend, iwres, nargs, true, + serr, ierr)) break; + bool isvar = false; + bool doit = true; + if (iwres <= 0) doit = false; + if (cmd_type == "assignment" && iwres==1) doit = false; + if (doit) { + isvar = evaluate_variable(iwres-1, iend, nargs, serr, ierr); + } + if ((iwres > 0) && (!isvar)) { + evaluate_function(iwres-1, iend, nargs, serr, ierr); + } + } + + // Now set ihip1,2 to the original set of parens and handle those. + // This will do a math eval inside the parens, handle multiple arguments + // and possibly remove the parens. + // We do not remove the parens if this is a command line or an assignment + // line and we are to the left of the equals because a lot of checks + // depend on the parens being there. + int ihip1 = istart-1; + int ihip2 = iend + 1; + bool remp = true; + if (ieq>ihip2 && ihip1==1) remp = false; + handle_innermost_parens(ihip1, ihip2, iwres, nargs, remp, + serr, ierr); + + // Handle the case of a variable array, i.e. evaluate and replace + // the variable array reference. We of course do not do this for + // an assignment statement where we are to the left of the equals. + //bool isvar = false; + bool doit = true; + if (iwres <= 0) doit = false; + if (cmd_type == "assignment" && iwres==1) doit = false; + if (doit) { + // We use ihip3 to account for ++ or -- following a + // variable array reference. + int ihip3 = ihip2 + 1; + //isvar = evaluate_variable(iwres-1, ihip3, nargs, serr, ierr); + evaluate_variable(iwres-1, ihip3, nargs, serr, ierr); + } + + // Do not allow a function outside of parens. We could do this + // but for now all math is inside parens. + //if ((iwres > 0) && (!isvar)) { + // evaluate_function(iwres-1, iend, nargs, serr, ierr); + //} + + } // if find ( + } // End of loop through all words on the line. + + //cout << "&&&&&cw Cmd.cc, Exit math_eval" << endl; +} + + +// =========================================================================== +// Handle the innermost set of parentheses. +// The return value is false if parens were not found or if they were +// unbalanced. If parens were found and handled then true is returned. +// iwres is an output quantity and is the location of the resultant word. +// =========================================================================== +bool Cmd::handle_innermost_parens(int &i1, int &i2, int &iwres, int &nargs, + bool remp, stringstream &serr, int &ierr) +{ + // If no innermost parens are found then iwres is meaningless. + iwres = -1; + + // Search for the innermost left parens. It is ok if there is no left + // parens, this line just does not have parens. + int ip1 = find_last("(", i1, i2); + if (ip1 == -1) return false; + + // After evaluation, the resultant word will be at ip1. + iwres = ip1; + + int ipstart = ip1 + 1; + bool done = false; + nargs = 1; + for (;;) { + int ip2 = find_any_char(ipstart, i2, ",)"); + if (ip2 == -1) { + words[ipstart].fatal_error(serr, ierr); + serr << "Did not find a closing parenthesis, ), in" + " math expression" << endl; + serr << "Check for unbalanced parentheses in math expression." << endl; + ierr = 2; + return false; + } + + if (words[ip2].get_string() == ")") done = true; + if (words[ip2].is_comma()) nargs += 1; + + if (remp) { + delete_words(ip2, ip2); + i2 -= 1; + } + int ip21a = ip2 - 1; + int ip21 = ip2 - 1; + seval(ipstart, ip21, serr, ierr); + i2 -= ip21a - ip21; + + if (done) break; + + ipstart += 1; + } + + // Delete the leading paren. + if (remp) { + delete_words(ip1, ip1); + i2 -= 1; + } + + return true; +} + + +// =========================================================================== +// Simple evaluation of a series of words i1 to i2 inclusive. +// =========================================================================== +void Cmd::seval(int &i1, int &i2, stringstream &serr, int &ierr) +{ + Parser_math pmath; + + subvar_w0(i1, i2, serr, ierr); + handle_unary_op(i1, i2, "-", serr, ierr); + handle_unary_op(i1, i2, "+", serr, ierr); + + // Level Operators + // ----- ----------------------- + // 8 () + // 7 ++ -- + // 6 ** + // 5 * / + // 4 + - + // 3 .gt. .ge. .lt. .le. .eq. .ne. + // 2 .not. + // 1 .and. + // 0 .or. + for (int level=6; level>=0; level--) { + for (int i=i1; i<=i2; i+=1) { + if (words[i].is_operator(level)) { + int ln = words[i].get_line_number(); + int file_ln = words[i].get_file_line_number(); + string fname = words[i].get_filename(); + Word w("", ln, file_ln, fname, lines); + + string op_type = words[i].get_op_type(); + if (op_type == "arithmetic") + pmath.do_op(i-1, i, i+1, words, w, serr, ierr); + if (op_type == "relational") + pmath.do_op_relational(i-1, i, i+1, words, w, serr, ierr); + if (op_type == "logical" && level == 2) // .not. is unary + pmath.do_op_not(i, i+1, words, w, serr, ierr); + if (op_type == "logical" && level != 2) + pmath.do_op_logical(i-1, i, i+1, words, w, serr, ierr); + + // level 2, .not., is unary and is handled differently. + if (level == 2) { + replace_words(i, i+1, w); + i2 -= 1; + } + else { + replace_words(i-1, i+1, w); + i2 -= 2; + i -= 1; + } + continue; + } + } + } + //cout << "&&&&&cw Leave seval" << endl; +} + + +// =========================================================================== +// Handle unary plus and minus. +// utype is either "+" or "-". +// =========================================================================== +void Cmd::handle_unary_op(int i1, int &ipend, string utype, + stringstream &serr, int &ierr) +{ + int ipstart = i1; + for (;;) { + int ip = find(ipstart, ipend, utype); + + // If we do not find any more plus/minus signs then we are done. + if (ip == -1) return; + + // Fatal error is the plus/minus sign is the last word on the line. + if (ip >= ((int)words.size()-1)) { + words[ip].fatal_error(serr, ierr); + serr << "Found a " << utype << " sign at the end of a line." << endl; + serr << "Expected something to the right of the " << utype + << " sign to operate on." << endl; + ierr = 2; + return; + } + + + // If the plus/minus sign is the first word on the line, then it has + // to be a unary op. The word after the plus/minus sign must be a + // number or variable. + // This should never happen because we are always inside (...) + // and will never be word 0, still we should be general and take care + // of this case. + if (ip == 0) { + if (!words[ip+1].is_number()) { + words[ip+1].fatal_error(serr, ierr); + serr << "Expected the object following the unary " << utype + << " to be a number." << endl; + serr << "Instead, it was " << words[ip+1].get_string() << endl; + ierr = 2; + return; + } + + // Actually do the negate operation. + do_unary_op(ip, utype); + ipstart = ip+1; + ipend -= 1; + continue; + } + + // Check to see if the +/- is a binary op. If so, then nothing needs + // to be done with this +/- sign, binary ops are handled elsewhere. + if (words[ip-1].is_number() && + (words[ip+1].is_number() || words[ip+1].get_string() == "-" || + words[ip+1].get_string() == "+") + ) { + ipstart = ip+1; + continue; + } + + // Check to see if the +/- is a unary op. + if (!words[ip-1].is_number() && words[ip+1].is_number()) { + do_unary_op(ip, utype); + ipstart = ip+1; + ipend -= 1; + continue; + } + + // Check for an error. + if (!words[ip-1].is_number() && + !words[ip+1].is_number()) { + words[ip-1].fatal_error(serr, ierr); + serr << "Expected the object following the unary " << utype << " to" + " be a number." << endl; + serr << "Instead, it was " << words[ip+1].get_string() << endl; + ierr = 2; + return; + } + + words[ip].fatal_error(serr, ierr); + serr << "Unknown error with unary " << utype << endl; + serr << "Error with words: " << endl; + serr << words[ip].get_string() << words[ip+1].get_string() << endl; + ierr = 2; + return; + } +} + + + +// =========================================================================== +// Do a unary operation. +// The minus sign is at word ip and the word to be negated is at word ip+1. +// After negation, both words get replaced by the new negated word. +// If the unary op is plus then all we need to do is get rid of the + sign. +// =========================================================================== +void Cmd::do_unary_op(int ip, string utype) +{ + if (utype == "+") { + delete_words(ip,ip); + return; + } + + if (utype == "-") { + if (words[ip+1].is_number()) { + Word w = words[ip+1]; + w.negate_value(); + replace_words(ip, ip+1, w); + return; + } + } +} + + + +// =========================================================================== +// Check to see that all ++ and -- have been handled and removed. +// =========================================================================== +void Cmd::check_ppmm(stringstream &serr, int &ierr) +{ + for (int i=0; i<(int)words.size(); i++) { + string s = words[i].get_string(); + if (s == "++" || s == "--") { + words[i].fatal_error(serr, ierr); + serr << "Misplaced " << s << " operator." << endl; + serr << "++ and -- operators must follow a variable or " << endl; + serr << "an element of an array variable. " << endl; + ierr = 2; + } + } +} + + +// =========================================================================== +// Evaluate a function. +// =========================================================================== +void Cmd::evaluate_function(int iw1, int &i2, int &nargs, + stringstream &serr, int &ierr) +{ + // If there is no map of functions, then we do nothing. + if (fmap == NULL) return; + + // If the word at iw1 is not a string then it will not be a function. + if (!words[iw1].is_string()) return; + + // Find the function. + string s = words[iw1].get_string(); + map::iterator p; + p = fmap->find(s); + + // The function was not found. + if (p == fmap->end()) { + words[iw1].fatal_error(serr, ierr); + serr << "Expected a function" << endl; + serr << "Instead found: " << words[iw1].get_string() << endl; + serr << "The list of known functions is:" << endl; + for (p=fmap->begin(); p!=fmap->end(); p++) { + serr << p->second.get_name() << endl; + } + ierr = 2; + return; + } + + // The function was found, do the evaluation and replace the words. + + // Common items needed for all types of functions. + int ln = words[iw1].get_line_number(); + int file_ln = words[iw1].get_file_line_number(); + string fname = words[iw1].get_filename(); + + + // Is a variable defined or not. + if (s == "defined") { + string varname = words[iw1+1].get_string(); + string result = "true"; + map::iterator p; + p = vmap->find(varname); + if (p == vmap->end()) result = "false"; + Word w(result, ln, file_ln, fname, lines); + replace_words(iw1, iw1+nargs, w); + i2 -= nargs; + return; + } + + + // String functions - string arguments, string results. + if (p->second.get_type() == "string") { + // Load all the arguments into a vector of strings. + vector vs; + for (int i=0; isecond.evaluate(vs, serr, ierr, ln, file_ln, + fname, lines); + Word w(result, ln, file_ln, fname, lines); + replace_words(iw1, iw1+nargs, w); + i2 -= nargs; + } + + + // Real functions - double arguments, double results. + if (p->second.get_type() == "real") { + // Check to see if all the function arguments have a value. + bool has_value = true; + for (int i=0; i vd; + for (int i=0; isecond.evaluate(vd, serr, ierr, ln, file_ln, + fname, lines); + Word w(result, ln, file_ln, fname, lines); + replace_words(iw1, iw1+nargs, w); + i2 -= nargs; + } +} + + + +// =========================================================================== +// When two "*" characters are together, assume that is the exponentiation +// operator, "**", and replace both "*"'s with "**". +// =========================================================================== +void Cmd::handle_star_star() +{ + for (int i=0; i<(int)words.size()-1; i++) { + if (words[i].get_string() == "*" && words[i+1].get_string() == "*") { + int lnum = words[i].get_line_number(); + int file_ln = words[i].get_file_line_number(); + string fname = words[i].get_filename(); + string s = "**"; + Word w(s, lnum, file_ln, fname, lines); + replace_words(i, i+1, w); + } + } +} + + +// =========================================================================== +// The parser does not automatically separate operators like .eq., .ne., etc. +// For example, the phrase a.eq.b will be one word when it should be 3 words. +// This routine finds those cases and splits the one word into multiple words. +// =========================================================================== +void Cmd::handle_ops() +{ + vector subs; + subs.push_back(".eq."); + subs.push_back(".ne."); + subs.push_back(".gt."); + subs.push_back(".ge."); + subs.push_back(".lt."); + subs.push_back(".le."); + subs.push_back(".hgeq."); + subs.push_back(".hgne."); + subs.push_back(".hggt."); + subs.push_back(".hgge."); + subs.push_back(".hglt."); + subs.push_back(".hgle."); + subs.push_back(".not."); + subs.push_back(".and."); + subs.push_back(".or."); + + for (int i=0; i<(int)words.size(); i++) { + string fstr = words[i].get_string(); + for (int j=0; j<(int)subs.size(); j++) { + vector vs; + bool b = separate_str(subs[j], fstr, vs); + if (b) { + vector vw; + for (int k=0; k<(int)vs.size(); k++) { + int lnum = words[i].get_line_number(); + int file_lnum = words[i].get_file_line_number(); + string fname = words[i].get_filename(); + Word w(vs[k], lnum, file_lnum, fname, lines); + vw.push_back(w); + //cout << vs[k] << endl; + } + replace_words(i, i, vw); + i--; + break; + } + } + + + //int lnum = words[i].get_line_number(); + //string s = "**"; + //Word w(s, lnum, lines); + //replace_words(i, i+1, w); + } +} + + +// =========================================================================== +// After the line has mostly been processed, check for any misplaced math +// operations. For example, the following line +// xcenter = 1.0 + 2.0 +// has a misplaced math op in it, i.e. it should be in parentheses +// xcenter = (1.0 + 2.0) +// =========================================================================== +void Cmd::check_misplaced_math(stringstream &serr, int &ierr) +{ + for (int i=0; i<(int)words.size(); i++) { + if (words[i].is_operator()) { + words[i].fatal_error(serr, ierr); + serr << "Misplaced math operation." << endl; + serr << "All math operations must be inside parentheses." << endl; + ierr = 2; + } + } +} + + +// *************************************************************************** +// *************************************************************************** +// Handle if/elseif/else/endif +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Handle if/elseif/else/endif statements. +// =========================================================================== +void Cmd::handle_if(bool &skip, deque &skip_level, + deque &satisfied, stringstream &serr, int &ierr) +{ + // If's can be nested to any level, the number of levels is determined + // by the size of skip_level, the size of satisfied would also work here. + int nlevels = (int)skip_level.size(); + + // The endif statement ends a block if. + if (words[0].get_string() == "endif") { + if ((int)words.size() > 1) { + words[1].fatal_error(serr, ierr); + serr << "The endif (or end if) statement should not have " + "anything else on the line." << endl; + serr << "Found other words on the line." << endl; + ierr = 2; + } + + // The if level has ended, just erase it. + skip_level.erase(skip_level.begin()+nlevels-1); + satisfied.erase(satisfied.begin()+nlevels-1); + skip = true; + return; + } + + // Else statment. + if (words[0].get_string() == "else") { + if ((int)words.size() > 1) { + words[1].fatal_error(serr, ierr); + serr << "The else statement should not have " + "anything else on the line." << endl; + serr << "Found other words on the line." << endl; + ierr = 2; + } + + // If the if has been satisfied before this else, then just + // skip the else block. Otherwise the if will be satisfied and + // we do not skip the else block. + if (satisfied[nlevels-1]) { + skip_level[nlevels-1] = true; + } + else { + satisfied[nlevels-1] = true; + skip_level[nlevels-1] = false; + } + skip = true; + return; + } + + // If any level is in skip mode, then we will skip this line. + // This is mostly for non if related lines, but the skip flag is + // used below. + skip = false; + for (int n=0; n 1) { + if (words[1].get_string() != "(") { + words[1].fatal_error(serr, ierr); + serr << "Expected an open parentheses, (, following " << + words[0].get_string() << endl; + serr << "Instead found: " << words[1].get_string() << endl; + ierr = 2; + } + } + + int nw = wsize-2; + if (nw >= 0) { + if (words[nw].get_string() != ")") { + words[nw].fatal_error(serr, ierr); + serr << "Expected a close parentheses, ), as the next to last " + "symbol on the line." << endl; + serr << "Instead found: " << words[nw].get_string() << endl; + ierr = 2; + } + } + + nw = wsize-1; + if (nw >= 0) { + if (words[nw].get_string() != "then") { + words[nw].fatal_error(serr, ierr); + serr << "Expected then as the last word on the line." << endl; + serr << "Instead found: " << words[nw].get_string() << endl; + ierr = 2; + } + } + + // Evaluate the conditional. + math_eval(serr, ierr); + + if (words[1].get_bool(serr, ierr)) { + satisfied[nlevels-1] = true; + skip_level[nlevels-1] = false; + } + else { + skip_level[nlevels-1] = true; + } + + } + + // Set skip to skip the elseif statement. + skip = true; + return; + } + + + + + if (words[0].get_string() == "if") { + //cout << "&&&&&cw Cmd.cc, if statment encountered" << endl; + + // If we are in skip mode at a higher level, then we can ignore this + // if. + if (skip) { + skip_level.push_back(true); + satisfied.push_back(true); + return; + } + + // Do some syntax checking. + int wsize = (int)words.size(); + + if (wsize > 1) { + if (words[1].get_string() != "(") { + words[1].fatal_error(serr, ierr); + serr << "Expected an open parentheses, (, following " << + words[0].get_string() << endl; + serr << "Instead found: " << words[1].get_string() << endl; + ierr = 2; + } + } + + // Evaluate the conditional. + math_eval(serr, ierr); + + //for (int i=0; i<(int)words.size(); i++) { + // cout << words[i].get_string() << endl; + //} + + // Single line if + if (words[2].get_string() != "then") { + if (words[1].get_bool(serr, ierr)) { + delete_words(0,1); + reset_name_type(); + skip = false; + } + else { + skip = true; + } + return; + } + + // Multi-block if + if (words[2].get_string() == "then") { + if (words[1].get_bool(serr, ierr)) { + skip_level.push_back(false); + satisfied.push_back(true); + skip = true; + } + else { + skip_level.push_back(true); + satisfied.push_back(false); + skip = true; + } + return; + } + } + + + //for (int i=0; i<(int)words.size(); i++) { + // if (words[i].get_string() == "*" && words[i+1].get_string() == "*") { + // } + //} +} + + +// *************************************************************************** +// *************************************************************************** +// Handle do loops +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Handle do loops. +// =========================================================================== +void Cmd::handle_do(bool &skip, deque &do_start, int &cdex, + bool &end_do_loop, stringstream &serr, int &ierr) +{ + // Do's can be nested to any level, the number of levels is determined + // by the size of do_start. + int nlevels = (int)do_start.size(); + + // End of do loop, go back to do line. + if (words[0].get_string() == "enddo") { + //cout << "&&&&&cw Cmd, handle_do, start of enddo, cdex=" << cdex << endl; + if ((int)words.size() > 1) { + words[1].fatal_error(serr, ierr); + serr << "The enddo (or end do) statement should not have " + "anything else on the line." << endl; + serr << "Found other words on the line." << endl; + ierr = 2; + } + + cdex = do_start[nlevels-1] - 1; + skip = true; + return; + } + + + // Cycle command encountered. + if (words[0].get_string() == "cycle") { + if ((int)words.size() > 1) { + words[1].fatal_error(serr, ierr); + serr << "The cycle statement should not have " + "anything else on the line." << endl; + serr << "Found other words on the line." << endl; + ierr = 2; + } + + cdex = do_start[nlevels-1] - 1; + skip = true; + return; + } + + // Break out of the do loop. + if (words[0].get_string() == "exit") { + if ((int)words.size() > 1) { + words[1].fatal_error(serr, ierr); + serr << "The exit statement should not have " + "anything else on the line." << endl; + serr << "Found other words on the line." << endl; + ierr = 2; + } + + end_do_loop = true; + return; + } + + if (words[0].get_string() == "do") { + + //for (int i=0; i<(int)words.size(); i++) { + // cout << words[i].get_string() << endl; + //} + + // Evaluate any math expressions on the do line. + math_eval(serr, ierr); + + // Replace any simple variables on the line with their values. + // Of course, do not replace the loop variable. + int ieq = -1; + for (int i=0; i<(int)words.size(); i++) { + if (words[i].get_string() == "=") { + ieq = i; + break; + } + } + if (ieq >= 0) { + int nw1 = (int)words.size()-1; + subvar_w0(ieq+1, nw1, serr, ierr); + } + + // Handle unary minus + handle_cmd_unary_minus(serr, ierr); + + // Number of words on the line after math evaluation. + int nwords = (int)words.size(); + + // Get the loop variable name. + string do_varname = "$i"; + bool isvar = true; + if (nwords>1) { + do_varname = words[1].get_string(); + if (!words[1].is_variable()) isvar = false; + } + + // Expecting 6 or 8 words, i.e. "do $i = 1 , 10" + if (nwords < 6) { + words[0].fatal_error(serr, ierr); + serr << "Expected at least 6 words on this line after any math evaluations." + << endl; + serr << "For example, " << endl + << " do " << do_varname << " = 1 , 10" << endl; + serr << "Instead found " << nwords << " words on the line." << endl; + serr << "The line (after any math evaluations have been done) is:" << endl; + serr << " "; + for (int iw=0; iw 7) istep = words[7].get_int(serr, ierr); + + string s1 = words[3].get_string(); + string s2 = words[5].get_string(); + + bool do_continue = false; + if (nlevels > 0) { + if (do_start[nlevels-1] == cdex) do_continue = true; + } + + if (do_continue) { // This do has already been encountered. + //cout << "&&&&&cw Cmd, handle_do, do:continue" << endl; + + // Find the variable in the list of variables, increment it, test for + // ending the loop, and store the incremented value. + map::iterator p; + p = vmap->find(do_varname); + if (p != vmap->end()) { + string do_var_value = p->second.get_var_value(); + int lnum = words[0].get_line_number(); + int file_lnum = words[0].get_file_line_number(); + string fname = words[0].get_filename(); + Word w(do_var_value, lnum, file_lnum, filename, lines); + int ival = w.get_int(serr, ierr); + ival += istep; + //cout << "&&&&&cw Cmd, handle_do, do:continue, do_var_value=" << ival << endl; + if (istep >= 0 && ival > i2) { + end_do_loop = true; + return; + } + if (istep < 0 && ival < i2) { + end_do_loop = true; + return; + } + stringstream ss; + ss << ival; + string sval = ss.str(); + vector valvec; + valvec.push_back(sval); + vector istart(0,0); + p->second.set_var_value(istart, valvec, lnum, file_lnum, fname, + lines, serr, ierr); + } + else { + words[1].fatal_error(serr, ierr); + serr << "The loop variable, " << do_varname << + " was not found in the variable list." << endl; + serr << "This should not happen, possible code bug?" << endl; + ierr = 2; + return; + } + } + else { // A new do loop has been encountered. + do_start.push_back(cdex); + + // It is possible that we don't execute the do loop at all. + if (istep >= 0 && i1 > i2) { + end_do_loop = true; + return; + } + if (istep < 0 && i1 < i2) { + end_do_loop = true; + return; + } + + //cout << "&&&&&cw Cmd, handle_do, do:start, cdex=" << cdex << endl; + //cout << "&&&&&cw Cmd, handle_do, do:start, s1=" << s1 << endl; + //cout << "&&&&&cw Cmd, handle_do, do:start, s2=" << s2 << endl; + + // Store the loop variable, create it if necessary. + vector istart(0,0); + int lnum = words[0].get_line_number(); + int file_lnum = words[0].get_file_line_number(); + string fname = words[0].get_filename(); + vector valvec; + valvec.push_back(s1); + map::iterator p; + p = vmap->find(do_varname); + if (p != vmap->end()) { + p->second.set_var_value(istart, valvec, lnum, file_lnum, fname, + lines, serr, ierr); + } + else { + Variable v(do_varname, istart, valvec, lnum, file_lnum, fname, + lines, serr, ierr); + vmap->insert(pair(v.get_varname(), v)); + } + } + skip = true; + return; + } +} + + + +// =========================================================================== +// Starting at a do statement, find the matching enddo. +// =========================================================================== +bool Cmd::find_matching_enddo(int &dlev, bool &stop_checking) +{ + if (words[0].get_string() == "enddo") { + if (dlev == 1) return true; + dlev -= 1; + return false; + } + if (words[0].get_string() == "do") { + dlev += 1; + return false; + } + + // If we are in main and hit a subroutine statement then that is the + // end of main and we need to stop checking. + if (words[0].get_string() == "subroutine") { + stop_checking = true; + return false; + } + + // If we are in a subroutine and hit an endsubroutine statement then + // we need to stop checking. + if (words[0].get_string() == "endsubroutine") { + stop_checking = true; + return false; + } + + return false; +} + + + +// *************************************************************************** +// *************************************************************************** +// Subroutines +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Handle subroutines. +// =========================================================================== +void Cmd::handle_subroutines(bool &skip, bool &go_to_sub, string &sub_name, + bool &go_to_call, stringstream &serr, int &ierr) +{ + // To suppress compiler warnings of unused parameters + assert(skip == skip); + //assert(serr == serr); + assert(ierr == ierr); + + // + if (words[0].get_string() == "call") { + sub_name = words[1].get_string(); + go_to_sub = true; + return; + } + + if (words[0].get_string() == "endsubroutine" || + words[0].get_string() == "return") { + go_to_call = true; + return; + } + +} + + +// =========================================================================== +// Searching for subroutine sub_name. +// =========================================================================== +bool Cmd::find_subroutine(string &sub_name) +{ + if ((int)words.size() < 2) return false; + if (words[0].get_string() == "subroutine" && + words[1].get_string() == sub_name) return true; + return false; +} + + +// =========================================================================== +// A call statement has been encountered, get the arguments, if any. +// The call is expected to be +// call subname ( arg1, arg2, ...) +// =========================================================================== +void Cmd::get_call_args(vector &sargs, vector &sargs_isvar, + stringstream &serr, int &ierr) +{ + //debug_print_words("Cmd, enter get_call_args"); + + // We do not want to modify the words on this line, but we have to + // temporarily to get the math eval to work right. Therefore store the + // words on the line and restore them later. + deque words_store; + for (int i=0; i<(int)words.size(); i++) { + words_store.push_back(words[i]); + } + + // Erase the call and the subroutine name. + erase_word(0); + erase_word(0); + + // Erase the opening and closing parens. + if ((int)words.size() > 0) { + if (words[0].get_string() == "(") erase_word(0); + } + if ((int)words.size() > 0) { + if (words[(int)words.size()-1].get_string() == ")") erase_last_word(); + } + //debug_print_words("Cmd, get_call_args, after erase"); + + // Do a math eval to get one word arguments. If the arguments are + // variables they will not be evaluated, so we will end up with a mix + // of variables and numbers. + if ((int)words.size() > 0) { + math_eval(serr, ierr); + int wlen = (int)words.size() - 1; + handle_unary_op(0, wlen, "-", serr, ierr); + wlen = (int)words.size() - 1; + handle_unary_op(0, wlen, "+", serr, ierr); + } + + //debug_print_words("Cmd, get_call_args, after math eval"); + + // Store the arguments in the vector of strings, sargs, to be returned + // to the calling code and also store in the class, call_args. + call_args.clear(); + call_args_isvar.clear(); + for (int i=0; i<(int)words.size(); i++) { + if (words[i].is_comma()) continue; + sargs.push_back(words[i].get_string()); + sargs_isvar.push_back(words[i].is_variable()); + call_args.push_back(words[i].get_string()); + call_args_isvar.push_back(words[i].is_variable()); + } + + // Restore the words before leaving this function. + words.clear(); + for (int i=0; i<(int)words_store.size(); i++) { + words.push_back(words_store[i]); + } + //debug_print_words("Cmd, get_call_args, after restoring words"); +} + + +// =========================================================================== +// A subroutine statement has been encountered, get the arguments, if any. +// The subroutine statement is expected to be +// subroutine subname ( arg1, arg2, ...) +// =========================================================================== +void Cmd::get_sub_args(vector &sargs, vector &sargs_isvar) +{ + sub_args.clear(); + sub_args_isvar.clear(); + for (int i=3; i<(int)words.size(); i+=2) { + sargs.push_back(words[i].get_string()); + sargs_isvar.push_back(words[i].is_variable()); + sub_args.push_back(words[i].get_string()); + sub_args_isvar.push_back(words[i].is_variable()); + } +} + + +// =========================================================================== +// Accessor functions for the calling and subroutine arguments. +// =========================================================================== +void Cmd::copy_call_args(vector &sargs, vector &sargs_isvar) +{ + for (int i=0; i<(int)call_args.size(); i++) { + sargs.push_back(call_args[i]); + sargs_isvar.push_back(call_args_isvar[i]); + } +} + +void Cmd::copy_sub_args(vector &sargs, vector &sargs_isvar) +{ + for (int i=0; i<(int)sub_args.size(); i++) { + sargs.push_back(sub_args[i]); + sargs_isvar.push_back(sub_args_isvar[i]); + } +} + + + + +// *************************************************************************** +// *************************************************************************** +// Handle comments. +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Process single line comments. +// =========================================================================== +void Cmd::single_line_comments() +{ + for (int i=0; i<(int)words.size()-1; i++) { + if ((words[i].get_string() == "!") || + (words[i].get_string() == "#") || + (words[i].get_string() == "/" && words[i+1].get_string() == "/") + ) { + words.erase(words.begin()+i, words.begin()+(int)words.size()); + break; + } + } + + // Handle the case where the last word might be an ! or a # + int ilast = (int)words.size()-1; + if (ilast < 0) return; + if ((words[ilast].get_string() == "!") || (words[ilast].get_string() == "#")) + words.erase(words.begin()+ilast); +} + + +// =========================================================================== +// Process multi-line comments. +// =========================================================================== +void Cmd::multi_line_comments(int &level) +{ + /* + cout << "*** Enter multi_line_comments, level=" << level << endl; + cout << "**** original string: " << original_str << endl; + stringstream ss; + print_using_words(ss); + cout << " print words before:" << endl; + cout << " " << ss.str() << endl; + */ + + int istart = -1; + if (level > 0) istart = 0; + + for (int i=0; i<(int)words.size()-1; i++) { + //cout << "Top of i loop, i=" << i << endl; + if (words[i].get_string() == "/" && words[i+1].get_string() == "*") { + if (level == 0) istart = i; + level += 1; + //cout << "found /*, i=" << i << endl; + i += 1; + continue; + } + + if (words[i].get_string() == "*" && words[i+1].get_string() == "/") { + if (level == 0) { + cout << "Error in line " << line_number << " umatched */" << endl; + } + //cout << "found */, istart=" << istart << " words[istart]=" << + // words[istart].get_string() << endl; + //cout << "found */, i=" << i << " words[i]=" << + // words[i].get_string() << endl; + //cout << "found */, i+1=" << i+1 << " words[i+1]=" << + // words[i+1].get_string() << endl; + words.erase(words.begin()+istart, words.begin()+i+2); + level -= 1; + int ndel = i+1-istart+1; + //cout << "ndel=" << ndel << " i=" << i << endl; + i = i+1-ndel+1; + //cout << "after changing i, i=" << i << " words[i]=" << + // words[i].get_string() << endl; + if (level > 0) istart = i; + continue; + } + + } + + if (level > 0) { + words.erase(words.begin()+istart, words.begin()+(int)words.size()); + } + + /* + stringstream ss1; + print_using_words(ss1); + cout << " print words after:" << endl; + cout << " " << ss1.str() << endl; + + cout << endl << "**********" << endl; + */ +} + + +// *************************************************************************** +// *************************************************************************** +// Miscellaneous +// *************************************************************************** +// *************************************************************************** + + +// =========================================================================== +// Go through every word on the line, check for matching quotes, then remove +// them. +// =========================================================================== +void Cmd::handle_quotes(stringstream &serr, int &ierr) +{ + for (int i=0; i<(int)words.size(); i++) { + words[i].handle_quotes(serr, ierr); + } +} + + + +// =========================================================================== +// The execution line arguments are expected to be of the form +// -v r1=5 -v somevar = 14 +// The -v is a keyword indicating that a variable setting follows. There is +// no $ because the shell does not allow that. +// At this point the execution line arguments have been parsed into words. +// This function extracts the variable definitions, inserts the $, and turns +// them into separate lines, and returns that in string sout. Then the +// parser can simply add that to the top of the user input file. +// =========================================================================== +void Cmd::handle_exe_args(string &sout) +{ + // We use a stringstream here instead of modifying sout directly so that + // we can use endl instead of \n since endl is portable and \n is not. + stringstream ss; + bool line; + for (int i=0; i<(int)words.size()-1; i++) { + // if you hit a - + if (words[i].get_string() == "-" && + ( words[i+1].get_string() == "v" || + words[i+1].get_string() == "l" ) ) { + // get type of argument + line = true; + if( words[i+1].get_string() == "v" ){ + line = false; + } + // move in after the - and stuff line until next - + int istart = i+2; + for (int j=istart; j<(int)words.size(); j++) { + // stop at next - + if (j < (int)words.size()-1) { + if (words[j].get_string() == "-" && + ( words[j+1].get_string() == "v" || + words[j+1].get_string() == "l" ) ) { + break; + } + } + string sj = words[j].get_string(); + if (j == istart && !line) { + sj.insert(sj.begin(), '$'); + } + ss << sj << " "; + } + ss << endl; + } + } + sout = ss.str(); +} + + +// =========================================================================== +// Consider the following input +// 2.0, 3.0 e15, -7.0 +// The issue is with the middle two words, "3.0 e15", the old parser ignored +// the space and treated this as one word, 3.0e15. The new parser treats is +// as two words. +// +// This should have been treated as an input error by the old parser but was +// not, so now we have to deal with it. +// +// This routine detects this situation and allows the calling code to deal +// with it according to the action input, allowed action values are: +// +// ignore Treat it as two words and silently continue. +// fix Merge the two words into one word, as the old parser did. +// error Generate a fatal error, force the user to fix it. +// +// =========================================================================== +void Cmd::deprecated_input01(string action, stringstream &serr, int &ierr) +{ + //if (cmd_name != "depcmd01") return; + + if (cmd_type != "command") return; + + for (int i=0; i<(int)words.size()-2; i++) { + // A comma must be found first. + if (!words[i].is_comma()) continue; + + // There could be a unary plus or minus on the next number, if so + // then skip it. At this point, the unary plus and minus have not + // been merged with their number. + int in1 = i+1; + string spm = words[i+1].get_string(); + string s1; + if (spm == "+" || spm == "-") { + s1 = spm; + in1 += 1; + } + s1 += words[in1].get_string(); + + // in1 is where the first number is, it needs to be a number. + if (!words[in1].is_number()) continue; + + // in2 is where the second number is, it needs to be a number. + int in2 = in1 + 1; + if (in2 > (int)words.size() - 1) break; + if (!words[in2].is_number()) continue; + string s2 = words[in2].get_string(); + s2.erase(s2.begin()); + + // The first character of the second number should be an e or E. + // But at this point we have already detected this and prepended + // a 1 on to the word. So the first character should be 1 and the + // second character should be e or E + char c30 = words[in2].get_string()[0]; + if (c30 != '1') continue; + + char c31 = words[in2].get_string()[1]; + if (c31 != 'e' && c31 != 'E') continue; + + // The next word, if present should be a comma. + int ic2 = in2 + 1; + if (ic2 <= (int)words.size()-1) { + if (!words[ic2].is_comma()) continue; + } + + // A deprecated input has been found, ignore it, fix it, or + // generate a fatal error. + if (action == "ignore") continue; + if (action == "fix") { + words[in2].erase_char(0); + merge_words(in1, in2); + continue; + } + if (action == "error") { + words[in1].fatal_error(serr, ierr); + serr << "Possible error, detected the following" << endl; + serr << " comma digits space exponent comma" << endl; + serr << "The digits and exponent are separated by one or more spaces," + << endl; + serr << "this is not allowed for a single number." << endl; + serr << "The digits are: " << s1 << " and the exponent is: " + << s2 << endl; + serr << "If this is one number, then remove the space." << endl; + serr << "If this is two numbers, then put a comma between the" + << " digits and exponent."<< endl; + serr << "This error (and the same error in subsequent lines) can" + << endl; + serr << "be controlled with the following input file command" + << endl; + serr << "and arguments (put before the lines with errors)" << endl; + serr << " depcmd_dse argument" << endl; + serr << "where argument has one of the following values:" << endl; + serr << " fix Silently remove the space, merge into one number" << endl; + serr << " ignore Silently treat as two numbers" << endl; + serr << " error Generate fatal error (default)" << endl; + serr << "While the default is to generate an error, if the" << endl; + serr << "command name is matdef, then the default is fix." << endl; + ierr = 2; + continue; + } + } +} + + + +// =========================================================================== +// Fatal error +// This is mainly meant to be called from some other class that does not +// know about words. +// =========================================================================== +void Cmd::fatal_error(int iw, stringstream &serr, int &ierr) +{ + // To suppress compiler warnings of unused parameters + assert(ierr == ierr); + + int lnum = words[iw].get_line_number(); + int file_lnum = words[iw].get_file_line_number(); + string fname = words[iw].get_filename(); + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; +} + +// =========================================================================== +// This is meant to be called from within this class. +// =========================================================================== +void Cmd::fatal_error2(stringstream &serr, int &ierr) +{ + // To suppress compiler warnings of unused parameters + assert(ierr == ierr); + + serr << endl; + serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl; + serr << " " << (*lines)[line_number-1] << endl; + serr << "in file: " << filename << endl; +} + + +// =========================================================================== +// Warning +// =========================================================================== +void Cmd::warning(int iw, stringstream &serr, int &ierr) +{ + // To suppress compiler warnings of unused parameters + assert(ierr == ierr); + + int lnum = words[iw].get_line_number(); + int file_lnum = words[iw].get_file_line_number(); + string fname = words[iw].get_filename(); + serr << endl; + serr << "*** WARNING in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; +} + + +// =========================================================================== +// This is used when printing duplicate lines warnings. +// =========================================================================== +void Cmd::print_duplicate_line(int iw, stringstream &ss, int fn_width, + int lnum_width, string after_lnum) +{ + int lnum = words[iw].get_line_number(); + int file_lnum = words[iw].get_file_line_number(); + string fname = words[iw].get_filename(); + ss << setw(fn_width) << fname; + ss << setw(lnum_width) << file_lnum << after_lnum; + ss << (*lines)[lnum-1]; + //print_using_words_fm(ss); +} + + +// =========================================================================== +// Get the filename size and line number size for formatting purposes +// when printing duplicate lines warnings. +// =========================================================================== +void Cmd::get_duplicate_sizes(int iw, int &fn_width, int &lnum_width) +{ + int file_lnum = words[iw].get_file_line_number(); + string fname = words[iw].get_filename(); + fn_width = (int)fname.size(); + lnum_width = 1; + if (file_lnum >= 10) lnum_width = 2; + if (file_lnum >= 100) lnum_width = 3; + if (file_lnum >= 1000) lnum_width = 4; + if (file_lnum >= 10000) lnum_width = 5; + if (file_lnum >= 100000) lnum_width = 6; + if (file_lnum >= 1000000) lnum_width = 7; +} + + +// =========================================================================== +// Look at the places where this function is called to understand the +// following indices. +// wdex = i Index into the words array. +// cdex = k C index in the output array. +// =========================================================================== +void Cmd::error_dup_line(string &cname, int wdex, int cdex, + vector &dup_wdex1, vector &dup_cmd1, + vector &dup_vals, const vector &size, + int dup_fatal, stringstream &serr, int &ierr) +{ + if (dup_vals[cdex] == 0) { + dup_cmd1[cdex] = this; + dup_wdex1[cdex] = wdex; + } + dup_vals[cdex] += 1; + if (dup_fatal == 0) return; + if (dup_vals[cdex] > 1) { + int wdex1 = dup_wdex1[cdex]; + Cmd *cmd = dup_cmd1[cdex]; + + // Get the dimension of the array, 0,1,2,3,... + int dim = (int)size.size(); + + if (dup_fatal == 2) words[wdex].fatal_error(serr, ierr); + if (dup_fatal == 1) words[wdex].warning(serr, ierr); + int tot_size = 1; + for (int ts=0; ts irdices(dim, 0); + Parser_utils putils(index_base); + putils.reverse_dex(cdex, tot_size, irdices, size); + serr << "A duplicate value has been specified for: " << cname << "("; + for (int irdex=0; irdexget_file_line_number(wdex1) << endl; + string fname = cmd->get_filename(wdex1); + serr << " " << (*lines)[cmd->get_line_number(wdex1)-1] << + endl; + serr << "in file: " << fname << endl; + + if (dup_fatal == 2) { + serr << "This fatal error can be turned into a warning with the command " << + endl << " duplicate_array_values = warning" << endl; + } + if (dup_fatal == 1) { + serr << "This warning can be turned into a fatal error with the command " << + endl << " duplicate_array_values = fatal" << endl; + } + serr << "Duplicate array value checking can be turned off totally with" << + endl << " duplicate_array_values = none" << endl; + serr << "This is not recommended since you will lose the opportunity" << + endl << "to check for legimate errors in your input." << endl; + + ierr = 1; + if (dup_fatal == 2) ierr = 3; + } +} + + +// =========================================================================== +// There are some commands that can be written as two words such as "end if", +// "else if", and "end do". Find these and combine them into one word. +// =========================================================================== +void Cmd::handle_two_words() +{ + // Handle + + -> ++ + for (int i=0; i<(int)words.size()-1; i++) { + if (words[i].get_string() == "+" && words[i+1].get_string() == "+") { + bool combine = false; + if (i == (int)words.size()-2) combine = true; + if (i < (int)words.size()-2) { + if (!words[i+2].is_numvar()) combine = true; + } + if (combine) { + int lnum = words[i].get_line_number(); + int file_lnum = words[i].get_file_line_number(); + string fname = words[i].get_filename(); + string s = "++"; + Word w(s, lnum, file_lnum, fname, lines); + replace_words(i, i+1, w); + } + } + } + + // Handle - - -> -- + for (int i=0; i<(int)words.size()-1; i++) { + if (words[i].get_string() == "-" && words[i+1].get_string() == "-") { + bool combine = false; + if (i == (int)words.size()-2) combine = true; + if (i < (int)words.size()-2) { + if (!words[i+2].is_numvar()) combine = true; + } + if (combine) { + int lnum = words[i].get_line_number(); + int file_lnum = words[i].get_file_line_number(); + string fname = words[i].get_filename(); + string s = "--"; + Word w(s, lnum, file_lnum, fname, lines); + replace_words(i, i+1, w); + } + } + } + + + // The rest of these have at least two words on the line, + // like "end subroutine". + if ((int)words.size() < 2) return; + + // Common settings. + int lnum = words[0].get_line_number(); + int file_lnum = words[0].get_file_line_number(); + string fname = words[0].get_filename(); + + // Handle the case where enddo is written as two + // words, just combine them into one word. + if (words[0].get_string() == "end" && words[1].get_string() == "do") { + string s = "enddo"; + Word w(s, lnum, file_lnum, fname, lines); + replace_words(0, 1, w); + } + + // Handle the case where endsubroutine is written as two + // words, just combine them into one word. + if (words[0].get_string() == "end" && words[1].get_string() == "subroutine") { + string s = "endsubroutine"; + Word w(s, lnum, file_lnum, fname, lines); + replace_words(0, 1, w); + } + + // Handle the case where endwhen is written as two + // words, just combine them into one word. + if (words[0].get_string() == "end" && words[1].get_string() == "when") { + string s = "endwhen"; + Word w(s, lnum, file_lnum, fname, lines); + replace_words(0, 1, w); + } + + // Handle the case where endif and elseif are written as two + // words, just combine them into one word. + if (words[0].get_string() == "end" && words[1].get_string() == "if") { + string s = "endif"; + Word w(s, lnum, file_lnum, fname, lines); + replace_words(0, 1, w); + } + if (words[0].get_string() == "else" && words[1].get_string() == "if") { + string s = "elseif"; + Word w(s, lnum, file_lnum, fname, lines); + replace_words(0, 1, w); + } +} + + +// =========================================================================== +// Check for end of input. There are several ways user input ends: +// End of file +// Encounter a stop command +// Encounter a fatal error command. +// =========================================================================== +bool Cmd::check_input_end(bool kill_run, stringstream &serr, int &ierr) +{ + // To suppress compiler warnings of unused parameters + assert(kill_run == kill_run); + + if (words[0].get_string() == "fatal_error") { + int lnum = words[0].get_line_number(); + int file_lnum = words[0].get_file_line_number(); + string fname = words[0].get_filename(); + serr << endl; + serr << "*** User has issued a fatal_error command in line " + << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << endl << "The user supplied fatal_error message is: " << endl; + serr << " "; + string s = (*lines)[lnum-1]; + int i1 = s.find("f", 0); + for (int i=i1+12; i<(int)s.size(); i++) { + serr << s[i]; + } + serr << endl; + ierr = 2; + return true; + } + + if (words[0].get_string() == "stop") return true; + return false; +} + + + + +// *************************************************************************** +// *************************************************************************** +// Operations on the deque of words. +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Find the first occurrence of string s in part of the line. +// =========================================================================== +int Cmd::find(int i1, int i2, string s) +{ + for (int i=i1; i<=i2; i++) { + if (words[i].get_string() == s) { + return i; + } + } + return -1; +} + + +// =========================================================================== +// Find the first occurrence of any character in string s in part of +// the line. +// =========================================================================== +int Cmd::find_any_char(int i1, int i2, string s) +{ + for (int i=i1; i<=i2; i++) { + for (int j=0; j<(int)s.size(); j++) { + string ssub = s.substr(j, 1); + if (words[i].get_string() == ssub) { + return i; + } + } + } + return -1; +} + + +// =========================================================================== +// Find the last occurrence of string s in the line. +// =========================================================================== +int Cmd::find_last(string s, int i1, int i2) +{ + int iloc = -1; + for (int i=i1; i<=i2; i++) { + if (words[i].get_string() == s) { + iloc = i; + } + } + return iloc; +} + + +// =========================================================================== +// Given an input string, fstr, and a sub string, subs, find the first +// occurrence of subs in fstr. Return in the string vector, vs, the string +// to the left of subs, if any, the sub string itself, subs, and the string +// to the right of subs, if any. +// =========================================================================== +bool Cmd::separate_str(string &subs, string &fstr, vector &vs) +{ + // Make sure the return vector is empty. + vs.clear(); + + // Find the sub string, if not found, then nothing more to do. + int loc = (int)fstr.find(subs,0); + if (loc == (int)string::npos) return false; + + // If fstr only contains subs and nothing more, then there is nothing + // to do. + if (subs == fstr) return false; + + // Anything to the left of the sub string is the first string returned. + if (loc > 0) { + string s1 = fstr.substr(0,loc); + vs.push_back(s1); + } + + // The sub string itself is the second string returned. + vs.push_back(subs); + + // Find the number of characters to the right of the sub string. + int subs_len = (int)subs.size(); + int fstr_len = (int)fstr.size(); + int istart = loc + subs_len; + int nchar = fstr_len - istart; + + // Anything to the right of the sub string is the third string returned. + if (nchar > 0) { + string s2 = fstr.substr(istart,nchar); + vs.push_back(s2); + } + + return true; +} + + +// =========================================================================== +// Find the location of the closing symbol that matches the opening symbol. +// Symbol examples are (), [], {} +// We assume that the opening symbol has been found and we are starting the +// search after the opening symbol location. +// Nesting is handled, for example, (...(...(...)...)...) +// =========================================================================== +int Cmd::find_closing_symbol(string opensym, string closesym, int i1) +{ + int level = 0; + for (int i=i1; i<(int)words.size(); i++) { + string w = words[i].get_string(); + if (w == opensym) { + level += 1; + continue; + } + if (w == closesym) { + if (level == 0) return i; + level -= 1; + continue; + } + } + return -1; +} + + +// =========================================================================== +// Delete words i1 through i2 inclusive from the deque. +// =========================================================================== +void Cmd::delete_words(int i1, int i2) +{ + deque::iterator p = words.begin(); + words.erase(p + i1, p + i2 + 1); +} + + +// =========================================================================== +// Replace words i1 through i2 inclusive with word w. +// =========================================================================== +void Cmd::replace_words(int i1, int i2, Word &w) +{ + delete_words(i1, i2); + deque::iterator p = words.begin(); + words.insert(p + i1, w); +} + + +// =========================================================================== +// Replace words i1 through i2 inclusive with all the words in vector vw. +// =========================================================================== +void Cmd::replace_words(int i1, int i2, vector &vw) +{ + delete_words(i1, i2); + for (int i=(int)vw.size()-1; i>=0; i--) { + deque::iterator p = words.begin(); + words.insert(p + i1, vw[i]); + } +} + + +// =========================================================================== +// Merge words i1 through i2 inclusive into one word located at i1, remove +// words i1+1 through i2 inclusive. +// =========================================================================== +void Cmd::merge_words(int i1, int i2) +{ + int lnum = words[i1].get_line_number(); + int file_lnum = words[i1].get_file_line_number(); + string fname = words[i1].get_filename(); + string s = words[i1].get_string(); + + for (int i=i1+1; i<=i2; i++) { + s += words[i].get_string(); + } + + Word w(s, lnum, file_lnum, fname, lines); + replace_words(i1, i2, w); +} + + +// =========================================================================== +// Find the equals sign on the line. +// =========================================================================== +int Cmd::find_equals() +{ + int ieq = -1; + for (int i=0; i<(int)words.size(); i++) { + if (words[i].get_string() == "=") { + ieq = i; + break; + } + } + return ieq; +} + + + +// *************************************************************************** +// *************************************************************************** +// Handle processed flags. +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Set all the processed flags in the line to be false. +// =========================================================================== +void Cmd::clear_processed() +{ + for (int i=0; i<(int)words.size(); i++) { + words[i].set_processed(false); + } +} + +void Cmd::set_processed(bool ip) +{ + for (int i=0; i<(int)words.size(); i++) { + words[i].set_processed(ip); + } +} + + + +// =========================================================================== +// Check processed flags for each word. +// =========================================================================== +void Cmd::check_processed(bool &good, stringstream &serr, int &ierr) +{ + // First we check to see if any of the words on the line have been + // processed. If none of the words have been processed, then we print + // the entire line as an error. This saves the user from having to + // wade through an error print for every word on the line. + bool p = false; + for (int i=0; i<(int)words.size(); i++) { + p = words[i].get_processed(); + if (p) break; + } + + if (!p) { + good = false; + words[0].fatal_error(serr, ierr); + serr << "This line has not been processed." << endl; + ierr = 2; + + return; + } + + + // At least one word on the line has been processed. + // Check all the words on the line, throw an error for any word not + // processed. + for (int i=0; i<(int)words.size(); i++) { + p = words[i].get_processed(); + + if (!p) { + good = false; + words[i].fatal_error(serr, ierr); + serr << "A word on this line has not been processed." << endl; + serr << "Not proccessed word = " << words[i].get_string() << endl; + ierr = 2; + + } + } +} + + +// *************************************************************************** +// *************************************************************************** +// Debug +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Print all the words on the line mainly for debugging. +// Output is to a stringstream so the calling method can decide what to do +// with the output - send it to the screen, use it for testing, whatever. +// =========================================================================== +void Cmd::print_all_words() +{ + stringstream ss; + print_all_words(ss); + cout << ss.str(); +} + +void Cmd::print_all_words(stringstream &ss) +{ + ss << "*** Command name = " << cmd_name << endl; + for (int i=0; i<(int)words.size(); i++) { + stringstream ss2; + words[i].print_type(ss2); + ss << words[i].get_string() << " " << ss2.str() << endl; + } + ss << endl; +} + + +// =========================================================================== +// Print all the words on the line mainly for debugging. +// Output is to a stringstream so the calling method can decide what to do +// with the output - send it to the screen, use it for testing, whatever. +// =========================================================================== +void Cmd::print_using_words(stringstream &ss) +{ + for (int i=0; i<(int)words.size(); i++) { + bool enc_quote = true; + if (i == 0) enc_quote = false; + ss << words[i].get_print_string(enc_quote) << " "; + } +} + +// =========================================================================== +// Another version of printing all the words on the line. +// This version is mainly for printing out the final cmds buffer. +// +// The output is formatted, commas are put back in, spaces are handled better, +// if a line is too long (see nctot_max), it is split into more than one line. +// +// For example, suppose the words on a line were +// a4d ( 1 1 1 2 ) = -3.4 4.7 5.2 4.6e19 +// spaces are used to delimit the words, but it is not very readable. This +// routine will print the above line as +// a4d(1, 1, 1, 2) = -3.4, 4.7, 5.2, 4.6e19 +// This has the added advantage that spaces can be eliminated and a compact +// form can be achieved. +// +// Another example is the line +// strinsert_cmd01 = Use The Force +// The string "Use The Force" is actually one word, even though it appears to +// be three words. This routine prints this correctly as +// strinsert_cmd01 = "Use The Force" +// =========================================================================== +void Cmd::print_using_words_fm(stringstream &ss) +{ + //debug_print_words("print_using_words_fm"); + int nctot_max = 75; + int istart = 0; + int ieq = -1; + int ip1 = -1; + int ip2 = -1; + for (;;) { + if (istart > 0) ss << " "; + int nctot = 0; + bool done = false; + for (int i=istart; i<(int)words.size(); i++) { + bool enc_quote = true; + if (i == 0) enc_quote = false; + string s = words[i].get_print_string(enc_quote); + if (s == "=") ieq = i; + if (s == "(") ip1 = i; + if (s == ")") ip2 = i; + string sp = ""; + if (i<(int)words.size()-1) { + sp = words[i+1].get_print_string(enc_quote); + } + if (sp == ")") ip2 = i+1; + int nc = (int)s.size(); + if ((i>istart) && (nc+1+nctot > nctot_max)) { + istart = i; + break; + } + if (i >= (int)words.size()-1) { + done = true; + ss << s; + } + else { + string endstr = " "; + int endinc = 1; + if (ieq > -1) { + if (i > ieq) { + endstr = ", "; + endinc = 2; + } + } + if (ip1 > -1 && (ip2 == -1 || i < ip2)) { + if (i > ip1 && (ip2 == -1 || i < ip2-1)) { + endstr = ", "; + endinc = 2; + } + } + if (endstr == " ") { + if (s == "(") { + endstr = ""; + endinc = 0; + } + if (i == ip2-1) { + endstr = ""; + endinc = 0; + } + if (i == 0 && sp == "(") { + endstr = ""; + endinc = 0; + } + } + ss << s << endstr; + nctot += nc + endinc; + } + } + if (done) break; + ss << endl; + } +} + + +// =========================================================================== +// Print the original command before processing, mainly for debugging. +// Output is to a stringstream so the calling method can decide what to do +// with the output - send it to the screen, use it for testing, whatever. +// =========================================================================== +void Cmd::print_original_string(stringstream &ss) +{ + ss << original_str; +} + + +// =========================================================================== +// This is mainly for debugging this class. It prints on all procs. +// =========================================================================== +void Cmd::debug_print_words(string s) +{ + cout << s << endl; + cout << " "; + for (int i=0; i<(int)words.size(); i++) { + bool enc_quote = true; + if (i == 0) enc_quote = false; + cout << words[i].get_print_string(enc_quote) << " "; + } + cout << endl; +} + + + + + + +} // End of the PP namespace + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Comm.hh =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Comm.hh @@ -0,0 +1,93 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +#ifndef COMMHHINCLUDE +#define COMMHHINCLUDE + +#ifdef __MPI__ +#define HAVE_MPI +#endif + +#ifdef HAVE_MPI +#define MPI_NO_CPPBIND +#include "mpi.h" +#endif + +namespace PP { + +class Comm +{ + public: + + Comm(); + ~Comm(); + + void broadcast(char *buffer, int count); + void global_abort_parser(); + + int getProcRank(void) const { + return(mype); + } + + int getNumProcs(void) const { + return(npes); + } + + int getIORank(void) const { + return(iope); + } + + bool isIOProc(void) const { + if (mype == iope) return true; + return false; + } + + + private: + int init_flag; + + int npes, mype, iope; + + //Comm(const Comm&); + //Comm& operator=(const Comm&); +}; + + +} // End of PP namespace + +#endif + + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Comm.cc =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Comm.cc @@ -0,0 +1,117 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +#include +#include +#include "Comm.hh" + +namespace PP { + + +// =========================================================================== +// Constructor +// =========================================================================== +Comm::Comm() +{ + npes = 1; + mype = 0; + iope = 0; + +#ifdef HAVE_MPI + int argc = 1; + char **argv = NULL; + + int init_check; + MPI_Initialized(&init_check); + //printf("DEBUG -- mpi initialized %d\n",init_check); + + init_flag = 0; + if (! init_check) { + // Only way for init_flag to be true is here; must be false otherwise + init_flag = 1; + MPI_Init(&argc, &argv); + } + //printf("DEBUG -- comm constructor -- init_flag %d\n",init_flag); + + MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); + MPI_Comm_size(MPI_COMM_WORLD, &npes ); + MPI_Comm_rank(MPI_COMM_WORLD, &mype ); +#endif +} + +// =========================================================================== +// Destructor +// =========================================================================== +Comm::~Comm() +{ + //printf("DEBUG -- comm destructor -- init_flag %d\n",init_flag); +#ifdef HAVE_MPI + if (init_flag) { + init_flag = 0; + MPI_Finalize(); + } +#endif +} + +// =========================================================================== +// Broadcast +// =========================================================================== +void Comm::broadcast(char *buffer, int count) +{ + // To suppress compiler warnings of unused parameters + assert(buffer == buffer); + assert(count == count); + + if (npes == 1) return; +#ifdef HAVE_MPI + MPI_Bcast(buffer, count, MPI_CHAR, 0, MPI_COMM_WORLD); +#endif +} + +// =========================================================================== +// Error handling +// =========================================================================== +void Comm::global_abort_parser() +{ +#ifdef HAVE_MPI + MPI_Abort(MPI_COMM_WORLD, 1); +#endif + exit(1); +} +// =========================================================================== +} // End of PP namespace + + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Function.hh =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Function.hh @@ -0,0 +1,120 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +#ifndef FUNCTIONHHINCLUDE +#define FUNCTIONHHINCLUDE + +// *************************************************************************** +// *************************************************************************** +// This class holds information about a function. It is mostly for use with +// the parser. +// *************************************************************************** +// *************************************************************************** + +#include +#include +#include +#include + +namespace PP +{ +using std::string; +using std::deque; +using std::stringstream; +using std::vector; + +enum FuncType {FUNC_}; + +//class ErrorState; + +class Function +{ + +public: + Function(); + Function(string nme, bool ext, int na, string ftype, string fdes); + + // Evaluate the function. + double evaluate(vector &vd, stringstream &serr, int &ierr, + int line_number, int file_line_number, + string filename, deque *lines); + + string evaluate(vector &vs, stringstream &serr, int &ierr, + int line_number, int file_line_number, + string filename, deque *lines); + + // Accessor methods. + string get_name() { return name; } + int get_num_args() { return nargs; } + string get_description() { return description; } + string get_type() { return type; } + +private: + + void name_err(stringstream &serr, int &ierr, + int line_number, int file_line_number, + string filename, deque *lines); + + void args_mismatch_err(int nargs_found, int nargs_expected, + stringstream &serr, int &ierr, + int line_number, int file_line_number, + string filename, deque *lines); + + // The name of the function. + string name; + + // Whether the function is external or internal. External functions + // are C++ functions like sin(), log(), ... Internal functions + // are defined within the input to the parser (this feature is not + // implemented yet). + bool external; + + // The number of arguments for the function. + int nargs; + + // A text description of the function. + string description; + + // The type of function. Allowed types are: + // real double arguments, double result (cos, sin, log, ...) + // string string arguments, string results (strlen, strcat, ...) + string type; +}; + + +} // End of the PP namespace + +#endif Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Function.cc =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Function.cc @@ -0,0 +1,324 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +// *************************************************************************** +// *************************************************************************** +// This class holds information about a function. It is mostly for use with +// the parser. +// *************************************************************************** +// *************************************************************************** +#include +#include +#include +#include +#include +#include +#include +#include + +#include "stdio.h" +#include "stdlib.h" + +#include "Function.hh" + +namespace PP +{ +using std:: string; +using std::cout; +using std::endl; +using std::deque; +using std::stringstream; +using std::setprecision; +using std::vector; + + +// =========================================================================== +// Default constructor. +// =========================================================================== +Function::Function() +{ + name = "__NO_NAME_GIVEN__"; + external = true; + nargs = 1; + description = " "; + type = "real"; +} + + +// =========================================================================== +// Most used constructor for functions. +// =========================================================================== +Function::Function(string nme, bool ext, int na, string ftype, string fdes) +{ + name = nme; + external = ext; + nargs = na; + description = fdes; + type = ftype; +} + + +// =========================================================================== +// Evaluate the function. This is for the case that the arguments all have +// values (double type values) and the function can be evaluated to a double. +// =========================================================================== +double Function::evaluate(vector &vd, stringstream &serr, int &ierr, + int line_number, int file_line_number, + string filename, deque *lines) +{ + // Verify that the number of args needed is equal to the number of args + // supplied. + int nvd = (int)vd.size(); + if (nvd != nargs) { + args_mismatch_err(nvd, nargs, serr, ierr, line_number, + file_line_number, filename, lines); + return 0.; + } + + // Functions with one argument. + if (nargs == 1) { + double d = vd[0]; + if (name == "acos") { + if (d < -1. || d > 1.) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl; + serr << " " << (*lines)[line_number-1] << endl; + serr << "in file: " << filename << endl; + serr << "Argument to acos is out of bounds." << endl; + serr << "Argument = " << d << endl; + serr << "This must be between -1. and 1." << endl; + ierr = 2; + return 0.; + } + return acos(d); + } + + if (name == "asin") { + if (d < -1. || d > 1.) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl; + serr << " " << (*lines)[line_number-1] << endl; + serr << "in file: " << filename << endl; + serr << "Argument to asin is out of bounds." << endl; + serr << "Argument = " << d << endl; + serr << "This must be between -1. and 1." << endl; + ierr = 2; + return 0.; + } + return asin(d); + } + + if (name == "atan") return atan(d); + if (name == "ceil") return ceil(d); + if (name == "cos") return cos(d); + if (name == "cosh") return cosh(d); + if (name == "exp") return exp(d); + if (name == "fabs") return fabs(d); + if (name == "floor") return floor(d); + if (name == "log") return log(d); + if (name == "log10") return log10(d); + if (name == "sin") return sin(d); + if (name == "sinh") return sinh(d); + if (name == "sqrt") return sqrt(d); + if (name == "tan") return tan(d); + if (name == "tanh") return tanh(d); + } + + // Functions with two arguments. + if (nargs == 2) { + double d1 = vd[0]; + double d2 = vd[1]; + + if (name == "atan2") return atan2(d1, d2); + if (name == "fmod") return fmod(d1, d2); + + if (name == "max") { + double result = d2; + if (d1 > d2) result = d1; + return result; + } + + if (name == "min") { + double result = d2; + if (d1 < d2) result = d1; + return result; + } + + if (name == "pow") { + if (d1 <= 0.) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl; + serr << " " << (*lines)[line_number-1] << endl; + serr << "in file: " << filename << endl; + serr << "First argument (base) to pow is out of bounds." << endl; + serr << "Argument = " << d1 << endl; + serr << "This must be greater than 0." << endl; + ierr = 2; + return 0.; + } + return pow(d1, d2); + } + } + + + // If we get down to this point, then the name supplied at + // construction was not recognized as a function name. + // This should never happen because we check for a valid function + // name before entering this function. + name_err(serr, ierr, line_number, file_line_number, filename, lines); + return 0.; +} + + +// =========================================================================== +// Evaluate the function. This is for string functions. +// =========================================================================== +string Function::evaluate(vector &vs, stringstream &serr, int &ierr, + int line_number, int file_line_number, + string filename, deque *lines) +{ + // Verify that the number of args needed is equal to the number of args + // supplied. + int nvs = (int)vs.size(); + if (nvs != nargs) { + args_mismatch_err(nvs, nargs, serr, ierr, line_number, + file_line_number, filename, lines); + return ""; + } + + // Functions with one argument. + if (nargs == 1) { + string s1 = vs[0]; + if (name == "strlen") { + int len = (int)s1.size(); + stringstream ss; + ss << len; + return ss.str(); + } + + if (name == "strtrim") { + int len = (int)s1.size(); + if (len == 0) return s1; + string whitespace = " \t"; + int iend = s1.find_last_not_of(whitespace, len - 1); + int NPOS = (int)string::npos; + if (iend == NPOS) return s1; + s1.erase(iend+1, (len-1) -(iend+1) + 1); + return s1; + } + } + + // Functions with two arguments. + if (nargs == 2) { + string s1 = vs[0]; + string s2 = vs[1]; + if (name == "strcat") { + return s1+s2; + } + } + + // Functions with three arguments. + if (nargs == 3) { + string s1 = vs[0]; + string s2 = vs[1]; + string s3 = vs[2]; + if (name == "strerase") { + int i1 = atoi(s2.c_str()) - 1; // minus 1 to get c index + int i2 = atoi(s3.c_str()) - 1; + s1.erase(i1, i2-i1+1); + return s1; + } + + if (name == "strinsert") { + int i1 = atoi(s2.c_str()) - 1; // minus 1 to get c index + s1.insert(i1, s3); + return s1; + } + + if (name == "strsubstr") { + int i1 = atoi(s2.c_str()) - 1; // minus 1 to get c index + int nchar = atoi(s3.c_str()); + string sret = s1.substr(i1, nchar); + return sret; + } + } + + // If we get down to this point, then the name supplied at + // construction was not recognized as a function name. + // This should never happen because we check for a valid function + // name before entering this function. + name_err(serr, ierr, line_number, file_line_number, filename, lines); + return ""; +} + + +// =========================================================================== +// Name not recognized error. +// =========================================================================== +void Function::name_err(stringstream &serr, int &ierr, + int line_number, int file_line_number, + string filename, deque *lines) +{ + serr << endl; + serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl; + serr << " " << (*lines)[line_number-1] << endl; + serr << "in file: " << filename << endl; + serr << "** Math function fatal error **" << endl; + serr << "Name not recognized as a function." << endl; + serr << "Name = " << name << endl; + ierr = 2; +} + + +// =========================================================================== +// Number of args mismatch error. +// =========================================================================== +void Function::args_mismatch_err(int nargs_found, int nargs_expected, + stringstream &serr, int &ierr, + int line_number, int file_line_number, + string filename, deque *lines) +{ + serr << endl; + serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl; + serr << " " << (*lines)[line_number-1] << endl; + serr << "in file: " << filename << endl; + serr << "For function " << name << endl; + serr << "Number of args expected = " << nargs_expected << endl; + serr << "Number of args found = " << nargs_found << endl; + ierr = 2; +} + +} // End of the PP namespace Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Globals.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Globals.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * Other LANL authors + * + */ +#ifndef _Globals_ +#define _Globals_ + + +#ifdef __cplusplus +extern "C" +{ +#endif + +//#define NDEBUG 1 +#include +#include +#include +#include + +#define ENTITY_COINCIDENCE_TOLERANCE ((double)1.0E-5) + +#define KDTREE_ELEMENT_BLOCKING_SIZE ((long)1024) + +#define POSITIVE_INFINITY (+1.0E+64) +#define NEGATIVE_INFINITY (-1.0E+64) + +#define XAXIS ((unsigned long)0) +#define YAXIS ((unsigned long)1) + +typedef struct { + double x, y; +} TVector; + +//#ifndef _BOOL +//typedef unsigned char boolean; +//#define true ((boolean)1) +//#define false ((boolean)0) +//#endif + +#ifndef SWAP +#define SWAP(a,b,t) {t h; h = a; a = b; b = h; } +#endif + +#ifdef __cplusplus +} +#endif + +#endif Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/KDTree.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/KDTree.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * Other LANL authors + * + * + * Implements a 2-dimensional k-D tree. One begins to use the k-D tree by + * adding the bounding box of geometric "elements" to the tree structure + * through a call to "KDTreeAddElement". Every element should be of the same + * type, but could be a single point, a line segment, triangles, etc. Once + * all the element bounding boxes have been added, the user of the structure + * may make queries against the tree. The actual tree is constructed lazily + * when an actual query occurs on the structure. + * + * This version only has one query -- intersection of a box with the elements + * and a set of "candidate" elements are returned. The candidates are identified + * by an index number (0, ...) signifying the order in which the element was + * added to the tree. It is up to the calling code to do additional processing + * based on the type of element being used to determine "real" intersections. + * + * The process of actually building the tree takes "n log n" time. Queries + * take "log n" time. + * + */ + +#ifndef _KDTree_ +#define _KDTree_ + +#ifdef __cplusplus +extern "C" +{ +#endif + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "Globals.h" +#include "Bounds.h" + +#define LEFT_HALF 0 +#define RIGHT_HALF 1 +#define BOTTOM_HALF 0 +#define TOP_HALF 1 + +typedef struct { + TBounds extent; + int elements_num, elements_allocated; + TBounds* elements; + bool tree_built; + int tree_size; + TBounds* tree_safety_boxes; + int * tree_link; +} TKDTree; + +extern void KDTree_Initialize(TKDTree *t); +extern void KDTree_Finalize(TKDTree *t); +extern void KDTree_Destroy(TKDTree* t); +extern void KDTree_AddElement(TKDTree* t, TBounds* add); +extern void KDTree_CreateTree(TKDTree* t); +extern void KDTree_QueryBoxIntersect(TKDTree* t, + int* result_num, int* result_indicies, + TBounds* box); + +void KDTree_QueryCircleIntersect_Double(TKDTree* t, + int* result_num, int* result_indicies, + double radius, int ncells, + double *x, double *dx, double *y, double *dy); +void KDTree_QueryCircleIntersect_Float(TKDTree* t, + int* result_num, int* result_indicies, + double radius, int ncells, + float *x, float *dx, float *y, float *dy); + +void KDTree_QueryCircleIntersectWeighted_Double(TKDTree* t, + int* result_num, int* result_indicies, double *weight, + double circ_radius, int ncells, + double *x, double *dx, double *y, double *dy); +void KDTree_QueryCircleIntersectWeighted_Float(TKDTree* t, + int* result_num, int* result_indicies, double *weight, + double circ_radius, int ncells, + float *x, float *dx, float *y, float *dy); + +void KDTree_QueryCircleInterior_Double(TKDTree* t, + int* result_num, int* result_indicies, + double circ_radius, int ncells, + double *x, double *dx, double *y, double *dy); +void KDTree_QueryCircleInterior_Float(TKDTree* t, + int* result_num, int* result_indicies, + double circ_radius, int ncells, + float *x, float *dx, float *y, float *dy); + +#ifdef __cplusplus +} +#endif + +#endif Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/KDTree.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/KDTree.c @@ -0,0 +1,712 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * Other LANL authors + * + */ +#include +#include "KDTree.h" + +#define MALLOC(n,t) ((t*)(malloc(n * sizeof(t)))) +#define REALLOC(p,n,t) ((t*)(realloc((void*)p, n * sizeof(t)))) +#define FREE(p) { if (p) free(p); } + +static void median_sort(TKDTree* t, + unsigned int cut_direction, int k, int num, int* idx) +{ + int left, mid, right, a, i, j; + + for (left = 0, right = num - 1; (right - left) > 1;) { + mid = (left + right) / 2; + SWAP(idx[mid], idx[left + 1], int); + if(Bounds_CenterAxis(&(t->elements[idx[left + 1]]), cut_direction) > + Bounds_CenterAxis(&(t->elements[idx[right]]), cut_direction)) + SWAP(idx[left + 1], idx[right], int); + if(Bounds_CenterAxis(&(t->elements[idx[left]]), cut_direction) > + Bounds_CenterAxis(&(t->elements[idx[right]]), cut_direction)) + SWAP(idx[left], idx[right], int); + if(Bounds_CenterAxis(&(t->elements[idx[left + 1]]), cut_direction) > + Bounds_CenterAxis(&(t->elements[idx[left]]), cut_direction)) + SWAP(idx[left + 1], idx[left], int); + a = idx[left]; + i = left + 1; + j = right; + while (1) { + for (i++; + Bounds_CenterAxis(&(t->elements[idx[i]]), cut_direction) < + Bounds_CenterAxis(&(t->elements[a]), cut_direction); + i++); + for (j--; + Bounds_CenterAxis(&(t->elements[idx[j]]), cut_direction) > + Bounds_CenterAxis(&(t->elements[a]), cut_direction); + j--); + if(j < i) + break; + SWAP(idx[i], idx[j], int); + } + idx[left] = idx[j]; + idx[j] = a; + if(j >= k) + right = j - 1; + if(j <= k) + left = i; + } + if(((right - left) ==1) && + (Bounds_CenterAxis(&(t->elements[idx[right]]), cut_direction) < + Bounds_CenterAxis(&(t->elements[idx[left]]), cut_direction))) + SWAP(idx[right], idx[left], int); +} + +void KDTree_Initialize(TKDTree* t) +{ + assert(t); + /* Flush the overall tree extent */ + Bounds_Infinite(&(t->extent)); + /* Allocate the initial memory for tree elements */ + t->elements_num = 0; + t->elements_allocated = KDTREE_ELEMENT_BLOCKING_SIZE; + t->elements = MALLOC(t->elements_allocated, TBounds); + assert(t->elements); + /* Start without a built tree */ + t->tree_built = false; + t->tree_size = 0; + t->tree_safety_boxes = NULL; + t->tree_link = NULL; +} + +void KDTree_Finalize(TKDTree* t) +{ + free(t->elements); +} + +void KDTree_Destroy(TKDTree* t) +{ + assert(t); + /* Flush the overall tree extent */ + Bounds_Infinite(&(t->extent)); + /* Destroy the element list */ + t->elements_num = 0; + t->elements_allocated = 0; + FREE(t->elements); + t->elements = NULL; + /* Destroy the actual tree */ + t->tree_built = false; + t->tree_size = 0; + FREE(t->tree_safety_boxes); + t->tree_safety_boxes = NULL; + FREE(t->tree_link); + t->tree_link = NULL; +} + + + +void KDTree_AddElement(TKDTree* t, TBounds* add) +{ + assert(t && add); + /* Destroy the current tree if it is built */ + if(t->tree_built) { + t->tree_built = false; + t->tree_size = 0; + FREE(t->tree_safety_boxes); + t->tree_safety_boxes = NULL; + FREE(t->tree_link); + t->tree_link = NULL; + } + /* Expand the element array if necessary */ + if(t->elements_num == t->elements_allocated) { + t->elements_allocated += KDTREE_ELEMENT_BLOCKING_SIZE; + t->elements = REALLOC(t->elements, t->elements_allocated, TBounds); + assert(t->elements); + } + /* Add the new element to the overall extent and the element list */ + Bounds_AddBounds(&(t->extent), add); + Bounds_Copy(add, &(t->elements[t->elements_num])); + t->elements_num++; +} + +void KDTree_CreateTree(TKDTree* t) +{ + unsigned int i; + int next_node, stack_ptr, min, mid, max, parent, cut_direction; + double width, max_width; + int* stack; + int* idx; + + assert(t); + /* If the tree is already built, we don't have to do anything */ + if(t->tree_built) + return; + /* If there are no elements in the tree, we don't have to do anything */ + if(t->elements_num > 0) { + /* Allocate the k-D tree memory */ + t->tree_size = 2 * t->elements_num; + t->tree_safety_boxes = MALLOC(t->tree_size, TBounds); + t->tree_link = MALLOC(t->tree_size, int); + /* Create and initialize temporary arrays */ + next_node = 0; + stack_ptr = 0; + stack = MALLOC(3 * t->tree_size, int); + idx = MALLOC(t->elements_num, int); + for (i = 0; (int)i < t->elements_num; i++) { + idx[i] = i; + } + /* Setup the root node of the tree and put it on the stack */ + stack[stack_ptr++] = 0; /* Node Number in the Tree */ + stack[stack_ptr++] = 0; /* Element Span Minumum */ + stack[stack_ptr++] = t->elements_num - 1; /* Element Span Maximum */ + Bounds_Copy(&(t->extent), &(t->tree_safety_boxes[0])); + next_node++; + /* Construct k-D tree by setting up each pair of child nodes */ + while (stack_ptr) { + /* Pop the top entry off the stack */ + max = stack[--stack_ptr]; + min = stack[--stack_ptr]; + parent = stack[--stack_ptr]; + /* If the current node should be a leaf node, make it one */ + if ((max - min) == 0) { + Bounds_Copy(&(t->elements[idx[min]]), &(t->tree_safety_boxes[parent])); + t->tree_link[parent] = - idx[min]; + continue; + } + /* Select optimum cutting direction for the parent node's safety box */ + cut_direction = -1; + max_width = NEGATIVE_INFINITY; + for (i = 0; i < 2; i++) { + width = Bounds_WidthAxis(&(t->tree_safety_boxes[parent]), i); + if(width > max_width) { + max_width = width; + cut_direction = i; + } + } + assert(cut_direction >= 0); + /* Do a median sort of the elements under the parent node. The sort key + is the center point of the element bounding boxes along the selected + cutting direction. */ + mid = (min + max) /2; + median_sort(t, (unsigned int)cut_direction, mid - min, max - min + 1, &(idx[min])); + /* Give the parent a reference to its two children */ + t->tree_link[parent] = next_node; + /* Add the "left" child to the tree and the stack */ + stack[stack_ptr++] = next_node; /* Node Number in the Tree */ + stack[stack_ptr++] = min; /* Element Span Minimum */ + stack[stack_ptr++] = mid; /* Element Span Maximum */ + Bounds_Infinite(&(t->tree_safety_boxes[next_node])); + for (i = min; (int)i <= mid; i++) { + Bounds_AddBounds(&(t->tree_safety_boxes[next_node]), + &(t->elements[idx[i]])); + } + next_node++; + /* Add the "right" child to the tree and the stack */ + stack[stack_ptr++] = next_node; /* Node Number in the Tree */ + stack[stack_ptr++] = mid + 1; /* Element Span Minimum */ + stack[stack_ptr++] = max; /* Element Span Maximum */ + Bounds_Infinite(&(t->tree_safety_boxes[next_node])); + for (i = min + 1; (int)i <= max; i++) { + Bounds_AddBounds(&(t->tree_safety_boxes[next_node]), + &(t->elements[idx[i]])); + } + next_node++; + } + /* Destroy the temporary arrays */ + FREE(stack); + FREE(idx); + } + /* Mark the tree "built" */ + t->tree_built = true; +} + +void KDTree_QueryBoxIntersect(TKDTree* t, + int* result_num, int* result_indicies, + TBounds* box) +{ + int stack_ptr, node; + TBounds sb; + int* stack; + + assert(t && result_num && result_indicies && box); + /* Build the k-D tree if necessary */ + if(!t->tree_built){ + //printf("BUILDING TREE... \n"); + //fflush(stdout); + KDTree_CreateTree(t); + } + /* Allocate the results array */ + *result_num = 0; + /* Create the temporary stack array */ + stack_ptr = 0; + stack = MALLOC(t->tree_size, int); + + /* Put the root node of the tree onto the stack */ + stack[stack_ptr++] = 0; + /* Search the k-D tree until the stack is empty */ + + while (stack_ptr) { + /* Pop the top entry off the stack */ + node = stack[--stack_ptr]; + /* Check if the query box intersects an epsilon-expanded safety box for + the current node. */ + Bounds_Copy(&(t->tree_safety_boxes[node]), &sb); + //Bounds_AddEpsilon(&sb, ENTITY_COINCIDENCE_TOLERANCE); + /* If the query box doesn't intersect this node's safety box, we are done + visiting the node and should continue with the next node */ + if(!Bounds_IsOverlappingBounds(&sb, box)) + continue; + /* If the current node is a leaf node, add it to the collision list. If + the current node is an interior node, add its children to the stack. */ + if(t->tree_link[node] <= 0) { + result_indicies[*result_num] = - t->tree_link[node]; + (*result_num)++; + } + else { + stack[stack_ptr++] = t->tree_link[node]; + stack[stack_ptr++] = t->tree_link[node] + 1; + } + } + /* Destroy the temporary stack array */ + FREE(stack); +} + +void KDTree_QueryCircleIntersect_Double(TKDTree* t, + int* result_num, int* result_indicies, + double circ_radius, int ncells, + double *x, double *dx, double *y, double *dy) +{ + assert(t && result_num && result_indicies && circ_radius); + /* Build the k-D tree if necessary */ + if(!t->tree_built){ + //printf("BUILDING TREE... \n"); + //fflush(stdout); + KDTree_CreateTree(t); + } + + int nez; + int *ind=(int *)malloc(ncells*sizeof(int)); + + TBounds box; + box.min.x = -circ_radius; + box.max.x = circ_radius; + box.min.y = -circ_radius; + box.max.y = circ_radius; + KDTree_QueryBoxIntersect(t, &nez, ind, &box); + + //for (int ic=0; ic rad2 ) || + (circ_radius > rad1 && circ_radius < rad2 ) ) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } else if ((circ_radius < rad2 && circ_radius > rad3 ) || + (circ_radius > rad2 && circ_radius < rad3 ) ) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } else if ((circ_radius < rad3 && circ_radius > rad4 ) || + (circ_radius > rad3 && circ_radius < rad4 ) ) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } else if ((circ_radius < rad4 && circ_radius > rad1 ) || + (circ_radius > rad4 && circ_radius < rad1 ) ) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } + } // for + free(ind); +} + +void KDTree_QueryCircleIntersect_Float(TKDTree* t, + int* result_num, int* result_indicies, + double circ_radius, int ncells, + float *x, float *dx, float *y, float *dy) +{ + assert(t && result_num && result_indicies && circ_radius); + /* Build the k-D tree if necessary */ + if(!t->tree_built){ + //printf("BUILDING TREE... \n"); + //fflush(stdout); + KDTree_CreateTree(t); + } + + int nez; + int *ind=(int *)malloc(ncells*sizeof(int)); + + TBounds box; + box.min.x = -circ_radius; + box.max.x = circ_radius; + box.min.y = -circ_radius; + box.max.y = circ_radius; + KDTree_QueryBoxIntersect(t, &nez, ind, &box); + + //for (int ic=0; ic rad2 ) || + (circ_radius > rad1 && circ_radius < rad2 ) ) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } else if ((circ_radius < rad2 && circ_radius > rad3 ) || + (circ_radius > rad2 && circ_radius < rad3 ) ) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } else if ((circ_radius < rad3 && circ_radius > rad4 ) || + (circ_radius > rad3 && circ_radius < rad4 ) ) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } else if ((circ_radius < rad4 && circ_radius > rad1 ) || + (circ_radius > rad4 && circ_radius < rad1 ) ) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } + } // for + free(ind); +} + +void KDTree_QueryCircleIntersectWeighted_Double(TKDTree* t, + int* result_num, int* result_indicies, double *weight, + double circ_radius, int ncells, + double *x, double *dx, double *y, double *dy) +{ + assert(t && result_num && result_indicies && circ_radius); + /* Build the k-D tree if necessary */ + if(!t->tree_built){ + //printf("BUILDING TREE... \n"); + //fflush(stdout); + KDTree_CreateTree(t); + } + + int nez; + int *ind=(int *)malloc(ncells*sizeof(int)); + + TBounds box; + box.min.x = -circ_radius; + box.max.x = circ_radius; + box.min.y = -circ_radius; + box.max.y = circ_radius; + KDTree_QueryBoxIntersect(t, &nez, ind, &box); + + //for (int ic=0; ic rad2 ) || + (circ_radius > rad1 && circ_radius < rad2 ) ) { + cuts_bottom=1; + } + if ((circ_radius < rad2 && circ_radius > rad3 ) || + (circ_radius > rad2 && circ_radius < rad3 ) ) { + cuts_right=1; + } + if ((circ_radius < rad3 && circ_radius > rad4 ) || + (circ_radius > rad3 && circ_radius < rad4 ) ) { + cuts_top=1; + } + if ((circ_radius < rad4 && circ_radius > rad1 ) || + (circ_radius > rad4 && circ_radius < rad1 ) ) { + cuts_left=1; + } + + horizontal_half=0; + vertical_half=0; + if (x[ii]+0.5*dx[ii] > 0.0) horizontal_half = RIGHT_HALF; + if (y[ii]+0.5*dy[ii] > 0.0) vertical_half = TOP_HALF; + + + if (horizontal_half == RIGHT_HALF && vertical_half == TOP_HALF) { /* quadrant 1 */ + weight[*result_num] = (circ_radius - rad1)/(rad3-rad1); + } else if (horizontal_half == LEFT_HALF && vertical_half == TOP_HALF) { /* quadrant 2 */ + weight[*result_num] = (circ_radius - rad2)/(rad4-rad2); + } else if (horizontal_half == LEFT_HALF && vertical_half == BOTTOM_HALF) { /* quadrant 3 */ + weight[*result_num] = (circ_radius - rad3)/(rad1-rad3); + } else if (horizontal_half == RIGHT_HALF && vertical_half == BOTTOM_HALF) { /* quadrant 4 */ + weight[*result_num] = (circ_radius - rad4)/(rad2-rad4); + } else { + weight[*result_num] = 0.5; + } + if (cuts_bottom || cuts_top || cuts_left || cuts_right) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } + + } // for + free(ind); +} + +void KDTree_QueryCircleIntersectWeighted_Float(TKDTree* t, + int* result_num, int* result_indicies, double *weight, + double circ_radius, int ncells, + float *x, float *dx, float *y, float *dy) +{ + assert(t && result_num && result_indicies && circ_radius); + /* Build the k-D tree if necessary */ + if(!t->tree_built){ + //printf("BUILDING TREE... \n"); + //fflush(stdout); + KDTree_CreateTree(t); + } + + int nez; + int *ind=(int *)malloc(ncells*sizeof(int)); + + TBounds box; + box.min.x = -circ_radius; + box.max.x = circ_radius; + box.min.y = -circ_radius; + box.max.y = circ_radius; + KDTree_QueryBoxIntersect(t, &nez, ind, &box); + + //for (int ic=0; ic rad2 ) || + (circ_radius > rad1 && circ_radius < rad2 ) ) { + cuts_bottom=1; + } + if ((circ_radius < rad2 && circ_radius > rad3 ) || + (circ_radius > rad2 && circ_radius < rad3 ) ) { + cuts_right=1; + } + if ((circ_radius < rad3 && circ_radius > rad4 ) || + (circ_radius > rad3 && circ_radius < rad4 ) ) { + cuts_top=1; + } + if ((circ_radius < rad4 && circ_radius > rad1 ) || + (circ_radius > rad4 && circ_radius < rad1 ) ) { + cuts_left=1; + } + + horizontal_half=0; + vertical_half=0; + if (x[ii]+0.5*dx[ii] > 0.0) horizontal_half = RIGHT_HALF; + if (y[ii]+0.5*dy[ii] > 0.0) vertical_half = TOP_HALF; + + + if (horizontal_half == RIGHT_HALF && vertical_half == TOP_HALF) { /* quadrant 1 */ + weight[*result_num] = (circ_radius - rad1)/(rad3-rad1); + } else if (horizontal_half == LEFT_HALF && vertical_half == TOP_HALF) { /* quadrant 2 */ + weight[*result_num] = (circ_radius - rad2)/(rad4-rad2); + } else if (horizontal_half == LEFT_HALF && vertical_half == BOTTOM_HALF) { /* quadrant 3 */ + weight[*result_num] = (circ_radius - rad3)/(rad1-rad3); + } else if (horizontal_half == RIGHT_HALF && vertical_half == BOTTOM_HALF) { /* quadrant 4 */ + weight[*result_num] = (circ_radius - rad4)/(rad2-rad4); + } else { + weight[*result_num] = 0.5; + } + if (cuts_bottom || cuts_top || cuts_left || cuts_right) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } + + } // for + free(ind); +} + +void KDTree_QueryCircleInterior_Double(TKDTree* t, + int* result_num, int* result_indicies, + double circ_radius, int ncells, + double *x, double *dx, double *y, double *dy) +{ + assert(t && result_num && result_indicies && circ_radius); + /* Build the k-D tree if necessary */ + if(!t->tree_built){ + //printf("BUILDING TREE... \n"); + //fflush(stdout); + KDTree_CreateTree(t); + } + + int nez; + int *ind=(int *)malloc(ncells*sizeof(int)); + + TBounds box; + box.min.x = -circ_radius; + box.max.x = circ_radius; + box.min.y = -circ_radius; + box.max.y = circ_radius; + KDTree_QueryBoxIntersect(t, &nez, ind, &box); + + //for (int ic=0; ic rad1 || circ_radius > rad2 ) || + (circ_radius > rad3 || circ_radius > rad4 ) ) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } + } // for + free(ind); +} + +void KDTree_QueryCircleInterior_Float(TKDTree* t, + int* result_num, int* result_indicies, + double circ_radius, int ncells, + float *x, float *dx, float *y, float *dy) +{ + assert(t && result_num && result_indicies && circ_radius); + /* Build the k-D tree if necessary */ + if(!t->tree_built){ + //printf("BUILDING TREE... \n"); + //fflush(stdout); + KDTree_CreateTree(t); + } + + int nez; + int *ind=(int *)malloc(ncells*sizeof(int)); + + TBounds box; + box.min.x = -circ_radius; + box.max.x = circ_radius; + box.min.y = -circ_radius; + box.max.y = circ_radius; + KDTree_QueryBoxIntersect(t, &nez, ind, &box); + + //for (int ic=0; ic rad1 || circ_radius > rad2 ) || + (circ_radius > rad3 || circ_radius > rad4 ) ) { + result_indicies[*result_num] = ind[i]; + (*result_num)++; + } + } // for + free(ind); +} + + + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/LICENSE =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/LICENSE @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * CLAMR -- LA-CC-11-094 + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/MallocPlus.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/MallocPlus.h @@ -0,0 +1,812 @@ +/* + * Copyright (c) 2011-2013, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * + */ +#ifndef MALLOCPLUS_H_ +#define MALLOCPLUS_H_ + +#include +#include +#include + +#ifdef HAVE_CONFIG_H +#include +#endif + +#define HOST_REGULAR_MEMORY 0x00000 +#define HOST_MANAGED_MEMORY 0x00001 +#define DEVICE_REGULAR_MEMORY 0x00002 +#define INDEX_ARRAY_MEMORY 0x00004 +#define LOAD_BALANCE_MEMORY 0x00008 +#define RESTART_DATA 0x00010 +#define REPLICATED_DATA 0x00020 +#define DISTRIBUTED_INT_DATA 0x00040 +#define DISTRIBUTED_DOUBLE_DATA 0x00080 + +#if defined(HAVE_MPI) +#include "mpi.h" +#if defined(HAVE_J7) +#include "j7/j7.h" +#endif +#endif + +using namespace std; + +/****************************************************************//** + * \brief + * Memory entry with fields for each entry in database + *******************************************************************/ +struct malloc_plus_memory_entry { + void *mem_ptr; //!< memory pointer for entry + size_t mem_capacity; //!< allocated capacity for memory buffer + //!< this may be larger than the number of + //!< elements and is used to internally handle + //!< memory resizing + size_t *mem_nelem; //!< number of elements + size_t mem_ndims; //!< number of dimensions + size_t mem_elsize; //!< element size for type of data + int mem_flags; //!< flags for special handling + char *mem_name; //!< name of memory entry +}; + +struct cmp_str +{ + bool operator()(char const *a, char const *b) + { + return strcmp(a, b) < 0; + } +}; + +/****************************************************************//** + * MallocPlus class + * Provide an enhanced memory allocation package with database + * of allocations, sizes and contiguous memory allocations for + * multi-dimensional arrays + *******************************************************************/ +class MallocPlus { +//protected: +public: + map memory_name_dict; //!< Dictionary entries by name + map memory_ptr_dict; //!< Dictionary entries by pointer + +#if defined(HAVE_MPI) && defined(HAVE_J7) +private: + J7 *j7; +#endif + +public: + // if we have MPI support enable these routines. they only really do anything + // if we also have j7 support, but that's okay; we don't want ifdefs all + // over. +#if defined(HAVE_MPI) + // parallel initialization routine + void pinit(MPI_Comm smComm, std::size_t memPoolSize); + // parallel finalization routine + void pfini(void); +#endif + +/****************************************************************//** + * \brief + * Allocates memory for a 1D array and put entry in database + * + * **Parameters** + * * size_t nelem -- number of elements in array + * * size_t elsize -- element size in bytes + * * const char *name -- name of array + * * int flags -- flags for special handling, default is 0 + * + * Typical Usage + * + * double *density = my_mem->memory_malloc(ncells, sizeof(double), + * "Density"); + *******************************************************************/ + void *memory_malloc(size_t nelem, size_t elsize, const char *name, int flags=0); + +/****************************************************************//** + * \brief + * Duplicate memory and add new entry into database + * + * **Parameters** + * * void *malloc_mem_ptr -- memory pointer to duplicate + * * const char *addname -- new name for variable + * + * Typical Usage + * + * double *new_density = my_mem->memory_duplicate(density, "Density_new"); + *******************************************************************/ + void *memory_duplicate(void *malloc_mem_ptr, const char *addname); + +/****************************************************************//** + * \brief + * Reallocates memory for memory pointer in database + * + * **Parameters** + * * size_t nelem -- number of elements for new allocation + * * void *malloc_mem_ptr -- memory pointer to duplicate + * + * Typical Usage + * + * double *density = my_mem->memory_realloc(new_ncells, density); + *******************************************************************/ + void *memory_realloc(size_t nelem, void *malloc_mem_ptr); + +/****************************************************************//** + * \brief + * Reallocates memory for named entry in database + * + * **Parameters** + * * size_t nelem -- number of elements for new allocation + * * const char *name -- named entry to duplicate + * + * Typical Usage + * + * double *density = my_mem->memory_realloc(new_ncells, "Density"); + *******************************************************************/ + void *memory_realloc(size_t nelem, const char *name); + +/****************************************************************//** + * \brief + * Request memory buffer capacity reallocation for memory pointer in database. + * This only changes the capacity for managed memory and does not change + * the current number of elements registered for the array. + * + * **Parameters** + * * size_t capacity -- capacity in number of elements for reallocation + * * void *malloc_mem_ptr -- memory pointer to reallocate more capacity + * + * Typical Usage + * + * double *density = my_mem->memory_request(new_capacity, density); + *******************************************************************/ + void *memory_request(size_t new_capacity, void *malloc_mem_ptr); + +/****************************************************************//** + * \brief + * Request memory buffer capacity reallocation for named entry in database. + * This only changes the capacity for managed memory and does not change + * the current number of elements registered for the array. + * + * **Parameters** + * * size_t capacity -- capacity in number of elements for reallocation + * * const char *name -- named entry in database + * + * Typical Usage + * + * double *density = my_mem->memory_request(new_capacity, "Density"); + *******************************************************************/ + void *memory_request(size_t new_capacity, const char *name); + +/****************************************************************//** + * \brief + * Reallocates memory for all arrays in the database. Element size stays + * the same. + * + * **Parameters** + * * size_t nelem -- number of elements for new allocation + * + * Typical Usage + * + * my_mem->memory_realloc_all(new_ncells); + *******************************************************************/ + void memory_realloc_all(size_t nelem); + +/****************************************************************//** + * \brief + * Requests capacity reallocation for all arrays in the database. Element + * size and number of elements stays the same. The maximum memory + * capacity is increased. + * + * **Parameters** + * * size_t capacity -- number of elements for new allocation + * + * Typical Usage + * + * my_mem->memory_realloc_all(new_capacity); + *******************************************************************/ + void memory_request_all(size_t new_capacity); + +/****************************************************************//** + * \brief + * Replaces a database entry with another database entry, effectively + * renaming the entry and deallocating the memory for the old entry + * and removing the other database entry. Both the return and the + * first argument old memory pointer gets reset to the new memory + * location. + * + * **Parameters** + * * void *malloc_mem_ptr_old -- memory pointer to entry to replace + * * void * const malloc_mem_ptr_new -- memory pointer to entry to + * put in place of old memory + * + * Typical Usage + * + * double *density_new = (double *)my_mem->memory_malloc(new_ncells, + * sizeof(double), "Density_new"); + * ... lots of calculations of density_new from density (old) ... + * density = (double *)my_mem->memory_replace(density, density_new); + *******************************************************************/ + void *memory_replace(void *malloc_mem_ptr_old, void * const malloc_mem_ptr_new); + +/****************************************************************//** + * \brief + * Swaps a database entry with another database entry, effectively + * renaming both entries. The new pointers are returned in the + * two arguments. + * + * **Parameters** + * * void *malloc_mem_ptr_old -- memory pointer to swap + * * void *malloc_mem_ptr_new -- memory pointer to swap + * + * Typical Usage + * + * int *level_old = (int *)my_mem->memory_malloc(new_ncells, + * sizeof(int), "level_old"); + * level = (int *)my_mem->memory_swap(&level, &level_old); + * ... lots of calculations of level from level_new ... + * my_mem->memory_delete(level_old); + *******************************************************************/ + void memory_swap(int **malloc_mem_ptr_old, int **malloc_mem_ptr_new); + +/****************************************************************//** + * \brief + * Swaps a database entry with another database entry, effectively + * renaming both entries. The new pointers are returned in the + * two arguments. + * + * **Parameters** + * * void *malloc_mem_ptr_old -- memory pointer to swap + * * void *malloc_mem_ptr_new -- memory pointer to swap + * + * Typical Usage + * + * float *density_old = (float *)my_mem->memory_malloc(new_ncells, + * sizeof(float), "Density_old"); + * density = (float *)my_mem->memory_swap(&density, &density_old); + * ... lots of calculations of density from density_new ... + * my_mem->memory_delete(density_old); + *******************************************************************/ + void memory_swap(float **malloc_mem_ptr_old, float **malloc_mem_ptr_new); + +/****************************************************************//** + * \brief + * Swaps a database entry with another database entry, effectively + * renaming both entries. The new pointers are returned in the + * two arguments. + * + * **Parameters** + * * void *malloc_mem_ptr_old -- memory pointer to swap + * * void *malloc_mem_ptr_new -- memory pointer to swap + * + * Typical Usage + * + * double *density_old = (double *)my_mem->memory_malloc(new_ncells, + * sizeof(double), "Density_old"); + * density = (double *)my_mem->memory_swap(&density, &density_old); + * ... lots of calculations of density from density_new ... + * my_mem->memory_delete(density_old); + *******************************************************************/ + void memory_swap(double **malloc_mem_ptr_old, double **malloc_mem_ptr_new); + +/****************************************************************//** + * \brief + * Adds an entry for an already allocated array into the database + * + * **Parameters** + * * void *malloc_mem_ptr -- memory pointer to add + * * size_t nelem -- number of elements in array + * * size_t elsize -- element size in bytes + * * const char *name -- name of array + * * int flags -- flags for special handling, default is 0 + * + * Typical Usage + * + * double *density = my_mem->memory_add(density, ncells, sizeof(double), + * "Density"); + *******************************************************************/ + void *memory_add(void *malloc_mem_ptr, size_t nelem, size_t elsize, + const char *name, int flags=0); + + void *memory_add(void *malloc_mem_ptr, int ndim, size_t *nelem, size_t elsize, + const char *name, int flags=0); + +/****************************************************************//** + * \brief + * Reorders all of the arrays in the database by the indices in the + * iorder array. The reorder does the following: + * tmp[ic] = density[iorder[ic]]; + * SWAP_PTR(tmp, density); + * Note that the pointer value will change during the operation and + * will be returned in the return value. + * + * **Parameters** + * * double *malloc_mem_ptr -- memory pointer to entry to reorder + * * int *iorder -- index array for reordering + * + * Typical Usage + * + * double *density = my_mem->memory_reorder_all(density, iorder); + *******************************************************************/ + double *memory_reorder(double *malloc_mem_ptr, int *iorder); + +/****************************************************************//** + * \brief + * Reorders all of the arrays in the database by the indices in the + * iorder array. The reorder does the following: + * tmp[ic] = density[iorder[ic]]; + * SWAP_PTR(tmp, density); + * Note that the pointer value will change during the operation and + * will be returned in the return value. + * + * **Parameters** + * * float *malloc_mem_ptr -- memory pointer to entry to reorder + * * int *iorder -- index array for reordering + * + * Typical Usage + * + * float *density = my_mem->memory_reorder_all(density, iorder); + *******************************************************************/ + float *memory_reorder(float *malloc_mem_ptr, int *iorder); + +/****************************************************************//** + * \brief + * Reorders all of the arrays in the database by the indices in the + * iorder array. The reorder does the following: + * tmp[ic] = level[iorder[ic]]; + * SWAP_PTR(tmp, level); + * Note that the pointer value will change during the operation and + * will be returned in the return value. + * + * **Parameters** + * * int *malloc_mem_ptr -- memory pointer to entry to reorder + * * int *iorder -- index array for reordering + * + * Typical Usage + * + * int *level = my_mem->memory_reorder_all(level, iorder); + *******************************************************************/ + int *memory_reorder(int *malloc_mem_ptr, int *iorder); + +/****************************************************************//** + * \brief + * Reorders an index array in the database by the indices in the + * iorder array and reindexes the array by the inverse order given + * by inv_iorder. The reorder does the following: + * tmp[ic] = inv_iorder[level[iorder[ic]]]; + * SWAP_PTR(tmp, level); + * Note that the pointer value will change during the operation and + * will be returned in the return value. + * + * **Parameters** + * * int *malloc_mem_ptr -- memory pointer to entry to reorder + * * int *iorder -- index array for reordering + * * int *inv_order -- inverse index array for reordering + * + * Typical Usage + * + * int *level = my_mem->memory_reorder_all(level, iorder, inv_iorder); + *******************************************************************/ + int *memory_reorder_indexarray(int *malloc_mem_ptr, int *iorder, int *inv_iorder); + +/****************************************************************//** + * \brief + * Reorders all of the arrays in the database by the indices in the + * iorder array. The reorder does the following: + * tmp[ic] = density[iorder[ic]]; + * SWAP_PTR(tmp, density); + * Note that the pointer value will change during the operation and + * must be retrieved from the database. + * + * **Parameters** + * * int *iorder -- index array for reordering + * + * Typical Usage + * + * my_mem->memory_reorder_all(iorder); + *******************************************************************/ + void memory_reorder_all(int *iorder); + +/****************************************************************//** + * \brief + * Prints out a report of all the arrays in the database. + * + * Typical Usage + * + * my_mem->memory_report(); + *******************************************************************/ + void memory_report(void); + +/****************************************************************//** + * \brief + * Deallocates memory for a memory pointer in the database and removes the + * entry from the database. + * + * Typical Usage + * + * my_mem->memory_delete(density); + *******************************************************************/ + void *memory_delete(void *malloc_mem_ptr); + +/****************************************************************//** + * \brief + * Deallocates memory for a named entry in the database and removes the + * entry from the database. + * + * Typical Usage + * + * my_mem->memory_delete("Density"); + *******************************************************************/ + void *memory_delete(const char *name); + +/****************************************************************//** + * \brief + * Deallocates memory for all arrays in the database. + * + * Typical Usage + * + * my_mem->memory_delete_all(); + *******************************************************************/ + void memory_delete_all(void); + +/****************************************************************//** + * \brief + * Removes the entry for a memory pointer from the database. (This does + * not delete the memory). + * + * Typical Usage + * + * my_mem->memory_delete(density); + *******************************************************************/ + void memory_remove(void *malloc_mem_ptr); + +/****************************************************************//** + * \brief + * Removes the entry for a named entry from the database. (This does + * not delete the memory). + * + * Typical Usage + * + * my_mem->memory_delete("Density"); + *******************************************************************/ + void memory_remove(const char *name); + +/****************************************************************//** + * \brief + * Gets initial memory pointer from database for iterating over the + * entries and processing each. + * + * Typical Usage + * + * for (void *mem_ptr = my_mem.memory_begin(); mem_ptr!=NULL; + * mem_ptr = my_mem.memory_next() ){ + * ... process entries ... + * } + *******************************************************************/ + void *memory_begin(void); + +/****************************************************************//** + * \brief + * Gets next memory pointer from database for iterating over the + * entries and processing each. Note that their is an implied caching + * of the current memory pointer in MallocPlus. + * + * Typical Usage + * + * for (void *mem_ptr = my_mem.memory_begin(); mem_ptr!=NULL; + * mem_ptr = my_mem.memory_next() ){ + * ... process entries ... + * } + *******************************************************************/ + void *memory_next(void); + +/****************************************************************//** + * \brief + * Gets initial memory pointer from database for iterating over the + * entries and processing each. + * + * Typical Usage + * + * for (void *mem_ptr = my_mem.memory_by_name_begin(); mem_ptr!=NULL; + * mem_ptr = my_mem.memory_by_name_next() ){ + * ... process entries ... + * } + *******************************************************************/ + void *memory_by_name_begin(void); + +/****************************************************************//** + * \brief + * Gets next memory pointer from database for iterating over the + * entries and processing each. Note that their is an implied caching + * of the current memory pointer in MallocPlus. + * + * Typical Usage + * + * for (void *mem_ptr = my_mem.memory_by_name_begin(); mem_ptr!=NULL; + * mem_ptr = my_mem.memory_by_name_next() ){ + * ... process entries ... + * } + *******************************************************************/ + void *memory_by_name_next(void); + +/****************************************************************//** + * \brief + * Gets initial memory entry from database for iterating over the + * entries and processing each. + * + * Typical Usage + * + * malloc_plus_memory_entry memory_item; + * for (memory_item = my_mem.memory_entry_begin(); + * memory_item != my_mem.memory_entry_end(); + * memory_item = my_mem.memory_entry_next() ){ + * ... process entries ... + * } + *******************************************************************/ + malloc_plus_memory_entry *memory_entry_begin(void); + +/****************************************************************//** + * \brief + * Gets next memory entry from database for iterating over the + * entries and processing each. + * + * Typical Usage + * + * malloc_plus_memory_entry memory_item; + * for (memory_item = my_mem.memory_entry_begin(); + * memory_item != my_mem.memory_entry_end(); + * memory_item = my_mem.memory_entry_next() ){ + * ... process entries ... + * } + *******************************************************************/ + malloc_plus_memory_entry *memory_entry_next(void); + +/****************************************************************//** + * \brief + * Gets initial memory iterator from database for iterating over the + * entries and processing each. + * + * Typical Usage + * + * malloc_plus_memory_entry *memory_item; + * for (memory_item = my_mem.memory_entry_begin(); + * memory_item != my_mem.memory_entry_end(); + * memory_item = my_mem.memory_entry_next() ){ + * ... process entries ... + * } + *******************************************************************/ + malloc_plus_memory_entry *memory_entry_end(void); + +/****************************************************************//** + * \brief + * Gets initial memory entry from name database for iterating over the + * entries and processing each. + * + * Typical Usage + * + * malloc_plus_memory_entry memory_item; + * for (memory_item = my_mem.memory_entry_by_name_begin(); + * memory_item != my_mem.memory_entry_by_name_end(); + * memory_item = my_mem.memory_entry_by_name_next() ){ + * ... process entries ... + * } + *******************************************************************/ + malloc_plus_memory_entry *memory_entry_by_name_begin(void); + +/****************************************************************//** + * \brief + * Gets next memory entry from database for iterating over the + * entries and processing each. + * + * Typical Usage + * + * malloc_plus_memory_entry memory_item; + * for (memory_item = my_mem.memory_entry_by_name_begin(); + * memory_item != my_mem.memory_entry_by_name_end(); + * memory_item = my_mem.memory_entry_by_name_next() ){ + * ... process entries ... + * } + *******************************************************************/ + malloc_plus_memory_entry *memory_entry_by_name_next(void); + +/****************************************************************//** + * \brief + * Gets initial memory iterator from database for iterating over the + * entries and processing each. + * + * Typical Usage + * + * malloc_plus_memory_entry *memory_item; + * for (memory_item = my_mem.memory_entry_by_name_begin(); + * memory_item != my_mem.memory_entry_by_name_end(); + * memory_item = my_mem.memory_entry_by_name_next() ){ + * ... process entries ... + * } + *******************************************************************/ + malloc_plus_memory_entry *memory_entry_by_name_end(void); + +/****************************************************************//** + * \brief + * Get number of elements for a memory pointer in the database. + * + * **Parameters** + * * void *malloc_mem_ptr -- memory pointer for entry in the database + * + * Typical Usage + * + * size_t nsize = my_mem->get_memory_size(density); + *******************************************************************/ + size_t get_memory_size(void *malloc_mem_ptr); + +/****************************************************************//** + * \brief + * Get the memory capacity in number of elements for a memory pointer + * in the database. Memory capacity is the overallocated size of the + * array in schemes where memory is managed internally to reduce + * the number of reallocations. + * + * **Parameters** + * * void *malloc_mem_ptr -- memory pointer for entry in the database + * + * Typical Usage + * + * size_t var_capacity = my_mem->get_memory_capacity(density); + *******************************************************************/ + size_t get_memory_capacity(void *malloc_mem_ptr); + +/****************************************************************//** + * \brief + * Get the element size for a memory pointer in the database. + * + * **Parameters** + * * void *malloc_mem_ptr -- memory pointer for entry in the database + * + * Typical Usage + * + * int elsize = my_mem->get_memory_elemsize(density); + *******************************************************************/ + int get_memory_elemsize(void *malloc_mem_ptr); + +/****************************************************************//** + * \brief + * Get name for a memory pointer in the database. + * + * **Parameters** + * * void *malloc_mem_ptr -- memory pointer for entry in the database + * + * Typical Usage + * + * const char *var_name = my_mem->get_memory_name(density); + *******************************************************************/ + const char *get_memory_name(void *malloc_mem_ptr); + +/****************************************************************//** + * \brief + * Get memory pointer for a named entry from the database. + * + * **Parameters** + * * const char *name -- name of entry in the database + * + * Typical Usage + * + * density = my_mem->get_memory_ptr("Density"); + *******************************************************************/ + void *get_memory_ptr(const char *name); + +/****************************************************************//** + * \brief + * Set a memory attribute for a memory pointer in the database. + * + * **Parameters** + * * void *malloc_mem_ptr -- memory pointer of entry in the database + * * int attribute -- flag to set for entry + * + * Typical Usage + * + * my_mem->set_memory_attribute(density, HOST_MANAGED_MEMORY); + *******************************************************************/ + void set_memory_attribute(void *malloc_mem_ptr, int attribute); + +/****************************************************************//** + * \brief + * Clear memory attribute for a memory pointer in the database. + * + * **Parameters** + * * void *malloc_mem_ptr -- memory pointer of entry in the database + * * int attribute -- flag to clear for entry + * + * Typical Usage + * + * my_mem->clear_memory_attribute(density, HOST_MANAGED_MEMORY); + *******************************************************************/ + void clear_memory_attribute(void *malloc_mem_ptr, int attribute); + +/****************************************************************//** + * \brief + * Get memory attributes for a memory pointer in the database. Returns + * the flag field. + * + * **Parameters** + * * void *malloc_mem_ptr -- memory pointer of entry in the database + * + * Typical Usage + * + * int flag = my_mem->get_memory_attribute(density); + *******************************************************************/ + int get_memory_flags(void *malloc_mem_ptr); + +/****************************************************************//** + * \brief + * Checks the setting for a memory attribute for a memory pointer in + * the database. Returns true for set and false for unset. + * + * **Parameters** + * * void *malloc_mem_ptr -- memory pointer of entry in the database + * * int attribute -- flag to check setting for entry + * + * Typical Usage + * + * bool flag = my_mem->check_memory_attribute(density, HOST_MANAGED_MEMORY); + *******************************************************************/ + bool check_memory_attribute(void *malloc_mem_ptr, int attribute); +}; + +extern "C" { + MallocPlus *MallocPlus_new(); + + void MallocPlus_memory_report(MallocPlus *mem_object); + + void MallocPlus_memory_add(MallocPlus *mem_object, void *dbleptr, + size_t nelem, size_t elsize, char *name, unsigned long long flags); + void MallocPlus_memory_add_nD(MallocPlus *mem_object, void *dbleptr, + int ndim, size_t *nelem, size_t elsize, char *name, unsigned long long flags); +} + +#endif // ifndef MALLOCPLUS_H_ + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/MallocPlus.cpp =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/MallocPlus.cpp @@ -0,0 +1,1227 @@ +/* + * Copyright (c) 2011-2014, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ + +// SKG TODO op realloc (similar to managed) + +#undef HAVE_OPENCL + +#include "MallocPlus.h" +#include +#include +#include +#include +#include +#include +#ifdef HAVE_OPENCL +#include "ezcl/ezcl.h" +#endif + +#ifndef DEBUG +#define DEBUG 0 +#endif +#define WARNING_SUPPRESSION 0 + +#ifdef HAVE_CL_DOUBLE +#ifdef HAVE_OPENCL +typedef cl_double2 cl_real2; +#endif +#else +#ifdef HAVE_OPENCL +typedef cl_float2 cl_real2; +#endif +#endif + +#ifndef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef SWAP_PTR +#define SWAP_PTR(xnew,xold,xtmp) (xtmp=xnew, xnew=xold, xold=xtmp) +#endif + +typedef unsigned int uint; +map::iterator it_save, it_end; +map::iterator it_save_by_name, it_end_by_name; + +#if defined(HAVE_MPI) +void +MallocPlus::pinit(MPI_Comm smComm, std::size_t memPoolSize) +{ +#if defined(HAVE_J7) + try { + j7 = new J7(smComm, memPoolSize); + } + catch(...) { + std::cerr << "*** pinit failure ***" << std::endl; + throw; + } +#else + // Just to suppress compiler warnings + if (WARNING_SUPPRESSION) printf("DEBUG memPoolSize = %lu smComm = %p\n",memPoolSize,smComm); +#endif +} + +void +MallocPlus::pfini(void) +{ +#if defined(HAVE_J7) + try { + delete j7; + j7 = NULL; + } + catch(...) { + std::cerr << "*** pfini failure ***" << std::endl; + throw; + } +#endif +} +#endif // if defined(HAVE_MPI) + +void *MallocPlus::memory_malloc(size_t nelem, size_t elsize, const char *name, int flags){ + malloc_plus_memory_entry *memory_item = (malloc_plus_memory_entry *)malloc(sizeof(malloc_plus_memory_entry)); + + memory_item->mem_nelem = (size_t *)malloc(1*sizeof(size_t)); + memory_item->mem_nelem[0] = nelem; + memory_item->mem_ndims = 1; + memory_item->mem_elsize = elsize; + memory_item->mem_flags = flags; + + // allocate memory on the accelerator if flag is set + if ((flags & DEVICE_REGULAR_MEMORY) != 0){ +#ifdef HAVE_OPENCL + cl_context context = ezcl_get_context(); + memory_item->mem_capacity = nelem; + memory_item->mem_ptr = ezcl_device_memory_malloc(context, NULL, name, nelem, elsize, CL_MEM_READ_WRITE, 0); +#endif + } + // Managed memory allocates extra space and expands as necessary to reduce allocations + else if ((flags & HOST_MANAGED_MEMORY) != 0){ + memory_item->mem_capacity = 2 * nelem; + memory_item->mem_ptr = malloc(2* nelem*elsize); + } +#ifdef HAVE_J7 + // experimental shared memory allocation + else if (flags & LOAD_BALANCE_MEMORY) { + memory_item->mem_capacity = nelem; + memory_item->mem_ptr = j7->memAlloc(nelem * elsize); + } +#endif + // Just regular memory allocation + else { + memory_item->mem_capacity = nelem; + memory_item->mem_ptr = malloc(nelem*elsize); + } + + memory_item->mem_name = strdup(name); // Mallocs memory for copy + + //printf("MALLOC_PLUS_MEMORY_MALLOC: DEBUG -- malloc plus memory pointer for :%s: is %p nelements %ld elsize is %ld flags %d\n",memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_nelem[0],memory_item->mem_elsize,memory_item->mem_flags); + + // Insert entry into dictionary -- two versions, one by name and another by pointer address + memory_name_dict.insert(std::pair(name, memory_item) ); + memory_ptr_dict.insert(std::pair(memory_item->mem_ptr, memory_item) ); + + if (DEBUG) printf("MALLOC_PLUS_MEMORY_MALLOC: DEBUG -- malloc plus memory pointer for :%s: is %p nelements %ld elsize is %ld\n",memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_nelem[0],memory_item->mem_elsize); + + // return the pointer for use by the calling routine + return(memory_item->mem_ptr); +} + +void *MallocPlus::memory_realloc(size_t nelem, void *malloc_mem_ptr){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + void *mem_ptr=NULL; + + if (it != memory_ptr_dict.end() ){ + // "second" will be the pointer to the memory entry data structure -- the value + // associated with the key + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REALLOC: DEBUG -- reallocated memory pointer %p\n",memory_item->mem_ptr); + + // memory pointer will probably change, so delete the dictionary entry + // named dictionary entry does not need to change; the pointer in the data structure + // will just be updated, but the pointer to the memory entry data structure + // will be the same + memory_ptr_dict.erase(it); + + if (memory_item->mem_flags & HOST_MANAGED_MEMORY){ + // Check to see if memory needs to be expanded + if (nelem > memory_item->mem_capacity) { + // Need to realloc memory. Allocate extra for growth of array. + mem_ptr=realloc(memory_item->mem_ptr, 2*nelem*memory_item->mem_elsize); + memory_item->mem_capacity = 2*nelem; + memory_item->mem_nelem[0] = nelem; + memory_item->mem_ptr = mem_ptr; + } else { + // Just move size to use more of memory buffer + memory_item->mem_nelem[0] = nelem; + } + } +#ifdef HAVE_J7 + else if (memory_item->mem_flags & LOAD_BALANCE_MEMORY) { + mem_ptr = j7->memRealloc(memory_item->mem_ptr, nelem * memory_item->mem_elsize); + memory_item->mem_capacity = nelem; + memory_item->mem_nelem[0] = nelem; + memory_item->mem_ptr = mem_ptr; + } +#endif + else { + mem_ptr=realloc(memory_item->mem_ptr, nelem*memory_item->mem_elsize); + memory_item->mem_capacity = nelem; + memory_item->mem_nelem[0] = nelem; + memory_item->mem_ptr = mem_ptr; + } + + // Put the pointer entry back into the dictionary + memory_ptr_dict.insert(std::pair(memory_item->mem_ptr, memory_item) ); + } else { + if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr); + } + + return(mem_ptr); +} + +void *MallocPlus::memory_realloc(size_t nelem, const char *name){ + map ::iterator it = memory_name_dict.find(name); + void *mem_ptr=NULL; + + if (it != memory_name_dict.end() ){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REALLOC: DEBUG -- " + "reallocated memory pointer %p\n",memory_item->mem_ptr); + + // Need to get the iterator for the pointer entry; the one above is for the name entry + map ::iterator it = memory_ptr_dict.find(memory_item->mem_ptr); + memory_ptr_dict.erase(it); + + if (memory_item->mem_flags & HOST_MANAGED_MEMORY) { + // Check to see if memory needs to be expanded + if (nelem > memory_item->mem_capacity) { + // Need to realloc memory. Allocate extra for growth of array. + mem_ptr=realloc(memory_item->mem_ptr, 2*nelem*memory_item->mem_elsize); + memory_item->mem_capacity = 2*nelem; + memory_item->mem_nelem[0] = nelem; + memory_item->mem_ptr = mem_ptr; + } else { + // Just move size to use more of memory buffer + memory_item->mem_nelem[0] = nelem; + } + } +#ifdef HAVE_J7 + else if (memory_item->mem_flags & LOAD_BALANCE_MEMORY) { + mem_ptr = j7->memRealloc(memory_item->mem_ptr, nelem * memory_item->mem_elsize); + memory_item->mem_capacity = nelem; + memory_item->mem_nelem[0] = nelem; + memory_item->mem_ptr = mem_ptr; + } +#endif + else { + //memory_name_dict.erase(it); + mem_ptr=realloc(memory_item->mem_ptr, nelem*memory_item->mem_elsize); + memory_item->mem_capacity = nelem; + memory_item->mem_nelem[0] = nelem; + memory_item->mem_ptr = mem_ptr; + //memory_name_dict.insert(std::pair(name, memory_item) ); + } + + memory_ptr_dict.insert(std::pair(memory_item->mem_ptr, memory_item) ); + } else { + if (DEBUG) printf("Warning -- memory named %s not found\n",name); + } + + return(mem_ptr); +} + +void *MallocPlus::memory_request(size_t new_capacity, void *malloc_mem_ptr){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + void *mem_ptr=NULL; + + if (it != memory_ptr_dict.end() ){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REQUEST: DEBUG -- reallocated memory pointer %p\n",memory_item->mem_ptr); + memory_ptr_dict.erase(it); + mem_ptr=realloc(memory_item->mem_ptr, new_capacity*memory_item->mem_elsize); + memory_item->mem_capacity = new_capacity; + memory_item->mem_ptr = mem_ptr; + memory_ptr_dict.insert(std::pair(mem_ptr, memory_item) ); + } else { + if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr); + } + + return(mem_ptr); +} + +// Increases the capacity of the allocated memory, primarily for the managed memory functionality +void *MallocPlus::memory_request(size_t new_capacity, const char *name){ + map ::iterator it = memory_name_dict.find(name); + void *mem_ptr=NULL; + + if (it != memory_name_dict.end() ){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REQUEST: DEBUG -- reallocated memory pointer %p\n",memory_item->mem_ptr); + map ::iterator it = memory_ptr_dict.find(memory_item->mem_ptr); + memory_ptr_dict.erase(it); + mem_ptr=realloc(memory_item->mem_ptr, new_capacity*memory_item->mem_elsize); + memory_item->mem_capacity = new_capacity; + memory_item->mem_ptr = mem_ptr; + memory_ptr_dict.insert(std::pair(mem_ptr, memory_item) ); + } else { + if (DEBUG) printf("Warning -- memory named %s not found\n",name); + } + + return(mem_ptr); +} + +void MallocPlus::memory_realloc_all(size_t nelem){ + // Need a copy of the dictionary since we will be modifying while being used + map memory_ptr_dict_old = memory_ptr_dict; + + // Need iterators to both new and old; new will be modified during the loop + map::iterator it_old; + map::iterator it_new; + void *mem_ptr=NULL; + + for ( it_old=memory_ptr_dict_old.begin(); it_old != memory_ptr_dict_old.end(); it_old++){ + // Get the memory entry for the old dictionary + malloc_plus_memory_entry *memory_item = it_old->second; + + // Get the iterator to the new dictionary by memory pointer and delete it + // since it will probably change + // The dictionary by name does not need to be updated + it_new = memory_ptr_dict.find(memory_item->mem_ptr); + memory_ptr_dict.erase(it_new); + + if (memory_item->mem_flags & HOST_MANAGED_MEMORY) { + if (nelem > memory_item->mem_capacity) { + mem_ptr=realloc(memory_item->mem_ptr, nelem*memory_item->mem_elsize); + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REALLOC_ALL: DEBUG -- reallocated memory pointer %p new pointer %p\n",memory_item->mem_ptr,mem_ptr); + memory_item->mem_capacity = nelem; + memory_item->mem_nelem[0] = nelem; + memory_item->mem_ptr = mem_ptr; + } else { + memory_item->mem_nelem[0] = nelem; + } + } +#ifdef HAVE_J7 + else if (it->mem_flags & LOAD_BALANCE_MEMORY) { + mem_ptr = j7->memRealloc(memory_item->mem_ptr, nelem * memory_item->mem_elsize); + memory_item->mem_capacity = nelem; + memory_item->mem_nelem[0] = nelem; + memory_item->mem_ptr = mem_ptr; + } +#endif + else { + mem_ptr=realloc(memory_item->mem_ptr, nelem*memory_item->mem_elsize); + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REALLOC_ALL: DEBUG -- reallocated memory pointer %p new pointer %p\n",memory_item->mem_ptr,mem_ptr); + memory_item->mem_capacity = nelem; + memory_item->mem_nelem[0] = nelem; + memory_item->mem_ptr = mem_ptr; + } + + //Insert the entry back into the dictionary + memory_ptr_dict.insert(std::pair(mem_ptr, memory_item) ); + } +} + +void MallocPlus::memory_request_all(size_t new_capacity){ + map memory_ptr_dict_old = memory_ptr_dict; + + map::iterator it_old; + map::iterator it_new; + + for ( it_old=memory_ptr_dict_old.begin(); it_old != memory_ptr_dict_old.end(); it_old++){ + malloc_plus_memory_entry *memory_item = it_old->second; + + it_new = memory_ptr_dict.find(memory_item->mem_ptr); + memory_ptr_dict.erase(it_new); + + void *mem_ptr=realloc(memory_item->mem_ptr, new_capacity*memory_item->mem_elsize); + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REQUEST_ALL: DEBUG -- reallocated memory pointer %p new pointer %p\n",memory_item->mem_ptr,mem_ptr); + memory_item->mem_capacity = new_capacity; + memory_item->mem_ptr = mem_ptr; + + memory_ptr_dict.insert(std::pair(mem_ptr, memory_item) ); + } +} + +// This routine is for memory allocated by the host program and added to the database +void *MallocPlus::memory_add(void *malloc_mem_ptr, size_t nelem, size_t elsize, const char *name, int flags){ + malloc_plus_memory_entry *memory_item = (malloc_plus_memory_entry *)malloc(sizeof(malloc_plus_memory_entry)); + + memory_item->mem_nelem = (size_t *)malloc(1*sizeof(size_t)); + memory_item->mem_nelem[0] = nelem; + memory_item->mem_ndims = 1; + memory_item->mem_capacity = nelem; + memory_item->mem_elsize = elsize; + memory_item->mem_flags = flags; + memory_item->mem_ptr = malloc_mem_ptr; + memory_item->mem_name = strdup(name); // mallocs memory + memory_ptr_dict.insert(std::pair(malloc_mem_ptr, memory_item) ); + memory_name_dict.insert(std::pair(name, memory_item) ); + if (DEBUG) printf("MALLOC_PLUS_MEMORY_ADD: DEBUG -- added memory pointer for %s is %p\n",name,malloc_mem_ptr); + + return(malloc_mem_ptr); +} + +// This routine is for memory allocated by the host program and added to the database +void *MallocPlus::memory_add(void *malloc_mem_ptr, int ndim, size_t *nelem, size_t elsize, const char *name, int flags){ + malloc_plus_memory_entry *memory_item = (malloc_plus_memory_entry *)malloc(sizeof(malloc_plus_memory_entry)); + + memory_item->mem_nelem = (size_t *)malloc(ndim*sizeof(size_t)); + for (int i=0; imem_nelem[i] = nelem[i]; + } + memory_item->mem_ndims = ndim; + memory_item->mem_capacity = 0; + memory_item->mem_elsize = elsize; + memory_item->mem_flags = flags; + memory_item->mem_ptr = malloc_mem_ptr; + memory_item->mem_name = strdup(name); // mallocs memory + memory_ptr_dict.insert(std::pair(malloc_mem_ptr, memory_item) ); + memory_name_dict.insert(std::pair(name, memory_item) ); + if (DEBUG) printf("MALLOC_PLUS_MEMORY_ADD: DEBUG -- added memory pointer for %s is %p\n",name,malloc_mem_ptr); + + return(malloc_mem_ptr); +} + +double *MallocPlus::memory_reorder(double *malloc_mem_ptr, int *iorder){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end() ){ + malloc_plus_memory_entry *memory_item = it->second; + double *ptr; + + memory_ptr_dict.erase(it); + + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + double *tmp = (double *)malloc(memory_item->mem_nelem[0]*memory_item->mem_elsize); +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (uint ic = 0; ic < memory_item->mem_nelem[0]; ic++){ + tmp[ic] = malloc_mem_ptr[iorder[ic]]; + } + SWAP_PTR(malloc_mem_ptr, tmp, ptr); + free(tmp); + memory_item->mem_ptr = malloc_mem_ptr; + + memory_ptr_dict.insert(std::pair(malloc_mem_ptr, memory_item) ); + } else { + if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr); + } + + return(malloc_mem_ptr); +} + +float *MallocPlus::memory_reorder(float *malloc_mem_ptr, int *iorder){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end() ){ + malloc_plus_memory_entry *memory_item = it->second; + float *ptr; + + memory_ptr_dict.erase(it); + + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + float *tmp = (float *)malloc(memory_item->mem_nelem[0]*memory_item->mem_elsize); +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (uint ic = 0; ic < memory_item->mem_nelem[0]; ic++){ + tmp[ic] = malloc_mem_ptr[iorder[ic]]; + } + SWAP_PTR(malloc_mem_ptr, tmp, ptr); + free(tmp); + memory_item->mem_ptr = malloc_mem_ptr; + + memory_ptr_dict.insert(std::pair(malloc_mem_ptr, memory_item) ); + } else { + if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr); + } + + return(malloc_mem_ptr); +} + +int *MallocPlus::memory_reorder(int *malloc_mem_ptr, int *iorder){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end() ){ + malloc_plus_memory_entry *memory_item = it->second; + int *ptr; + + memory_ptr_dict.erase(it); + + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + int *tmp = (int *)malloc(memory_item->mem_nelem[0]*memory_item->mem_elsize); +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (uint ic = 0; ic < memory_item->mem_nelem[0]; ic++){ + tmp[ic] = malloc_mem_ptr[iorder[ic]]; + } + SWAP_PTR(malloc_mem_ptr, tmp, ptr); + free(tmp); + memory_item->mem_ptr = malloc_mem_ptr; + + memory_ptr_dict.insert(std::pair(malloc_mem_ptr, memory_item) ); + } else { + if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr); + } + + return(malloc_mem_ptr); +} + +int *MallocPlus::memory_reorder_indexarray(int *malloc_mem_ptr, int *iorder, int *inv_iorder){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end() ){ + malloc_plus_memory_entry *memory_item = it->second; + int *ptr; + + if (DEBUG) printf("Found memory_item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + memory_ptr_dict.erase(it); + int *tmp = (int *)malloc(memory_item->mem_nelem[0]*memory_item->mem_elsize); + for (uint ic = 0; ic < memory_item->mem_nelem[0]; ic++){ + tmp[ic] = inv_iorder[malloc_mem_ptr[iorder[ic]]]; + } + SWAP_PTR(malloc_mem_ptr, tmp, ptr); + free(tmp); + memory_item->mem_ptr = malloc_mem_ptr; + memory_ptr_dict.insert(std::pair(malloc_mem_ptr, memory_item) ); + } else { + if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr); + } + + return(malloc_mem_ptr); +} + +void MallocPlus::memory_reorder_all(int *iorder){ + map memory_ptr_dict_old = memory_ptr_dict; + map ::iterator it_old; + vector inv_iorder; + + for ( it_old=memory_ptr_dict_old.begin(); it_old != memory_ptr_dict_old.end(); it_old++){ + malloc_plus_memory_entry *memory_item_old = it_old->second; + + map ::iterator it = memory_ptr_dict.find(memory_item_old->mem_ptr); + malloc_plus_memory_entry *memory_item = it_old->second; + memory_ptr_dict.erase(it); + + if (memory_item_old->mem_flags & 0x100) { + if (inv_iorder.size() < memory_item_old->mem_nelem[0]) { + inv_iorder.resize(memory_item_old->mem_nelem[0]); + for (int ic = 0; ic < (int)memory_item_old->mem_nelem[0]; ic++){ + inv_iorder[iorder[ic]] = ic; + } + } + int *ptr; + int *malloc_mem_ptr = (int *)memory_item_old->mem_ptr; + int *tmp = (int *)malloc(memory_item_old->mem_nelem[0]*memory_item_old->mem_elsize); + for (uint ic = 0; ic < memory_item_old->mem_nelem[0]; ic++){ + tmp[ic] = inv_iorder[malloc_mem_ptr[iorder[ic]]]; + } + memory_replace(malloc_mem_ptr, tmp); + SWAP_PTR(malloc_mem_ptr, tmp, ptr); + free(tmp); + memory_item->mem_ptr = malloc_mem_ptr; + memory_ptr_dict.insert(std::pair(malloc_mem_ptr, memory_item) ); + } else if (memory_item_old->mem_elsize == 8){ + double *ptr; + double *malloc_mem_ptr = (double *)memory_item_old->mem_ptr; + double *tmp = (double *)malloc(memory_item_old->mem_nelem[0]*memory_item_old->mem_elsize); + + for (uint ic = 0; ic < memory_item_old->mem_nelem[0]; ic++){ + tmp[ic] = malloc_mem_ptr[iorder[ic]]; + } + + SWAP_PTR(malloc_mem_ptr, tmp, ptr); + free(tmp); + memory_item->mem_ptr = malloc_mem_ptr; + memory_ptr_dict.insert(std::pair(malloc_mem_ptr, memory_item) ); + } else { + float *ptr; + float *malloc_mem_ptr = (float *)memory_item_old->mem_ptr; + float *tmp = (float *)malloc(memory_item_old->mem_nelem[0]*memory_item_old->mem_elsize); + for (uint ic = 0; ic < memory_item_old->mem_nelem[0]; ic++){ + tmp[ic] = malloc_mem_ptr[iorder[ic]]; + } + memory_replace(malloc_mem_ptr, tmp); + SWAP_PTR(malloc_mem_ptr, tmp, ptr); + free(tmp); + memory_item->mem_ptr = malloc_mem_ptr; + memory_ptr_dict.insert(std::pair(malloc_mem_ptr, memory_item) ); + } + + } + + inv_iorder.clear(); +} + +void MallocPlus::memory_report(void){ + map::iterator it_ptr; + + for ( it_ptr=memory_ptr_dict.begin(); it_ptr != memory_ptr_dict.end(); it_ptr++){ + malloc_plus_memory_entry *memory_item = it_ptr->second; + + printf("MallocPlus ptr %p: name %10s ptr %p dims %lu nelem (", + it_ptr->first,memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_ndims); + + char nelemstring[80]; + char *str_ptr = nelemstring; + str_ptr += sprintf(str_ptr,"%lu", memory_item->mem_nelem[0]); + for (uint i = 1; i < memory_item->mem_ndims; i++){ + str_ptr += sprintf(str_ptr,", %lu", memory_item->mem_nelem[i]); + } + printf("%12s",nelemstring); + + printf(") elsize %lu flags %d capacity %lu\n", + memory_item->mem_elsize,memory_item->mem_flags,memory_item->mem_capacity); + } + + map::iterator it_name; + + for ( it_name=memory_name_dict.begin(); it_name != memory_name_dict.end(); it_name++){ + malloc_plus_memory_entry *memory_item = it_name->second; + + printf("MallocPlus name %14s: name %10s ptr %p dims %lu nelem (", + it_name->first.c_str(),memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_ndims); + + char nelemstring[80]; + char *str_ptr = nelemstring; + str_ptr += sprintf(str_ptr,"%lu", memory_item->mem_nelem[0]); + for (uint i = 1; i < memory_item->mem_ndims; i++){ + str_ptr += sprintf(str_ptr,", %lu", memory_item->mem_nelem[i]); + } + printf("%12s",nelemstring); + + printf(") elsize %lu flags %d capacity %lu\n", + memory_item->mem_elsize,memory_item->mem_flags,memory_item->mem_capacity); + } +} + +void *MallocPlus::memory_delete(void *malloc_mem_ptr){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p\n",memory_item->mem_ptr); + + if ((memory_item->mem_flags & DEVICE_REGULAR_MEMORY) != 0){ +#ifdef HAVE_OPENCL + //printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p\n",memory_item->mem_ptr); + ezcl_device_memory_delete(memory_item->mem_ptr); +#endif + } +#ifdef HAVE_J7 + else if (memory_item->mem_flags & LOAD_BALANCE_MEMORY) { + j7->memFree(memory_item->mem_ptr); + } +#endif + else { + free(memory_item->mem_ptr); + } + + memory_ptr_dict.erase(it); + // Need to delete the entry in the name dictionary. This is done in a separate scope + // so the iterator "it" is isolated for this use + { + map ::iterator it = memory_name_dict.find(memory_item->mem_name); + memory_name_dict.erase(it); + } + + free(memory_item->mem_nelem); + free(memory_item->mem_name); + free(memory_item); + } else { + if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr); + } + + return(NULL); +} + +void *MallocPlus::memory_delete(const char *name){ + map ::iterator it = memory_name_dict.find(name); + + if (it != memory_name_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p\n",memory_item->mem_ptr); + if ((memory_item->mem_flags & DEVICE_REGULAR_MEMORY) != 0){ +#ifdef HAVE_OPENCL + ezcl_device_memory_delete(memory_item->mem_ptr); +#endif + } +#ifdef HAVE_J7 + else if (memory_item->mem_flags & LOAD_BALANCE_MEMORY) { + j7->memFree(memory_item->mem_ptr); + } +#endif + else { + free(memory_item->mem_ptr); + } + + memory_name_dict.erase(it); + { + map ::iterator it = memory_ptr_dict.find(memory_item->mem_ptr); + memory_ptr_dict.erase(it); + } + + free(memory_item->mem_nelem); + free(memory_item->mem_name); + free(memory_item); + } else { + if (DEBUG) printf("Warning -- memory named %s not found\n",name); + } + + return(NULL); +} + +void MallocPlus::memory_delete_all(void){ + map memory_ptr_dict_old = memory_ptr_dict; + map ::iterator it; + + for ( it=memory_ptr_dict_old.begin(); it != memory_ptr_dict_old.end(); it++){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + + if ((memory_item->mem_flags & DEVICE_REGULAR_MEMORY) != 0){ +#ifdef HAVE_OPENCL + ezcl_device_memory_delete(memory_item->mem_ptr); +#endif + } else { + free(memory_item->mem_ptr); + } + + free(memory_item->mem_nelem); + free(memory_item->mem_name); + free(memory_item); + } + + memory_ptr_dict.clear(); + memory_name_dict.clear(); +} + +// For memory that was allocated by the host and added to the database with the +// memory_add function. This is the corresponding routine to delete the dictionary entry. +// The memory itself is not freed. +void MallocPlus::memory_remove(void *malloc_mem_ptr){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p\n",memory_item->mem_ptr); + memory_ptr_dict.erase(it); + { + map ::iterator it = memory_name_dict.find(memory_item->mem_name); + memory_name_dict.erase(it); + } + free(memory_item->mem_nelem); + free(memory_item->mem_name); + free(memory_item); + } else { + if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr); + } +} + +void MallocPlus::memory_remove(const char *name){ + map ::iterator it = memory_name_dict.find(name); + + if (it != memory_name_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p\n",memory_item->mem_ptr); + memory_name_dict.erase(it); + { + map ::iterator it = memory_ptr_dict.find(memory_item->mem_ptr); + memory_ptr_dict.erase(it); + } + free(memory_item->mem_nelem); + free(memory_item->mem_name); + free(memory_item); + } else { + if (DEBUG) printf("Warning -- memory named %s not found\n",name); + } +} + +void *MallocPlus::memory_begin(void){ + it_save = memory_ptr_dict.begin(); + malloc_plus_memory_entry *memory_item = it_save->second; + return(memory_item->mem_ptr); +} + +void *MallocPlus::memory_next(void){ + map ::iterator it; + + it_save++; + it = it_save; + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + return(memory_item->mem_ptr); + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + return(NULL); + } +} + +void *MallocPlus::memory_by_name_begin(void){ + it_save_by_name = memory_name_dict.begin(); + malloc_plus_memory_entry *memory_item = it_save->second; + return(memory_item->mem_ptr); +} + +void *MallocPlus::memory_by_name_next(void){ + map::iterator it_by_name; + + it_save_by_name++; + it_by_name = it_save_by_name; + + if (it_by_name != memory_name_dict.end()){ + malloc_plus_memory_entry *memory_item = it_by_name->second; + + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + return(memory_item->mem_ptr); + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + return(NULL); + } +} + +malloc_plus_memory_entry* MallocPlus::memory_entry_begin(void){ + it_save = memory_ptr_dict.begin(); + malloc_plus_memory_entry *memory_item = it_save->second; + return(memory_item); +} + +malloc_plus_memory_entry* MallocPlus::memory_entry_next(void){ + it_save++; + if (it_save == memory_ptr_dict.end()) return(NULL); + malloc_plus_memory_entry *memory_item = it_save->second; + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + return(memory_item); +} + +malloc_plus_memory_entry* MallocPlus::memory_entry_end(void){ + return(NULL); +} + +malloc_plus_memory_entry* MallocPlus::memory_entry_by_name_begin(void){ + it_save_by_name = memory_name_dict.begin(); + malloc_plus_memory_entry *memory_item = it_save_by_name->second; + return(memory_item); +} + +malloc_plus_memory_entry* MallocPlus::memory_entry_by_name_next(void){ + it_save_by_name++; + if (it_save_by_name == memory_name_dict.end()) return(NULL); + malloc_plus_memory_entry *memory_item = it_save_by_name->second; + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + return(memory_item); +} + +malloc_plus_memory_entry* MallocPlus::memory_entry_by_name_end(void){ + return(NULL); +} + +size_t MallocPlus::get_memory_size(void *malloc_mem_ptr){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + return(memory_item->mem_nelem[0]); + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } + return(0); +} + +int MallocPlus::get_memory_elemsize(void *malloc_mem_ptr){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + return(memory_item->mem_elsize); + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } + return(0); +} + +int MallocPlus::get_memory_flags(void *malloc_mem_ptr){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("Found memory item ptr %p name %s attribute %d\n",memory_item->mem_ptr,memory_item->mem_name,memory_item->mem_flags); + return(memory_item->mem_flags); + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } + return(0); +} + +size_t MallocPlus::get_memory_capacity(void *malloc_mem_ptr){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + return(memory_item->mem_capacity); + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } + return(0); +} + +const char * MallocPlus::get_memory_name(void *malloc_mem_ptr){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + return(memory_item->mem_name); + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } + return(NULL); +} + +void *MallocPlus::memory_replace(void *malloc_mem_ptr_old, void * const malloc_mem_ptr_new){ + map ::iterator it_old = memory_ptr_dict.find(malloc_mem_ptr_old); + map ::iterator it_new = memory_ptr_dict.find(malloc_mem_ptr_new); + + if (it_old != memory_ptr_dict.end() && it_new != memory_ptr_dict.end() ){ + malloc_plus_memory_entry *memory_item_old = it_old->second; + malloc_plus_memory_entry *memory_item_new = it_new->second; + + // erase the entries in the pointer dictionary + memory_ptr_dict.erase(it_new); + memory_ptr_dict.erase(it_old); + // get the iterators for the named dictionary + map ::iterator it_old = memory_name_dict.find(memory_item_old->mem_name); + map ::iterator it_new = memory_name_dict.find(memory_item_new->mem_name); + memory_name_dict.erase(it_new); + memory_name_dict.erase(it_old); + + if (DEBUG) printf("Found memory item ptr_old %p name %s ptr_new %p name %s\n",memory_item_old->mem_ptr,memory_item_old->mem_name,memory_item_new->mem_ptr,memory_item_new->mem_name); + + if ((memory_item_old->mem_flags & DEVICE_REGULAR_MEMORY) != 0){ +#ifdef HAVE_OPENCL + if (DEBUG) printf("Deleting device memory name %s pointer %p\n",memory_item_old->mem_name,memory_item_old->mem_ptr); + ezcl_device_memory_replace(&memory_item_old->mem_ptr, &memory_item_new->mem_ptr); +#endif + } +#ifdef HAVE_J7 + else if (memory_item->mem_flags & LOAD_BALANCE_MEMORY) { + j7->memFree(memory_item_old->mem_ptr); + memory_item_old->mem_ptr = memory_item_new->mem_ptr; + } +#endif + else { + free(memory_item_old->mem_ptr); + memory_item_old->mem_ptr = memory_item_new->mem_ptr; + } + + memory_item_old->mem_nelem[0] = memory_item_new->mem_nelem[0]; + memory_item_old->mem_capacity = memory_item_new->mem_capacity; + memory_item_old->mem_elsize = memory_item_new->mem_elsize; + memory_item_old->mem_flags = memory_item_new->mem_flags; + malloc_mem_ptr_old = (void *)malloc_mem_ptr_new; + free(memory_item_new->mem_nelem); + free(memory_item_new->mem_name); + free(memory_item_new); + + memory_ptr_dict.insert(std::pair(malloc_mem_ptr_old, memory_item_old) ); + memory_name_dict.insert(std::pair(memory_item_old->mem_name, memory_item_old) ); + + return(memory_item_old->mem_ptr); + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } + return(NULL); +} + +void MallocPlus::memory_swap(int **malloc_mem_ptr_old, int **malloc_mem_ptr_new){ + map ::iterator it_old = memory_ptr_dict.find(*malloc_mem_ptr_old); + map ::iterator it_new = memory_ptr_dict.find(*malloc_mem_ptr_new); + + if (it_old != memory_ptr_dict.end() && it_new != memory_ptr_dict.end() ){ + // Swap the memory entries during the retrieval + malloc_plus_memory_entry *memory_item_new = it_old->second; + malloc_plus_memory_entry *memory_item_old = it_new->second; + + if (DEBUG) printf("Found memory item ptr_old %p name %s ptr_new %p name %s\n",memory_item_old->mem_ptr,memory_item_old->mem_name,memory_item_new->mem_ptr,memory_item_new->mem_name); + + const char *mem_name_tmp; + mem_name_tmp = memory_item_old->mem_name; + memory_item_old->mem_name = memory_item_new->mem_name; + memory_item_new->mem_name = (char *)mem_name_tmp; + + // Delete the ptr entries + memory_ptr_dict.erase(it_old); + memory_ptr_dict.erase(it_new); + + memory_ptr_dict.insert(std::pair(memory_item_old->mem_ptr, memory_item_old) ); + memory_ptr_dict.insert(std::pair(memory_item_new->mem_ptr, memory_item_new) ); + + // Delete the named entries + map ::iterator it_name_old = memory_name_dict.find(memory_item_old->mem_name); + map ::iterator it_name_new = memory_name_dict.find(memory_item_new->mem_name); + memory_name_dict.erase(it_name_old); + memory_name_dict.erase(it_name_new); + + memory_name_dict.insert(std::pair(memory_item_old->mem_name, memory_item_old) ); + memory_name_dict.insert(std::pair(memory_item_new->mem_name, memory_item_new) ); + + // memory items have been swapped, so return the new pointers + *malloc_mem_ptr_old = (int *)memory_item_old->mem_ptr; + *malloc_mem_ptr_new = (int *)memory_item_new->mem_ptr; + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } +} + +void MallocPlus::memory_swap(float **malloc_mem_ptr_old, float **malloc_mem_ptr_new){ + map ::iterator it_old = memory_ptr_dict.find(*malloc_mem_ptr_old); + map ::iterator it_new = memory_ptr_dict.find(*malloc_mem_ptr_new); + + if (it_old != memory_ptr_dict.end() && it_new != memory_ptr_dict.end() ){ + // Swap the memory entries during the retrieval + malloc_plus_memory_entry *memory_item_new = it_old->second; + malloc_plus_memory_entry *memory_item_old = it_new->second; + + if (DEBUG) printf("Found memory item ptr_old %p name %s ptr_new %p name %s\n",memory_item_old->mem_ptr,memory_item_old->mem_name,memory_item_new->mem_ptr,memory_item_new->mem_name); + + const char *mem_name_tmp; + mem_name_tmp = memory_item_old->mem_name; + memory_item_old->mem_name = memory_item_new->mem_name; + memory_item_new->mem_name = (char *)mem_name_tmp; + + // Delete the ptr entries + memory_ptr_dict.erase(it_old); + memory_ptr_dict.erase(it_new); + + memory_ptr_dict.insert(std::pair(memory_item_old->mem_ptr, memory_item_old) ); + memory_ptr_dict.insert(std::pair(memory_item_new->mem_ptr, memory_item_new) ); + + // Delete the named entries + map ::iterator it_old = memory_name_dict.find(memory_item_old->mem_name); + map ::iterator it_new = memory_name_dict.find(memory_item_new->mem_name); + memory_name_dict.erase(it_old); + memory_name_dict.erase(it_new); + + memory_name_dict.insert(std::pair(memory_item_old->mem_name, memory_item_old) ); + memory_name_dict.insert(std::pair(memory_item_new->mem_name, memory_item_new) ); + + // memory items have been swapped, so return the new pointers + *malloc_mem_ptr_old = (float *)memory_item_old->mem_ptr; + *malloc_mem_ptr_new = (float *)memory_item_new->mem_ptr; + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } +} + +void MallocPlus::memory_swap(double **malloc_mem_ptr_old, double **malloc_mem_ptr_new){ + map ::iterator it_old = memory_ptr_dict.find(*malloc_mem_ptr_old); + map ::iterator it_new = memory_ptr_dict.find(*malloc_mem_ptr_new); + + if (it_old != memory_ptr_dict.end() && it_new != memory_ptr_dict.end() ){ + // Swap the memory entries during the retrieval + malloc_plus_memory_entry *memory_item_new = it_old->second; + malloc_plus_memory_entry *memory_item_old = it_new->second; + + if (DEBUG) printf("Found memory item ptr_old %p name %s ptr_new %p name %s\n",memory_item_old->mem_ptr,memory_item_old->mem_name,memory_item_new->mem_ptr,memory_item_new->mem_name); + + const char *mem_name_tmp; + mem_name_tmp = memory_item_old->mem_name; + memory_item_old->mem_name = memory_item_new->mem_name; + memory_item_new->mem_name = (char *)mem_name_tmp; + + // Delete the ptr entries + memory_ptr_dict.erase(it_old); + memory_ptr_dict.erase(it_new); + + memory_ptr_dict.insert(std::pair(memory_item_old->mem_ptr, memory_item_old) ); + memory_ptr_dict.insert(std::pair(memory_item_new->mem_ptr, memory_item_new) ); + + // Delete the named entries + map ::iterator it_old = memory_name_dict.find(memory_item_old->mem_name); + map ::iterator it_new = memory_name_dict.find(memory_item_new->mem_name); + memory_name_dict.erase(it_old); + memory_name_dict.erase(it_new); + + memory_name_dict.insert(std::pair(memory_item_old->mem_name, memory_item_old) ); + memory_name_dict.insert(std::pair(memory_item_new->mem_name, memory_item_new) ); + + // memory items have been swapped, so return the new pointers + *malloc_mem_ptr_old = (double *)memory_item_old->mem_ptr; + *malloc_mem_ptr_new = (double *)memory_item_new->mem_ptr; + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } +} + +// This duplicates memory for a variable and makes a new dictionary entry for the new variable +void *MallocPlus::memory_duplicate(void *malloc_mem_ptr, const char *addname){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + void *mem_ptr_dup; + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + // The memory_malloc will add the database entry + mem_ptr_dup = memory_malloc(memory_item->mem_nelem[0], memory_item->mem_elsize, addname, memory_item->mem_flags); + return(mem_ptr_dup); + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } + return(NULL); +} + +void *MallocPlus::get_memory_ptr(const char *name){ + map ::iterator it = memory_name_dict.find(name); + + if (it != memory_name_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name); + return(memory_item->mem_ptr); + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } + return(NULL); +} + +bool MallocPlus::check_memory_attribute(void *malloc_mem_ptr, int attribute){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("Found memory item ptr %p name %s attribute %d\n",memory_item->mem_ptr,memory_item->mem_name,memory_item->mem_flags); + bool bvalue = false; + if (memory_item->mem_flags & attribute) bvalue = true; + + return bvalue; + } else { + printf("Error -- memory not found\n"); + exit(1); + } +} + +void MallocPlus::set_memory_attribute(void *malloc_mem_ptr, int attribute){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("Found memory item ptr %p name %s attribute %d\n",memory_item->mem_ptr,memory_item->mem_name,memory_item->mem_flags); + memory_item->mem_flags |= attribute; + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } +} + +void MallocPlus::clear_memory_attribute(void *malloc_mem_ptr, int attribute){ + map ::iterator it = memory_ptr_dict.find(malloc_mem_ptr); + + if (it != memory_ptr_dict.end()){ + malloc_plus_memory_entry *memory_item = it->second; + + if (DEBUG) printf("Found memory item ptr %p name %s attribute %d\n",memory_item->mem_ptr,memory_item->mem_name,memory_item->mem_flags); + memory_item->mem_flags &= ~attribute; + if (DEBUG) printf("Found memory item ptr %p name %s attribute %d\n",memory_item->mem_ptr,memory_item->mem_name,memory_item->mem_flags); + } else { + if (DEBUG) printf("Warning -- memory not found\n"); + } +} + +extern "C" { + MallocPlus *MallocPlus_new(){ + return new MallocPlus; + } + + void MallocPlus_memory_report(MallocPlus *mem_object) { + mem_object->memory_report(); + } + + void MallocPlus_memory_add(MallocPlus *mem_object, void *dbleptr, size_t nelem, + size_t elsize, char *name, unsigned long long flags){ +// printf("DEBUG -- nelem %lu elsize %lu\n", nelem, elsize); + mem_object->memory_add(dbleptr, nelem, elsize, name, + (unsigned long long)flags); + } + void MallocPlus_memory_add_nD(MallocPlus *mem_object, void *dbleptr, int ndim, size_t *nelem, + size_t elsize, char *name, unsigned long long flags){ +// printf("DEBUG -- ndim %d nelem[0] %lu elsize %lu\n",ndim, nelem[0], elsize); + mem_object->memory_add(dbleptr, ndim, nelem, elsize, name, + (unsigned long long)flags); + } +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_math.hh =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_math.hh @@ -0,0 +1,85 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +#ifndef PARSERMATHHHINCLUDE +#define PARSERMATHHHINCLUDE + +// *************************************************************************** +// *************************************************************************** +// This class collects various parser math functions. +// There are two reasons to have this class: +// 1. To keep the command processing class from getting too big. +// 2. Some of these functions are used in more than one class. +// *************************************************************************** +// *************************************************************************** + +#include +#include +#include +#include + +namespace PP +{ +using std::string; +using std::stringstream; +using std::vector; +using std::deque; + + +class Parser_math +{ + +public: + Parser_math(); + + void do_op(int i1, int i2, int i3, deque &wq, Word &wres, + stringstream &serr, int &ierr); + void do_op_relational(int i1, int i2, int i3, deque &wq, + Word &wres, stringstream &serr, int &ierr); + void do_op_logical(int i1, int i2, int i3, deque &wq, + Word &wres, stringstream &serr, int &ierr); + void do_op_not(int i2, int i3, deque &wq, + Word &wres, stringstream &serr, int &ierr); + + +private: + +}; + + +} // End of the PP namespace + +#endif Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_math.cc =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_math.cc @@ -0,0 +1,326 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +// *************************************************************************** +// *************************************************************************** +// This class collects various parser math functions. +// +// There are two reasons to have this class: +// 1. To keep the command processing class from getting too big. +// 2. Some of these functions are used in more than one class. +// *************************************************************************** +// *************************************************************************** + +#include +#include +#include +#include +#include +#include +#include + +#include "Word.hh" +#include "Parser_math.hh" + +namespace PP +{ +using std:: string; +using std::cout; +using std::endl; +using std::stringstream; +using std::setprecision; +using std::vector; +using std::deque; + + +// =========================================================================== +// Default constructor. +// =========================================================================== +Parser_math::Parser_math() +{ +} + + +// =========================================================================== +// Do a single arithmetic binary operation involving 3 words. i1, on the left, +// is supposed to be a number, i2 is the operator, and i3, on the right, is +// supposed to be a number. +// The result is put in word wres. +// =========================================================================== +void Parser_math::do_op(int i1, int i2, int i3,deque &wq, Word &wres, + stringstream &serr, int &ierr) +{ + // The words to the left and right of the operator have to be a number. + if ((!wq[i1].is_number()) || (!wq[i3].is_number())) { + wq[i2].fatal_error(serr, ierr); + serr << "Expected some number " << wq[i2].get_string() << + " some number" << endl; + serr << "But did not find a number, instead found" << endl; + serr << wq[i1].get_string() << wq[i2].get_string() << + wq[i3].get_string() << endl; + ierr = 2; + wres.set_value(0.); + return; + } + + + double d1 = wq[i1].get_double(); + string op = wq[i2].get_string(); + double d2 = wq[i3].get_double(); + + double result = 0.; + + if (op == "+") result = d1 + d2; + if (op == "-") result = d1 - d2; + if (op == "*") result = d1 * d2; + + if (op == "**") { + if (d1 == 0. && d2 >= 0.) { + wres.set_value(0.); + return; + } + + if (d1 == 0. && d2 < 0.) { + wq[i2].fatal_error(serr, ierr); + serr << "Trying to exponentiate 0 to a negative power." << endl; + serr << "Base = " << d1 << " Exponent = " << d2 << endl; + ierr = 2; + wres.set_value(0.); + return; + } + + if (d1 < 0. && !wq[i3].is_integer()) { + wq[i2].fatal_error(serr, ierr); + serr << "Trying to exponentiate a negative number to a non-integer power." << endl; + serr << "Base = " << d1 << " Exponent = " << d2 << endl; + ierr = 2; + wres.set_value(0.); + return; + } + + result = pow(d1,d2); + } + + if (op == "/") { + if (d2 == 0.) { + if (d1 == 0.) result = 0.; + else result = 1.e30; + wq[i2].fatal_error(serr, ierr); + serr << "Divide by 0." << endl; + serr << "Numerator = " << d1 << " Denominator = " << d2 << endl; + ierr = 2; + wres.set_value(result); + return; + } + result = d1 / d2; + } + + // Do not implement the % operator, it is too much like the fortran % + // operator which is for referencing components of a fortran structure. + /* + if (op == "%") { + if (d2 == 0.) { + result = 0.; + wq[i2].fatal_error(serr, ierr); + serr << "Modulus (%) second argument is 0." << endl; + serr << "First arg = " << d1 << " second arg = " << d2 << endl; + ierr = 2; + wres.set_value(result); + return; + } + result = ((int)d1) % ((int)d2); + } + */ + + wres.set_value(result); +} + + +// =========================================================================== +// Do a single relational binary operation involving 3 words. +// Relational operators include .eq., .ne., .le., ... +// The result is either true or false and is put in word wres. +// =========================================================================== +void Parser_math::do_op_relational(int i1, int i2, int i3, deque &wq, + Word &wres, stringstream &serr, int &ierr) +{ + string s1 = wq[i1].get_string(); + string op = wq[i2].get_string(); + string s3 = wq[i3].get_string(); + bool result = false; + + //cout << "&&&&&cw op = " << s1 << op << s3 << endl; + + if ((wq[i1].is_bool()) && (wq[i3].is_bool())) { + if (op == ".gt." || op == ".ge." || op == ".lt." || op == ".le.") { + wq[i2].fatal_error(serr, ierr); + serr << "Does not make sense to compare logical values" << endl; + serr << " with greater than or less than" << endl; + serr << " " << s1 << " " << op << " " << s3 << endl; + ierr = 2; + wres.set_value(false); + return; + } + } + + if ( ((wq[i1].is_bool()) && (!wq[i3].is_bool())) || + ((!wq[i1].is_bool()) && (wq[i3].is_bool())) + ) { + wq[i2].fatal_error(serr, ierr); + serr << "Does not make sense to compare logical and" << endl; + serr << " non-logical values" << endl; + serr << " " << s1 << " " << op << " " << s3 << endl; + ierr = 2; + wres.set_value(false); + return; + } + + if ( ((wq[i1].is_number()) && (!wq[i3].is_number())) || + ((!wq[i1].is_number()) && (wq[i3].is_number())) + ) { + wq[i2].fatal_error(serr, ierr); + serr << "Does not make sense to compare numerical and" << endl; + serr << " non-numerical values" << endl; + serr << " " << s1 << " " << op << " " << s3 << endl; + ierr = 2; + wres.set_value(false); + return; + } + + // Compare two numbers. + if ( (wq[i1].is_number()) && (wq[i3].is_number()) ) { + double d1 = wq[i1].get_double(); + double d3 = wq[i3].get_double(); + if (op == ".gt.") result = d1 > d3; + if (op == ".ge.") result = d1 >= d3; + if (op == ".lt.") result = d1 < d3; + if (op == ".le.") result = d1 <= d3; + if (op == ".eq.") result = d1 == d3; + if (op == ".ne.") result = d1 != d3; + //cout << "&&&&&cw relational result = " << result << endl; + wres.set_value(result); + return; + } + + if ( (wq[i1].is_bool()) && (wq[i3].is_bool()) ) { + bool b1 = wq[i1].get_bool(serr, ierr); + bool b3 = wq[i3].get_bool(serr, ierr); + if (op == ".eq.") result = b1 == b3; + if (op == ".ne.") result = b1 != b3; + //cout << "&&&&&cw relational result = " << result << endl; + wres.set_value(result); + return; + } + + // Compare two strings. + if (op == ".gt.") result = s1 > s3; + if (op == ".ge.") result = s1 >= s3; + if (op == ".lt.") result = s1 < s3; + if (op == ".le.") result = s1 <= s3; + if (op == ".eq.") result = s1 == s3; + if (op == ".ne.") result = s1 != s3; + wres.set_value(result); + return; +} + +// =========================================================================== +// Do the .not. operation, this is different from all the others in that +// .not. is a unary operator, the others are binary ops. +// The result is either true or false and is put in word wres. +// =========================================================================== +void Parser_math::do_op_not(int i2, int i3, deque &wq, + Word &wres, stringstream &serr, int &ierr) +{ + string op = wq[i2].get_string(); + string s3 = wq[i3].get_string(); + bool result = false; + + if (!wq[i3].is_bool()) { + wq[i2].fatal_error(serr, ierr); + serr << "The word following the .not. operator must be" + " true or false." << endl; + serr << "Instead the word following .not. is " << s3 << endl; + ierr = 2; + wres.set_value(false); + return; + } + + result = true; + if (wq[i3].get_bool(serr, ierr) == true) result = false; + wres.set_value(result); + return; +} + + +// =========================================================================== +// Do a single logical binary operation involving 3 words. +// The binary logical operators are .and. and .or. +// The result is either true or false and is put in word wres. +// =========================================================================== +void Parser_math::do_op_logical(int i1, int i2, int i3, deque &wq, + Word &wres, stringstream &serr, int &ierr) +{ + string s1 = wq[i1].get_string(); + string op = wq[i2].get_string(); + string s3 = wq[i3].get_string(); + bool result = false; + + //cout << "&&&&&cw logical = " << s1 << op << s3 << endl; + + // For .and. and .or., both operands must be boolean. + if ((!wq[i1].is_bool()) || (!wq[i3].is_bool())) { + wq[i2].fatal_error(serr, ierr); + serr << "The operator is " << op << endl; + serr << "The two operands (on the left and right of the operator)" << endl; + serr << "must be logical values (true or false)." << endl; + serr << " " << s1 << " " << op << " " << s3 << endl; + ierr = 2; + wres.set_value(false); + return; + } + + bool b1 = wq[i1].get_bool(serr, ierr); + bool b3 = wq[i3].get_bool(serr, ierr); + if (op == ".and.") result = b1 && b3; + if (op == ".or.") result = b1 || b3; + //cout << "&&&&&cw logical result = " << result << endl; + wres.set_value(result); + return; +} + + + +} // End of the PP namespace Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_utils.hh =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_utils.hh @@ -0,0 +1,80 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +#ifndef PARSERUTILSHHINCLUDE +#define PARSERUTILSHHINCLUDE + +// *************************************************************************** +// *************************************************************************** +// This class collects various low level utilities for the parser. +// *************************************************************************** +// *************************************************************************** + +#include +#include +#include +#include + +namespace PP +{ +using std::string; +using std::stringstream; +using std::vector; +using std::deque; + + +class Parser_utils +{ + +public: + Parser_utils(int base); + + int start_dex(vector &istart, const vector &size); + void reverse_dex(int icdex, int nvals, vector &istart, + const vector &size); + + void print_strings(vector< vector > rows, int n_header_rows, + int offset, int col_spacing, int line_len_max, + stringstream &ss); + +private: + +}; + + +} // End of the PP namespace + +#endif Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_utils.cc =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_utils.cc @@ -0,0 +1,329 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +// *************************************************************************** +// *************************************************************************** +// This class collects various low level utilities for the parser. +// *************************************************************************** +// *************************************************************************** + +#include +#include +#include +#include +#include +#include + +#include "Parser_utils.hh" + +namespace PP +{ +using std:: string; +using std::cout; +using std::endl; +using std::stringstream; +using std::setprecision; +using std::vector; +using std::deque; +using std::setw; + +static int index_base = 1; // For Fortran, 0 for C/C++style + + +// =========================================================================== +// Default constructor. +// =========================================================================== +Parser_utils::Parser_utils(int base) +{ + index_base = base; +} + + +// =========================================================================== +// Given an array command like +// cmd(5,3) = 1.0, 3.0, -5.0 +// find the starting position in a 1d array. +// +// The indices in cmd are referenced from 1 (i.e. fortran) while the 1d array +// is referenced from 0 (C++). +// +// This function works for any dimension array, 0,1,2,3,... +// +// Example 1: Consider a 1d command +// cmd(5) = 1.0, 3.4 +// We start filling the 1d fortran array at position 5 and put in two values. +// Subtract 1 to reference from 0, so this function returns 4. +// +// Example 2: Consider the 2d command above: +// cmd(5,3) = 1.0, 3.0, -5.0 +// We also need to know that the max size of the first dimension is say 7. +// Since in fortran, the first index varies fastest, the fortran 1d index +// would be +// 5 + (3-1)*7 = 19 +// Subtract 1 to reference from 0, thus the return value is 18. +// +// The istart vector contains the indices, for example 2 this would be 5 and +// 3. The size vector contains the max size of each dimension, for example 2 +// this would be 7 and whatever for the second dimension. +// =========================================================================== +int Parser_utils::start_dex(vector &istart, const vector &size) +{ + // Get the array dimension, 0,1,2,3,... + int dim = (int)istart.size(); + + // 0d is a special case. + if (dim == 0) return 0; + + // Find the index. + // Adjustment for base 1 + int ix = istart[0]-index_base; + for (int i=1; i &istart, + const vector &size) +{ + // Get the dimension. + int dim = (int)istart.size(); + + // Nothing to do for scalars. + if (dim == 0) return; + + // Start at 1,1,1,1,1,1,... + for (int i=0; i > rows, int n_header_rows, + int offset, int col_spacing, int line_len_max, + stringstream &ss) +{ + // Get the number of columns. + int ncol = (int)rows[0].size(); + + // Find the max number of characters in each column for all the rows. + vector maxc(ncol, 0); + for (int row=0; row<(int)rows.size(); row++) { + for (int c=0; c maxc[c]) maxc[c] = (int)s.size(); + } + } + + // Find the column widths. + vector col_width(ncol,0); + for (int c=0; c col_width[c]) col_width[c] = maxc[c]; + } + + // Spacing between columns. + vector cspace(ncol, col_spacing); + cspace[0] = offset; + + // Limit the lines to a max length. + if (line_len_max > 0) { + int line_len = 0; + for (int c=0; c 0) { + col_width[ncol-1] -= excess_c; + for (int row=0; row<(int)rows.size(); row++) { + int len = 0; + for (int c=0; c (int)s.size()) nc = (int)s.size(); + s.erase(start, nc); + rows[row][c] = s + " ..."; + } + } + } + + + // Write the rows. + for (int row=0; row<(int)rows.size(); row++) { + + // Insert the line of dashes after the header rows. + if (row == n_header_rows) { + for (int c=0; c 0) { + nsp_left = dsp/2; + nsp_right = col_width[c] - nsp_left - nc; + } + for (int i=0; i= n_header_rows) { + if (c < ncol-1) ss << setw(maxc[c]) << rows[row][c]; + if (c == ncol-1) ss << rows[row][c]; + } + else { + ss << rows[row][c]; + } + + for (int i=0; i +// declaration did not work with just doing "class Cmd;", we need to fully +// include Cmd.hh. +#include "Comm.hh" +#include +#include +#include "Word.hh" +#include "Cmd.hh" +#include "Restartblock.hh" +#include "Whenthen.hh" + +/****************************************************************//** + * PP is the namespace for PowerParser. Example: + * + * using namespace PP; + *******************************************************************/ +namespace PP +{ +using std::ofstream; +using std::streambuf; + +/****************************************************************//** + * PowerParser class + * Provide a class that parses text files into lines and words. + *******************************************************************/ +class PowerParser +{ + +public: + + ofstream fileout; + streambuf *coutbuf; + + // Constructors, destructors and drivers. +/****************************************************************//** + * \brief + * Constructor with no arguments + * + * Typical Usage + * + * PowerParser parse; + * or + * PowerParser *parse = new PowerParser(); + *******************************************************************/ + PowerParser(void); + +/****************************************************************//** + * \brief + * Constructor -- with input filename in string format + * + * **Parameters** + * * string filename[in] -- the input file. The file will be + * read in, broadcast, and then parsed + * + * Typical Usage + * + * string fin("simfile.in"); + * PowerParser parse(fin); + * or + * string fin("simfile.in"); + * PowerParser *parse = new PowerParser(fin); + *******************************************************************/ + PowerParser(string filename); + +/****************************************************************//** + * \brief + * Constructor -- with input filename in char array format + * + * **Parameters** + * * const char *filename[in] -- the input file. The file will be + * read in, broadcast, and then parsed + * + * Typical Usage + * + * PowerParser parse("simfile.in"); + * or + * PowerParser *parse = new PowerParser("simfile.in"); + *******************************************************************/ + PowerParser(const char *filename); + +/****************************************************************//** + * \brief + * Destructor with no arguments + * + * Typical Usage + * + * delete parse; + *******************************************************************/ + ~PowerParser(void); + + void dictionary_add(char *name, double value, bool pred, char *vdesc); + void dictionary_env_add(char *name, bool pred); + +/****************************************************************//** + * \brief + * Reads the file in on the IO processor, broadcast the string + * to all the other processors, then parse the string. + * + * **Parameters** + * * string filename + * + * Typical Usage + * + * string fin("simfile.in"); + * PowerParser parse(); + * parse.parse_file(fin); + *******************************************************************/ + void parse_file(string filename); + +/****************************************************************//** + * \brief + * Reads the file in on the IO processor, broadcast the string + * to all the other processors, then parse the string. + * + * **Parameters** + * * const char *filename + * + * Typical Usage + * + * PowerParser parse(); + * parse.parse_file("simfile.in"); + *******************************************************************/ + void parse_file(const char *filename); + +/****************************************************************//** + * \brief + * Given a multi-line string on every processor, parse it into cmds + * and words. After calling this function, the parser is ready for use. + *******************************************************************/ + void parse_string(string filename, string s_in); + +/****************************************************************//** + * \brief + * The input file(s) has been read and put into commands. Now do the + * compilation phase. + *******************************************************************/ + void compile_buffer(int &return_value); + +/****************************************************************//** + * \brief + * Handle the execution line arguments + *******************************************************************/ + void handle_exe_args(string other_argggs); + +/****************************************************************//** + * \brief + * Clear out the parser and re-init + *******************************************************************/ + void clear_and_init(); + +/****************************************************************//** + *******************************************************************/ + void store_exe_args(string &oargs, string &fname) { + other_args = oargs; + file_name = fname; + } + +/****************************************************************//** + *******************************************************************/ + void get_exe_args(string &oargs, string &fname) { + oargs = other_args; + fname = file_name; + } + +/****************************************************************//** + * \brief + * String version of the driver for getting boolean values as integers. + * This works for arrays of any dimension, 0,1,2,3,... + * + * **Parameters** + * * string &cname -- key word in input file + * * int *cvalue -- variable to set in simulation code + * * const vector &size = vector() -- sizes of array, + * (default is null for a scalar). + * * bool skip = false -- skip setting variable, (default is false) + * + * Typical Usage + * + * for scalars + * string InputName("OutputGraphics"); + * int iflag = 0; + * parse.get_bool_int(InputName, &iflag); + * or for arrays + * string InputName("OutputGraphicsTypes"); + * vector iflags[2] = {0, 0}; + * vector size = {2}; + * parse.get_bool_int(InputName, &iflags[0], size); + *******************************************************************/ + void get_bool_int(string &cname, + int *cvalue, + const vector &size = vector(), // optional argument + bool skip = false); // optional argument + +/****************************************************************//** + * \brief + * String version of the driver for getting boolean values. + * This works for arrays of any dimension, 0,1,2,3,... + * + * **Parameters** + * * string &cname -- key word in input file + * * bool *cvalue -- variable to set in simulation code + * * const vector &size = vector() -- sizes of array, + * (default is null for a scalar). + * * bool skip = false -- skip setting variable, (default is false) + * + * Typical Usage + * + * for scalars + * string InputName("OutputGraphics"); + * bool iflag = 0; + * parse.get_bool(InputName, &iflag); + * or for arrays + * string InputName("OutputGraphicsTypes"); + * vector iflags[2] = {0, 0}; + * vector size = {2}; + * parse.get_bool(InputName, &iflags[0], size); + *******************************************************************/ + void get_bool(string &cname, + bool *cvalue, + const vector &size = vector(), // optional argument + bool skip = false); // optional argument + +/****************************************************************//** + * \brief + * String version of the driver for getting integer values. + * This works for arrays of any dimension, 0,1,2,3,... + * + * **Parameters** + * * const char *cname -- key word in input file + * * int *cvalue -- variable to set in simulation code. Int can be + * either standard int or long long int + * * const vector &size = vector() -- sizes of array, + * (default is null for a scalar). + * * bool skip = false -- skip setting variable, (default is false) + * + * Typical Usage + * + * for scalars + * int ivalue = 0; + * parse.get_int("Num_Cycles", &ivalue); + * or for arrays + * vector ivalue[2] = {0, 0}; + * vector size = {2}; + * parse.get_int("Dimensions", &ivalue[0], size); + *******************************************************************/ + template< typename T > + void get_int(string &cname, + T *cvalue, + const vector &size = vector(), // optional argument + bool skip = false); // optional argument + +/****************************************************************//** + * \brief + * String version of the driver for getting real values. + * This works for arrays of any dimension, 0,1,2,3,... + * + * **Parameters** + * * const char *cname -- key word in input file + * * double *cvalue -- variable to set in simulation code. + * * const vector &size = vector() -- sizes of array, + * (default is null for a scalar). + * * bool skip = false -- skip setting variable, (default is false) + * + * Typical Usage + * + * for scalars + * double rvalue = 0; + * parse.get_real("TimeStop", &rvalue); + * or for arrays + * vector rvalues[2] = {0.0, 0.0}; + * vector size = {2}; + * parse.get_real("DumpTimes", &rvalues[0], size); + *******************************************************************/ + void get_real(string &cname, + double *cvalue, + const vector &size = vector(), // optional argument + bool skip = false); // optional argument + +/****************************************************************//** + *******************************************************************/ + void get_char(string &cname, + vector &vstr, + const vector &size = vector(), // optional argument + bool single_char = false, // optional argument + bool skip = false); // optional argument + + // These are just convenience function to allow char arrays for get variable so + // the calls are simpler. They convert the cname to a string and call the + // string versions above + +/****************************************************************//** + * \brief + * Char array version of the driver for getting boolean values as integers. + * This works for arrays of any dimension, 0,1,2,3,... + * + * **Parameters** + * * const char *cname -- key word in input file + * * int *cvalue -- variable to set in simulation code + * * const vector &size = vector() -- sizes of array, + * (default is null for a scalar). + * * bool skip = false -- skip setting variable, (default is false) + * + * Typical Usage + * + * for scalars + * int iflag = 0; + * parse.get_bool_int("OutputGraphics", &iflag); + * or for arrays + * vector iflags[2] = {0, 0}; + * vector size = {2}; + * parse.get_bool_int("OutputGraphicsTypes", &iflags[0], size); + *******************************************************************/ + void get_bool_int(const char *cname, + int *cvalue, + const vector &size = vector(), // optional argument + bool skip = false); // optional argument + +/****************************************************************//** + * \brief + * Char array version of the driver for getting boolean values. + * This works for arrays of any dimension, 0,1,2,3,... + * + * **Parameters** + * * const char *cname -- key word in input file + * * bool *cvalue -- variable to set in simulation code + * * const vector &size = vector() -- sizes of array, + * (default is null for a scalar). + * * bool skip = false -- skip setting variable, (default is false) + * + * Typical Usage + * + * for scalars + * bool iflag = 0; + * parse.get_bool("OutputGraphics", &iflag); + * or for arrays + * vector iflags[2] = {0, 0}; + * vector size = {2}; + * parse.get_bool("OutputGraphicsTypes", &iflags[0], size); + *******************************************************************/ + void get_bool(const char *cname, + bool *cvalue, + const vector &size = vector(), // optional argument + bool skip = false); // optional argument + +/****************************************************************//** + * \brief + * Char array version of the driver for getting integer values. + * This works for arrays of any dimension, 0,1,2,3,... + * + * **Parameters** + * * const char *cname -- key word in input file + * * int *cvalue -- variable to set in simulation code. Int can be + * either standard int or long long int + * * const vector &size = vector() -- sizes of array, + * (default is null for a scalar). + * * bool skip = false -- skip setting variable, (default is false) + * + * Typical Usage + * + * for scalars + * int ivalue = 0; + * parse.get_int("Num_Cycles", &ivalue); + * or for arrays + * vector ivalue[2] = {0, 0}; + * vector size = {2}; + * parse.get_int("Dimensions", &ivalue[0], size); + *******************************************************************/ + template< typename T > + void get_int(const char *cname, + T *cvalue, + const vector &size = vector(), // optional argument + bool skip = false); // optional argument + +/****************************************************************//** + * \brief + * Char array version of the driver for getting real values. + * This works for arrays of any dimension, 0,1,2,3,... + * + * **Parameters** + * * const char *cname -- key word in input file + * * double *cvalue -- variable to set in simulation code. + * * const vector &size = vector() -- sizes of array, + * (default is null for a scalar). + * * bool skip = false -- skip setting variable, (default is false) + * + * Typical Usage + * + * for scalars + * double rvalue = 0; + * parse.get_real("TimeStop", &rvalue); + * or for arrays + * vector rvalue[2] = {0.0, 0.0}; + * vector size = {2}; + * parse.get_real("DumpTimes", &rvalue[0], size); + *******************************************************************/ + void get_real(const char *cname, + double *cvalue, + const vector &size = vector(), // optional argument + bool skip = false); // optional argument + +/****************************************************************//** + *******************************************************************/ + void get_char(const char *cname, + vector &vstr, + const vector &size = vector(), // optional argument + bool single_char = false, // optional argument + bool skip = false); // optional argument + + +/****************************************************************//** + * \brief + * Driver for getting array sizes. + *******************************************************************/ + void get_size(string &cname, vector &size); + +/****************************************************************//** + * \brief + * Driver for getting array sizes. Version to get all sizes + *******************************************************************/ + void get_sizeb(string &cname, vector &size); + + +/****************************************************************//** + * \brief + * Check if the input command, cname, appears in the final, parsed user input. + * + * The two outputs are in_input and in_whenthen, + * in_input command is in (or not) the main part of the input, i.e. + * everything except the when...then statements. + * in_whenthen command is in (or not) at least one when...then statement. + *******************************************************************/ + void cmd_in_input(string &cname, bool &in_input, bool &in_whenthen); + +/****************************************************************//** + * \brief + * Set the processed flag for all words for all commands that match cname. + * The value to set the processed flag to is bval. + * This sets the processed flag for commands in the final buffer and in the + * when...then final buffers. + *******************************************************************/ + void cmd_set_processed(string &cname, bool bval); + +/****************************************************************//** + * \brief + * Check all processed flags on every command. If any word on any command + * has not been processed, then that is a fatal error. + *******************************************************************/ + void check_processed(bool &good); + +/****************************************************************//** + * \brief + * If commands appear more than once in the input file(s), print a warning + * to the user. + *******************************************************************/ + void check_duplicates(); + + +/****************************************************************//** + * \brief + * Echo user input to a stringstream. + *******************************************************************/ + void echo_input_start(); + +/****************************************************************//** + * \brief + * Echo user input to a stringstream. + *******************************************************************/ + void echo_input_ss(stringstream &ssinp); + +/****************************************************************//** + * Get a line from the ssfout stringstream. (low-level function) + *******************************************************************/ + bool get_ssfout_line(string &sline); + + // Communications object from the infrastructure. +/****************************************************************//** + * \brief + * Holds internal comm class for PowerParser. Comm is initialized + * automatically and will use an already initialized MPI or + * initialize it itself. This is meant to be for use internal to + * the package, but developers can get the number of processors + * and rank with + * + * int mype = parse->comm->getProcRank(); + * int npes = parse->comm->getNumProcs(); + *******************************************************************/ + Comm *comm; + +/****************************************************************//** + *******************************************************************/ + void list_funcs_start(); + +/****************************************************************//** + *******************************************************************/ + void list_vars_start(); + +/****************************************************************//** + *******************************************************************/ + void list_cmdsf_start(); + +/****************************************************************//** + *******************************************************************/ + void list_wt_cmdsf_start(); + + void process_error_global(int &return_value); + + + void rb_check(vector &code_varnames, + vector &code_values, + vector &vv_active, int *rbci, + int *rb_ntriggered, int *rb_triggered_indices); + int get_rb_num_varnames(); + void get_rb_varnames(vector &rb_varnames_vstr); + void get_num_rb(int *rbnum) { *rbnum = (int)restartblocks.size(); } + void set_num_rb(int rbnum) { nrb_on_dump = rbnum; } + void get_rb_names(vector &rb_names_vstr); + void set_rb_names(vector &rb_names_vstr); + void get_rb_aflags(int *rb_aflags); + void set_rb_aflags(int *rb_aflags, int rb_num); + void get_rb_satsize(int *rb_satsize); + void set_rb_satsize(int rb_satsize); + void get_rb_satprb(int *rb_satprb); + void set_rb_satprb(int *rb_satprb, int rb_num); + void get_rb_sat(int *rb_sat); + void set_rb_sat(int *rb_sat, int rb_satsize); + void list_rb(); + void list_rb_start(); + void list_rb_ss(stringstream &ssc); + void list_rb1_start(int *rb); + void list_rb1_ss(stringstream &ssc, int *rbp); + void list_one_rb_ss(stringstream &ssc, int rb); + + + void get_num_whenthen(int *wtnum) { *wtnum = (int)whenthens.size(); } + void wt_check(int wtn, vector &code_varnames, + vector &code_values, + vector &vv_active, int *wtci); + void wt_set_cmdsfp(int wtn); + void wt_reset(); + void wt_casize(int wtn, int *wt_casize); + void wt_carray(int wtn, char *wt_ca, int wt_casize); + + void wt_satsize(int wtn, int *wt_satsize); + void wt_getsat(int wtn, int *wt_sat, int wt_satsize); + void wt_setsat(int wtn, int *wt_sat, int wt_satsize); + void wt_getprocessed(int wtn, int *wtp); + void wt_setprocessed(int wtn, int wtp); + void wt_getseq(int wtn, int *wtseq); + void wt_setseq(int wtn, int wtseq); + + void chars_to_vstr(char *chars_1d, vector &vstr, + int nv, int nchar); + void vstr_to_chars(char *chars_1d, vector &vstr, + int nv, int nchar); + + void ListIncludeFiles(); + int NumIncludeFiles(); + string GetIncludeFile(int); + + + +private: + + void init(); + int process_dav_cmd(); + void check_dup_scalar(int wtn, bool &found_any); + void set_dup_row(vector &row, Cmd &cmdi, int iw); + void remove_dup_scalar(int wtn); + void read_into_string(string filename, string &s_in); + void broadcast_buffer(string &s_in); + bool get_line_from_string(string &strn, string &sout, int ¤t_pos); + bool get_sc_line_from_string(string &strn, string &sout, int ¤t_pos); + void store_line_strings(string &s_in); + void eliminate_white_space(string &sline); + void cmd_set_reprocessed(bool bval); + int process_error_return_int(stringstream &serr, int &ierr); + void process_error(stringstream &serr, int &ierr); + + void list_vars(string lv1, string lv2, string var_to_list); + void list_vars_ss(string lv1, string lv2, string var_to_list, + stringstream &ssvars); + + void list_funcs(string lf1, string lf2); + void list_funcs_ss(string lf1, string lf2, stringstream &ssfunc); + + void list_cmdsf(string lc1, string lc2); + void list_cmdsf_ss(string lc1, string lc2, + stringstream &ssc); + void list_wt_cmdsf(); + void list_wt_cmdsf_ss(stringstream &ssc); + + void print_strings(vector< vector > rows, int n_header_rows, + int offset, int col_spacing, int line_len_max, + stringstream &ss); + bool end_do_loop(int &i, deque &do_start, + stringstream &serr, int &ierr); + void end_do_ret(int &i, deque &do_start, + stringstream &serr, int &ierr); + void check_enddo(deque &do_start, stringstream &serr, int &ierr); + int jump_to_call(int &i, deque &icall, deque &isub, + stringstream &serr, int &ierr); + int jump_to_sub(int &i, string &sub_name, + stringstream &serr, int &ierr); + void print_line(int i); + void print_line(Cmd &cmd); + + // Store exe line arguments. + string other_args, file_name; + + // A double ended queue for storing the original lines. This is + // before the lines get turned into Cmds. + // line_number is an index into cmd_strings, note that it starts + // from 1, not 0. + deque cmd_strings; + int line_number; + + // Define a map for a set of variables. + map vmap; + + // Maintain a list of included files + + std::map IncludeFiles; + + // Define a map for the functions. + map fmap; + + // A double ended queue for storing the commands. + deque cmds; + deque cmdsf; + deque *cmdsfp; + + // Store cmd names that have been processed, used for clearing and + // recreating the parser. + deque processed_cmd_names; + + // Related to writing output to a fortran file. + int ssfout_current_pos; + stringstream ssfout; + + // Used for storing the list of pre-defined variables to be printed + // out later. + stringstream pre_defined_varss; + + // Used for storing multiple errors and processing them later. + stringstream serr_global; + int ierr_global; + + // The execution line arguments are put in this string. + string exe_args_str; + + // The when ... then objects. + deque whenthens; + + // Restart blocks. + deque restartblocks; + int nrb_on_dump; + deque bnames_on_dump; + deque baflags_on_dump; + int satsize_on_dump; + deque rbsat_on_dump; + deque rbsatprb_on_dump; + + // Flag for whether duplicate array values will be none, fatal, or + // a warning, determined by the duplicate_array_values command. + // dup_fatal = 0 Turn off duplicate array value checking + // dup_fatal = 1 Duplicate array value checking is a warning + // dup_fatal = 2 Duplicate array value checking is a fatal error + int dup_fatal; +}; + +} // end of PP namespace + +#endif Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/PowerParser.cc =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/PowerParser.cc @@ -0,0 +1,3269 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +// *************************************************************************** +// *************************************************************************** +// Provide a class that parses text files into lines and words. +// *************************************************************************** +// *************************************************************************** + +#include "PowerParser.hh" +#include "Parser_utils.hh" +#include "Variable.hh" +#include "Function.hh" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace PP +{ +using std::cout; +using std::endl; +using std::string; +using std::ifstream; +using std::ios; +using std::deque; +using std::map; +using std::pair; +using std::vector; +using std::stringstream; +using std::setw; +using std::setprecision; +using std::numeric_limits; + +static int index_base = 1; +static bool case_sensitive = false; + +// =========================================================================== +// Various constructors. +// =========================================================================== +PowerParser::PowerParser() +{ + comm = new Comm(); + + init(); // Init vars, setup functions, ... + nrb_on_dump = 0; + coutbuf = NULL; +} + +PowerParser::PowerParser(string filename) +{ + comm = new Comm(); + + init(); // Init vars, setup functions, ... + nrb_on_dump = 0; + parse_file(filename); // Parse the file. + coutbuf = NULL; +} + +PowerParser::PowerParser(const char *filename) +{ + comm = new Comm(); + + string fstring(filename); + + init(); // Init vars, setup functions, ... + nrb_on_dump = 0; + parse_file(fstring); // Parse the file. + coutbuf = NULL; +} + +// =========================================================================== +// Destructor +// =========================================================================== +PowerParser::~PowerParser() +{ + fileout.close(); + if (coutbuf != NULL) cout.rdbuf(coutbuf); // restore cout's original streambuf + delete comm; + + cmd_strings.clear(); + vmap.clear(); + fmap.clear(); + cmds.clear(); + cmdsf.clear(); + whenthens.clear(); + restartblocks.clear(); + pre_defined_varss.str(""); +} + +// =========================================================================== +// Parse a file. The basic strategy is to read the file into a string on the +// io processor, broadcast the string to all the other processors, then parse +// the string. +// =========================================================================== +void PowerParser::parse_file(string filename) +{ + // Read the file into a string. This simply copies every character in + // the file to the string including end of line characters. + // Note that only the io processor reads the file into the string. + string s_in = ""; + read_into_string(filename, s_in); + + // Broadcast the buffer string to all the other processors. After this + // braodcast, all the processors should have the same buffer string. + broadcast_buffer(s_in); + + // Parse the string. After this is done, the parser is ready to be used + // by the application code. + parse_string(filename, s_in); +} + +void PowerParser::parse_file(const char *filename) +{ + string fstring(filename); + parse_file(fstring); +} + +int PowerParser::NumIncludeFiles() +{ + return IncludeFiles.size(); +} + +string PowerParser::GetIncludeFile(int i) +{ + if (0 <= i && i < IncludeFiles.size()) return IncludeFiles[i]; + return string(""); +} + +void PowerParser::ListIncludeFiles() +{ + int i, num_include; + num_include = NumIncludeFiles(); + std::cerr << "Number of include files = " << num_include << "\n"; + for (i = 0; i < num_include; ++i) + { + std::cerr << "Include file << "<< i << " = " << GetIncludeFile(i) << "\n"; + } +} + +// =========================================================================== +// Given a multi-line string on every processor, parse it into cmds and words. +// After calling this function, the parser is ready for use. +// =========================================================================== +void PowerParser::parse_string(string filename, string buffer) +{ + // Get command lines from the buffer and store them as strings. + int current_pos = 0; + string sline1 = ""; + string sline = ""; + int file_line_number = 0; + bool exe_args_inserted = false; + for (;;) { + // Get the next line from the buffer. No processing is done, just + // get each line. This does, however, remove the end of line + // characters (either \r\n or only \n) from the string. + if (!get_line_from_string(buffer, sline1, current_pos)) break; + line_number += 1; + file_line_number += 1; + + // Store the line without any processing. This is done so that a + // fortran routine can get each original line and echo it to an + // output file. + cmd_strings.push_back(sline1); + + // The line, sline1, may be composed of sub-lines separated by + // semicolons. Loop through the line extracting each semicolon + // separated sub-line and process it. + int current_sc_pos = 0; + for (;;) { + if (!get_sc_line_from_string(sline1, sline, current_sc_pos)) break; + + // Flag for making the command or not. + bool make_cmd = true; + + // Get rid of leading and trailing blanks and tabs. + eliminate_white_space(sline); + + // If after removing white space, the resulting line string is empty, + // then do not turn it into a command. + if ((int)sline.size() == 0) make_cmd = false; + + // Turn the line into a command. This creates the words. Empty lines + // can be skipped. + if (make_cmd) { + stringstream serr; + int ierr = 0; + Cmd cmd(sline, &vmap, &fmap, &cmd_strings, + line_number, file_line_number, filename, serr, ierr); + process_error(serr, ierr); + if (cmd.get_string(0) == "set_index_base_zero") { + // C/C++ index convention + cmd.set_index_base(0); + Variable v(0); + index_base = 0; + } + if (cmd.get_string(0) == "set_index_base_one") { + // Fortran index convention + cmd.set_index_base(1); + Variable v(1); + index_base = 1; + } + if (cmd.get_string(0) == "set_case_sensitive") { + cmd.set_case_sensitive(true); + case_sensitive = true; + } + if (cmd.get_string(0) == "set_case_insensitive") { + cmd.set_case_sensitive(false); + case_sensitive = false; + } + if (cmd.get_string(0) == "put_exe_args_here") { + if (exe_args_str != "") { + parse_string("execution line arguments", exe_args_str); + exe_args_inserted = true; + } + } + else if (cmd.is_include()) { + string fname = ""; + stringstream ssfiles; + if(comm->isIOProc()) { + fname = cmd.get_cmd_filename(ssfiles); + } + broadcast_buffer(fname); + map::iterator ifp; + int isize = IncludeFiles.size(); + IncludeFiles[isize] = fname; + if (fname == "") { + stringstream serr; + serr << endl; + serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl; + serr << " " << cmd_strings[line_number-1] << endl; + serr << "in file: " << filename << endl; + serr << "Could not open include file." << endl; + serr << "The name of the file and any alternates are:" << endl; + serr << ssfiles.str() << endl; + int ierr = 2; + process_error(serr, ierr); + return; + } + parse_file(fname); + } + else { + cmds.push_back(cmd); + } + } + } + } + // process inserted command line args if not parsing just the args + if (filename != "execution line arguments" && + filename != "exe_args_tmp" && + exe_args_str != "") { + // if inserting manually, remove the tmp insertion at the beginning + if (exe_args_inserted) { + for (int i=0; i<(int)cmds.size(); i++) { + if (cmds[i].get_filename() == "exe_args_tmp") { + cmds.erase(cmds.begin()+i); + i -= 1; + } + } + } + // change the file name to the real name for args + else { + for (int i=0; i<(int)cmds.size(); i++) { + if (cmds[i].get_filename() == "exe_args_tmp") { + cmds[i].set_filename("execution line arguments"); + } + } + } + } +} + + +// =========================================================================== +// Handle the execution line arguments. +// =========================================================================== +void PowerParser::handle_exe_args(string other_args) +{ + if ((int)other_args.size() == 0) return; + + stringstream serr; + int ierr = 0; + Cmd cmd(other_args, &vmap, &fmap, &cmd_strings, + 1, 1, "", serr, ierr); + process_error(serr, ierr); + //print_line(cmd); + exe_args_str = ""; + cmd.handle_exe_args(exe_args_str); + if (cmd.get_nwords() == 0) { + exe_args_str = ""; + return; + } + parse_string("exe_args_tmp", exe_args_str); +} + + +// =========================================================================== +// Clear out the parser and re-init. +// =========================================================================== +void PowerParser::clear_and_init() +{ + // comm does not need to be reset + cmd_strings.clear(); + vmap.clear(); + fmap.clear(); + cmds.clear(); + cmdsf.clear(); + whenthens.clear(); + restartblocks.clear(); + pre_defined_varss.str(""); + + // Do not clear out the restart block info from the dump since the whole + // point of doing this function is to be able to reset the parser with + // the restart block info from the dump. + + //for (int i=0; i<(int)bnames_on_dump.size(); i++) { + // cout << "&&&&&cw PowerParser.cc, clear_and_init, bnames_on_dump = " << + // bnames_on_dump[i] << endl; + // cout << "&&&&&cw PowerParser.cc, clear_and_init, baflags_on_dump = " << + // baflags_on_dump[i] << endl; + //} + + // Do the initialization again. + init(); +} + +// =========================================================================== +// Echo user input to a stringstream. +// =========================================================================== +void PowerParser::echo_input_start() +{ + ssfout.str(""); + echo_input_ss(ssfout); + ssfout_current_pos = 0; +} +void PowerParser::echo_input_ss(stringstream &ssinp) +{ + if (!comm->isIOProc()) return; + for (int i=0; i<(int)cmd_strings.size(); i++) { + ssinp << cmd_strings[i] << endl; + } +} + + +// =========================================================================== +// The input file(s) has been read and put into commands. Now do the +// compilation phase. +// =========================================================================== +void PowerParser::compile_buffer(int &return_value) +{ + // At this point, the list of variables only contains the pre-defined + // parser variables, thus if we list the variables at this point we will + // have a list of only the pre-defined variables. This is stored in a + // stringstream to be printed later. + string lv1 = "********** List of pre-defined parser variables"; + string lv2 = "********** End list of pre-defined parser variables"; + list_vars_ss(lv1, lv2, "", pre_defined_varss); + + int return_local; + + return_local =-1; + return_value = 0; + + // Handle single line (! and //) comments and multi line + // comments (/* ... */) + // The level variable is used for nested multi line comments. + int level = 0; + for (int i=0; i<(int)cmds.size(); i++) { + cmds[i].single_line_comments(); + cmds[i].multi_line_comments(level); + } + + // Check for matching quotes and remove them. + int ierr = 0; + stringstream serr; + for (int i=0; i<(int)cmds.size(); i++) { + cmds[i].handle_quotes(serr, ierr); + } + return_local = process_error_return_int(serr, ierr); + return_value = return_local; + + if (return_local > 0) { + cout << "handle quotes gave error " << ierr << endl; + if (return_local > 1) return; + } + + // Remove empty lines. + for (int i=0; i<(int)cmds.size(); i++) { + if (cmds[i].get_nwords() == 0) { + cmds.erase(cmds.begin()+i); + i -= 1; + continue; + } + } + + // Handle continuation lines (ending in & or ,). + // Continuation lines are merged into one long (possibly very long) + // line. + for (int i=(int)cmds.size()-1; i>=0; i--) { + int nw1 = cmds[i].get_nwords(); + if (cmds[i].get_string(nw1-1) == "&" || + cmds[i].get_string(nw1-1) == ",") { + if (cmds[i].get_string(nw1-1) == "&") + cmds[i].erase_word(nw1-1); + int nw2 = cmds[i+1].get_nwords(); + for (int j=0; j 0) { + cout << "handle variable dimension statement has error " << ierr << endl; + if (return_local > 1) return; + } + + // Combine things like "end if" into one word, i.e. "endif". + for (int i=0; i<(int)cmds.size(); i++) { + cmds[i].handle_two_words(); + } + + // Handle the case of a space between digits and the e for reals. + // For example, in the following, + // 1.0, 2.3 e14, -5.6 + // there is a space between 2.3 and e14 which should most likely + // be treated as a single number, 2.3e14. + ierr = 0; + serr.str(""); + string action = "error"; + bool action_set = false; + for (int i=0; i<(int)cmds.size(); i++) { + if (cmds[i].get_cmd_name() == "depcmd_dse") { + action = cmds[i].get_string(1); + action_set = true; + cmds.erase(cmds.begin()+i); + i--; + continue; + } + if (cmds[i].get_cmd_name() == "matdef") { + if (!action_set) action = "fix"; + } + cmds[i].deprecated_input01(action, serr, ierr); + } + + return_local = process_error_return_int(serr, ierr); + return_value = return_local; + if (return_local > 0) { + cout << "handle space between digits has error " << ierr << endl; + if (return_local > 1) return; + } + + + // This is the main loop where most everything is done. + bool print_final_buffer = false; + deque skip_level; + deque satisfied; + deque do_start; + string sub_name = ""; + deque icall, isub; + bool skip_sub = false; + int nwhen = 0; + int when_level = 0; + bool single_line_when = false; + int nrb = 0; // Number of restart blocks + bool single_line_rb = false; // Flag for single line restart blocks + bool skiprb = false; // Flag for skipping cmds in restart block + for (int i=0; i<(int)cmds.size(); i++) { + // Work with cmdi, so that cmds will be available for do loops. + Cmd cmdi = cmds[i]; + //print_line(cmdi); + + if (cmdi.get_cmd_name() == "parser_redirect_to_file") { + string fname; + int nw = cmdi.get_nwords(); + if (nw > 1) { + fname = cmdi.get_string(1); + } else { + fname = "parser.out"; + } + if (comm->isIOProc()) { + //cout << "DEBUG fname is " << fname << endl; + //cout << "Redirecting output to file" << endl; + cout.flush(); + coutbuf = cout.rdbuf(); + fileout.open(fname.c_str()); + cout.rdbuf(fileout.rdbuf()); + //cout << "Start of output to file" << endl; + } + + continue; + } + + // Handle restart_block commands. + if (cmdi.get_string(0) == "restart_block") { + Restartblock rb(nrb, cmdi, skiprb, single_line_rb, + bnames_on_dump, baflags_on_dump, + rbsatprb_on_dump, rbsat_on_dump, + serr, ierr); + restartblocks.push_back(rb); + + for (int rbi=0; rbi<(int)restartblocks.size(); rbi++) { + string rbi_name = restartblocks[rbi].get_name(); + for (int rbj=rbi+1; rbj<(int)restartblocks.size(); rbj++) { + if (rbi_name == restartblocks[rbj].get_name()) { + cmdi.fatal_error(0, serr, ierr); + serr << "Restart block names must be unique." << endl; + serr << "Non unique name = " << rbi_name << endl; + ierr = 2; + } + } + } + + if (single_line_rb && skiprb) { + skiprb = false; + continue; + } + bool cflag = true; + if (single_line_rb && !skiprb) cflag = false; + if (cflag) continue; + } + if (cmdi.get_string(0) == "end_restart_block") { + skiprb = false; + continue; + } + if (skiprb) continue; + + + if (skip_sub) { + if (cmdi.get_string(0) == "endsubroutine") { + skip_sub = false; + //cout << "endsubroutine found, turning skip_sub to false" << endl; + } + continue; + } + + return_local = ierr; + return_value = return_local; + if (return_local > 0) { + if (return_local > 1) return; + } + + + + // List variables, functions, etc. + if (cmdi.get_cmd_name() == "parser_list_variables") { + string lv1 = "********** Debugging: list variable names and values " + "available in input file."; + string lv2 = "********** Debugging END: list variable names and values"; + string var_to_list = ""; + int nw = cmdi.get_nwords(); + if (nw > 1) var_to_list = cmdi.get_string(1); + if (comm->isIOProc()) cout << endl; + list_vars(lv1, lv2, var_to_list); + if (comm->isIOProc()) cout << endl; + continue; + } + if (cmdi.get_cmd_name() == "parser_list_functions") { + string lf1 = "********** Debugging: list function names available in input file."; + string lf2 = "********** Debugging END: list function names."; + if (comm->isIOProc()) cout << endl; + list_funcs(lf1, lf2); + if (comm->isIOProc()) cout << endl; + continue; + } + if (cmdi.get_cmd_name() == "parser_print_fbuffer") { + print_final_buffer = true; + continue; + } + + // Handle when ... then commands. + string wtcmd = cmdi.get_string(0); + if (wtcmd == "when" || wtcmd == "whenever") { + bool skipwhen = true; + bool ever_flag = false; + if (wtcmd == "whenever") ever_flag = true; + Whenthen wt(nwhen, cmdi, skipwhen, single_line_when, ever_flag, serr, ierr); + when_level += 1; + whenthens.push_back(wt); + if (skipwhen) continue; + } + if (cmdi.get_string(0) == "endwhen") { + when_level -= 1; + if (when_level < 0) { + cmdi.fatal_error(0, serr, ierr); + serr << "Extra endwhen (or end when) found with no matching " + "when command." << endl; + serr << "Make sure every when command has one and only one" + << endl << "matching endwhen command." << endl; + ierr = 2; + } + return_local = ierr; + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "handle endwhen " << ierr << endl; + return; + } + continue; + } + + + // Handle if/elseif/else/endif statements. + bool skip = false; + cmdi.handle_if(skip, skip_level, satisfied, serr, ierr); + if (skip) continue; + + // Handle do loops. Note that we terminate if there is an error to + // avoid the possibility of an infinite loop. + int cdex = i; + bool end_do = false; + int ierr2 = 0; + cmdi.handle_do(skip, do_start, cdex, end_do, serr, ierr2); + if (ierr2 == 2) { + ierr = 2; + return_local = ierr; + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "handle endwhen " << ierr << endl; + } + break; + } + if (end_do) { + if (!end_do_loop(i, do_start, serr, ierr)) break; + continue; + } + i = cdex; + if (skip) continue; + + // Handle call/subroutines. + bool go_to_sub = false; + bool go_to_call = false; + cmdi.handle_subroutines(skip, go_to_sub, sub_name, go_to_call, + serr, ierr); + if (go_to_call) { + end_do_ret(i, do_start, serr, ierr); + return_local = jump_to_call(i, icall, isub, serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "jump_to_call error " << ierr << endl; + } + continue; + } + + if (go_to_sub) { + icall.push_back(i); + return_local = jump_to_sub(i, sub_name, serr, ierr); + isub.push_back(i); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "jump_to_sub error " << ierr << endl; + } + continue; + } + + if (cmdi.get_string(0) == "subroutine") { + //cout << "subroutine found!!!, turning skip_sub to true" << endl; + skip_sub = true; + continue; + } + + // Check for a variable description command. If found we set the description, + // then go to the next line. + if (cmdi.check_for_var_description(serr, ierr)) continue; + + // Stop if we hit a stop command or a fatal_error command + bool kill_run = false; + if (cmdi.check_input_end(kill_run, serr, ierr)) { + // Killing the calculation will be done, for example, if the user + // issues a fatal_error command. + if (kill_run) { + return_local = process_error_return_int(serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "handle endwhen " << ierr << endl; + if (return_local > 1) return; + } + } + + // Clear out all do's so we don't get an error about unmatched + // do/enddo. + do_start.clear(); + break; + } + + cmdi.math_eval(serr, ierr); + cmdi.substitute_variables(serr, ierr); // Sub vars not in math expressions. + cmdi.check_ppmm(serr, ierr); // All ++, -- should be gone. + cmdi.remove_commas(); + cmdi.handle_cmd_unary_minus(serr, ierr); + cmdi.handle_cmd_unary_plus(serr, ierr); + cmdi.handle_cmd_multiplicity(serr, ierr); + cmdi.check_misplaced_math(serr, ierr); + cmdi.set_variables(serr, ierr); + + // Copy the command to the final commands deque. + if (cmdi.get_cmd_type() == "command") { + if (when_level > 0) { + whenthens[nwhen-1].add_cmdf(cmdi); + if (single_line_when) { + when_level -= 1; + single_line_when = false; + } + } + else { + cmdsf.push_back(cmdi); + } + } + } + // Print error messages and terminate if fatal. + return_local = process_error_return_int(serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + return_value = return_local; + if (return_local > 1) return; + } + + // Check that an enddo was found for every do. + check_enddo(do_start, serr, ierr); + + // Print error messages and terminate if fatal. + return_local = process_error_return_int(serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + return_value = return_local; + if (comm->isIOProc()) { + cout << "handle enddo is wrong with err " << ierr << endl; + } + if (return_local > 1) return; + } + + // Set the processed flag in every word in every command to be false. + // At the end of parsing, if any word has not been processed in some way, + // then that is a fatal error. + for (int i=0; i<(int)cmdsf.size(); i++) { + cmdsf[i].clear_processed(); + } + + // If this is a recreate of the parser, then there might be some commands + // that have already been processed, set these. + cmd_set_reprocessed(true); + + // Check and print duplicate scalar commands. + // Remove duplicate scalar commands. + // Process the duplicate_array_values command. + return_local = process_dav_cmd(); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + if (comm->isIOProc()) { + cout << "Checked for duplicate arrays and error is " << return_local << endl; + } + } + //check_duplicates(); + + // Debug: print each of the final commands to the screen. + if (print_final_buffer) { + if (comm->isIOProc()) { + cout << "********************************************************************************\n" + << "********** Echo final parser buffer, this is what the code uses to set internal \n" + << "********** code variables." << endl; + list_cmdsf("", ""); + cout << "********** End of echo final parser buffer.\n" + << "********************************************************************************\n\n" + << endl; + + cout << "********************************************************************************\n" + << "********** Echo final when...then parser buffers, this is what the code uses to \n" + << "********** set internal code variables when processing when...then commands." << endl; + list_wt_cmdsf(); + cout << "********** End of echo final when...then parser buffers.\n" + << "********************************************************************************\n\n" + << endl; + + cout << "********************************************************************************\n" + << "********** Echo restart block information." << endl; + list_rb(); + cout << "********** End of echo restart block information.\n" + << "********************************************************************************\n\n" + << endl; + } + } + + // Return the to the calling program + + return; +} + + +// =========================================================================== +// A "endsubroutine" or "return" command has been found. Jump back to the call +// statement. This sets the loop index i so that we end up on the line after +// the call. +// =========================================================================== +int PowerParser::jump_to_call(int &i, deque &icall, deque &isub, + stringstream &serr, int &ierr) +{ + int return_value; + int return_local; + return_value = 0; + return_local = 0; + + int icsize = (int)icall.size(); + if (icsize == 0) { + cmds[i].fatal_error(0, serr, ierr); + serr << "icall size = 0, this should never happen." << endl; + ierr = 2; + return_value = process_error_return_int(serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "jump_to_call icall error " << ierr << endl; + if (return_local > 1) return return_value; + } + } + i = icall[icsize-1]; + icall.erase(icall.begin()+icsize-1); + + vector call_args; + vector call_args_isvar; + cmds[i].copy_call_args(call_args, call_args_isvar); + int idex_sub = isub[(int)isub.size()-1]; + vector sub_args; + vector sub_args_isvar; + cmds[idex_sub].copy_sub_args(sub_args, sub_args_isvar); + //cout << "jump_to_call, sub args, then call args" << endl; + //for (int j=0; j<(int)sub_args.size(); j++) { + // cout << sub_args[j] << endl; + //} + //for (int j=0; j<(int)call_args.size(); j++) { + // cout << call_args[j] << endl; + //} + //cout << "--------------------------------" << endl; + + for (int j=0; j<(int)sub_args.size(); j++) { + string sub_var = sub_args[j]; + string call_var = call_args[j]; + + map::iterator psub; + psub = vmap.find(sub_var); + string sub_value = ""; + if (psub != vmap.end()) { + sub_value = psub->second.get_var_value(); + } + else { + cmds[i].fatal_error(0, serr, ierr); + serr << "Subroutine argument not found." << endl; + serr << "This should not happen." << endl; + ierr = 2; + return_value = process_error_return_int(serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "jump_to_call Subroutine argument not found " << endl; + if (return_local > 1) return return_value; + } + } + + if (!call_args_isvar[j]) { + if (sub_value != call_var) { + cmds[i].fatal_error(0, serr, ierr); + cmds[isub[(int)isub.size()-1]].fatal_error(0, serr, ierr); + serr << "The calling argument, argument number " << j+1 + << ", (after any math eval) is " << call_var << endl; + serr << "The corrseponding subroutine dummy argument, " + << sub_var << ", has" + << " the value of " << sub_value << endl; + serr << "These are different and should not be." << endl; + serr << "The calling argument is not a variable and thus" + " is fixed and cannot be changed." << endl; + serr << "The dummy argument was changed in the subroutine," << endl; + serr << "thus you are trying to change a fixed quantity which" + " is not allowed." << endl; + ierr = 2; + return_value = process_error_return_int(serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "jump_to_call subroutine arguments errors " << endl; + if (return_local > 1) return return_value; + } + } + } + else { + map::iterator pcall; + pcall = vmap.find(call_var); + if (pcall != vmap.end()) { + pcall->second = psub->second; + pcall->second.set_temporary(false); + pcall->second.set_varname(call_var); + } + else { + cmds[i].fatal_error(0, serr, ierr); + serr << "Calling argument not found." << endl; + serr << "This should not happen." << endl; + ierr = 2; + return_value = process_error_return_int(serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "jump_to_call calling argument not found " << endl; + if (return_local > 1) return return_value; + } + } + } + + } + + // Erase temporary variables. + // There should be a better way to do this. + map::iterator p; + for (;;) { + bool erase_done = false; + for(p = vmap.begin(); p != vmap.end(); p++) { + if (p->second.is_temporary()) { + vmap.erase(p); + erase_done = true; + break; + } + } + if (!erase_done) break; + } + + // Remove the index to the subroutine line. + isub.erase(isub.begin()+(int)isub.size()-1); + + return return_value; + +} // End of jump_to_call + + +// =========================================================================== +// A "call" command has been found. Find the subroutine it is trying to call +// and set the loop index, i, to the subroutine line so we will end up on the +// line after the subroutine. +// =========================================================================== +int PowerParser::jump_to_sub(int &i, string &sub_name, + stringstream &serr, int &ierr) +{ + int return_value; + int return_local; + return_value = 0; + return_local = 0; + + // At this point, i is the index for the call line. + //cout << "&&&&&cw PowerParser loop, jump_to_sub, i=" << i << endl; + + // Find the line index, cdex, for the subroutine. + int cdex = -1; + for (int j=0; j<(int)cmds.size(); j++) { + if (cmds[j].find_subroutine(sub_name)) { + cdex = j; + break; + } + } + + if (cdex == -1) { + cmds[i].fatal_error(0, serr, ierr); + serr << "Subroutine " << sub_name << " not found." << endl; + ierr = 2; + return_value = process_error_return_int(serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "Subroutine name not found " << ierr << endl; + if (return_local > 1) return return_value; + } + } + + // Get the calling arguments. This will potentially be a mix + // of variable names and numbers. This does not modify the words + // in cmds[i], but it does store the call arguments in cmds[i]. + vector call_args; + vector call_args_isvar; + cmds[i].get_call_args(call_args, call_args_isvar, serr, ierr); + //for (int j=0; j<(int)call_args.size(); j++) { + // cout << call_args[j] << endl; + //} + + // Get the subroutine arguments. + // This does not modify the words in cmds[cdex], but it does store + // the subroutine arguments in cmds[cdex] + //print_line(cdex); + vector sub_args; + vector sub_args_isvar; + cmds[cdex].get_sub_args(sub_args, sub_args_isvar); + //for (int j=0; j<(int)sub_args.size(); j++) { + // cout << sub_args[j] << endl; + //} + + // Error checking. + int ncall_args = (int)call_args.size(); + int nsub_args = (int)sub_args.size(); + if (ncall_args != nsub_args) { + cmds[i].fatal_error(0, serr, ierr); + cmds[cdex].fatal_error(0, serr, ierr); + serr << "Number of calling arguments = " << ncall_args << endl; + serr << "Number of subroutine arguments = " << nsub_args << endl; + serr << "These must be the same." << endl; + ierr = 2; + return_local = process_error_return_int(serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "Arguments in subroutine and in calling are different " << ierr << endl; + if (return_local > 1) return return_value; + } + } + + for (int j=0; j<(int)sub_args.size(); j++) { + if (!sub_args_isvar[j]) { + cmds[cdex].fatal_error(0, serr, ierr); + serr << "Subroutine dummy arguments must always be variables." << endl; + serr << "Argument " << j+1 << ", " << sub_args[j] << ", " + << "is not a variable." << endl; + serr << "Remember that variables always begin with a" + " dollar sign, $" << endl; + serr << "Note that putting quotes around a variable name makes it" << endl; + serr << "a string, not a variable." << endl; + ierr = 2; + return_local = process_error_return_int(serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "Dummy arguments must be variables " << ierr << endl; + if (return_local > 1) return return_value; + } + } + } + + + // Define new, temporary variables for the subroutine dummy arguments. + // Set their values to the call values. + for (int j=0; j<(int)sub_args.size(); j++) { + string sub_vname = sub_args[j]; + string call_vname = call_args[j]; + + // Find the subroutine variable name in the variable map. + map::iterator psub; + psub = vmap.find(sub_vname); + + if (psub != vmap.end()) { + cmds[cdex].fatal_error(0, serr, ierr); + serr << "Argument " << j+1 << ", " << sub_args[j] << ", " + << "is both a global variable and a dummy subroutine argument." << endl; + serr << "This is not allowed, dummy subroutine arguments " + "cannot also be" << endl; + serr << "global variables." << endl; + ierr = 2; + return_local = process_error_return_int(serr, ierr); + if (return_local > return_value) { + return_value = return_local; + } + if (return_local > 0) { + cout << "Dummy argument cannot be global variable " << ierr << endl; + if (return_local > 1) return return_value; + } + } + else { + // If the calling argument is a variable, then we set the + // temporary variable equal to the calling variable. This passes + // in the correct value, but it also passes in arrays, and + // whatever characteristics the calling variable has. + // + // If the calling argument is not a variable, then we just + // create the new, temporary variable and give it the calling + // argument as its value. + if (call_args_isvar[j]) { + map::iterator pcall; + pcall = vmap.find(call_vname); + if (pcall != vmap.end()) { + Variable v = pcall->second; + v.set_varname(sub_vname); + v.set_temporary(true); + vmap.insert(pair(v.get_varname(), v)); + } + else { + // FATAL ERROR + // calling argument variable not defined. + } + } + else { + vector istart(0,0); + vector valvec; + valvec.push_back(call_vname); + int lnum = cmds[cdex].get_line_number(0); + int file_lnum = cmds[cdex].get_file_line_number(0); + string fname = cmds[cdex].get_filename(0); + Variable v(sub_vname, istart, valvec, lnum, file_lnum, + fname, &cmd_strings, serr, ierr); + v.set_temporary(true); + vmap.insert(pair(v.get_varname(), v)); + } + + } + } + + // Set the loop index to the index of the subroutine so we + // will end up at the line after the subroutine line. + i = cdex; + return return_value; +} // End of jump_to_sub + + + + +// =========================================================================== +// End a do loop. This happens when a do loop has gone through all its +// iterations or when an exit statment is encountered. +// Basically, find the matching enddo and continue after that statement. +// =========================================================================== +bool PowerParser::end_do_loop(int &i, deque &do_start, + stringstream &serr, int &ierr) +{ + int rtvl = 0; + // Find the matching enddo. + // Stop checking will be true if we are in main and hit a subroutine + // statement or if we are in a subroutine and hit an endsubroutine + // statement. + int cdex = -1; + int dlev = 1; + for (int j=i+1; j<(int)cmds.size(); j++) { + bool stop_checking = false; + if (cmds[j].find_matching_enddo(dlev, stop_checking)) { + cdex = j; + break; + } + if (stop_checking) break; + } + + // If the matching enddo was not found then that is a fatal error. + int nlevels = (int)do_start.size(); + if (cdex == -1) { + if (nlevels > 0) { + int ido = do_start[nlevels-1]; + cmds[ido].fatal_error(0, serr, ierr); + } + serr << "No enddo found for do statement." << endl; + ierr = 2; + rtvl = process_error_return_int(serr, ierr); + if (rtvl > 0) cout << "Enddo not found " << endl; + return false; + } + + // We are done with this do loop, so we can get rid of the reference + // to it. + if (nlevels > 0) { + do_start.erase(do_start.begin()+nlevels-1); + } + + // Set the loop index to the enddo statement so that we will start + // processing immediately after the enddo. + i = cdex; + return true; +} // End end_do_loop + + +// =========================================================================== +// A return statement in a subroutine has been encountered. We need to handle +// the do loops before returning to the call statement, otherwise the code +// will complain about do loops without enddo statements. +// This routine searches from the return statement to the endsubroutine +// statement, finds any free enddo's and erases the corresponding references +// to the do statements. +// =========================================================================== +void PowerParser::end_do_ret(int &i, deque &do_start, + stringstream &serr, int &ierr) +{ + // To suppress compiler warnings of unused parameters + //assert(serr == serr); + assert(ierr == ierr); + + int istart = i; + for (;;) { + + // Find an enddo. + // Stop checking will be true if we are in main and hit a subroutine + // statement or if we are in a subroutine and hit an endsubroutine + // statement. + int cdex = -1; + int dlev = 1; + bool stop_checking = false; + for (int j=istart; j<(int)cmds.size(); j++) { + if (cmds[j].find_matching_enddo(dlev, stop_checking)) { + cdex = j; + istart = j+1; + break; + } + if (stop_checking) break; + } + + // If we don't find an enddo, then we are done. + if (cdex == -1) break; + if (stop_checking) break; + + // We are done with this do loop, so we can get rid of the reference + // to it. + int nlevels = (int)do_start.size(); + if (nlevels > 0) { + do_start.erase(do_start.begin()+nlevels-1); + } + } +} // End end_do_ret + + + + +// =========================================================================== +// Check that an enddo was found for every do. +// =========================================================================== +void PowerParser::check_enddo(deque &do_start, stringstream &serr, int &ierr) +{ + for (int i=0; i<(int)do_start.size(); i++) { + int ido = do_start[i]; + cmds[ido].fatal_error(0, serr, ierr); + serr << "No enddo found for do statement." << endl; + ierr = 2; + } +} + + +// =========================================================================== +// Check all processed flags on every command. If any word on any command +// has not been processed, then that is a fatal error. +// =========================================================================== +void PowerParser::check_processed(bool &good) +{ + int rtvl = 0; + int ierr = 0; + stringstream serr; + + for (int i=0; i<(int)cmdsfp->size(); i++) { + (*cmdsfp)[i].check_processed(good, serr, ierr); + } + + process_error(serr, ierr); +} + + +// =========================================================================== +// Process the duplicate array values command. +// =========================================================================== +int PowerParser::process_dav_cmd() +{ + int rtvl = 0; + int return_value = 0; + + // Process the duplicate_array_values command. + // Note that duplicate array values are processed when the calls are made + // from the host code to actually extract information from the final + // buffer. + for (int i=0; i<(int)cmdsfp->size(); i++) { + string cmdi = (*cmdsfp)[i].get_string(0); + if (cmdi != "duplicate_array_values") continue; + (*cmdsfp)[i].set_processed(true); + string vali = (*cmdsfp)[i].get_string(2); + if (vali == "warning") dup_fatal = 1; + else if (vali == "fatal") dup_fatal = 2; + else if (vali == "none") dup_fatal = 0; + else { + int ierr = 0; + stringstream serr; + (*cmdsfp)[i].fatal_error(0, serr, ierr); + serr << "The value for the duplicate_array_values command must" << endl << + "be either none, warning, or fatal" << endl; + ierr = 2; + cout << "The value for the duplicate_array_values command must" << endl; + cout << "be either none, warning, or fatal" << endl; + rtvl = process_error_return_int(serr, ierr); + if (rtvl > return_value) { + return_value = rtvl; + } + + if (rtvl > 0) { + cout << "Duplicate array values not recognized " << ierr << endl; + } + return return_value; + } + if (dup_fatal > 0 ) return dup_fatal; + } + return 0; +} + + +// =========================================================================== +// If commands appear more than once in the input file(s), print a warning +// to the user. +// =========================================================================== +void PowerParser::check_duplicates() +{ + // Check for and print and duplicate scalar commands in the input file. + //if (comm->isIOProc()) { + // cout << "********************************************************************************" << endl; + //} + //bool found_any = false; + bool fany; + check_dup_scalar(-1, fany); + //if (fany) found_any = true; + for (int wtn=0; wtn<(int)whenthens.size(); wtn++) { + check_dup_scalar(wtn, fany); + //if (fany) found_any = true; + } + wt_reset(); + + // If duplicate scalar commands are not found, we do not really need to + // pollute the output telling the user that. + //if (!found_any) { + // if (comm->isIOProc()) { + // cout << "********** No Duplicate Scalar Commands Found in User Input File" << endl; + // } + //} + //if (comm->isIOProc()) { + // cout << "********************************************************************************" << endl; + // cout << endl << endl; + //} + + + // Remove and duplicate scalar commands from the final buffer. + remove_dup_scalar(-1); + for (int wtn=0; wtn<(int)whenthens.size(); wtn++) { + remove_dup_scalar(wtn); + } + wt_reset(); +} + +// =========================================================================== +// Check for duplicate scalar commands in the user input file. +// Print a table of any duplicate scalar commands to stdout. +// =========================================================================== +void PowerParser::check_dup_scalar(int wtn, bool &found_any) +{ + vector< vector > rows; + + vector row1; + row1.push_back(" "); + row1.push_back("Line"); + row1.push_back(" "); + rows.push_back(row1); + + vector row2; + row2.push_back("Filename"); + row2.push_back("Number"); + row2.push_back("Command"); + rows.push_back(row2); + + int n_header_rows = (int)rows.size(); + + if (wtn < 0) cmdsfp = &cmdsf; + else cmdsfp = whenthens[wtn].get_cmdsf_ptr(); + + vector cmds_done; + found_any = false; + for (int i=0; i<(int)cmdsfp->size(); i++) { + bool already_printed_i = false; + string cmdi = (*cmdsfp)[i].get_string(0); + string w1i = (*cmdsfp)[i].get_string(1); + if (w1i == "(") continue; + + bool already_done = false; + for (int j=0; j<(int)cmds_done.size(); j++) { + if (cmdi == cmds_done[j]) { + already_done = true; + break; + } + } + if (already_done) continue; + + bool found = false; + for (int j=i+1; j<(int)cmdsfp->size(); j++) { + string cmdj = (*cmdsfp)[j].get_string(0); + if (cmdi == cmdj) { + stringstream ss; + if (!already_printed_i) { + vector row; + set_dup_row(row, (*cmdsfp)[i], 0); + rows.push_back(row); + already_printed_i = true; + } + vector row; + set_dup_row(row, (*cmdsfp)[j], 0); + rows.push_back(row); + found = true; + found_any = true; + } + } + if (found) { + cmds_done.push_back(cmdi); + vector row; + row.push_back(" "); row.push_back(" "); row.push_back(" "); + rows.push_back(row); + } + } + + if (found_any) { + if (comm->isIOProc()) { + cout << endl; + if (wtn < 0) { + cout << "********** WARNING: Duplicate Scalar Commands Found in User Input File" << endl; + cout << " The following commands appear more than once in the user input file." << endl; + } + else { + cout << "********** WARNING: Duplicate Scalar Commands Found in when...then" << endl; + cout << " The following commands appear more than once in the when...then." << endl; + } + cout << " The last instance of the command will be used." << endl; + cout << " Is this what you want??" << endl << endl; + stringstream ssout; + Parser_utils putils(index_base); + putils.print_strings(rows, n_header_rows, 4, 3, 80, ssout); + cout << ssout.str() << endl; + } + } +} + + +// =========================================================================== +// Helper function for check_dup_scalar. +// The duplicate scalar commands are printed as rows with each row containing +// the file name the duplicate command was found in, the line number, and the +// command line itself. +// Given the duplicate command, this function generates that row of +// information and adds it to the row vector. +// =========================================================================== +void PowerParser::set_dup_row(vector &row, Cmd &cmdi, int iw) +{ + int lnum = cmdi.get_line_number(iw); + int file_lnum = cmdi.get_file_line_number(iw); + string fname = cmdi.get_filename(iw); + + row.push_back(fname); + stringstream ss; + ss << file_lnum; + row.push_back(ss.str()); + row.push_back(cmd_strings[lnum-1]); +} + + + +// =========================================================================== +// Remove duplicate scalar commands in the user input file. +// Keep only the last instance of the command. +// =========================================================================== +void PowerParser::remove_dup_scalar(int wtn) +{ + if (wtn < 0) cmdsfp = &cmdsf; + else cmdsfp = whenthens[wtn].get_cmdsf_ptr(); + + for (int i=(int)cmdsfp->size()-1; i>=0; i--) { + string cmdi = (*cmdsfp)[i].get_string(0); + string w1i = (*cmdsfp)[i].get_string(1); + if (w1i == "(") continue; + + for (int j=i-1; j>=0; j--) { + string cmdj = (*cmdsfp)[j].get_string(0); + if (cmdi == cmdj) { + cmdsfp->erase(cmdsfp->begin()+j); + i--; + } + } + } +} + +// =========================================================================== +// Helper function to convert doubles to strings. +// =========================================================================== +std::string const to_string( double const x ) +{ + std::ostringstream tmp; + tmp << std::setprecision(16) << x; + return tmp.str(); +} + +// =========================================================================== +// Initialize the parser. This will typically be called by the +// constructors. +// =========================================================================== +void PowerParser::init() +{ + line_number = 0; + cmdsfp = &cmdsf; + dup_fatal = 1; + ierr_global = 0; + + // make a little smaller (2.0) to avoid floating point excepting on some + // compilers + double huge_double = numeric_limits::max( )/2.0; + Word whuge_double(huge_double, 1, 1, "", NULL); + Variable vhuge_double("$huge_double", whuge_double.get_string(), true, "largest double/2.0"); + vmap.insert(pair(vhuge_double.get_varname(), vhuge_double)); + + float huge_float = numeric_limits::max( ); + Word whuge_float(huge_float, 1, 1, "", NULL); + Variable vhuge_float("$huge_float", whuge_float.get_string(), true, "largest float"); + vmap.insert(pair(vhuge_float.get_varname(), vhuge_float)); + + int huge_int = numeric_limits::max( ); + Word whuge_int(huge_int, 1, 1, "", NULL); + Variable vhuge_int("$huge_int", whuge_int.get_string(), true, "largest integer"); + vmap.insert(pair(vhuge_int.get_varname(), vhuge_int)); + + double tiny_double = numeric_limits::min( ); + Word wtiny_double(tiny_double, 1, 1, "", NULL); + Variable vtiny_double("$tiny_double", wtiny_double.get_string(), true, "tiniest double"); + vmap.insert(pair(vtiny_double.get_varname(), vtiny_double)); + + float tiny_float = numeric_limits::min( ); + Word wtiny_float(tiny_float, 1, 1, "", NULL); + Variable vtiny_float("$tiny_float", wtiny_float.get_string(), true, "tiniest float"); + vmap.insert(pair(vtiny_float.get_varname(), vtiny_float)); + + int tiny_int = numeric_limits::min( ); + Word wtiny_int(tiny_int, 1, 1, "", NULL); + Variable vtiny_int("$tiny_int", wtiny_int.get_string(), true, "tiniest integer"); + vmap.insert(pair(vtiny_int.get_varname(), vtiny_int)); + + int ncores_tot = comm->getNumProcs(); + Word wncores_tot(ncores_tot, 1, 1, "", NULL); + Variable vncores_tot("$ncores_tot", wncores_tot.get_string(), true, "total number of cores"); + vmap.insert(pair(vncores_tot.get_varname(), vncores_tot)); + + + // *********************************************************************** + // Define the default functions. + Function facos("acos", true, 1, "real", + "arccosine, radians, arg -1 to 1"); + fmap.insert(pair(facos.get_name(), facos)); + + Function fasin("asin", true, 1, "real", + "arcsine, radians, arg -1 to 1"); + fmap.insert(pair(fasin.get_name(), fasin)); + + Function fatan("atan", true, 1, "real", "arctangent, returns radians"); + fmap.insert(pair(fatan.get_name(), fatan)); + + Function fceil("ceil", true, 1, "real", "round up (smallest int >= arg)"); + fmap.insert(pair(fceil.get_name(), fceil)); + + Function fcos("cos", true, 1, "real", "cosine, arg in radians"); + fmap.insert(pair(fcos.get_name(), fcos)); + + Function fcosh("cosh", true, 1, "real", "hyperbolic cosine"); + fmap.insert(pair(fcosh.get_name(), fcosh)); + + Function fexp("exp", true, 1, "real", "exponential"); + fmap.insert(pair(fexp.get_name(), fexp)); + + Function ffabs("fabs", true, 1, "real", "absolute value of a real"); + fmap.insert(pair(ffabs.get_name(), ffabs)); + + Function ffloor("floor", true, 1, "real", + "round down (largest int <= arg)"); + fmap.insert(pair(ffloor.get_name(), ffloor)); + + Function flog("log", true, 1, "real", "log to base e, arg>0"); + fmap.insert(pair(flog.get_name(), flog)); + + Function flog10("log10", true, 1, "real", "log to base 10, arg>0"); + fmap.insert(pair(flog10.get_name(), flog10)); + + Function fsin("sin", true, 1, "real", "sine, arg in radians"); + fmap.insert(pair(fsin.get_name(), fsin)); + + Function fsinh("sinh", true, 1, "real", "hyperbolic sine"); + fmap.insert(pair(fsinh.get_name(), fsinh)); + + Function fsqrt("sqrt", true, 1, "real", "square root (arg >= 0)"); + fmap.insert(pair(fsqrt.get_name(), fsqrt)); + + Function ftan("tan", true, 1, "real", "tangent, arg in radians"); + fmap.insert(pair(ftan.get_name(), ftan)); + + Function ftanh("tanh", true, 1, "real", "hyperbolic tangent"); + fmap.insert(pair(ftanh.get_name(), ftanh)); + + Function fatan2("atan2", true, 2, "real", "arctangent, 2 args"); + fmap.insert(pair(fatan2.get_name(), fatan2)); + + Function ffmod("fmod", true, 2, "real", "remainder of arg1/arg2"); + fmap.insert(pair(ffmod.get_name(), ffmod)); + + Function fpow("pow", true, 2, "real", "arg1 raised to arg2 power"); + fmap.insert(pair(fpow.get_name(), fpow)); + + Function ffmax("max", true, 2, "real", "return the greater of two args"); + fmap.insert(pair(ffmax.get_name(), ffmax)); + + Function ffmin("min", true, 2, "real", "return the lesser of two args"); + fmap.insert(pair(ffmin.get_name(), ffmin)); + + Function fstrlen("strlen", true, 1, "string", "number of chars in arg"); + fmap.insert(pair(fstrlen.get_name(), fstrlen)); + + Function fstrcat("strcat", true, 2, "string", "concatenate two strings"); + fmap.insert(pair(fstrcat.get_name(), fstrcat)); + + Function fstrerase("strerase", true, 3, "string", "erase chars from string"); + fmap.insert(pair(fstrerase.get_name(), fstrerase)); + + Function fstrinsert("strinsert", true, 3, "string", "insert chars into string"); + fmap.insert(pair(fstrinsert.get_name(), fstrinsert)); + + Function fstrsubstr("strsubstr", true, 3, "string", "get sub string"); + fmap.insert(pair(fstrsubstr.get_name(), fstrsubstr)); + + Function fstrtrim("strtrim", true, 1, "string", "remove trailing whitespace"); + fmap.insert(pair(fstrtrim.get_name(), fstrtrim)); + + Function fdefined("defined", true, 1, "logical", "is a variable defined or not"); + fmap.insert(pair(fdefined.get_name(), fdefined)); +} + +void PowerParser::dictionary_add(char *name, double value, bool pred, char *vdesc) +{ + Variable *Var_entry = new Variable(name, to_string(value), pred, vdesc); + vmap.insert(pair(Var_entry->get_varname(), *Var_entry)); +} + +void PowerParser::dictionary_env_add(char *name, bool pred) +{ + const char *getenv_p; + const char *getenv_p_not_defined = ""; + + getenv_p = getenv(name); + if( getenv_p == NULL ){ + getenv_p = getenv_p_not_defined; + } + + int len_name = strlen(name); + + // One extra character for $ and another for null termination + char *varname = (char *)malloc(sizeof(char)*(len_name+2)); + + varname[0] = '$'; + strncpy(varname+1, name, len_name+1); + + Variable *Var_entry = new Variable(varname, getenv_p, pred, name); + vmap.insert(pair(Var_entry->get_varname(), *Var_entry)); + + free(varname); +} + + +// =========================================================================== +// Read a file into a string. +// This is only done on the io processor. +// =========================================================================== +void PowerParser::read_into_string(string filename, string &s_in) +{ + if(comm != NULL) { + if(!comm->isIOProc()) return; + } + + // Its OK if an input file is not specified. + if (filename == " ") { + s_in = " "; + return; + } + + // Open the input data file. + ifstream in_stream(filename.c_str(), ios::in); + if( !in_stream ) { + stringstream serr; + serr << endl << "*** FATAL ERROR" << endl; + serr << "Could not open input (or include) file." << endl; + serr << "The name of the file is " << filename << endl; + if (filename == "") { + serr << "(The file name is blank.)" << endl; + } + int ierr = 2; + process_error(serr, ierr); + return; + } + + // Read each character and store in a string. We use a string so we + // don't have to fiddle with memory allocation and reallocation. + // There are more efficient ways to do this, but whatever way is + // used has to handle arbitrarily long files. + char c; + while (in_stream) { + in_stream.get(c); + if (!in_stream) break; + s_in += c; + } + + // Check for a 0 size input file, this might be an indication of a + // full file system. + if( (int)s_in.size() == 0 ) { + stringstream serr; + serr << endl << "*** FATAL ERROR" << endl; + serr << "The name of the input file is " << filename << endl; + serr << "This file exists, but its size is 0 bytes, (empty file)." << endl; + serr << "Perhaps the file system is full??" << endl; + serr << "Use a unix command like \"df -k .\" to find out how full the" + " file system is." << endl; + int ierr = 2; + process_error(serr, ierr); + return; + } + + // Now that the file contents are transferred to a string we do not need + // the data file anymore and can close it. + in_stream.close(); +} + + +//+*************************************************************************** +// *************************************************************************** +// Driver functions for getting values from the commands. +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Driver for getting boolean values as integers. +// This works for arrays of any dimension, 0,1,2,3,... +// =========================================================================== +void PowerParser::get_bool_int(string &cname, + int *cvalue, + const vector &size, + bool skip) +{ + // Note that we do not default cvalue. Its value only changes if the + // command is found. + + // Used in checking for duplicate array values + int dim = (int)size.size(); + int tot_size = 1; + for (int i=0; i dup_vals(tot_size, 0); + vector dup_cmd1(tot_size); + vector dup_wdex1(tot_size, -1); + + int ierr = 0; + stringstream serr; + if (! case_sensitive) { + transform(cname.begin(), cname.end(), cname.begin(), tolower); + } + for (int i=0; i<(int)cmdsfp->size(); i++) { + if ((*cmdsfp)[i].get_cmd_name() == cname) { + (*cmdsfp)[i].get_bool_int(cname, cvalue, size, dup_cmd1, dup_wdex1, + dup_fatal, dup_vals, skip, serr, ierr); + processed_cmd_names.push_back(cname); + } + } + + // Process errors, global abort if ierr==2 + process_error(serr, ierr); +} + +void PowerParser::get_bool_int(const char *cname, + int *cvalue, + const vector &size, + bool skip) +{ + string cstring(cname); + get_bool_int( cstring, cvalue, size, skip); +} + +void PowerParser::get_bool(string &cname, + bool *cvalue, + const vector &size, + bool skip) +{ + // Note that we do not default cvalue. Its value only changes if the + // command is found. + + // Used in checking for duplicate array values + int dim = (int)size.size(); + int tot_size = 1; + for (int i=0; i dup_vals(tot_size, 0); + vector dup_cmd1(tot_size); + vector dup_wdex1(tot_size, -1); + + int ierr = 0; + stringstream serr; + for (int i=0; i<(int)cmdsfp->size(); i++) { + if ((*cmdsfp)[i].get_cmd_name() == cname) { + (*cmdsfp)[i].get_bool(cname, cvalue, size, dup_cmd1, dup_wdex1, + dup_fatal, dup_vals, skip, serr, ierr); + processed_cmd_names.push_back(cname); + } + } + + // Process errors, global abort if ierr==2 + process_error(serr, ierr); +} + +void PowerParser::get_bool(const char *cname, + bool *cvalue, + const vector &size, + bool skip) +{ + string cstring(cname); + get_bool( cstring, cvalue, size, skip); +} + + +// =========================================================================== +// Driver for getting integer values. +// This works for arrays of any dimension, 0,1,2,3,... +// =========================================================================== +template< typename T > +void PowerParser::get_int(string &cname, T *cvalue, const vector &size, bool skip) +{ + // Note that we do not default cvalue. Its value only changes if the + // command is found. + + // Used in checking for duplicate array values + int dim = (int)size.size(); + int tot_size = 1; + for (int i=0; i dup_vals(tot_size, 0); + vector dup_cmd1(tot_size); + vector dup_wdex1(tot_size, -1); + + int ierr = 0; + stringstream serr; + if (! case_sensitive) { + transform(cname.begin(), cname.end(), cname.begin(), tolower); + } + for (int i=0; i<(int)cmdsfp->size(); i++) { + if ((*cmdsfp)[i].get_cmd_name() == cname) { + (*cmdsfp)[i].get_int(cname, cvalue, size, dup_cmd1, dup_wdex1, + dup_fatal, dup_vals, skip, serr, ierr); + processed_cmd_names.push_back(cname); + } + } + + // Process errors, global abort if ierr==2 + process_error(serr, ierr); +} + +//! Explicit instantiation of supported template types. If more types are +//! needed those explicit versions must be listed here. We are not using +//! automatic inclusion (we would need to move the function definition into +//! the header file for that). The listed versions below are the only ones +//! that will be included in the library. +template void PowerParser::get_int( + string &cname, int *cvalue, const vector &size, bool skip); +template void PowerParser::get_int( + string &cname, int64_t *cvalue, const vector &size, bool skip); + +template< typename T > +void PowerParser::get_int(const char *cname, + T *cvalue, + const vector &size, + bool skip) +{ + string cstring(cname); + get_int( cstring, cvalue, size, skip); +} + +template void PowerParser::get_int( + const char *cname, int *cvalue, const vector &size, bool skip); +template void PowerParser::get_int( + const char *cname, int64_t *cvalue, const vector &size, bool skip); + +// =========================================================================== +// Driver for getting real values. +// This works for arrays of any dimension, 0,1,2,3,... +// =========================================================================== +void PowerParser::get_real(string &cname, + double *cvalue, + const vector &size, + bool skip) +{ + // Note that we do not default cvalue. Its values only change if the + // command is found. + + // Used in checking for duplicate array values + int dim = (int)size.size(); + int tot_size = 1; + for (int i=0; i dup_vals(tot_size, 0); + vector dup_cmd1(tot_size); + vector dup_wdex1(tot_size, -1); + + int ierr = 0; + stringstream serr; + if (! case_sensitive) { + transform(cname.begin(), cname.end(), cname.begin(), tolower); + } + for (int i=0; i<(int)cmdsfp->size(); i++) { + if ((*cmdsfp)[i].get_cmd_name() == cname) { + (*cmdsfp)[i].get_real(cname, cvalue, size, dup_cmd1, dup_wdex1, + dup_fatal, dup_vals, skip, serr, ierr); + processed_cmd_names.push_back(cname); + } + } + + // Process errors, global abort if ierr==2 + process_error(serr, ierr); +} + +void PowerParser::get_real(const char *cname, + double *cvalue, + const vector &size, + bool skip) +{ + string cstring(cname); + get_real( cstring, cvalue, size, skip); +} + +// =========================================================================== +// Driver for getting character strings. +// This works for arrays of any dimension, 0,1,2,3,... +// =========================================================================== +void PowerParser::get_char(string &cname, + vector &vstr, + const vector &size, + bool single_char, + bool skip) +{ + // Note that we do not default cvalue. Its value only changes if the + // command is found. + + // Used in checking for duplicate array values + int dim = (int)size.size(); + int tot_size = 1; + for (int i=0; i dup_vals(tot_size, 0); + vector dup_cmd1(tot_size); + vector dup_wdex1(tot_size, -1); + + int ierr = 0; + stringstream serr; + if (! case_sensitive) { + transform(cname.begin(), cname.end(), cname.begin(), tolower); + } + for (int i=0; i<(int)cmdsfp->size(); i++) { + if ((*cmdsfp)[i].get_cmd_name() == cname) { + (*cmdsfp)[i].get_char(cname, vstr, size, single_char, dup_cmd1, + dup_wdex1, dup_fatal, dup_vals, + skip, serr, ierr); + processed_cmd_names.push_back(cname); + } + } + + // Process errors, global abort if ierr==2 + process_error(serr, ierr); +} + +void PowerParser::get_char(const char *cname, + vector &vstr, + const vector &size, + bool single_char, + bool skip) +{ + string cstring(cname); + get_char( cstring, vstr, size, single_char, skip); +} + +// =========================================================================== +// Driver for getting array sizes. +// =========================================================================== +void PowerParser::get_size(string &cname, vector &size) +{ + int ierr = 0; + stringstream serr; + if (! case_sensitive) { + transform(cname.begin(), cname.end(), cname.begin(), tolower); + } + for (int i=0; i<(int)cmdsfp->size(); i++) { + if ((*cmdsfp)[i].get_cmd_name() == cname) { + (*cmdsfp)[i].get_size(size, serr, ierr); + } + } + + // Process errors, global abort if ierr==2 + process_error(serr, ierr); +} + + +// =========================================================================== +// Driver for getting array sizes. Version to get all sizes +// =========================================================================== +void PowerParser::get_sizeb(string &cname, vector &size) +{ + int ierr = 0; + stringstream serr; + if (! case_sensitive) { + transform(cname.begin(), cname.end(), cname.begin(), tolower); + } + for (int i=0; i<(int)cmdsfp->size(); i++) { + if ((*cmdsfp)[i].get_cmd_name() == cname) { + (*cmdsfp)[i].get_sizeb(size, serr, ierr); + } + } + + // Process errors, global abort if ierr==2 + process_error(serr, ierr); +} + + +// =========================================================================== +// Check if the input command, cname, appears in the final, parsed user input. +// +// The two outputs are in_input and in_whenthen, +// in_input command is in (or not) the main part of the input, i.e. +// everything except the when...then statements. +// in_whenthen command is in (or not) at least one when...then statement. +// =========================================================================== +void PowerParser::cmd_in_input(string &cname, bool &in_input, bool &in_whenthen) +{ + in_input = false; + in_whenthen = false; + + if (! case_sensitive) { + transform(cname.begin(), cname.end(), cname.begin(), tolower); + } + for (int i=0; i<(int)cmdsfp->size(); i++) { + if ((*cmdsfp)[i].get_cmd_name() == cname) { + in_input = true; + break; + } + } + + for (int wtn=0; wtn<(int)whenthens.size(); wtn++) { + cmdsfp = whenthens[wtn].get_cmdsf_ptr(); + for (int i=0; i<(int)cmdsfp->size(); i++) { + if ((*cmdsfp)[i].get_cmd_name() == cname) { + in_whenthen = true; + break; + } + } + if (in_whenthen) break; + } + + wt_reset(); +} + + +// =========================================================================== +// Set the processed flag for all words for all commands that match cname. +// The value to set the processed flag to is bval. +// This sets the processed flag for commands in the final buffer and in the +// when...then final buffers. +// =========================================================================== +void PowerParser::cmd_set_processed(string &cname, bool bval) +{ + if (! case_sensitive) { + transform(cname.begin(), cname.end(), cname.begin(), tolower); + } + for (int i=0; i<(int)cmdsfp->size(); i++) { + if ((*cmdsfp)[i].get_cmd_name() == cname) { + (*cmdsfp)[i].set_processed(bval); + } + } + for (int wtn=0; wtn<(int)whenthens.size(); wtn++) { + cmdsfp = whenthens[wtn].get_cmdsf_ptr(); + for (int i=0; i<(int)cmdsfp->size(); i++) { + if ((*cmdsfp)[i].get_cmd_name() == cname) { + (*cmdsfp)[i].set_processed(bval); + } + } + } + wt_reset(); +} + + +// =========================================================================== +// =========================================================================== +void PowerParser::cmd_set_reprocessed(bool bval) +{ + for (int c=0; c<(int)processed_cmd_names.size(); c++) { + string cname = processed_cmd_names[c]; + cmd_set_processed(cname, bval); + } +} + + +// =========================================================================== +// Process errors. +// =========================================================================== +void PowerParser::process_error_global(int &return_value) +{ + int return_val_local; + int ierr = ierr_global; + + return_val_local = 0; + if (ierr == 0) { + return_value = 0; + return; + } + + return_val_local = process_error_return_int(serr_global, ierr); + return_value = return_val_local; +} + +// =========================================================================== +// + +void PowerParser::process_error(stringstream &serr, int &ierr) +{ + if (ierr == 0) return; + + if (ierr == 3) { + serr_global << serr.str(); + ierr_global = ierr; + return; + } + + string err_type = "Warnings"; + if (ierr == 2) err_type = "Fatal errors"; + + if (comm->isIOProc()) { + cout << endl; + cout << err_type << " have been encountered while parsing the user" + " input file." << endl; + cout << "Note that often fixing the first error will also fix the" + " other errors." << endl; + cout << serr.str() << endl; + fflush(NULL); + } + if (ierr == 2) { + // Force all processors to quit. + // We have the problem that the non-IO procs may kill the calculation + // before the IO proc can finish printing the error messages, thus + // force the IO proc to do the global abort but still allow the + // possibility that the IO proc may not have aborted, some other + // proc might have. + if (comm->isIOProc()) { + comm->global_abort_parser(); + } + else { + sleep(2); + comm->global_abort_parser(); + } + } + + + // A possible sleep function if the library sleep function is not portable. + // #include + // void sleep(unsigned int mseconds) + // { + // clock_t goal = mseconds + clock(); + // while (goal > clock()); + // } + + + // A better function is the following since it uses CLOCKS_PER_SEC and + // thus does not assume its value. + // + //#include + //void wait ( int seconds ) + //{ + // clock_t endwait; + // endwait = clock () + seconds * CLOCKS_PER_SEC ; + // while (clock() < endwait) {} + //} + + // We might want to put this in Comm, i.e. modify global_abort. +} + + +// =========================================================================== +int PowerParser::process_error_return_int(stringstream &serr, int &ierr) +{ + int return_value; + + return_value = ierr; + + if (ierr == 0) return(return_value); + + return_value = ierr; + + if (ierr == 3) { + serr_global << serr.str(); + ierr_global = ierr; + cout << "Error encountered in process_error_return_int -- err code is " << ierr << endl; + fflush(NULL); + } + + if (comm->isIOProc()) { + cout << endl; + cout << "Error encountered while parsing the user input file -- err code is " + << ierr << endl; + cout << "Note that often fixing the first error will also fix the" + " other errors." << endl; + cout << serr.str() << endl; + cout.flush(); + fflush(NULL); + } + + return(return_value); + + + // A possible sleep function if the library sleep function is not portable. + // #include + // void sleep(unsigned int mseconds) + // { + // clock_t goal = mseconds + clock(); + // while (goal > clock()); + // } + + + // A better function is the following since it uses CLOCKS_PER_SEC and + // thus does not assume its value. + // + //#include + //void wait ( int seconds ) + //{ + // clock_t endwait; + // endwait = clock () + seconds * CLOCKS_PER_SEC ; + // while (clock() < endwait) {} + //} + +} + + +//+*************************************************************************** +// *************************************************************************** +// When...then commands +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Check if a when...then condition is satisfied. +// =========================================================================== +void PowerParser::wt_check(int wtn, vector &code_varnames, + vector &code_values, + vector &vv_active, int *wtci) +{ + stringstream serr; + int ierr = 0; + whenthens[wtn-1].check_wt(code_varnames, code_values, vv_active, wtci, + serr, ierr); + process_error(serr, ierr); + if ( (*wtci) == 1) { + cmdsfp = whenthens[wtn-1].get_cmdsf_ptr(); + } +} + + +// =========================================================================== +// Set the commands final buffer pointer. +// This is also done in the check routine. +// =========================================================================== +void PowerParser::wt_set_cmdsfp(int wtn) +{ + cmdsfp = whenthens[wtn-1].get_cmdsf_ptr(); +} + + +// =========================================================================== +// Reset the commands final buffer pointer. +// =========================================================================== +void PowerParser::wt_reset() +{ + cmdsfp = &cmdsf; +} + + +// =========================================================================== +// =========================================================================== +void PowerParser::wt_casize(int wtn, int *wt_casize) +{ + // To suppress compiler warnings of unused parameters + assert(wt_casize == wt_casize); + + whenthens[wtn-1].get_char_array_size(wt_casize); +} + + +// =========================================================================== +// =========================================================================== +void PowerParser::wt_carray(int wtn, char *wt_ca, int wt_casize) +{ + // To suppress compiler warnings of unused parameters + assert(wt_casize == wt_casize); + + string sc; + whenthens[wtn-1].get_char_array(sc); + for (int i=0; i<(int)sc.size(); i++) { + wt_ca[i] = sc[i]; + } +} + + +// =========================================================================== +// =========================================================================== +void PowerParser::wt_satsize(int wtn, int *wt_satsize) +{ + // To suppress compiler warnings of unused parameters + assert(wt_satsize == wt_satsize); + + whenthens[wtn-1].get_satsize(wt_satsize); +} + + +// =========================================================================== +// =========================================================================== +void PowerParser::wt_getsat(int wtn, int *wt_sat, int wt_satsize) +{ + // To suppress compiler warnings of unused parameters + assert(wt_satsize == wt_satsize); + + whenthens[wtn-1].getsat(wt_sat); +} + + +// =========================================================================== +// =========================================================================== +void PowerParser::wt_setsat(int wtn, int *wt_sat, int wt_satsize) +{ + // To suppress compiler warnings of unused parameters + assert(wt_satsize == wt_satsize); + + whenthens[wtn-1].setsat(wt_sat); +} + + +// =========================================================================== +// Get and Set the processed flag for a whenthen. +// =========================================================================== +void PowerParser::wt_getprocessed(int wtn, int *wtp) +{ + whenthens[wtn-1].getprocessed(wtp); +} + +void PowerParser::wt_setprocessed(int wtn, int wtp) +{ + whenthens[wtn-1].setprocessed(wtp); +} + + +// =========================================================================== +// Get and Set the sequence index for a whenthen. +// =========================================================================== +void PowerParser::wt_getseq(int wtn, int *wtseq) +{ + whenthens[wtn-1].getseq(wtseq); +} + +void PowerParser::wt_setseq(int wtn, int wtseq) +{ + whenthens[wtn-1].setseq(wtseq); +} + + + + +//+*************************************************************************** +// *************************************************************************** +// restart_block commands +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Check if a restart block condition is satisfied. +// =========================================================================== +void PowerParser::rb_check(vector &code_varnames, + vector &code_values, + vector &vv_active, int *rbci, + int *rb_ntriggered, int *rb_triggered_indices) +{ + stringstream serr; + int ierr = 0; + *rbci = 0; + *rb_ntriggered = 0; + for (int i=0; i<(int)restartblocks.size(); i++) { + int ri = 0; + restartblocks[i].check_rb(code_varnames, code_values, vv_active, &ri, + serr, ierr); + if (ri == 1) { + *rbci = 1; + rb_triggered_indices[*rb_ntriggered] = i; + *rb_ntriggered += 1; + } + } + process_error(serr, ierr); +} + + +// =========================================================================== +// Get/set the restart block names +// =========================================================================== +void PowerParser::get_rb_names(vector &rb_names_vstr) +{ + rb_names_vstr.clear(); + for (int i=0; i<(int)restartblocks.size(); i++) { + rb_names_vstr.push_back(restartblocks[i].get_name()); + } +} +void PowerParser::set_rb_names(vector &rb_names_vstr) +{ + bnames_on_dump.clear(); + for (int i=0; i<(int)rb_names_vstr.size(); i++) { + bnames_on_dump.push_back(rb_names_vstr[i]); + } +} + + +// =========================================================================== +// Get/set the restart block activity flags. +// =========================================================================== +void PowerParser::get_rb_aflags(int *rb_aflags) +{ + for (int i=0; i<(int)restartblocks.size(); i++) { + rb_aflags[i] = restartblocks[i].get_aflag(); + } +} +void PowerParser::set_rb_aflags(int *rb_aflags, int rb_num) +{ + baflags_on_dump.clear(); + for (int j=0; j &rb_varnames_vstr) +{ + rb_varnames_vstr.clear(); + for (int i=0; i<(int)restartblocks.size(); i++) { + int numv = restartblocks[i].get_num_varnames(); + for (int j=0; jisIOProc()) { + cout << ssc.str(); + } +} + +void PowerParser::list_rb_start() +{ + ssfout.str(""); + list_rb_ss(ssfout); + ssfout_current_pos = 0; +} + +void PowerParser::list_rb_ss(stringstream &ssc) +{ + int rblen = (int)restartblocks.size(); + if (rblen <= 0) { + ssc << endl << "No restart blocks have been specified." + << endl << endl; + return; + } + + for (int rb=0; rb= rblen) { + ssc << endl << "List restart block error: rb>=rblen" + << endl << endl; + return; + } + + list_one_rb_ss(ssc, rb); +} + + +// =========================================================================== +// List info for one restart block, index=rb +// =========================================================================== +void PowerParser::list_one_rb_ss(stringstream &ssc, int rb) +{ + ssc << endl; + ssc << "** Echo restart block info, restart block name = " + << restartblocks[rb].get_name() << endl; + string s = "false"; + if (restartblocks[rb].get_aflag() == 1) s = "true"; + ssc << " Active flag = " << s << endl; + ssc << " Condition for this restart block =" << endl; + restartblocks[rb].list_condition(" ", " ", ssc); + ssc << endl; + ssc << " Number of sub-conditions = " << + restartblocks[rb].get_satsize() << endl; + for (int i=0; iisIOProc()) { + cout << ssv.str(); + } +} + +void PowerParser::list_vars_start() +{ + ssfout.str(""); + ssfout << pre_defined_varss.str() << endl; + list_vars_ss("", "", "", ssfout); + ssfout_current_pos = 0; +} + +void PowerParser::list_vars_ss(string lv1, string lv2, string var_to_list, + stringstream &ssvars) +{ + if (!comm->isIOProc()) return; + ssvars << lv1 << endl; + + map::iterator p; + + // Holds the various header and data rows to be printed. + vector< vector > rows; + + // Construct the header row. + vector header_row; + header_row.push_back("Variable name"); + header_row.push_back("Value"); + header_row.push_back("Description"); + rows.push_back(header_row); + int n_header_rows = (int)rows.size(); + + // Set the number of columns. + int ncol = (int)header_row.size(); + + // Construct the data rows. + for(p = vmap.begin(); p != vmap.end(); p++) { + string vname = p->first; + if (var_to_list != "") { + if (vname != var_to_list) continue; + } + int nvalues = p->second.get_nvalues(); + string description = p->second.get_description(); + + int ndim = p->second.get_ndim(); + vector istart(ndim,0); + + for (int n=0; n 1) { + p->second.get_indices(n, istart); + stringstream ss; + ss << vname << "("; + for (int d=0; dsecond.get_var_value(n); + vector sv; + for (int c=0; cisIOProc()) { + cout << ssf.str(); + } + + // Alternate method. + //list_funcs_start(); + //for (;;) { + // string sline; + // if (!get_ssfout_line(sline)) break; + // if (comm->isIOProc()) { + // cout << sline << endl; + // } + //} +} + +void PowerParser::list_funcs_start() +{ + ssfout.str(""); + list_funcs_ss("", "", ssfout); + ssfout_current_pos = 0; +} + +void PowerParser::list_funcs_ss(string lf1, string lf2, stringstream &ssfunc) +{ + if (!comm->isIOProc()) return; + ssfunc << lf1 << endl; + + map::iterator p; + + // Holds the various header and data rows to be printed. + vector< vector > rows; + + // Construct the header row. + vector header_row; + header_row.push_back("Function name"); + header_row.push_back("nargs"); + header_row.push_back("type"); + header_row.push_back("Description"); + rows.push_back(header_row); + int n_header_rows = (int)rows.size(); + + // Set the number of columns. + int ncol = (int)header_row.size(); + + // Construct the data rows. + for(p = fmap.begin(); p != fmap.end(); p++) { + string fname = p->first; + vector sv; + int nargs = p->second.get_num_args(); + stringstream ss; + ss << nargs; + string type = p->second.get_type(); + string fdes = p->second.get_description(); + for (int c=0; cisIOProc()) { + cout << ssc.str(); + } +} + +void PowerParser::list_cmdsf_start() +{ + ssfout.str(""); + list_cmdsf_ss("", "", ssfout); + ssfout_current_pos = 0; +} + +void PowerParser::list_cmdsf_ss(string lc1, string lc2, + stringstream &ssc) +{ + if (!comm->isIOProc()) return; + ssc << lc1; + + for (int i=0; i<(int)cmdsfp->size(); i++) { + (*cmdsfp)[i].print_using_words_fm(ssc); + //(*cmdsfp)[i].print_all_words(ssc); + //(*cmdsfp)[i].print_original_string(ssc); + ssc << endl; + } + + for (int wt=0; wt<(int)whenthens.size(); wt++) { + ssc << "when ("; + whenthens[wt].list_condition("", " ", ssc); + ssc << ") then " << endl; + whenthens[wt].list_cmdsf_ss(ssc); + ssc << "endwhen" << endl; + } + + ssc << lc2; +} + + +void PowerParser::list_wt_cmdsf() +{ + stringstream ssc; + list_wt_cmdsf_ss(ssc); + if (comm->isIOProc()) { + cout << ssc.str(); + } +} + +void PowerParser::list_wt_cmdsf_start() +{ + ssfout.str(""); + list_wt_cmdsf_ss(ssfout); + ssfout_current_pos = 0; +} + +void PowerParser::list_wt_cmdsf_ss(stringstream &ssc) +{ + int wtlen = (int)whenthens.size(); + if (wtlen <= 0) { + ssc << endl << "No when...then commands have been specified." + << endl << endl; + return; + } + + for (int wt=0; wt= strn_len) return false; + + for (int i = current_pos; i= strn_len) return false; + + bool ignore_sc = false; + for (int i = current_pos; i &vstr, + int nv, int nchar) +{ + // Temporary storage for each string in the array of characters. + char *cnchar = new char[nchar]; + + // Loop through all the strings in the array of characters. + for (int i=0; i= 0; c--) { + if (cnchar[c] != ' ') { + cnchar_len = c+1; + break; + } + } + + // Create the C++ string. + string s(cnchar,cnchar_len); + + // Remove leading spaces. + int i2=0; + for (int c=0; c<(int)s.size(); c++) { + if (s[c] != ' ') { + i2=c; + break; + } + } + if (i2 != 0) s.erase(s.begin(), s.begin()+i2); + + // Add the string to the vector of strings. + vstr.push_back(s); + } + delete [] cnchar; +} + + +// =========================================================================== +// Convert a vector of C++ strings into a packed array of characters. +// +// chars_1d The array of characters (output). This is composed of a sequence +// of strings, each one nchar long. The number of strings is nv. +// vstr Vector of C++ strings (input). There will be nv number of +// C++ strings in this vector. The length of each C++ string will +// vary, whitespace is added to each C++ string to make its length +// nchar. +// nv Number of strings in chars_1d (input). +// nchar Number of characters in each string in chars_1d (input). +// +// Why would anyone want to do this? +// When passing strings between Fortran and C++ it is cleaner and easier to +// pass a packed array of single characters. This routine takes the vector +// of strings and converts that to a packed character array. +// =========================================================================== +void PowerParser::vstr_to_chars(char *chars_1d, vector &vstr, + int nv, int nchar) +{ + // To suppress compiler warnings of unused parameters + assert(nv == nv); + + // Loop through each string in the vector of strings. + for (int strdex=0; strdex<(int)vstr.size(); strdex++) { + + // Starting location in the 1d array of characters for each string. + int i1d = strdex * nchar; + + // Number of characters in the C++ string. Should be smaller or + // equal to nchar, but we handle the case where it is larger + // than nchar. + int nc = (int)vstr[strdex].size(); + if (nc > nchar) nc = nchar; + + // Copy the string into the 1d character array. + for (int c=i1d; c +#include +#include +#include +#include + +#include "Word.hh" + +namespace PP +{ +using std::string; +using std::deque; +using std::vector; +using std::map; +using std::stringstream; + +class Restartblock +{ + +public: + Restartblock(); + Restartblock(int &nrb, Cmd &cmdi, bool &skiprb, + bool &single_line_rb, + deque &bnames_on_dump, + deque &baflags_on_dump, + stringstream &serr, int &ierr); + + Restartblock(int &nrb, Cmd &cmdi, bool &skiprb, + bool &single_line_rb, + deque &bnames_on_dump, + deque &baflags_on_dump, + deque &rbsatprb_on_dump, + deque &rbsat_on_dump, + stringstream &serr, int &ierr); + + + void check_rb(vector &code_varnames, + vector &code_values, + vector &vv_active, int *rbci, + stringstream &serr, int &ierr); + + void list_condition(string offset1, string offset2, + stringstream &ssc); + + string get_name() { return name; } + int get_aflag() { return active; } + void set_aflag(int af) { active = af; } + int get_satsize() { return (int)satisfied.size(); } + int get_sat(int j) { if (satisfied[j] == "true") return 1; return 0; } + int get_num_varnames() { return (int)varname.size(); } + string get_varname(int i) { return varname[i].get_string(); } + + +private: + + void add_word(Cmd &cmdi, int idex, deque &wq); + void add_word(Cmd &cmdi, int idex, deque &wq, string sadd); + void process_words(deque &words, vector &code_varnames, + vector &code_values, vector &vv_active, + stringstream &serr, int &ierr); + void delete_words(int i1, int i2, deque &words); + void replace_words(int i1, int i2, deque &words, Word &w); + + + // The condition: varname relation value logical varname relation value etc. + // Example: time .gt. 3.0 .and. ncycle .ge. 50 + // The condition is thought of as a sequence of subconditions connected by + // logical operators. The above example has two subconditions connected by the + // .and. logical operator. + deque varname; // Host code variable name to be replaced by host code value. + deque relation; // Relation between varname and value, like .gt., .hglt., ... + deque value; // Value to compare with host code value. + deque logop; // Logical operator connecting subconditions. + deque satisfied; // Satisfied flag for each subcondition. + deque has_got; // Has got flag for the relation. This is true if + // the relation is .hggt., .hglt., ..., false otherwise. + + // The restart_block commands are processed if active is true (== 1) + int active; + + // The name of this restart block. + string name; +}; + + +} // end of PP namespace + +#endif + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Restartblock.cc =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Restartblock.cc @@ -0,0 +1,598 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +// *************************************************************************** +// *************************************************************************** +// Restart Blocks +// Run the code until a restart block condition is satisfied. Set the restart +// block as active, write a restart dump, stop the code, and restart. +// *************************************************************************** +// *************************************************************************** +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Variable.hh" +#include "Function.hh" +#include "Word.hh" +#include "Parser_math.hh" +#include "Cmd.hh" +#include "Restartblock.hh" + +namespace PP +{ +using std::cout; +using std::endl; +using std::string; +using std::deque; +using std::vector; +using std::stringstream; +using std::pair; +using std::ifstream; +using std::ios; + + +// =========================================================================== +// Default constructor. +// =========================================================================== +Restartblock::Restartblock() +{ + active = -1; +} + +// =========================================================================== +// Usual constructor. +// restart_block name (time .eq. 50) then +// =========================================================================== +Restartblock::Restartblock(int &nrb, Cmd &cmdi, bool &skiprb, + bool &single_line_rb, + deque &bnames_on_dump, + deque &baflags_on_dump, + deque &rbsatprb_on_dump, + deque &rbsat_on_dump, + stringstream &serr, int &ierr) +{ + //cout << "&&&&&cw ********** Restartblock.cc, Enter Constructor **********" << endl; + active = -1; + nrb += 1; + skiprb = true; + single_line_rb = false; + int nwords = cmdi.get_nwords(); + + // &&&&&cw + //stringstream ssprint; + //cmdi.print_using_words(ssprint); + //cout << ssprint.str() << endl; + + if (nwords < 8) { + cmdi.fatal_error(0, serr, ierr); + serr << "A restart block line must have at least 8 words on it (the " + << endl + << "opening and closing parentheses each count as a word)" + << endl; + serr << "This restart block command only has " << nwords << + " words on it." << endl; + serr << "Expected something like (this has 8 words):" << endl; + serr << " restart_block after5 (time .gt. 5) then" << endl; + serr << "Or perhaps a single line restart block like (this has 13 words):" + << endl; + serr << " restart_block after5 (time .gt. 5) sizemat(2) = 0.005" << endl; + ierr = 2; + return; + } + + string p2 = cmdi.get_string(2); + if (p2 != "(") { + cmdi.fatal_error(2, serr, ierr); + serr << "Expected an open parentheses following the restart block name.." + << endl; + serr << "Instead found " << p2 << " following the restart block name." + << endl; + serr << "The restart block command should be something like:" << endl; + serr << " restart_block t_is_gt_5 (time .gt. 5) then" << endl; + serr << "Or perhaps a single line restart block like:" << endl; + serr << " restart_block t_is_gt_5 (time .gt. 5) sizemat(2) = 0.005" << endl; + ierr = 2; + return; + } + + + for (int i=1; i &wq) +{ + int ln = cmdi.get_line_number(idex); + int file_ln = cmdi.get_file_line_number(idex); + string fname = cmdi.get_filename(idex); + deque *lines = cmdi.get_lines(); + Word w(cmdi.get_string(idex), ln, file_ln, fname, lines); + wq.push_back(w); +} + +void Restartblock::add_word(Cmd &cmdi, int idex, deque &wq, string sadd) +{ + int ln = cmdi.get_line_number(idex); + int file_ln = cmdi.get_file_line_number(idex); + string fname = cmdi.get_filename(idex); + deque *lines = cmdi.get_lines(); + Word w(sadd, ln, file_ln, fname, lines); + wq.push_back(w); +} + + +// =========================================================================== +// This is the check for when the condition is satisfied. +// =========================================================================== +void Restartblock::check_rb(vector &code_varnames, + vector &code_values, + vector &vv_active, int *rbci, + stringstream &serr, int &ierr) +{ + *rbci = 0; + //if (active==1) return; + + Parser_math pmath; + + deque wordsf; + + bool skip_sat = false; + int num_sub_cond = (int)varname.size(); + for (int n=0; n words; + + if (satisfied[n] == "true") { + int ln = varname[n].get_line_number(); + int file_ln = varname[n].get_file_line_number(); + string fname = varname[n].get_filename(); + deque *lines = varname[n].get_lines(); + Word w("true", ln, file_ln, fname, lines); + words.push_back(w); + } + else { + words.push_back(varname[n]); + words.push_back(relation[n]); + words.push_back(value[n]); + + process_words(words, code_varnames, code_values, vv_active, + serr, ierr); + + if (has_got[n]) { + if (words[0].get_bool(serr, ierr)) { + bool doit = true; + if (n > 0) { + if (logop[n-1].get_string() == ".andthen." && skip_sat) { + doit = false; + } + } + if (doit) satisfied[n] = "true"; + } + else { + skip_sat = true; + } + } + } + + wordsf.push_back(words[0]); + if (logop[n].get_string() == "none") break; + else wordsf.push_back(logop[n]); + } + + process_words(wordsf, code_varnames, code_values, vv_active, + serr, ierr); + + // rbci is an output flag telling the code to write a dump and end + // the calculation or not. rbci=0 means do not end the calc, + // rbci=1 tells the code to end the calc. + // Basically, if the condition changes from its previous value, then + // set rbci to 1. + + // This is the current value of the condition that was calculated above. + bool b = wordsf[0].get_bool(serr, ierr); + + // *rcbi is the key output result from this function + // *rbci = 0 Calling code does nothing + // *rbci = 1 Calling code stops calculation, normally does restart + *rbci = 0; + + // Here we check to see if the condition has changed, i.e. is b different + // from the active flag. If so, then we end the calculation. + // When the restart block is first created, the active flag is set to -1, + // this is for runs from scratch. + // If this is a restart, then the active flag will come from the dump and + // be either 0 or 1. + // So if active is -1 and the condition is true, then we end the calculation + // right away (this should not be common, but could happen). + // + // Changed on 7/2/10 - The original idea for restart blocks was that they + // would trigger when the condition changed from false to true. But they + // would also trigger when the condition changed back from true to false. + // This causes problems for the users when the restart block would + // repeatedly trigger because the condition oscillates between true and + // false. Therefore, change the restart blocks so they trigger once and + // only once (which happens when the condition first becomes true). If + // the users ever need a restart block that also triggers when the + // condition changes from true to false, then some + // sort of option could be put in to allow this. + if (b && active == -1) { *rbci = 1; active = 1; return; } // Trigger + if (b && active == 0) { *rbci = 1; active = 1; return; } // Trigger + if (b && active == 1) { *rbci = 0; return; } // Do nothing + if (!b && active == -1) { *rbci = 0; active = 0; return; } // Do nothing + if (!b && active == 0) { *rbci = 0; return; } // Do nothing + + // This is the true to false trigger that causes problems. + //if (!b && active == 1) { *rbci = 1; active = 0; return; } // Trigger +} + + + +// =========================================================================== +// Given a deque of words, go through them evaluating relational and logical +// operators. The words should evaluate to one final word. +// =========================================================================== +void Restartblock::process_words(deque &words, vector &code_varnames, + vector &code_values, + vector &vv_active, + stringstream &serr, int &ierr) +{ + Parser_math pmath; + + // Replace any code vars with their values. + int i2 = (int)words.size(); + for (int i=0; i *lines = words[i].get_lines(); + if (vv_active[j] == 0) { + Word wj("false", ln, file_ln, fname, lines); + replace_words(i, i+2, words, wj); + i2 -= 2; + break; + } + else { + Word wj(code_values[j], ln, file_ln, fname, lines); + words[i] = wj; + } + } + } + } + + int i1 = 0; + i2 = (int)words.size() - 1; + for (int level=6; level>=0; level--) { + for (int i=i1; i<=i2; i+=1) { + if (words[i].is_operator(level)) { + int ln = words[i].get_line_number(); + int file_ln = words[i].get_file_line_number(); + string fname = words[i].get_filename(); + deque *lines = words[i].get_lines(); + Word w("", ln, file_ln, fname, lines); + + string op_type = words[i].get_op_type(); + + if (op_type == "relational") { + pmath.do_op_relational(i-1, i, i+1, words, w, serr, ierr); + } + + if (op_type == "logical" && level == 2) // .not. is unary + pmath.do_op_not(i, i+1, words, w, serr, ierr); + + if (op_type == "logical" && level != 2) + pmath.do_op_logical(i-1, i, i+1, words, w, serr, ierr); + + // level 2, .not., is unary and is handled differently. + if (level == 2) { + replace_words(i, i+1, words, w); + i2 -= 1; + } + else { + replace_words(i-1, i+1, words, w); + i2 -= 2; + i -= 1; + } + continue; + } + } + } + + // The condition has to evaluate to a single boolean value. + if ((int)words.size() != 1) { + words[0].fatal_error(serr, ierr); + serr << "restart_block condition did not evaluate to a single boolean value." + << endl; + serr << "Fix the restart_block condition" << endl; + ierr = 2; + } +} + + +// =========================================================================== +// List the condition for this restart block to a stringstream. +// This is done to let the user indentify this restart block. It is +// also useful for debugging. +// =========================================================================== +void Restartblock::list_condition(string offset1, string offset2, + stringstream &ssc) +{ + for (int n=0; n<(int)varname.size(); n++) { + + string relstr = relation[n].get_string(); + string rstr = relstr; + if (has_got[n]) { + if (relstr == ".lt.") rstr = ".hglt."; + if (relstr == ".le.") rstr = ".hgle."; + if (relstr == ".eq.") rstr = ".hgeq."; + if (relstr == ".ne.") rstr = ".hgne."; + if (relstr == ".gt.") rstr = ".hggt."; + if (relstr == ".ge.") rstr = ".hgge."; + } + relstr = rstr; + + string offset = offset1; + if (n > 0) offset = offset2; + + ssc << offset << varname[n].get_string() << " " + << relstr << " " << value[n].get_string(); + + if (logop[n].get_string() == "none") break; + ssc << " " << logop[n].get_string(); + ssc << endl; + } +} + + +// =========================================================================== +// Delete words i1 through i2 inclusive from the deque. +// =========================================================================== +void Restartblock::delete_words(int i1, int i2, deque &words) +{ + deque::iterator p = words.begin(); + words.erase(p + i1, p + i2 + 1); +} + + +// =========================================================================== +// Replace words i1 through i2 inclusive with word w. +// =========================================================================== +void Restartblock::replace_words(int i1, int i2, deque &words, Word &w) +{ + delete_words(i1, i2, words); + deque::iterator p = words.begin(); + words.insert(p + i1, w); +} + + + +} // End of the PP namespace + + + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Variable.hh =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Variable.hh @@ -0,0 +1,130 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +#ifndef VARIABLEHHINCLUDE +#define VARIABLEHHINCLUDE + +// *************************************************************************** +// *************************************************************************** +// This class holds information about a variable. +// *************************************************************************** +// *************************************************************************** + +#include +#include +#include +#include + +namespace PP +{ +using std::string; +using std::stringstream; +using std::vector; +using std::deque; + + + +class Variable +{ + +public: + Variable(); + Variable(int base); + Variable(string nme, string v, bool pred, string tdes); + Variable(string nme); + Variable(string nme, vector &istart, vector &vvec, + int lnum, int file_lnum, string fname, deque *lines, + stringstream &serr, int &ierr); + + // Accessor methods. + string get_varname() { return name; } + void set_varname(string s) { name = s; } + int get_ndim() { return ndim; } + + int get_nvalues() { return (int)value.size(); } + + string get_var_value() { return value[0]; } + string get_var_value(int idex) { return value[idex]; } + string get_var_value(vector &adex, string vname, int lnum, + int file_lnum, string fname, deque *lines, + stringstream &serr, int &ierr); + + + void set_var_value(vector &istart, vector &valvec, + int lnum, int file_lnum, string fname, + deque *lines, stringstream &serr, int &ierr); + void bump_var(vector &istart, int increment, + int lnum, int file_lnum, string fname, + deque *lines, stringstream &serr, int &ierr); + + + void set_bounds(vector &bounds, int lnum, int file_lnum, + string fname, deque *lines, + stringstream &serr, int &ierr); + + void get_indices(int icdex, vector &adex); + + string get_description() { return description; } + void set_description(string vardes) { description = vardes; } + + bool is_pre_defined() { return pre_defined; } + + void set_temporary(bool b) { temporary = b; } + bool is_temporary() { return temporary; } + +private: + + // name The name of the variable. + // value Vector containing the values of the variable. + // ndim Number of dimensions, for example var(9,3) has ndim=2 + // maxdim Max num for each dimension except the last. + // lnum_ndim The line number where ndim was set. + // lnum_bounds The line number where maxdim was set. + // pre_defined Pre-defined vars cannot be redefined. + // description Text description of the variable. + // temporary A temporary variable. + string name; + vector value; + int ndim, lnum_bounds, lnum_ndim; + vector maxdim; + bool pre_defined, temporary; + string description; +}; + + +} // End of the PP namespace + +#endif Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Variable.cc =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Variable.cc @@ -0,0 +1,491 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +// *************************************************************************** +// *************************************************************************** +// This class holds information about a variable. It is mostly for use with +// the parser. +// *************************************************************************** +// *************************************************************************** + +#include +#include +#include +#include +#include +#include + +#include "Parser_utils.hh" +#include "Word.hh" +#include "Variable.hh" + +namespace PP +{ +using std:: string; +using std::cout; +using std::endl; +using std::stringstream; +using std::setprecision; +using std::vector; +using std::deque; + +static int index_base = 1; + + +// =========================================================================== +// Default constructor. +// =========================================================================== +Variable::Variable() +{ + name = "__NO_NAME_GIVEN__"; + value.push_back("__NO_VALUE_GIVEN__"); + ndim = -1; + lnum_ndim = 0; + lnum_bounds = 0; + pre_defined = false; + description = ""; + temporary = false; +} + +// =========================================================================== +// Constructor to reset index base +// =========================================================================== +Variable::Variable(int base) +{ + index_base = base; +} + +// =========================================================================== +// Constructor given a string as input. This constructs a scalar variable. +// =========================================================================== +Variable::Variable(string nme, string v, bool pred, string tdes) +{ + name = nme; + value.push_back(v); + ndim = 0; + lnum_ndim = 0; + lnum_bounds = 0; + pre_defined = pred; + description = tdes; + temporary = false; +} + + +// =========================================================================== +// Constructor for variables with no value. +// =========================================================================== +Variable::Variable(string nme) +{ + name = nme; + value.push_back("__NO_VALUE_GIVEN__"); + ndim = -1; + lnum_ndim = 0; + lnum_bounds = 0; + pre_defined = false; + description = ""; + temporary = false; +} + + +// =========================================================================== +// Constructor given a vector of strings as input. +// =========================================================================== +Variable::Variable(string nme, vector &istart, vector &valvec, + int lnum, int file_lnum, string fname, deque *lines, + stringstream &serr, int &ierr) +{ + name = nme; + ndim = -1; + lnum_ndim = 0; + lnum_bounds = 0; + pre_defined = false; + description = ""; + temporary = false; + set_var_value(istart, valvec, lnum, file_lnum, fname, lines, serr, ierr); +} + + +// =========================================================================== +// istart gives the starting location in the array for setting values. +// The istart indices start from 1 (fortran based). +// +// This function works for any dimension, 0,1,2,3,... +// =========================================================================== +void Variable::set_var_value(vector &istart, vector &valvec, + int lnum, int file_lnum, string fname, + deque *lines, stringstream &serr, int &ierr) +{ + // Cannot redefine a pre-defined variable. + if (pre_defined) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << "Cannot redefine a pre-defined variable." << endl; + ierr = 2; + return; + } + + // Set the array dimension and make sure the user is not trying to + // change it. + int ndim_new = (int)istart.size(); + if (ndim >= 0) { + if (ndim != ndim_new) { + // Throw an error + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << "Cannot redefine the dimensionality of a variable." << endl; + serr << "Original number of dimensions = " << ndim << endl; + serr << "New number of dimensions = " << ndim_new << endl; + if (lnum_ndim > 0) { + serr << "Previously set in line " << lnum_ndim << ":" << endl; + serr << " " << (*lines)[lnum_ndim-1] << endl; + } + ierr = 2; + return; + } + } + else { + ndim = ndim_new; + lnum_ndim = lnum; + } + + int bsize = (int)maxdim.size(); + + if (ndim == 0 && bsize > 0) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << "Array boundaries not allowed for scalar variable." << endl; + if (lnum_bounds > 0) { + serr << "Array boundaries were set in line " << lnum_bounds << ":" << endl; + serr << " " << (*lines)[lnum_bounds-1] << endl; + } + ierr = 2; + return; + } + + if (ndim > 0) { + if (ndim != bsize+1) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << "Number of dimensions = " << ndim << endl; + serr << "Number of array boundaries + 1 = " << bsize+1 << endl; + serr << "These should match but don't. " << endl; + ierr = 2; + return; + } + } + + // Find the 1d starting position given multiple array indices. + Parser_utils putils(index_base); + int i1 = putils.start_dex(istart, maxdim); + + // nvals Number of values after the = sign. + // Note that multiplicity is already handled, i.e. valvec has already + // been expanded to include multiplicites. + int nvals = (int)valvec.size(); + + // Get memory that we need. + if (i1+nvals > (int)value.size()) { + value.resize(i1+nvals, ""); + } + + // Store the array values. + for (int i=i1; i &istart, int increment, + int lnum, int file_lnum, string fname, + deque *lines, stringstream &serr, int &ierr) +{ + // Find the 1d starting position given multiple array indices. + Parser_utils putils(index_base); + int i1 = putils.start_dex(istart, maxdim); + + // We are incrementing an existing variable, so i1 should be valid. + if (i1 >= (int)value.size()) { + // Fatal Error + } + + Word w1(value[i1], lnum, file_lnum, fname, lines); + if (!w1.is_number()) { } // FATAL ERROR + + stringstream ss; + if (w1.is_integer()) { + int ia1 = w1.get_int(serr, ierr); + int ia = ia1 + increment; + ss << ia; + } + else { + double d1 = w1.get_double(serr, ierr); + double d = d1 + increment; + ss << setprecision(15) << d; + } + value[i1] = ss.str(); +} + + + +// =========================================================================== +// The problem with multi-dimensional variable arrays is that the user has +// to tell us the bounds on every dimension except the last one. This info +// is input in the bounds vector and stored. +// +// This function works for any dimension, 0,1,2,3,... +// =========================================================================== +void Variable::set_bounds(vector &bounds, int lnum, int file_lnum, + string fname, deque *lines, + stringstream &serr, int &ierr) + +{ + // Cannot redefine a pre-defined variable. + if (pre_defined) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << "Cannot redefine a pre-defined variable." << endl; + ierr = 2; + return; + } + + // Set the array dimension and make sure the user is not trying to + // change it. + int ndim_new = (int)bounds.size() + 1; + if (ndim >= 0) { + if (ndim != ndim_new) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << "Cannot redefine the dimensionality of a variable " + "(set_bounds)." << endl; + serr << "Original number of dimensions = " << ndim << endl; + serr << "New number of dimensions = " << ndim_new << endl; + if (lnum_ndim > 0) { + serr << "Previously set in line " << lnum_ndim << ":" << endl; + serr << " " << (*lines)[lnum_ndim-1] << endl; + } + ierr = 2; + return; + } + } + else { + ndim = ndim_new; + lnum_ndim = lnum; // Store line num for better err messages. + } + + // Check to make sure the user is not reseting the bounds. + if ((int)maxdim.size() > 0) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << "The bounds on this array has already been set," << endl; + serr << "cannot reset them." << endl; + if (lnum_bounds > 0) { + serr << "Previously set in line " << lnum_bounds << ":" << endl; + serr << " " << (*lines)[lnum_bounds-1] << endl; + } + ierr = 2; + return; + } + + // Store the line num where bounds were set for better err messages. + lnum_bounds = lnum; + + // Store the bounds. + maxdim.clear(); + for (int i=0; i<(int)bounds.size(); i++) { + maxdim.push_back(bounds[i]); + } +} + + +// =========================================================================== +// Given indices, in adex, get the value of the variable. +// For example, suppose you want the value of +// $var2d(3,5) +// The adex vector contains 2 numbers, 3 and 5, for the fortran indices. +// The start_dex function is used to get the 1d index into the value array. +// +// This function works for any dimension, 0,1,2,3,... +// =========================================================================== +string Variable::get_var_value(vector &adex, string vname, int lnum, + int file_lnum, string fname, deque *lines, + stringstream &serr, int &ierr) +{ + int adex_size = (int)adex.size(); + + // Special case for scalar variables. + if (ndim == 0 || adex_size == 0) return value[0]; + + // The adex indices and bounds indices must match. + if (adex_size - 1 != (int)maxdim.size()) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << "The dimensionality of variable " << vname << endl; + serr << "does not match what was previously set." << endl; + if (lnum_bounds > 0) { + serr << "Previous dimensionality set in line " << lnum_bounds << ":" << endl; + serr << " " << (*lines)[lnum_bounds-1] << endl; + } + ierr = 2; + return ""; + } + + // Indices cannot exceed max allowed. + // Remember that adex if referenced from 1 (fortran index). + for (int d=0; d<(int)maxdim.size(); d++) { + if (adex[d] > maxdim[d]) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << "Variable name = " << vname << endl; + serr << "The value for dimension " << d+1 << " = " << adex[d] << endl; + serr << "This exceeds the max dimension of " << maxdim[d] << endl; + if (lnum_bounds > 0) { + serr << "The array bounds were set in line " << lnum_bounds << ":" << endl; + serr << " " << (*lines)[lnum_bounds-1] << endl; + } + ierr = 2; + } + } + + if (ierr == 2) return ""; + + // Indices cannot be < 1. + for (int d=0; d<(int)adex.size(); d++) { + if (adex[d] < 1) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << "Variable name = " << vname << endl; + serr << "Expected index greater than or equal to 1 " << endl; + serr << "Instead, index = " << adex[d] << endl; + ierr = 2; + } + } + + if (ierr == 2) return ""; + + + // Using the indices in adex and the bounds for multi-d arrays, maxdim, + // get the 1d index into the value array. + Parser_utils putils(index_base); + int i1 = putils.start_dex(adex, maxdim); + + // Check that the value array size has not been exceeded. + if (i1 >= (int)value.size()) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl; + serr << " " << (*lines)[lnum-1] << endl; + serr << "in file: " << fname << endl; + serr << "Variable name = " << vname << endl; + serr << "Exceeded array bounds. Check to make sure you are not" << endl; + serr << "requesting an array element you have not yet set." << endl; + + vector maxdex((int)adex.size(), 0); + get_indices((int)value.size()-1, maxdex); + for (int d=0; d<(int)adex.size(); d++) { + string s = ""; + if (adex[d] > maxdex[d]) s = " ERROR, max exceeded"; + serr << " Requested index = " << adex[d] + << " Max index = " << maxdex[d] << s << endl; + } + + ierr = 2; + return ""; + } + + // Return the value. + return value[i1]; +} + + + +// =========================================================================== +// Given the 1d index, icdex (starting from 0), find the corresponding +// multi dimensional fortran indices (each starting from 1). +// +// Example 1: Consider a 1d array +// var1d(1) = 1 3 5 9 -4 -5 6 +// Suppose icdex=3, corresponding to array value 9. +// This 1d case is very simple, all we do is add 1 to icdex to get a reference +// from 1, thus returning 4. +// +// Example 1: Consider a 2d array +// $var2d(1,1) = 11. 21. 31. 12. 22. 32. 13. 23. 33. +// Where the max of the first dimension is 3. Suppose the user specifies +// icdex = 5, this corresponds to array value 32. The two indices returned +// would be 3,2 (referenced from 1). +// +// The adex vector contains the output indices, for example 2 this would be 3 +// and 2. +// +// This function works for any dimension, 0,1,2,3,... +// =========================================================================== +void Variable::get_indices(int icdex, vector &adex) +{ + // Nothing to do for scalar variables. + if (ndim == 0) return; + + // Given icdex, get the indices. + int nvalues = (int)value.size(); + Parser_utils putils(index_base); + putils.reverse_dex(icdex, nvalues, adex, maxdim); +} + + + + +} // End of the PP namespace Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Whenthen.hh =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Whenthen.hh @@ -0,0 +1,139 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +#ifndef WHENTHENHHINCLUDE +#define WHENTHENHHINCLUDE + +// *************************************************************************** +// *************************************************************************** + +// *************************************************************************** +// *************************************************************************** + +#include +#include +#include +#include +#include + +#include "Word.hh" + +namespace PP +{ +using std::string; +using std::deque; +using std::vector; +using std::map; +using std::stringstream; + +class Whenthen +{ + +public: + Whenthen(); + Whenthen(int &nwhen, Cmd &cmdi, bool &skipwhen, bool &single_line_when, + bool eflag, stringstream &serr, int &ierr); + void add_cmdf(Cmd &cmdi); + void list_condition(string offset1, string offset2, + stringstream &ssc); + void list_cmdsf_ss(stringstream &ssc); + + void check_wt(vector &code_varnames, + vector &code_values, + vector &vv_active, + int *wtci, stringstream &serr, int &ierr); + + deque *get_cmdsf_ptr() { return &cmdsf; } + + void get_char_array_size(int *ca_size); + void get_char_array(string &sc); + + void get_satsize(int *sat_size); + void getsat(int *sat); + void setsat(int *sat); + void getprocessed(int *wtp); + void setprocessed(int wtp); + void getseq(int *wtseq); + void setseq(int wtseq); + int get_num_varnames() { return (int)varname.size(); } + string get_varname(int i) { return varname[i].get_string(); } + + +private: + + void add_word(Cmd &cmdi, int idex, deque &wq); + void add_word(Cmd &cmdi, int idex, deque &wq, string sadd); + void process_words(deque &words, vector &code_varnames, + vector &code_values, + vector &vv_active, + stringstream &serr, int &ierr); + void delete_words(int i1, int i2, deque &words); + void replace_words(int i1, int i2, deque &words, Word &w); + + // The condition: varname relation value logical varname relation value etc. + // Example: time .gt. 3.0 .and. ncycle .ge. 50 + // The condition is thought of as a sequence of subconditions connected by + // logical operators. The above example has two subconditions connected by the + // .and. logical operator. + deque varname; // Host code variable name to be replaced by host code value. + deque relation; // Relation between varname and value, like .gt., .hglt., ... + deque value; // Value to compare with host code value. + deque logop; // Logical operator connecting subconditions. + deque satisfied; // Satisfied flag for each subcondition. + deque has_got; // Has got flag for the relation. This is true if + // the relation is .hggt., .hglt., ..., false otherwise. + + // Commands to be done when the condition is satisfied. + deque cmdsf; + + // The whenthen is only done once when the condition is satisfied. + // This flag keeps it from being done again. + bool processed; + + // This flag is used to distinguish between the when command and the + // whenever command. + bool ever_flag; + + // This is a sequence index to keep track of what order the whenthen's + // have been processed in. + int seqdex; +}; + + +} // end of PP namespace + +#endif + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Whenthen.cc =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Whenthen.cc @@ -0,0 +1,633 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +// *************************************************************************** +// *************************************************************************** +// *************************************************************************** +// *************************************************************************** +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Variable.hh" +#include "Function.hh" +#include "Word.hh" +#include "Parser_math.hh" +#include "Cmd.hh" +#include "Whenthen.hh" + +namespace PP +{ +using std::cout; +using std::endl; +using std::string; +using std::deque; +using std::vector; +using std::stringstream; +using std::pair; +using std::ifstream; +using std::ios; + + +// =========================================================================== +// Default constructor. +// =========================================================================== +Whenthen::Whenthen() +{ + processed = false; + seqdex = -1; + ever_flag = false; +} + + +// =========================================================================== +// Usual constructor. +// =========================================================================== +Whenthen::Whenthen(int &nwhen, Cmd &cmdi, bool &skipwhen, + bool &single_line_when, bool eflag, + stringstream &serr, int &ierr) +{ + processed = false; + seqdex = -1; + ever_flag = eflag; + nwhen += 1; + skipwhen = true; + single_line_when = false; + int nwords = cmdi.get_nwords(); + + // &&&&&cw + //stringstream ssprint; + //cmdi.print_using_words(ssprint); + //cout << ssprint.str() << endl; + + if (nwords < 7) { + cmdi.fatal_error(0, serr, ierr); + serr << "A when command line must have at least 7 words on it (the " + << endl + << "opening and closing parenthses each count as a word)" + << endl; + serr << "This when command only has " << nwords << " words on it." << endl; + serr << "Expected something like (this has 7 words):" << endl; + serr << " when (time .gt. 5) then" << endl; + serr << "Or perhaps a single line when like (this has 9 words):" << endl; + serr << " when (time .gt. 5) shortmodcyc = 5" << endl; + ierr = 2; + return; + } + + string p1 = cmdi.get_string(1); + if (p1 != "(") { + cmdi.fatal_error(1, serr, ierr); + serr << "Expected an open parentheses following the when keyword." + << endl; + serr << "Instead found " << p1 << " following the when keyword." + << endl; + serr << "The when command should be something like:" << endl; + serr << " when (time .gt. 5) then" << endl; + serr << "Or perhaps a single line when like:" << endl; + serr << " when (time .gt. 5) shortmodcyc = 5" << endl; + ierr = 2; + return; + } + + + for (int i=1; i &wq) +{ + int ln = cmdi.get_line_number(idex); + int file_ln = cmdi.get_file_line_number(idex); + string fname = cmdi.get_filename(idex); + deque *lines = cmdi.get_lines(); + Word w(cmdi.get_string(idex), ln, file_ln, fname, lines); + wq.push_back(w); +} + +void Whenthen::add_word(Cmd &cmdi, int idex, deque &wq, string sadd) +{ + int ln = cmdi.get_line_number(idex); + int file_ln = cmdi.get_file_line_number(idex); + string fname = cmdi.get_filename(idex); + deque *lines = cmdi.get_lines(); + Word w(sadd, ln, file_ln, fname, lines); + wq.push_back(w); +} + + + +// =========================================================================== +// Add a command to the deque of commands for this whenthen. +// =========================================================================== +void Whenthen::add_cmdf(Cmd &cmdi) +{ + cmdsf.push_back(cmdi); +} + + +// =========================================================================== +// This is the check for when the condition is satisfied. +// =========================================================================== +void Whenthen::check_wt(vector &code_varnames, + vector &code_values, + vector &vv_active, + int *wtci, stringstream &serr, int &ierr) +{ + *wtci = 0; + if (processed) return; + + Parser_math pmath; + + deque wordsf; + + bool skip_sat = false; + int num_sub_cond = (int)varname.size(); + for (int n=0; n words; + + if (satisfied[n] == "true") { + int ln = varname[n].get_line_number(); + int file_ln = varname[n].get_file_line_number(); + string fname = varname[n].get_filename(); + deque *lines = varname[n].get_lines(); + Word w("true", ln, file_ln, fname, lines); + words.push_back(w); + } + else { + words.push_back(varname[n]); + words.push_back(relation[n]); + words.push_back(value[n]); + + process_words(words, code_varnames, code_values, vv_active, + serr, ierr); + + if (has_got[n]) { + if (words[0].get_bool(serr, ierr)) { + bool doit = true; + if (n > 0) { + if (logop[n-1].get_string() == ".andthen." && skip_sat) { + doit = false; + } + } + if (doit) satisfied[n] = "true"; + } + else { + skip_sat = true; + } + } + } + + wordsf.push_back(words[0]); + if (logop[n].get_string() == "none") break; + else wordsf.push_back(logop[n]); + } + + process_words(wordsf, code_varnames, code_values, vv_active, + serr, ierr); + + // The output value, wtci, defaults to false (0). If the condition + // is satisfied then the output is true (1). + if (wordsf[0].get_bool(serr, ierr)) { + *wtci = 1; + if (!ever_flag) processed = true; + return; + } +} + + + +// =========================================================================== +// Given a deque of words, go through them evaluating relational and logical +// operators. The words should evaluate to one final word. +// =========================================================================== +void Whenthen::process_words(deque &words, vector &code_varnames, + vector &code_values, + vector &vv_active, + stringstream &serr, int &ierr) +{ + Parser_math pmath; + + // Replace any code vars with their values. + int i2 = (int)words.size(); + for (int i=0; i *lines = words[i].get_lines(); + if (vv_active[j] == 0) { + Word wj("false", ln, file_ln, fname, lines); + replace_words(i, i+2, words, wj); + i2 -= 2; + break; + } + else { + Word wj(code_values[j], ln, file_ln, fname, lines); + words[i] = wj; + } + } + } + } + + int i1 = 0; + i2 = (int)words.size() - 1; + for (int level=6; level>=0; level--) { + for (int i=i1; i<=i2; i+=1) { + if (words[i].is_operator(level)) { + int ln = words[i].get_line_number(); + int file_ln = words[i].get_file_line_number(); + string fname = words[i].get_filename(); + deque *lines = words[i].get_lines(); + Word w("", ln, file_ln, fname, lines); + + string op_type = words[i].get_op_type(); + + if (op_type == "relational") { + pmath.do_op_relational(i-1, i, i+1, words, w, serr, ierr); + } + + if (op_type == "logical" && level == 2) // .not. is unary + pmath.do_op_not(i, i+1, words, w, serr, ierr); + + if (op_type == "logical" && level != 2) + pmath.do_op_logical(i-1, i, i+1, words, w, serr, ierr); + + // level 2, .not., is unary and is handled differently. + if (level == 2) { + replace_words(i, i+1, words, w); + i2 -= 1; + } + else { + replace_words(i-1, i+1, words, w); + i2 -= 2; + i -= 1; + } + continue; + } + } + } + + // The condition has to evaluate to a single boolean value. + if ((int)words.size() != 1) { + words[0].fatal_error(serr, ierr); + serr << "When...then condition did not evaluate to a single boolean value." + << endl; + serr << "Fix the when...then condition" << endl; + ierr = 2; + } +} + + +// =========================================================================== +// =========================================================================== +void Whenthen::get_char_array_size(int *ca_size) +{ + string sc; + get_char_array(sc); + (*ca_size) = (int)sc.size(); +} + + +// =========================================================================== +// =========================================================================== +void Whenthen::get_char_array(string &sc) +{ + for (int n=0; n<(int)varname.size(); n++) { + sc += varname[n].get_string(); + sc += relation[n].get_string(); + sc += value[n].get_string(); + sc += logop[n].get_string(); + if (has_got[n]) sc += "hasgot"; + } + for (int n=0; n<(int)cmdsf.size(); n++) { + int nw = cmdsf[n].get_nwords(); + for (int i=0; i 0) offset = offset2; + + ssc << offset << varname[n].get_string() << " " + << relstr << " " << value[n].get_string(); + + if (logop[n].get_string() == "none") break; + ssc << " " << logop[n].get_string(); + ssc << endl; + } +} + + +// =========================================================================== +// Delete words i1 through i2 inclusive from the deque. +// =========================================================================== +void Whenthen::delete_words(int i1, int i2, deque &words) +{ + deque::iterator p = words.begin(); + words.erase(p + i1, p + i2 + 1); +} + + +// =========================================================================== +// Replace words i1 through i2 inclusive with word w. +// =========================================================================== +void Whenthen::replace_words(int i1, int i2, deque &words, Word &w) +{ + delete_words(i1, i2, words); + deque::iterator p = words.begin(); + words.insert(p + i1, w); +} + + + + + +} // End of the PP namespace + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Word.hh =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Word.hh @@ -0,0 +1,244 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +#ifndef WORDHHINCLUDE +#define WORDHHINCLUDE + +// *************************************************************************** +// *************************************************************************** +// This class holds each word from the line. +// *************************************************************************** +// *************************************************************************** + +#include +#include +#include +#include +#include +#include + +namespace PP +{ +using std::string; +using std::stringstream; +using std::vector; +using std::map; +using std::deque; + +enum WordType {WUNKNOWN, WSTRING, INTEGER, DOUBLE, EQUALS, OPERATOR, + OPEN_PARENS, CLOSED_PARENS, + OPEN_SQUARE_BRACKET, CLOSED_SQUARE_BRACKET, + OPEN_BRACE, CLOSED_BRACE, + COMMA, VARIABLE}; + +class Word +{ + +public: + Word(); + Word(string s); + Word(string s, int lnum, int file_lnum, string fname, + deque *lstr); + Word(double d, int lnum, int file_lnum, string fname, + deque *lstr); + Word(int ia, int lnum, int file_lnum, string fname, + deque *lstr); + Word operator=(const Word &); + Word(const Word &); + ~Word(); + + // Given a word, change its value. + void set_value(double d); + void set_value(string s); + void set_value(const char *s); + void set_value(bool b); + + // Print the type of word to a stringstream. + void print_type(stringstream &ss); + + // Set the type of word. + void set_type(); + + // Get the operator type, arithmetic, relational, ... + string get_op_type() {return op_type;} + + bool is_operator() { if (type == OPERATOR) return true; return false; } + + bool is_operator(int level) { + if (type != OPERATOR) return false; + if (level == op_level) return true; + return false; + } + + bool is_bool(); + + bool is_string() { if (type == WSTRING) return true; return false; } + + bool is_integer() { if (type == INTEGER) return true; return false; } + + bool is_number() { if (type == INTEGER || type == DOUBLE) return true; + return false; } + + bool is_numvar() { if (type == INTEGER || type == DOUBLE || + type == VARIABLE) return true; + return false; } + + bool has_value() { if (type == INTEGER || type == DOUBLE) return true; + return false; } + + bool is_variable() { if (type == VARIABLE) return true; return false; } + + bool is_comma() { if (type == COMMA) return true; return false; } + + /*! \brief Get the word as a string. */ + string get_string() { return wstr; } + string get_print_string(bool enc_quotes); + string get_stringp() { processed=true; return wstr; } + char get_single_char(stringstream &serr, int &ierr); + + /*! \brief Get the word as a float. */ + float get_float(); + + /*! \brief Get the word as a double. */ + double get_double(); + double get_double(stringstream &serr, int &ierr); + + // Get the word as an int, without and with error processing. + int get_int(); + int get_int(stringstream &serr, int &ierr); + + int64_t get_int64_t(); + int64_t get_int64_t(stringstream &serr, int &ierr); + + /*! \brief Get the word as a boolean. */ + bool get_bool(stringstream &serr, int &ierr); + + /*! \brief Templated get method for get_double, get_int, etc. */ + template< class T > + T get_val( T &dummy ); + + /*! + * \brief Convert string s to the type of the first argument. Function + * returns the converted value as a reference and as the function + * result. + * + * These overloaded functions are the base for the templated accessor + * functions "get_val()". + * + * \param rtti - Convert the string s into the type of rtti and return it. + */ + int convertFromString ( const int &rtti, const string &s ) const; + int64_t convertFromString ( const int64_t &rtti, const string &s ) const; + string convertFromString ( const string &rtti, const string &s ) const; + float convertFromString ( const float &rtti, const string &s ) const; + double convertFromString ( const double &rtti, const string &s ) const; + //bool convertFromString ( const bool &rtti, const string &s ) const; + + /*! \brief Negate the word or set a flag to negate it later. */ + void negate_value(); + + // Handle errors. + void fatal_error(stringstream &serr, int &ierr); + void warning(stringstream &serr, int &ierr); + + // Miscellaneous functions. + void handle_quotes(stringstream &serr, int &ierr); + void erase_char(int ic); + + // Accessor methods. + void set_word(string str) { wstr = str; } + void set_processed(bool p) { processed = p; } + bool get_processed() { return processed; } + int get_line_number() { return line_number; } + int get_file_line_number() { return file_line_number; } + string get_filename() { return filename; } + deque *get_lines() { return lines; } + void set_filename(string fn) { filename = fn; } + int get_multiplicity() { return multiplicity; } + void set_multiplicity(int m) { multiplicity = m; } + +private: + void init(); + bool check_before_e(string s, int i1, int i2); + bool check_after_e(string s, int i1, int i2); + + // This is the basic storage for the word. + string wstr; + + // The type of word, like operator, string, variable, etc. + WordType type; + + // Flag for testing whether this word was processed or not. + bool processed; + + // Flag to negate a variable. + bool negate; + + // This word is repeated multiplicity times. + int multiplicity; + + // If the word is an operator, then this is its level, i.e. "**" has + // the highest level, then "*","/", etc. + int op_level; + + // The operator type, arithmetic, relational, ... + string op_type; + + // Convert the input string to lower case. + void string_to_lower( string &s ) const; + + // This is needed for telling the user what line in the input + // file or include file the error occurred on. + // + // line_number The line_number corresponding to this command, this is + // an index into lines and starts from 1, not 0. + // lines Pointer to the deque of original lines. This contains all + // the lines from the input file and any include files. + // file_line_number The line number in the input file or include file. + // filename The name of the input file or include file. + // + // file_line_number and filename are needed to that the user can open + // the file and go to the line in error. + int line_number, file_line_number; + string filename; + deque *lines; + +}; + + +} // end of PP namespace + +#endif Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Word.cc =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Word.cc @@ -0,0 +1,1193 @@ +/* Copyright 2015. Los Alamos National Security, LLC. This material was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy + * of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * Under this license, it is required to include a reference to this work. We + * request that each derivative work contain a reference to LANL Copyright + * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly + * measured. + * + * This is LANL Copyright Disclosure C15076/LA-CC-15-054 + */ + +/* + * PowerParser is a general purpose input file parser for software applications. + * + * Authors: Chuck Wingate XCP-2 caw@lanl.gov + * Robert Robey XCP-2 brobey@lanl.gov + */ + +// *************************************************************************** +// *************************************************************************** +// This class holds each word from the line. +// *************************************************************************** +// *************************************************************************** + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef SGI +#else +#include +#endif + +#include "Word.hh" + +namespace PP +{ +using std::string; +using std::cout; +using std::endl; +//using std::isdigit; +using std::stringstream; +using std::setprecision; +using std::vector; +using std::map; +using std::deque; +using std::pair; + + +// =========================================================================== +// Default constructor. +// =========================================================================== +Word::Word() +{ + wstr = ""; + init(); +} + + +// =========================================================================== +// Construct given a string. +// =========================================================================== +Word::Word(string s) +{ + wstr = s; + init(); + set_type(); +} + + +// =========================================================================== +// Construct given a string. Also set the map of variables. +// =========================================================================== +Word::Word(string s, int lnum, int file_lnum, string fname, + deque *lstr) +{ + wstr = s; + init(); + line_number = lnum; + file_line_number = file_lnum; + filename = fname; + lines = lstr; + set_type(); +} + + +// =========================================================================== +// Construct given a double +// =========================================================================== +Word::Word(double d, int lnum, int file_lnum, string fname, + deque *lstr) +{ + stringstream ss; + ss << setprecision(15) << d; + wstr = ss.str(); + init(); + type = DOUBLE; + line_number = lnum; + file_line_number = file_lnum; + filename = fname; + lines = lstr; +} + + +// =========================================================================== +// Construct given an integer. +// =========================================================================== +Word::Word(int ia, int lnum, int file_lnum, string fname, + deque *lstr) +{ + stringstream ss; + ss << ia; + wstr = ss.str(); + init(); + type = DOUBLE; + line_number = lnum; + file_line_number = file_lnum; + filename = fname; + lines = lstr; +} + + +// =========================================================================== +/*! = operator. */ +// =========================================================================== +Word Word::operator=(const Word &ws) +{ + if (&ws == this) return *this; + wstr = ws.wstr; + processed = ws.processed; + type = ws.type; + negate = ws.negate; + line_number = ws.line_number; + file_line_number = ws.file_line_number; + filename = ws.filename; + lines = ws.lines; + multiplicity = ws.multiplicity; + op_level = ws.op_level; + op_type = ws.op_type; + return *this; +} + + +// =========================================================================== +/*! Copy constructor. */ +// =========================================================================== +Word::Word(const Word &ws) +{ + wstr = ws.wstr; + processed= ws.processed; + type = ws.type; + negate = ws.negate; + line_number = ws.line_number; + file_line_number = ws.file_line_number; + filename = ws.filename; + lines = ws.lines; + multiplicity = ws.multiplicity; + op_level = ws.op_level; + op_type = ws.op_type; +} + + +// =========================================================================== +// Common initialization routine called from constructors. +// =========================================================================== +void Word::init() +{ + processed = false; + type = WUNKNOWN; + negate = false; + lines = NULL; + line_number = 0; + file_line_number = 0; + filename = ""; + multiplicity = 1; + op_level = -1; + op_type = ""; +} + + + +// =========================================================================== +/*! Destructor */ +// =========================================================================== +Word::~Word() +{ +} + + +// *************************************************************************** +// *************************************************************************** +// *************************************************************************** +// Change the value of a word. +// *************************************************************************** +// *************************************************************************** +// *************************************************************************** + +// =========================================================================== +// Set the word to a double. +// =========================================================================== +void Word::set_value(double d) +{ + stringstream ss; + ss << setprecision(15) << d; + wstr = ss.str(); + set_type(); +} + + +// =========================================================================== +// Set the word to a string. +// =========================================================================== +void Word::set_value(string s) +{ + wstr = s; + set_type(); +} + + +// =========================================================================== +// Set the word to a string. +// Use this when you want to do set_value("lasjdf"), otherwise c++ cannot +// get which set_type to use (and it does not tell you it is having trouble). +// =========================================================================== +void Word::set_value(const char *s) +{ + wstr = s; + set_type(); +} + + +// =========================================================================== +// Set the word to a boolean. +// =========================================================================== +void Word::set_value(bool b) +{ + if (!b) wstr = "false"; + if (b) wstr = "true"; + set_type(); +} + + + + +// *************************************************************************** +// *************************************************************************** +// *************************************************************************** +// This section handles the type of the word, whether it is an operator, +// a function, a string, etc. +// *************************************************************************** +// *************************************************************************** +// *************************************************************************** + + +// =========================================================================== +/*! Set the type of word. */ +// =========================================================================== +void Word::set_type() +{ + // Make sure the type is initialized. If the word is not anything else, + // then it is a string. + type = WSTRING; + + // Just for convenience. + int len = (int)wstr.size(); + + // First determine if the word starts and ends with quotes. If it does, + // then it is a string. We do not strip off the quote symbols at this + // point because we might be in a comment region where the quotes donot + // matter. Later after the comments are stripped out, we check for + // matching quotes and remove them. + if ((wstr[0] == '\"') || (wstr[0] == '\'') || + (wstr[len-1] == '\"') || (wstr[len-1] == '\'')) { + type = WSTRING; + //wstr.erase(wstr.end() - 1); + //wstr.erase(wstr.begin()); + return; + } + + // Check for a delimiter. + if (wstr == "(") { type = OPEN_PARENS; return; } + if (wstr == ")") { type = CLOSED_PARENS; return; } + if (wstr == "[") { type = OPEN_SQUARE_BRACKET; return; } + if (wstr == "]") { type = CLOSED_SQUARE_BRACKET; return; } + if (wstr == "{") { type = OPEN_BRACE; return; } + if (wstr == "}") { type = CLOSED_BRACE; return; } + + // Comma is used for a couple of things. + if (wstr == ",") { type = COMMA; return; } + + // Variables always begin with $. Of course, if the word is in quotes it is + // not a variable even if it does begin with $. + if (wstr[0] == '$') { type = VARIABLE; return; } + + // Check for an operator. + if (wstr == "++") { type=OPERATOR; op_level=7; op_type="arithmetic"; return; } + if (wstr == "--") { type=OPERATOR; op_level=7; op_type="arithmetic"; return; } + + if (wstr == "**") { type=OPERATOR; op_level=6; op_type="arithmetic"; return; } + + // Do not implement the % operator, it is too much like the fortran % + // operator which is for referencing components of a fortran structure. + //if (wstr == "%") { type=OPERATOR; op_level=5; op_type="arithmetic"; return; } + if (wstr == "*") { type=OPERATOR; op_level=5; op_type="arithmetic"; return; } + if (wstr == "/") { type=OPERATOR; op_level=5; op_type="arithmetic"; return; } + + if (wstr == "+") { type=OPERATOR; op_level=4; op_type="arithmetic"; return; } + if (wstr == "-") { type=OPERATOR; op_level=4; op_type="arithmetic"; return; } + + if (wstr == ".gt.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + if (wstr == ".ge.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + if (wstr == ".lt.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + if (wstr == ".le.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + if (wstr == ".eq.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + if (wstr == ".ne.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + + if (wstr == ".hggt.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + if (wstr == ".hgge.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + if (wstr == ".hglt.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + if (wstr == ".hgle.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + if (wstr == ".hgeq.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + if (wstr == ".hgne.") { type=OPERATOR; op_level=3; op_type="relational"; return; } + + if (wstr == ".not.") { type=OPERATOR; op_level=2; op_type="logical"; return; } + + if (wstr == ".and.") { type=OPERATOR; op_level=1; op_type="logical"; return; } + + if (wstr == ".or.") { type=OPERATOR; op_level=0; op_type="logical"; return; } + + // Equals sign. + if (wstr == "=") { type = EQUALS; return; } + + // At this point the word is either a string or a number. + + // If the word begins with a + or - sign, then it is numeric. + bool start_with_pm = false; + if (wstr[0] == '+') start_with_pm = true; + if (wstr[0] == '-') start_with_pm = true; + + // If the word does not begin with a + or - sign or a digit, or a + // ., or an e or a d then it is a string. + if (!start_with_pm) { + if(!isdigit(wstr[0])) { + if (wstr[0] != '.') { + if (wstr[0] != 'e') { + if (wstr[0] != 'E') { + if (wstr[0] != 'd') { + if (wstr[0] != 'D') { + type = WSTRING; return; + } + } + } + } + } + } + } + + // Check for all digits, i.e. an integer. + bool is_number = true; + int istart = 0; + if (start_with_pm) istart = 1; + for (int i=istart; i<(int)wstr.size(); i++) { + if (!isdigit(wstr[i])) { + is_number = false; + break; + } + } + if (is_number) { type = INTEGER; return; } + + // Check for floating point. + // If there is anything in the string that is not a component of a + // floating point number, then that makes it a string. + is_number = true; + for (int i=0; i<(int)wstr.size(); i++) { + if (!isdigit(wstr[i]) && wstr[i]!='.' && wstr[i]!='e' && wstr[i]!='E' && + wstr[i]!='d' && wstr[i]!='D' && wstr[i]!='+' && wstr[i]!='-') { + is_number = false; + break; + } + } + //if (is_number) { type = DOUBLE; return; } + if (!is_number) { type = WSTRING; return; } + + // We suspect a floating point number. + // At this point, everything in the string is a component of a floating + // point number, i.e. "+ - digit e E d D ." + type = DOUBLE; + + // Check that strings that start with "e E d D" really are numbers. + // And for numbers like e+01, e-05, etc, the atof or strtod functions do not + // interpret those as numbers, therefore we insert a 1 in front of the e + // so that atof and strtod will call it a number. + if (wstr[0] == 'e' || wstr[0] == 'E' || wstr[0] == 'd' || wstr[0] == 'D') { + + // Check for proper syntax after the "e E d D". + if (!check_after_e(wstr, 1, (int)wstr.size()-1)) { + type = WSTRING; + return; + } + + // This appears to be a number, insert the digit. + wstr.insert(0, "1"); + return; + } + + // The string appears to be a floating point number (fpn), check syntax. + // First, find the location of the "e E d D". Check that there can be + // only one "e E d D" in the string. + int ie = -1; + for (int i=0; i<(int)wstr.size(); i++) { + if (wstr[i] == 'e' || wstr[i] == 'E' || wstr[i] == 'd' || + wstr[i] == 'D') { + ie = i; + break; + } + } + if (ie > -1) { + for (int i=ie+1; i<(int)wstr.size(); i++) { + if (wstr[i] == 'e' || wstr[i] == 'E' || wstr[i] == 'd' || + wstr[i] == 'D') { + type = WSTRING; + return; + } + } + } + + // Check that the characters before the "e E d D" are valid. If no + // "e E d D" was found then check the entire string as if it preceeded + // an "e E d D". + int ic1 = 0; + int ic2 = (int)wstr.size() - 1; + if (ie > -1) { + ic2 = ie - 1; + } + if (!check_before_e(wstr, ic1, ic2)) { + type = WSTRING; + return; + } + + // All other cases handled, this must be a fpn (type DOUBLE). + return; +} + + +// =========================================================================== +// The input string, s, could be a floating point number (fpn). It has been +// determined that s contains an "e E d D" located at position i2+1. Check +// everything before the "e E d D", postions i1 through i2 inclusive to +// verify that this is a fpn. +// +// It is also possible that an "e E d D" was not found in which case the +// entire string is checked as if it preceeded an "e E d D". +// +// Return false if this is a string +// true if this could be a fpn +// =========================================================================== +bool Word::check_before_e(string s, int i1, int i2) +{ + // If there is nothing before the "e E d D" then this still could be a fpn. + int size = i2 - i1 + 1; + if (size < 1) return true; + + // The first character could be "+ -", but the remaining characters + // cannot be "+ -". + int ie1 = i1; + if (s[i1] == '+' || s[i1] == '-') ie1 = i1+1; + for (int i=ie1; i<=i2; i++) { + if (s[i] == '+' || s[i] == '-') return false; + } + + // Locate the optional "." character. There can only be one ".". + int pointdex = -1; + for (int i=ie1; i<=i2; i++) { + if (s[i] == '.') { + pointdex = i; + break; + } + } + if (pointdex > -1) { + for (int i=pointdex+1; i<=i2; i++) { + if (s[i] == '.') return false; + } + } + + + // Everything before and after the point must be a digit (except + // that the very first character could be "+ -"). + if (pointdex > -1) { + for (int i=ie1; i> iret; + // Apply the negate flag if it is turned on. + if (negate) iret *= -1; + return iret; +} + +// =========================================================================== +// Get the word as a double, error processing version. +// =========================================================================== +double Word::get_double(stringstream &serr, int &ierr) +{ + // Mark this word as having been processed. + processed = true; + + // The word must at least be a number. + if ((type != DOUBLE) && (type != INTEGER)) { + if (lines != NULL) { + serr << endl; + serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl; + serr << " " << (*lines)[line_number-1] << endl; + serr << "in file: " << filename << endl; + } + serr << "Expected a numerical value." << endl; + serr << "Instead got: " << wstr << endl << endl; + ierr = 2; + return 0; + } + + // Use a temporary string that might be modified. + string s = wstr; + + // We allow exponents using d and D in addition to e and E, for example + // 1.d14 or -1.38D-18. The problem with this is that atof and strtod + // do not allow d or D, therefore we have to replace d or D with e + // before sending it to atof or strtod. + if (type == DOUBLE) { + for (int i=0; i<(int)s.size(); i++) { + if (s[i] == 'd') s[i] = 'e'; + if (s[i] == 'D') s[i] = 'e'; + } + } + + // Convert the string to a double. + double d = atof(s.c_str()); + + // Apply the negate flag if it is turned on. + if (negate) d *= -1.0; + return d; +} + + +// =========================================================================== +// Get the word as a single character, error processing version. +// =========================================================================== +char Word::get_single_char(stringstream &serr, int &ierr) +{ + // To suppress compiler warnings of unused parameters + //assert(serr == serr); + assert(ierr == ierr); + + // Mark this word as having been processed. + processed = true; + + return wstr[0]; +} + + +// =========================================================================== +// Get the word as an int, no error processing version. +// =========================================================================== +int Word::get_int() +{ + int dummy; + return get_val( dummy ); +} + +// support uint64_t +int64_t Word::get_int64_t() +{ + int64_t dummy; + return get_val( dummy ); +} + +// =========================================================================== +// Get the word as a float +// =========================================================================== +float Word::get_float() +{ + float dummy; + return get_val( dummy ); +} + +// =========================================================================== +// Get the word as a double +// =========================================================================== +double Word::get_double() +{ + double dummy; + return get_val( dummy ); +} + + +// =========================================================================== +// Get the word as a Type T. +// =========================================================================== +template< class T > +T Word::get_val( T &dummyValue ) +{ + // To suppress compiler warnings of unused parameters + assert(dummyValue == dummyValue); + + T retValue; + + // Mark this word as having been processed. + processed = true; + + // Convert the word to the requested data type. + retValue = convertFromString( retValue, wstr ); + + return retValue; +} + +//! Explicit instantiation of supported template types. If more types are +//! needed those explicit versions must be listed here. We are not using +//! automatic inclusion (we would need to move the function definition into +//! the header file for that). The listed versions below are the only ones +//! that will be included in the library. +template int Word::get_val( int& ); +template int64_t Word::get_val( int64_t& ); +template float Word::get_val( float& ); +template double Word::get_val( double& ); +//template bool Word::get_val( bool& ); +template string Word::get_val( string& ); + +// =========================================================================== +// Convert from string to return type explicitly. +// =========================================================================== + +//! rtti is only used for type identification (each overloaded function must +//! have a unique signature. The return value is not part of the signature). + +// Convert string to integer. +int Word::convertFromString( const int &rtti, const string &s ) const +{ + // To suppress compiler warnings of unused parameters + assert(rtti == rtti); + + int iret = atoi( s.c_str() ); + if (negate) iret *= -1; + return iret; +} + +// Convert string to int64_t. +int64_t Word::convertFromString( const int64_t &rtti, const string &s ) const +{ + // To suppress compiler warnings of unused parameters + assert(rtti == rtti); + + int64_t iret; + std::stringstream( s ) >> iret; + if (negate) iret *= -1; + return iret; +} + +// Convert string to string (do nothing). +string Word::convertFromString( const string &rtti, const string &s ) const +{ + // To suppress compiler warnings of unused parameters + assert(rtti == rtti); + + return s; +} + +// Convert string to float. +float Word::convertFromString( const float &rtti, const string &s ) const +{ + // To suppress compiler warnings of unused parameters + assert(rtti == rtti); + + // Use a temporary string that might be modified. + string sm = s; + + // We allow exponents using d and D in addition to e and E, for example + // 1.d14 or -1.38D-18. The problem with this is that atof and strtod + // do not allow d or D, therefore we have to replace d or D with e + // before sending it to atof or strtod. + for (int i=0; i<(int)sm.size(); i++) { + if (sm[i] == 'd') sm[i] = 'e'; + if (sm[i] == 'D') sm[i] = 'e'; + } + + float f = (float)atof( sm.c_str() ); + if (negate) f *= -1.; + return f; +} + +// Convert string to double. +double Word::convertFromString( const double &rtti, const string &s ) const +{ + // To suppress compiler warnings of unused parameters + assert(rtti == rtti); + + // Use a temporary string that might be modified. + string sm = s; + + // We allow exponents using d and D in addition to e and E, for example + // 1.d14 or -1.38D-18. The problem with this is that atof and strtod + // do not allow d or D, therefore we have to replace d or D with e + // before sending it to atof or strtod. + for (int i=0; i<(int)sm.size(); i++) { + if (sm[i] == 'd') sm[i] = 'e'; + if (sm[i] == 'D') sm[i] = 'e'; + } + + double d = atof( sm.c_str() ); + if (negate) d *= -1.; + return d; +} + + +// *************************************************************************** +// *************************************************************************** +// *************************************************************************** +// Utility functions. +// *************************************************************************** +// *************************************************************************** +// *************************************************************************** + + +// =========================================================================== +// Return the word as a string for printing. This is usually just the word +// but if it has multiplicity, then include that in the return string. +// =========================================================================== +string Word::get_print_string(bool enc_quotes) +{ + bool equotes = true; + if (!enc_quotes) equotes = false; + if (wstr == "true") equotes = false; + if (wstr == "false") equotes = false; + + + string sq = ""; + + if (equotes) { + if (type == WSTRING) sq = "\""; + sq += wstr; + if (type == WSTRING) sq += "\""; + } + else { + sq = wstr; + } + + if (multiplicity <= 1) return sq; + + stringstream ss; + ss << multiplicity; + string s = ""; + s = ss.str() + "*" + sq; + return s; +} + + +// =========================================================================== +// If a word starts or ends with quotes, make sure the quotes match, if not +// generate a fatal error, and then strip off the quotes. +// =========================================================================== +void Word::handle_quotes(stringstream &serr, int &ierr) +{ + // Just for convenience. + int len = (int)wstr.size(); + + // Check for matching quotes, generate a fatal error if they do not match. + bool ferr = false; + if ((wstr[0] == '\"') && (wstr[len-1] != '\"')) ferr = true; + if ((wstr[0] == '\'') && (wstr[len-1] != '\'')) ferr = true; + if ((wstr[len-1] == '\"') && (wstr[0] != '\"')) ferr = true; + if ((wstr[len-1] == '\'') && (wstr[0] != '\'')) ferr = true; + + if (ferr) { + fatal_error(serr, ierr); + serr << "Quotes mismatch found." << endl; + serr << "A starting quotes must have a closing quotes." << endl; + serr << "Double quotes, \", must be matched with double quotes." + << endl; + serr << "Single quotes, \', must be matched with single quotes." + << endl; + ierr = 2; + } + + // Determine if the word starts and ends with quotes. If it does, + // then we strip off the quote symbols. + if (((wstr[0] == '\"') || (wstr[0] == '\'')) && + ((wstr[len-1] == '\"') || (wstr[len-1] == '\''))) { + wstr.erase(wstr.end() - 1); + wstr.erase(wstr.begin()); + return; + } +} + + +// =========================================================================== +// Erase a single character from the word, ic is the index of the character +// to be erased (starting from 0). +// =========================================================================== +void Word::erase_char(int ic) +{ + if (ic >= (int)wstr.size()) return; + wstr.erase(wstr.begin() + ic); +} + + + +// =========================================================================== +// Fatal error +// =========================================================================== +void Word::fatal_error(stringstream &serr, int &ierr) +{ + // To suppress compiler warnings of unused parameters + assert(ierr == ierr); + + serr << endl; + serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl; + serr << " " << (*lines)[line_number-1] << endl; + serr << "in file: " << filename << endl; +} + +void Word::warning(stringstream &serr, int &ierr) +{ + // To suppress compiler warnings of unused parameters + assert(ierr == ierr); + + serr << endl; + serr << "*** WARNING in line " << file_line_number << ":" << endl; + serr << " " << (*lines)[line_number-1] << endl; + serr << "in file: " << filename << endl; +} + + +// =========================================================================== +// Negate a word. +// =========================================================================== +void Word::negate_value() +{ + int len = (int)wstr.size(); + + if (type == INTEGER || type == DOUBLE) { + negate = false; + + // If the string starts with a - sign, then delete it. + for (int i=0; i +#include +#include +#include +#include +#include +#include +//#include "graphics/display.h" +#include "graphics.h" +#include "input.h" +#include "mesh.h" +#include "partition.h" +#include "state.h" +#include "timer.h" +#include "memstats.h" +#include "crux.h" +#include "PowerParser.hh" +#include "MallocPlus.h" +#ifdef HAVE_ITTNOTIFY +#include +#endif + +using namespace PP; + +#ifdef _OPENMP +#include +#endif + +#ifndef DEBUG +#define DEBUG 0 +#endif +#undef DEBUG_RESTORE_VALS + +#define MIN3(x,y,z) ( min( min(x,y), z) ) + +static int do_cpu_calc = 1; +static int do_gpu_calc = 0; + +typedef unsigned int uint; + +static bool do_display_graphics = false; + +#ifdef HAVE_GRAPHICS +static double circle_radius=-1.0; +#ifdef FULL_PRECISION + void (*set_display_cell_coordinates)(double *, double *, double *, double *) = &set_display_cell_coordinates_double; + void (*set_display_cell_data)(double *) = &set_display_cell_data_double; +#else + void (*set_display_cell_coordinates)(float *, float *, float *, float *) = &set_display_cell_coordinates_float; + void (*set_display_cell_data)(float *) = &set_display_cell_data_float; +#endif +#endif + +static int view_mode = 0; + +#ifdef FULL_PRECISION +#define SUM_ERROR 2.0e-16 + void (*set_graphics_cell_coordinates)(double *, double *, double *, double *) = &set_graphics_cell_coordinates_double; + void (*set_graphics_cell_data)(double *) = &set_graphics_cell_data_double; +#else +#define SUM_ERROR 1.0e-8 + void (*set_graphics_cell_coordinates)(float *, float *, float *, float *) = &set_graphics_cell_coordinates_float; + void (*set_graphics_cell_data)(float *) = &set_graphics_cell_data_float; +#endif + +void store_crux_data(Crux *crux, int ncycle); +void restore_crux_data_bootstrap(Crux *crux, char *restart_file, int rollback_counter); +void restore_crux_data(Crux *crux); + +bool restart, // Flag to start from a back up file; init in input.cpp::parseInput(). + verbose, // Flag for verbose command-line output; init in input.cpp::parseInput(). + localStencil, // Flag for use of local stencil; init in input.cpp::parseInput(). + face_based, // Flag for face-based finite difference; + outline; // Flag for drawing outlines of cells; init in input.cpp::parseInput(). +int outputInterval, // Periodicity of output; init in input.cpp::parseInput(). + crux_type, // Type of checkpoint/restart -- CRUX_NONE, CRUX_IN_MEMORY, CRUX_DISK; + // init in input.cpp::parseInput(). + enhanced_precision_sum,// Flag for enhanced precision sum (default true); init in input.cpp::parseInput(). + lttrace_on, // Flag to turn on logical time trace package; + do_quo_setup, // Flag to turn on quo dynamic scheduling policies package; + levmx, // Maximum number of refinement levels; init in input.cpp::parseInput(). + nx, // x-resolution of coarse grid; init in input.cpp::parseInput(). + ny, // y-resolution of coarse grid; init in input.cpp::parseInput(). + niter, // Maximum iterations; init in input.cpp::parseInput(). + graphic_outputInterval, // Periodicity of graphic output that is saved; init in input.cpp::parseInput() + checkpoint_outputInterval, // Periodicity of checkpoint output that is saved; init in input.cpp::parseInput() + num_of_rollback_states,// Maximum number of rollback states to maintain; init in input.cpp::parseInput() + backup_file_num,// Backup file number to restart simulation from; init in input.cpp::parseInput() + numpe, // + ndim = 2; // Dimensionality of problem (2 or 3). +double upper_mass_diff_percentage; // Flag for the allowed pecentage difference to the total + // mass per output intervals; init in input.cpp::parseInput(). + +char *restart_file; + +static int it = 0; + +enum partition_method initial_order, // Initial order of mesh. + cycle_reorder; // Order of mesh every cycle. +static Mesh *mesh; // Object containing mesh information +static State *state; // Object containing state information corresponding to mesh +static Crux *crux; // Object containing checkpoint/restart information +static PowerParser *parse; // Object containing input file parsing + +static real_t circ_radius = 0.0; +static int next_cp_cycle = 0; +static int next_graphics_cycle = 0; + +// Set up timing information. +static struct timeval tstart, tstart_cpu, tstart_partmeas; + +static double H_sum_initial = 0.0; +static double cpu_time_graphics = 0.0; +static double cpu_time_calcs = 0.0; +static double cpu_time_partmeas = 0.0; + +static int ncycle = 0; +static double simTime = 0.0; +static double deltaT = 0.0; +char total_sim_time_log[] = {"total_execution_time.log"}; +struct timeval total_exec; + +static int mype=0; +int main(int argc, char **argv) { + + // Needed for code to compile correctly on the Mac + int numpe=-1; + + // Process command-line arguments, if any. + parseInput(argc, argv); + +#ifdef _OPENMP + int nt = 0; + int tid = 0; + + nt = omp_get_max_threads(); + tid = omp_get_thread_num(); + if (0 == tid && mype == 0) { + printf("--- max num openmp threads: %d\n", nt); + } +#pragma omp parallel firstprivate(nt, tid) + { + nt = omp_get_num_threads(); + tid = omp_get_thread_num(); + +#pragma omp master + if (mype == 0) { + printf("--- num openmp threads in parallel region: %d\n", nt); + } + } +#endif + + parse = new PowerParser(); + + struct timeval tstart_setup; + cpu_timer_start(&tstart_setup); + + crux = new Crux(crux_type, num_of_rollback_states, restart); + + circ_radius = 6.0; + // Scale the circle appropriately for the mesh size. + circ_radius = circ_radius * (real_t) nx / 128.0; + int boundary = 1; + int parallel_in = 0; + double deltax_in = 1.0; + double deltay_in = 1.0; + + if (restart){ + restore_crux_data_bootstrap(crux, restart_file, 0); + mesh = new Mesh(nx, ny, levmx, ndim, deltax_in, deltay_in, boundary, parallel_in, do_gpu_calc); + mesh->init(nx, ny, circ_radius, initial_order, do_gpu_calc); + + state = new State(mesh); + restore_crux_data(crux); + mesh->proc.resize(mesh->ncells); + mesh->calc_distribution(numpe); + } else { + mesh = new Mesh(nx, ny, levmx, ndim, deltax_in, deltay_in, boundary, parallel_in, do_gpu_calc); + if (DEBUG) { + //if (mype == 0) mesh->print(); + + char filename[10]; + sprintf(filename,"out%1d",mype); + mesh->fp=fopen(filename,"w"); + + //mesh->print_local(); + } + + mesh->init(nx, ny, circ_radius, initial_order, do_gpu_calc); + state = new State(mesh); + state->init(do_gpu_calc); + mesh->proc.resize(mesh->ncells); + mesh->calc_distribution(numpe); + state->fill_circle(circ_radius, 100.0, 7.0); + } + + size_t &ncells = mesh->ncells; + + if (graphic_outputInterval > niter) next_graphics_cycle = graphic_outputInterval; + if (checkpoint_outputInterval > niter) next_cp_cycle = checkpoint_outputInterval; + + + // Kahan-type enhanced precision sum implementation. + double H_sum = state->mass_sum(enhanced_precision_sum); + if (mype == 0) printf ("Mass of initialized cells equal to %14.12lg\n", H_sum); + H_sum_initial = H_sum; + + if(upper_mass_diff_percentage < 0){ + upper_mass_diff_percentage = H_sum_initial * SUM_ERROR; + //printf("Setting sum mass error to %16.8lg\n",upper_mass_diff_percentage); + } + + double cpu_time_main_setup = cpu_timer_stop(tstart_setup); + #ifdef TIMING + mesh->parallel_output("CPU: setup time time was",cpu_time_main_setup, 0, "s"); + #endif + + long long mem_used = memstats_memused(); + #ifdef MEMORY + if (mem_used > 0) { + mesh->parallel_output("Memory used in startup ",mem_used, 0, "kB"); + mesh->parallel_output("Memory peak in startup ",memstats_mempeak(), 0, "kB"); + mesh->parallel_output("Memory free at startup ",memstats_memfree(), 0, "kB"); + mesh->parallel_output("Memory available at startup ",memstats_memtotal(), 0, "kB"); + } + #endif + + if (mype == 0) { + if (ncycle != 0){ + printf("Iteration %3d timestep %lf Sim Time %lf cells %ld Mass Sum %14.12lg\n", + ncycle, deltaT, simTime, ncells, H_sum); + } else { + printf("Iteration 0 timestep n/a Sim Time 0.0 cells %ld Mass Sum %14.12lg\n", ncells, H_sum); + } + } + + for (int i = 0; i < MESH_COUNTER_SIZE; i++){ + mesh->cpu_counters[i]=0; + } + for (int i = 0; i < MESH_TIMER_SIZE; i++){ + mesh->cpu_timers[i]=0.0; + } + + cpu_timer_start(&tstart_cpu); + +#ifdef HAVE_GRAPHICS + do_display_graphics = true; + set_display_mysize(ncells); + set_display_cell_coordinates(&mesh->x[0], &mesh->dx[0], &mesh->y[0], &mesh->dy[0]); + set_display_cell_data(&state->H[0]); + set_display_cell_proc(&mesh->proc[0]); + + set_display_window((float)mesh->xmin, (float)mesh->xmax, + (float)mesh->ymin, (float)mesh->ymax); + set_display_outline((int)outline); + set_display_viewmode(view_mode); +#endif + + if (ncycle == next_graphics_cycle){ + set_graphics_outline(outline); + set_graphics_window((float)mesh->xmin, (float)mesh->xmax, + (float)mesh->ymin, (float)mesh->ymax); + set_graphics_mysize(ncells); + set_graphics_cell_coordinates(&mesh->x[0], &mesh->dx[0], + &mesh->y[0], &mesh->dy[0]); + set_graphics_cell_data(&state->H[0]); + set_graphics_cell_proc(&mesh->proc[0]); + set_graphics_viewmode(view_mode); + + if (mype == 0) { + init_graphics_output(); + set_graphics_cell_proc(&mesh->proc[0]); + write_graphics_info(0,0,0.0,0,0); + } + next_graphics_cycle += graphic_outputInterval; + } + +#ifdef HAVE_GRAPHICS + set_display_circle_radius(circle_radius); + init_display(&argc, argv, "Shallow Water"); + draw_scene(); + //if (verbose) sleep(5); + sleep(2); + + // Clear superposition of circle on grid output. + circle_radius = -1.0; +#endif + cpu_time_graphics += cpu_timer_stop(tstart_cpu); + + // Set flag to show mesh results rather than domain decomposition. + view_mode = 1; + + if (ncycle == next_cp_cycle) store_crux_data(crux, ncycle); + + cpu_timer_start(&tstart); +#ifdef HAVE_GRAPHICS + set_idle_function(&do_calc); + start_main_loop(); +#else +#ifdef HAVE_ITTNOTIFY +__itt_resume(); +__SSC_MARK(0x111); +#endif + for (it = ncycle; it < 10000000; it++) { + do_calc(); + } +#ifdef HAVE_ITTNOTIFY +__itt_pause(); +__SSC_MARK(0x222); +#endif +#endif + + return 0; +} + +extern "C" void do_calc(void) +{ double g = 9.80; + double sigma = 0.95; + int icount, jcount; + static int rollback_attempt = 0; + static double total_program_time = 0; + + // Initialize state variables for GPU calculation. + size_t &ncells = mesh->ncells; + + vector mpot; + + size_t new_ncells = 0; + double H_sum = -1.0; + + // Main loop. + int endcycle = MIN3(niter, next_cp_cycle, next_graphics_cycle); + + cpu_timer_start(&tstart_cpu); + + for (int nburst = ncycle % outputInterval; nburst < outputInterval && ncycle < endcycle; nburst++, ncycle++) { + +#ifdef _OPENMP +#pragma omp parallel + { +#endif + // Calculate the real time step for the current discrete time step. + double mydeltaT = state->set_timestep(g, sigma); // Private variable to avoid write conflict +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + deltaT = mydeltaT; + simTime += deltaT; +#ifdef _OPENMP + } +#endif + + mesh->calc_neighbors(ncells); + + cpu_timer_start(&tstart_partmeas); + mesh->partition_measure(); + +#ifdef _OPENMP +#pragma omp master +#endif + cpu_time_partmeas += cpu_timer_stop(tstart_partmeas); + + // Currently not working -- may need to be earlier? + //if (do_cpu_calc && ! mesh->have_boundary) { + // state->add_boundary_cells(mesh); + //} + + // Apply BCs is currently done as first part of gpu_finite_difference and so comparison won't work here + + mesh->set_bounds(ncells); + + // Execute main kernel + if (face_based) { + state->calc_finite_difference_via_faces(deltaT); + } else { + state->calc_finite_difference(deltaT); + } + + // Size of arrays gets reduced to just the real cells in this call for have_boundary = 0 + state->remove_boundary_cells(); +#ifdef _OPENMP + } // end parallel region +#endif + + mpot.resize(ncells); + new_ncells = state->calc_refine_potential(mpot, icount, jcount); + + // Resize the mesh, inserting cells where refinement is necessary. + +#ifdef _OPENMP +#pragma omp parallel + { +#endif + state->rezone_all(icount, jcount, mpot); + + // Clear does not delete mpot, so have to swap with an empty vector to get + // it to delete the mpot memory. This is all to avoid valgrind from showing + // it as a reachable memory leak +#ifdef _OPENMP +#pragma omp master + { +#endif + //mpot.clear(); + vector().swap(mpot); + + mesh->ncells = new_ncells; + ncells = new_ncells; +#ifdef _OPENMP + } +#pragma omp barrier +#endif + mesh->set_bounds(ncells); + +#ifdef _OPENMP +#pragma omp master + { +#endif + //cpu_timer_start(&tstart_check); + mesh->proc.resize(ncells); + if (icount) + { vector index(ncells); + mesh->partition_cells(numpe, index, cycle_reorder); + state->state_reorder(index); + state->memory_reset_ptrs(); + } + //cpu_time_check += cpu_timer_stop(tstart_check); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + +#ifdef _OPENMP + } // end parallel region +#endif + + } // End burst loop + + cpu_time_calcs += cpu_timer_stop(tstart_cpu); + + H_sum = state->mass_sum(enhanced_precision_sum); + + int error_status = STATUS_OK; + + if (isnan(H_sum)) { + printf("Got a NAN on cycle %d\n",ncycle); + error_status = STATUS_NAN; + } + + double percent_mass_diff = fabs(H_sum - H_sum_initial)/H_sum_initial * 100.0; + if (percent_mass_diff >= upper_mass_diff_percentage) { + printf("Mass difference outside of acceptable range on cycle %d percent_mass_diff %lg upper limit %lg\n",ncycle,percent_mass_diff, upper_mass_diff_percentage); + error_status = STATUS_MASS_LOSS; + } + + if (error_status != STATUS_OK){ + if (crux_type != CRUX_NONE) { + + rollback_attempt++; + if (rollback_attempt > num_of_rollback_states) { + printf("Can not recover from error from back up files. Killing program...\n"); + total_program_time = cpu_timer_stop(total_exec); + FILE *fp = fopen(total_sim_time_log,"w"); + fprintf(fp,"The total execution time of the program before failure was %g seconds\n", total_program_time); + fclose(fp); + state->print_failure_log(ncycle, simTime, H_sum_initial, H_sum, percent_mass_diff, true); + exit(-1); + } + + if (graphic_outputInterval <= niter){ + mesh->calc_spatial_coordinates(0); + set_graphics_mysize(ncells); + set_graphics_viewmode(view_mode); + set_graphics_cell_coordinates(&mesh->x[0], &mesh->dx[0], &mesh->y[0], &mesh->dy[0]); + set_graphics_cell_data(&state->H[0]); + set_graphics_cell_proc(&mesh->proc[0]); + write_graphics_info(ncycle/graphic_outputInterval,ncycle,simTime,1,rollback_attempt); + } + + if((ncycle - (rollback_attempt)*checkpoint_outputInterval) < 0){ + printf("Rolling simulation back to to ncycle 0\n"); + } + else{ + printf("Rolling simulation back to to ncycle %d\n", ncycle - (rollback_attempt*checkpoint_outputInterval)); + } + + state->print_rollback_log(ncycle, simTime, H_sum_initial, H_sum, percent_mass_diff, rollback_attempt, num_of_rollback_states, error_status); + + int rollback_num = crux->get_rollback_number(); + + restore_crux_data_bootstrap(crux, NULL, rollback_num); + mesh->terminate(); + state->terminate(); + restore_crux_data(crux); + + + } else { + printf("failure.log has been created\n"); + state->print_failure_log(ncycle, simTime, H_sum_initial, H_sum, percent_mass_diff, true); + exit(-1); + } + } + + if (mype == 0 && ncycle % outputInterval == 0) { + printf("Iteration %3d timestep %lf Sim Time %lf cells %ld Mass Sum %14.12lg Mass Change %12.6lg\n", + ncycle, deltaT, simTime, ncells, H_sum, H_sum - H_sum_initial); + } + + if (ncycle == next_cp_cycle) store_crux_data(crux, ncycle); + + cpu_timer_start(&tstart_cpu); + + if(do_display_graphics || ncycle == next_graphics_cycle || + (ncycle >= niter && graphic_outputInterval < niter) ){ + + mesh->calc_spatial_coordinates(0); + } + + if(ncycle == next_graphics_cycle){ + set_graphics_mysize(ncells); + set_graphics_viewmode(view_mode); + set_graphics_cell_coordinates(&mesh->x[0], &mesh->dx[0], &mesh->y[0], &mesh->dy[0]); + set_graphics_cell_data(&state->H[0]); + set_graphics_cell_proc(&mesh->proc[0]); + + write_graphics_info(ncycle/graphic_outputInterval,ncycle,simTime,0,0); + next_graphics_cycle += graphic_outputInterval; + } + +#ifdef HAVE_GRAPHICS + if(ncycle % outputInterval == 0){ + if(ncycle != next_graphics_cycle){ + set_display_mysize(ncells); + set_display_viewmode(view_mode); + set_display_cell_coordinates(&mesh->x[0], &mesh->dx[0], &mesh->y[0], &mesh->dy[0]); + set_display_cell_data(&state->H[0]); + set_display_cell_proc(NULL); + } + set_display_circle_radius(circle_radius); + draw_scene(); + } + +#endif + + cpu_time_graphics += cpu_timer_stop(tstart_cpu); + + // Output final results and timing information. + if (ncycle >= niter) { + //free_display(); + + if(graphic_outputInterval < niter){ + cpu_timer_start(&tstart_cpu); + +#ifdef HAVE_GRAPHICS + set_display_viewmode(view_mode); + set_display_mysize(ncells); + set_display_cell_coordinates(&mesh->x[0], &mesh->dx[0], &mesh->y[0], &mesh->dy[0]); + set_display_cell_data(&state->H[0]); + set_display_cell_proc(NULL); +#endif + + if (mype == 0) { + write_graphics_info(ncycle/graphic_outputInterval,ncycle,simTime,0,0); + } + next_graphics_cycle += graphic_outputInterval; + + cpu_time_graphics += cpu_timer_stop(tstart_cpu); + } + + // Get overall program timing. + double elapsed_time = cpu_timer_stop(tstart); + + long long mem_used = memstats_memused(); + #ifdef MEMORY + if (mem_used > 0) { + printf("Memory used %lld kB\n",mem_used); + printf("Memory peak %lld kB\n",memstats_mempeak()); + printf("Memory free %lld kB\n",memstats_memfree()); + printf("Memory available %lld kB\n",memstats_memtotal()); + } + #endif + state->output_timing_info(do_cpu_calc, do_gpu_calc, elapsed_time); + #ifdef TIMING + mesh->parallel_output("CPU: calc incl part meas time was",cpu_time_calcs, 0, "s"); + mesh->parallel_output("CPU: calculation only time was",cpu_time_calcs-cpu_time_partmeas, 0, "s"); + mesh->parallel_output("CPU: partition measure time was",cpu_time_partmeas, 0, "s"); + mesh->parallel_output("CPU: graphics time was",cpu_time_graphics, 0, "s"); + //mesh->parallel_output("CPU: check time was",cpu_time_check, 0, "s"); + #endif + + mesh->print_partition_measure(); + mesh->print_calc_neighbor_type(); + mesh->print_partition_type(); + + printf("CPU: rezone frequency \t %8.4f\tpercent\n", (double)mesh->get_cpu_counter(MESH_COUNTER_REZONE)/(double)ncycle*100.0 ); + printf("CPU: calc neigh frequency \t %8.4f\tpercent\n", (double)mesh->get_cpu_counter(MESH_COUNTER_CALC_NEIGH)/(double)ncycle*100.0 ); + printf("CPU: refine_smooth_iter per rezone \t %8.4f\t\n", (double)mesh->get_cpu_counter(MESH_COUNTER_REFINE_SMOOTH)/(double)mesh->get_cpu_counter(MESH_COUNTER_REZONE) ); + + mesh->terminate(); + state->terminate(); + + terminate_graphics_output(); + + delete mesh; + delete state; + delete crux; + delete parse; + + total_program_time = cpu_timer_stop(total_exec); + FILE *fp = fopen(total_sim_time_log,"w"); + fprintf(fp,"The total execution time of the program was %g seconds\n", total_program_time); + fclose(fp); + exit(0); + } // Complete final output. + +} // end do_calc + +const int CRUX_CLAMR_VERSION = 101; +const int num_int_vals = 15; +const int num_double_vals = 5; + +MallocPlus clamr_bootstrap_memory; + +void store_crux_data(Crux *crux, int ncycle) +{ + size_t nsize = num_int_vals*sizeof(int) + + num_double_vals*sizeof(double); + nsize += state->get_checkpoint_size(); + + next_cp_cycle += checkpoint_outputInterval; + + int int_vals[num_int_vals]; + + int_vals[ 0] = CRUX_CLAMR_VERSION; // Version number + int_vals[ 1] = nx; + int_vals[ 2] = ny; + int_vals[ 3] = levmx; + int_vals[ 4] = ndim; + int_vals[ 5] = outputInterval; + int_vals[ 6] = enhanced_precision_sum; + int_vals[ 7] = niter; + int_vals[ 8] = it; + int_vals[ 9] = ncycle; + int_vals[10] = crux_type; + int_vals[11] = graphic_outputInterval; + int_vals[12] = checkpoint_outputInterval; + int_vals[13] = next_cp_cycle; + int_vals[14] = next_graphics_cycle; + + double double_vals[num_double_vals]; + double_vals[ 0] = circ_radius; + double_vals[ 1] = H_sum_initial; + double_vals[ 2] = simTime; + double_vals[ 3] = deltaT; + double_vals[ 4] = upper_mass_diff_percentage; + + clamr_bootstrap_memory.memory_add(int_vals, size_t(num_int_vals), 4, "bootstrap_int_vals", RESTART_DATA); + clamr_bootstrap_memory.memory_add(double_vals, size_t(num_double_vals), 8, "bootstrap_double_vals", RESTART_DATA); + + crux->store_begin(nsize, ncycle); + + crux->store_MallocPlus(clamr_bootstrap_memory); + + state->store_checkpoint(crux); + + crux->store_end(); + + clamr_bootstrap_memory.memory_remove(int_vals); + clamr_bootstrap_memory.memory_remove(double_vals); +} + +void restore_crux_data_bootstrap(Crux *crux, char *restart_file, int rollback_counter) +{ + crux->restore_begin(restart_file, rollback_counter); + + int int_vals[num_int_vals]; + + double double_vals[num_double_vals]; + + clamr_bootstrap_memory.memory_add(int_vals, size_t(num_int_vals), 4, "bootstrap_int_vals", RESTART_DATA); + clamr_bootstrap_memory.memory_add(double_vals, size_t(num_double_vals), 8, "bootstrap_double_vals", RESTART_DATA); + + crux->restore_MallocPlus(clamr_bootstrap_memory); + + if (int_vals[ 0] != CRUX_CLAMR_VERSION) { + printf("CRUX version mismatch for clamr data, version on file is %d, version in code is %d\n", + int_vals[0], CRUX_CLAMR_VERSION); + exit(0); + } + + nx = int_vals[ 1]; + ny = int_vals[ 2]; + levmx = int_vals[ 3]; + ndim = int_vals[ 4]; + outputInterval = int_vals[ 5]; + enhanced_precision_sum = int_vals[ 6]; + niter = int_vals[ 7]; + it = int_vals[ 8]; + ncycle = int_vals[ 9]; + crux_type = int_vals[10]; + graphic_outputInterval = int_vals[11]; + checkpoint_outputInterval = int_vals[12]; + next_cp_cycle = int_vals[13]; + next_graphics_cycle = int_vals[14]; + + circ_radius = double_vals[ 0]; + H_sum_initial = double_vals[ 1]; + simTime = double_vals[ 2]; + deltaT = double_vals[ 3]; + upper_mass_diff_percentage = double_vals[ 4]; + + // need to reset crux type, because initialize to none + // before checkpoint is read + crux->set_crux_type(crux_type); + + clamr_bootstrap_memory.memory_remove(int_vals); + clamr_bootstrap_memory.memory_remove(double_vals); + +#ifdef DEBUG_RESTORE_VALS + if (DEBUG_RESTORE_VALS) { + const char *int_vals_descriptor[num_int_vals] = { + "CRUX_CLAMR_VERSION", + "nx", + "ny", + "levmx", + "ndim", + "outputInterval", + "enhanced_precision_sum", + "niter", + "it", + "ncycle", + "crux_type", + "graphic_outputInterval", + "checkpoint_outputInterval", + "next_cp_cycle", + "next_graphics_cycle" + }; + printf("\n"); + printf(" === Restored bootstrap int_vals ===\n"); + for (int i = 0; i < num_int_vals; i++){ + printf(" %-30s %d\n",int_vals_descriptor[i], int_vals[i]); + } + printf(" === Restored bootstrap int_vals ===\n"); + printf("\n"); + } +#endif + +#ifdef DEBUG_RESTORE_VALS + if (DEBUG_RESTORE_VALS) { + const char *double_vals_descriptor[num_double_vals] = { + "circ_radius", + "H_sum_initial", + "simTime", + "deltaT", + "upper_mass_diff_percentage" + }; + printf("\n"); + printf(" === Restored bootstrap double_vals ===\n"); + for (int i = 0; i < num_double_vals; i++){ + printf(" %-30s %lg\n",double_vals_descriptor[i], double_vals[i]); + } + printf(" === Restored bootstrap double_vals ===\n"); + printf("\n"); + } +#endif +} + +void restore_crux_data(Crux *crux) +{ + state->restore_checkpoint(crux); + + crux->restore_end(); +} + + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/crux.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/crux.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2014, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * + * Authors: Brian Atkinson bwa@g.clemson.edu + Bob Robey XCP-2 brobey@lanl.gov + */ + +#ifndef CRUX_H_ +#define CRUX_H_ + +#include +#include "MallocPlus.h" + +enum crux_types{ + CRUX_NONE, + CRUX_DISK, + CRUX_IN_MEMORY +}; + +class Crux +{ + int num_of_rollback_states; + int crux_type; + int checkpoint_counter; + +public: + + Crux(int crux_type_in, int num_of_rollback_states_in, bool restart); + ~Crux(); + + void store_MallocPlus(MallocPlus memory); + void store_begin(size_t nsize, int ncycle); + void store_field_header(const char *name, int name_size); + void store_bools(bool *bool_vals, size_t nelem); + void store_ints(int *int_vals, size_t nelem); + void store_longs(long long *long_vals, size_t nelem); + void store_sizets(size_t *size_t_vals, size_t nelem); + void store_doubles(double *double_vals, size_t nelem); + void store_int_array(int *int_array, size_t nelem); + void store_long_array(long long *long_array, size_t nelem); + void store_float_array(float *float_array, size_t nelem); + void store_double_array(double *double_array, size_t nelem); + void store_replicated_int_array(int *int_array, size_t nelem); + void store_replicated_double_array(double *double_array, size_t nelem); + void store_named_ints(const char *name, int name_size, int *int_vals, size_t nelem); +#ifdef HAVE_MPI + void store_distributed_int_array(int *int_array, size_t nelem, int flags); + void store_distributed_double_array(double *double_array, size_t nelem, int flags); +#endif + void store_end(void); + + void restore_MallocPlus(MallocPlus memory); + void restore_begin(char *restart_file, int rollback_counter); + void restore_field_header(char *name, int name_size); + void restore_bools(bool *bool_vals, size_t nelem); + void restore_ints(int *int_vals, size_t nelem); + void restore_longs(long long *long_vals, size_t nelem); + void restore_sizets(size_t *size_t_vals, size_t nelem); + void restore_doubles(double *double_vals, size_t nelem); + int *restore_int_array(int *int_array, size_t nsize); + long long *restore_long_array(long long *long_array, size_t nsize); + float *restore_float_array(float *float_array, size_t nsize); + double *restore_double_array(double *double_array, size_t nsize); + int *restore_replicated_int_array(int *int_array, size_t nsize); + double *restore_replicated_double_array(double *double_array, size_t nsize); + void restore_named_ints(const char *name, int name_size, int *int_vals, size_t nelem); +#ifdef HAVE_MPI + int *restore_distributed_int_array(int *int_array, size_t nsize, int flags); + double *restore_distributed_double_array(double *double_array, size_t nsize, int flags); +#endif + void restore_end(void); + + int get_rollback_number(); + void set_crux_type(int crux_type_in); + +}; +#endif // CRUX_H_ Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/crux.cpp =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/crux.cpp @@ -0,0 +1,1054 @@ +/* + * Copyright (c) 2014, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * + * Authors: Brian Atkinson bwa@g.clemson.edu + Bob Robey XCP-2 brobey@lanl.gov + */ + +#include +#include +#include +#include +#include +#include +#include +#include "PowerParser.hh" + +#include "crux.h" +#include "timer.h" +#include "fmemopen.h" + +#ifdef HAVE_HDF5 +#include "hdf5.h" +#endif +#ifdef HAVE_MPI +#include "mpi.h" +#endif + +const bool CRUX_TIMING = true; +bool do_crux_timing = false; + +#define RESTORE_NONE 0 +#define RESTORE_RESTART 1 +#define RESTORE_ROLLBACK 2 + +#ifndef DEBUG +#define DEBUG 0 +#endif +#define DEBUG_RESTORE_VALS 1 + +using namespace std; +using PP::PowerParser; +// Pointers to the various objects. +PowerParser *parse; + +char checkpoint_directory[] = "checkpoint_output"; +int cp_num, rs_num; +int *backup; +void **crux_data; +size_t *crux_data_size; +#ifdef HAVE_HDF5 +bool USE_HDF5 = true; //MSB +hid_t h5_fid; +herr_t h5err; +bool is_restart = false; + +hid_t create_hdf5_parallel_file_plist(); + +void map_name_to_hdf5 (const char*, int, char*, char*); + +void access_named_hdf5_values (const char *name, int name_size, + hsize_t rank, hsize_t *cur_size, + void *values, hid_t datatype, + bool store); +#endif + + +FILE *crux_time_fp; +struct timeval tcheckpoint_time; +struct timeval trestore_time; +int checkpoint_timing_count = 0; +float checkpoint_timing_sum = 0.0f; +float checkpoint_timing_size = 0.0f; +int rollback_attempt = 0; +FILE *store_fp, *restore_fp; +#ifdef HAVE_MPI +static MPI_File mpi_store_fp, mpi_restore_fp; +#endif +static int mype = 0, npes = 1; + +Crux::Crux(int crux_type_in, int num_of_rollback_states_in, bool restart) +{ +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD,&mype); + MPI_Comm_size(MPI_COMM_WORLD,&npes); +#endif + + num_of_rollback_states = num_of_rollback_states_in; + crux_type = crux_type_in; + checkpoint_counter = 0; + + if (crux_type != CRUX_NONE || restart){ + do_crux_timing = CRUX_TIMING; + struct stat stat_descriptor; + if (stat(checkpoint_directory,&stat_descriptor) == -1){ + mkdir(checkpoint_directory,0777); + } + } + + crux_data = (void **)malloc(num_of_rollback_states*sizeof(void *)); + for (int i = 0; i < num_of_rollback_states; i++){ + crux_data[i] = NULL; + } + crux_data_size = (size_t *)malloc(num_of_rollback_states*sizeof(size_t)); + + + if (do_crux_timing){ + char checkpointtimelog[60]; + sprintf(checkpointtimelog,"%s/crux_timing.log",checkpoint_directory); + crux_time_fp = fopen(checkpointtimelog,"w"); + } +} + +Crux::~Crux() +{ + for (int i = 0; i < num_of_rollback_states; i++){ + free(crux_data[i]); + } + free(crux_data); + free(crux_data_size); + + if (do_crux_timing){ + if (checkpoint_timing_count > 0) { + printf("CRUX checkpointing time averaged %f msec, bandwidth %f Mbytes/sec\n", + checkpoint_timing_sum/(float)checkpoint_timing_count*1.0e3, + checkpoint_timing_size/checkpoint_timing_sum*1.0e-6); + + fprintf(crux_time_fp,"CRUX checkpointing time averaged %f msec, bandwidth %f Mbytes/sec\n", + checkpoint_timing_sum/(float)checkpoint_timing_count*1.0e3, + checkpoint_timing_size/checkpoint_timing_sum*1.0e-6); + + fclose(crux_time_fp); + } + } +} + +void Crux::store_MallocPlus(MallocPlus memory){ + + malloc_plus_memory_entry *memory_item; + + for (memory_item = memory.memory_entry_by_name_begin(); + memory_item != memory.memory_entry_by_name_end(); + memory_item = memory.memory_entry_by_name_next() ){ + + void *mem_ptr = memory_item->mem_ptr; + if ((memory_item->mem_flags & RESTART_DATA) == 0) continue; + + + + if (DEBUG) { + printf("MallocPlus ptr %p: name %10s ptr %p dims %lu nelem (", + mem_ptr,memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_ndims); + + char nelemstring[80]; + char *str_ptr = nelemstring; + str_ptr += sprintf(str_ptr,"%lu", memory_item->mem_nelem[0]); + for (uint i = 1; i < memory_item->mem_ndims; i++){ + str_ptr += sprintf(str_ptr,", %lu", memory_item->mem_nelem[i]); + } + printf("%12s",nelemstring); + + printf(") elsize %lu flags %d capacity %lu\n", + memory_item->mem_elsize,memory_item->mem_flags,memory_item->mem_capacity); + } + +#ifdef HAVE_HDF5 + if(USE_HDF5) { + access_named_hdf5_values (memory_item->mem_name, + strlen (memory_item->mem_name), + (hsize_t) memory_item->mem_ndims, + (hsize_t *) memory_item->mem_nelem, + mem_ptr, + memory_item->mem_elsize == 4 ? + H5T_NATIVE_INT : H5T_NATIVE_DOUBLE, + true); + } else { +#endif + int num_elements = 1; + for (uint i = 0; i < memory_item->mem_ndims; i++){ + num_elements *= memory_item->mem_nelem[i]; + } + store_field_header(memory_item->mem_name,30); + if (memory_item->mem_flags & REPLICATED_DATA) { + if (memory_item->mem_elsize == 4){ + store_replicated_int_array((int *)mem_ptr, num_elements); + } else { + store_replicated_double_array((double *)mem_ptr, num_elements); + } + } else { + if (memory_item->mem_elsize == 4){ + store_int_array((int *)mem_ptr, num_elements); + } else { + store_double_array((double *)mem_ptr, num_elements); + } + } + } +#ifdef HAVE_HDF5 + } +#endif +} + +void Crux::store_begin(size_t nsize, int ncycle) +{ + + int mype = 0; + +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD,&mype); +#endif + + cp_num = checkpoint_counter % num_of_rollback_states; + cpu_timer_start(&tcheckpoint_time); + + if(crux_type == CRUX_IN_MEMORY) { + if (crux_data[cp_num] != NULL) free(crux_data[cp_num]); + crux_data[cp_num] = (int *)malloc(nsize); + crux_data_size[cp_num] = nsize; + store_fp = fmemopen(crux_data[cp_num], nsize, "w"); + } else if(crux_type == CRUX_DISK) { + char backup_file_w_dir[60]; + char backup_file[40]; +#ifdef HAVE_HDF5 + if(USE_HDF5) { + + hid_t plist_id = create_hdf5_parallel_file_plist(); + +#ifdef HDF5_FF + if(is_restart) + sprintf(backup_file_w_dir,"rbackup%05d.h5",ncycle); + else + sprintf(backup_file_w_dir,"backup%05d.h5",ncycle); +#else + sprintf(backup_file_w_dir,"%s/backup%05d.h5",checkpoint_directory,ncycle); + sprintf(backup_file,"backup%05d.h5",ncycle); +#endif + if(!(h5_fid = H5Fcreate(backup_file_w_dir, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id))) { + printf("HDF5: Could not write HDF5 %s at iteration %d\n",backup_file_w_dir,ncycle); + } + H5Pclose(plist_id); + } else { +#endif + sprintf(backup_file_w_dir,"%s/backup%05d.crx",checkpoint_directory,ncycle); + sprintf(backup_file,"backup%05d.crx",ncycle); +#ifdef HAVE_MPI + int iret = MPI_File_open(MPI_COMM_WORLD, backup_file_w_dir, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_store_fp); + if(iret != MPI_SUCCESS) { + printf("Could not write %s at iteration %d\n",backup_file_w_dir,ncycle); + } +#else + store_fp = fopen(backup_file_w_dir,"w"); + if(!store_fp){ + printf("Could not write %s at iteration %d\n",backup_file_w_dir,ncycle); + } +#endif + if (mype == 0) { + char symlink_file[60]; + sprintf(symlink_file,"%s/backup%1d.crx",checkpoint_directory,cp_num); + unlink(symlink_file); + symlink(backup_file, symlink_file); + // int ireturn = symlink(backup_file, symlink_file); + // if (ireturn == -1) { + // printf("Warning: error returned with symlink call for file %s and symlink %s\n", + // backup_file,symlink_file); + // } + } + } +#ifdef HAVE_HDF5 + } +#endif + if (do_crux_timing) { + checkpoint_timing_size += nsize; + } +} + +void Crux::store_field_header(const char *name, int name_size){ +#ifdef HAVE_MPI + assert(name != NULL); + MPI_Status status; + MPI_File_write_shared(mpi_store_fp, (void *)name, name_size, MPI_CHAR, &status); + MPI_Barrier(MPI_COMM_WORLD); +#ifdef DEBUG_RESTORE_VALS + int count; + MPI_Get_count(&status, MPI_CHAR, &count); + printf("%d:Wrote %d characters at line %d in file %s\n",mype,count,__LINE__,__FILE__); +#endif + +#else + assert(name != NULL && store_fp != NULL); + fwrite(name,sizeof(char),name_size,store_fp); +#endif +} + +#ifdef HAVE_HDF5 +hid_t create_hdf5_parallel_file_plist() +{ + hid_t plist_id = H5P_DEFAULT; + + if( (plist_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) + printf("HDF5: Could not create property list \n"); + +#ifdef HAVE_MPI + if( H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) < 0) + printf("HDF5: Could set libver bounds \n"); +# ifdef HDF5_FF + H5Pset_fapl_daosm(plist_id, MPI_COMM_WORLD, MPI_INFO_NULL); + if(H5Pset_all_coll_metadata_ops(plist_id, true) < 0) + printf("HDF5: Could not set collective metadata \n"); +# else + H5Pset_fapl_mpio(plist_id, MPI_COMM_WORLD, MPI_INFO_NULL); +#endif +#endif + return plist_id; +} + +void map_name_to_hdf5 (const char *name, int name_size, + char *group, + char *label) +{ + static const char * default_group = "default"; + int i, j; + group[0] = '/'; + for (i=0; i 1) { + size_t *counts = new size_t[npes]; + MPI_Allgather (&count, sizeof(count), MPI_BYTE, + counts, sizeof *counts, MPI_BYTE, + MPI_COMM_WORLD); + for (int i=0; imem_ptr; + if ((memory_item->mem_flags & RESTART_DATA) == 0) continue; + + if (DEBUG) { + printf("MallocPlus ptr %p: name %10s ptr %p dims %lu nelem (", + mem_ptr,memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_ndims); + + char nelemstring[80]; + char *str_ptr = nelemstring; + str_ptr += sprintf(str_ptr,"%lu", memory_item->mem_nelem[0]); + for (uint i = 1; i < memory_item->mem_ndims; i++){ + str_ptr += sprintf(str_ptr,", %lu", memory_item->mem_nelem[i]); + } + printf("%12s",nelemstring); + + printf(") elsize %lu flags %d capacity %lu\n", + memory_item->mem_elsize,memory_item->mem_flags,memory_item->mem_capacity); + } +#ifdef HAVE_HDF5 + if(USE_HDF5) { + access_named_hdf5_values (memory_item->mem_name, + strlen (memory_item->mem_name), + (hsize_t) memory_item->mem_ndims, + (hsize_t *) memory_item->mem_nelem, + mem_ptr, + memory_item->mem_elsize == 4 ? + H5T_NATIVE_INT : H5T_NATIVE_DOUBLE, false); + } else { +#endif + int num_elements = 1; + for (uint i = 0; i < memory_item->mem_ndims; i++){ + num_elements *= memory_item->mem_nelem[i]; + } + restore_field_header(test_name,30); + if (strcmp(test_name,memory_item->mem_name) != 0) { + printf("ERROR in restore checkpoint for %s %s\n",test_name,memory_item->mem_name); +#ifdef HAVE_MPI + MPI_Finalize(); +#endif + exit(-1); + } + if (memory_item->mem_flags & REPLICATED_DATA) { + if (memory_item->mem_elsize == 4){ + restore_replicated_int_array((int *)mem_ptr, num_elements); + } else { + restore_replicated_double_array((double *)mem_ptr, num_elements); + } + } else { + if (memory_item->mem_elsize == 4){ + restore_int_array((int *)mem_ptr, num_elements); + } else { + restore_double_array((double *)mem_ptr, num_elements); + } + } + } +#ifdef HAVE_HDF5 + } +#endif +} + +void Crux::restore_begin(char *restart_file, int rollback_counter) +{ + rs_num = rollback_counter % num_of_rollback_states; + + cpu_timer_start(&trestore_time); + + if (restart_file != NULL){ + if (mype == 0) { + printf("\n ================================================================\n"); + printf( " Restoring state from disk file %s\n",restart_file); + printf( " ================================================================\n\n"); + } +#ifdef HAVE_HDF5 + is_restart = true; + if (USE_HDF5) { + hid_t plist_id = create_hdf5_parallel_file_plist(); + + if(!(h5_fid = H5Fopen(restart_file, H5F_ACC_RDWR, plist_id))) + printf("HDF5: Could not restart from HDF5 file: %s\n", restart_file); + H5Pclose(plist_id); + } else { +#endif +#ifdef HAVE_MPI + int iret = MPI_File_open(MPI_COMM_WORLD, restart_file, MPI_MODE_RDONLY | MPI_MODE_UNIQUE_OPEN, MPI_INFO_NULL, &mpi_restore_fp); + if(iret != MPI_SUCCESS){ + //printf("Could not write %s at iteration %d\n",restart_file,crux_int_vals[8]); + printf("Could not open restart file %s\n",restart_file); + } +#else + restore_fp = fopen(restart_file,"r"); + if(!restore_fp){ + //printf("Could not write %s at iteration %d\n",restart_file,crux_int_vals[8]); + printf("Could not open restart file %s\n",restart_file); + } +#endif +#ifdef HAVE_HDF5 + } +#endif + restore_type = RESTORE_RESTART; + } else if(crux_type == CRUX_IN_MEMORY){ + printf("Restoring state from memory rollback number %d rollback_counter %d\n",rs_num,rollback_counter); + restore_fp = fmemopen(crux_data[rs_num], crux_data_size[rs_num], "r"); + restore_type = RESTORE_ROLLBACK; + } else if(crux_type == CRUX_DISK){ + char backup_file_w_dir[60]; + + sprintf(backup_file_w_dir,"%s/backup%d.crx",checkpoint_directory,rs_num); + printf("Restoring state from disk file %s rollback_counter %d\n",backup_file_w_dir,rollback_counter); + restore_fp = fopen(backup_file_w_dir,"r"); + if(!restore_fp){ + //printf("Could not write %s at iteration %d\n",backup_file_w_dir,crux_int_vals[8]); + printf("Could not open restore file %s\n",backup_file_w_dir); + } + restore_type = RESTORE_ROLLBACK; + } +} + +void Crux::restore_field_header(char *name, int name_size) +{ +#ifdef HAVE_MPI + assert(name != NULL); + MPI_Status status; + MPI_File_read_shared(mpi_restore_fp, name, name_size, MPI_CHAR, &status); + MPI_Barrier(MPI_COMM_WORLD); +#ifdef DEBUG_RESTORE_VALS + int count; + MPI_Get_count(&status, MPI_CHAR, &count); + printf("%d:Read %d characters at line %d in file %s\n",mype,count,__LINE__,__FILE__); +#endif + +#else + int name_read = fread(name,sizeof(char),name_size,restore_fp); + if (name_read != name_size){ + printf("Warning: number of elements read %d is not equal to request %d\n",name_read,name_size); + } +#endif +} + +void Crux::restore_bools(bool *bool_vals, size_t nelem) +{ + size_t nelem_read = fread(bool_vals,sizeof(bool),nelem,restore_fp); + if (nelem_read != nelem){ + printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem); + } +} + +void Crux::restore_ints(int *int_vals, size_t nelem) +{ + size_t nelem_read = fread(int_vals,sizeof(int),nelem,restore_fp); + if (nelem_read != nelem){ + printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem); + } +} + +void Crux::restore_longs(long long *long_vals, size_t nelem) +{ + size_t nelem_read = fread(long_vals,sizeof(long),nelem,restore_fp); + if (nelem_read != nelem){ + printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem); + } +} + +void Crux::restore_sizets(size_t *size_t_vals, size_t nelem) +{ + size_t nelem_read = fread(size_t_vals,sizeof(size_t),nelem,restore_fp); + if (nelem_read != nelem){ + printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem); + } +} + +void Crux::restore_doubles(double *double_vals, size_t nelem) +{ + size_t nelem_read = fread(double_vals,sizeof(double),nelem,restore_fp); + if (nelem_read != nelem){ + printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem); + } +} + +int *Crux::restore_int_array(int *int_array, size_t nelem) +{ +#ifdef HAVE_MPI + assert(int_array != NULL); + MPI_Status status; + MPI_File_read_shared(mpi_restore_fp, int_array, (int)nelem, MPI_INT, &status); + MPI_Barrier(MPI_COMM_WORLD); +#ifdef DEBUG_RESTORE_VALS + int count; + MPI_Get_count(&status, MPI_INT, &count); + printf("%d:Read %d integers at line %d in file %s\n",mype,count,__LINE__,__FILE__); +#endif + +#else + size_t nelem_read = fread(int_array,sizeof(int),nelem,restore_fp); + if (nelem_read != nelem){ + printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem); + } +#endif + return(int_array); +} + +long long *Crux::restore_long_array(long long *long_array, size_t nelem) +{ + size_t nelem_read = fread(long_array,sizeof(long long),nelem,restore_fp); + if (nelem_read != nelem){ + printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem); + } + return(long_array); +} + +float *Crux::restore_float_array(float *float_array, size_t nelem) +{ + size_t nelem_read = fread(float_array,sizeof(float),nelem,restore_fp); + if (nelem_read != nelem){ + printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem); + } + return(float_array); +} + +double *Crux::restore_double_array(double *double_array, size_t nelem) +{ +#ifdef HAVE_MPI + MPI_Status status; + MPI_File_read_shared(mpi_restore_fp, double_array, (int)nelem, MPI_DOUBLE, &status); + MPI_Barrier(MPI_COMM_WORLD); +#ifdef DEBUG_RESTORE_VALS + int count; + MPI_Get_count(&status, MPI_DOUBLE, &count); + printf("%d:Read %d doubles at line %d in file %s\n",mype,count,__LINE__,__FILE__); +#endif + +#else + size_t nelem_read = fread(double_array,sizeof(double),nelem,restore_fp); + if (nelem_read != nelem){ + printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem); + } +#endif + return(double_array); +} + +int *Crux::restore_replicated_int_array(int *int_array, size_t nelem) +{ +#ifdef HAVE_MPI + assert(int_array != NULL); + MPI_Status status; + MPI_File_read_shared(mpi_restore_fp, int_array, (int)nelem, MPI_INT, &status); + MPI_Barrier(MPI_COMM_WORLD); +#ifdef DEBUG_RESTORE_VALS + int count; + MPI_Get_count(&status, MPI_INT, &count); + printf("%d:Read %d integers at line %d in file %s\n",mype,count,__LINE__,__FILE__); +#endif + +#else + size_t nelem_read = fread(int_array,sizeof(int),nelem,restore_fp); + if (nelem_read != nelem){ + printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem); + } +#endif + return(int_array); +} + +double *Crux::restore_replicated_double_array(double *double_array, size_t nelem) +{ +#ifdef HAVE_MPI + MPI_Status status; + MPI_File_read_shared(mpi_restore_fp, double_array, (int)nelem, MPI_DOUBLE, &status); + MPI_Barrier(MPI_COMM_WORLD); +#ifdef DEBUG_RESTORE_VALS + int count; + MPI_Get_count(&status, MPI_DOUBLE, &count); + printf("%d:Read %d doubles at line %d in file %s\n",mype,count,__LINE__,__FILE__); +#endif + +#else + size_t nelem_read = fread(double_array,sizeof(double),nelem,restore_fp); + if (nelem_read != nelem){ + printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem); + } +#endif + return(double_array); +} + +#ifdef HAVE_MPI +int *Crux::restore_distributed_int_array(int *int_array, size_t nelem, int flags) +{ + assert(int_array != NULL); + //MPI_Datatype datatype = get_crux_datatype(DISTRIBUTED_INT_DATA); + MPI_Status status; + //MPI_File_read_shared(mpi_restore_fp, int_array, (int)nelem, datatype, &status); + MPI_Barrier(MPI_COMM_WORLD); +#ifdef DEBUG_RESTORE_VALS + int count; + MPI_Get_count(&status, MPI_INT, &count); + printf("%d:Read %d integers at line %d in file %s\n",mype,count,__LINE__,__FILE__); +#endif + + return(int_array); +} + +double *Crux::restore_distributed_double_array(double *double_array, size_t nelem, int flags) +{ + //MPI_Datatype datatype = get_crux_datatype(DISTRIBUTED_DOUBLE_DATA); + MPI_Status status; + //MPI_File_read_shared(mpi_restore_fp, double_array, (int)nelem, datatype, &status); + MPI_Barrier(MPI_COMM_WORLD); +#ifdef DEBUG_RESTORE_VALS + int count; + MPI_Get_count(&status, MPI_DOUBLE, &count); + printf("%d:Read %d doubles at line %d in file %s\n",mype,count,__LINE__,__FILE__); +#endif + + return(double_array); +} +#endif + +void Crux::restore_end(void) +{ + double restore_total_time = cpu_timer_stop(trestore_time); + + if (do_crux_timing){ + if (restore_type == RESTORE_RESTART) { + fprintf(crux_time_fp, "Total time for restore was %g seconds\n", restore_total_time); + } else if (restore_type == RESTORE_ROLLBACK){ + fprintf(crux_time_fp, "Total time for rollback %d was %g seconds\n", rollback_attempt, restore_total_time); + } + } +#ifdef HAVE_HDF5 + if(USE_HDF5) { + if(H5Fclose(h5_fid) != 0) { + printf("HDF5: Could not close HDF5 file!!\n"); + } + } else { +#endif +#ifdef HAVE_MPI + MPI_File_close(&mpi_store_fp); +#else + assert(restore_fp != NULL); + fclose(restore_fp); +#endif +#ifdef HAVE_HDF5 + } +#endif +} + +int Crux::get_rollback_number() +{ + rollback_attempt++; + return(checkpoint_counter % num_of_rollback_states); +} + +void Crux::set_crux_type(int crux_type_in) +{ + crux_type = crux_type_in; +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/fmemopen.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/fmemopen.h @@ -0,0 +1,35 @@ +#ifndef FMEMOPEN_H_ +#define FMEMOPEN_H_ + +#if defined __cplusplus +extern "C" { +#endif + +/** + * A BSD port of the fmemopen Linux method using funopen. + * + * man docs for fmemopen: + * http://linux.die.net/man/3/fmemopen + * + * man docs for funopen: + * https://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/funopen.3.html + * + * This method is ported from ingenuitas' python-tesseract project. + * + * You must call fclose on the returned file pointer or memory will be leaked. + * + * @param buf The data that will be used to back the FILE* methods. Must be at least + * @c size bytes. + * @param size The size of the @c buf data. + * @param mode The permitted stream operation modes. + * @return A pointer that can be used in the fread/fwrite/fseek/fclose family of methods. + * If a failure occurred NULL will be returned. + * @ingroup NimbusMemoryMappping + */ +FILE *fmemopen(void *buf, size_t size, const char *mode); + +#ifdef __cplusplus +} +#endif + +#endif // #ifndef FMEMOPEN_H_ Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/genmalloc.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/genmalloc.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* memory routines */ +#define genvector( inum, elsize) \ + ( genvector_p(inum, elsize, __FILE__, __LINE__) ) +#define genvectorfree( var) \ + ( genvectorfree_p(var, __FILE__, __LINE__) ) +#define genmatrix( jnum, inum, elsize) \ + ( genmatrix_p(jnum, inum, elsize, __FILE__, __LINE__) ) +#define gentrimatrix( knum, jnum, inum, elsize) \ + ( gentrimatrix_p(knum, jnum, inum, elsize, __FILE__, __LINE__) ) +#define genmatrixfree( var) \ + ( genmatrixfree_p(var, __FILE__, __LINE__) ) +#define gentrimatrixfree( var) \ + ( gentrimatrixfree_p(var, __FILE__, __LINE__) ) + +#define genmalloc_memory_add( malloc_mem_ptr, size) \ + ( genmalloc_memory_add_p(malloc_mem_ptr, size, __FILE__, __LINE__) ) +#define genmalloc_memory_remove( malloc_mem_ptr) \ + ( genmalloc_memory_remove_p(malloc_mem_ptr, __FILE__, __LINE__) ) +#define genmem_free_all() \ + ( genmem_free_all_p(__FILE__, __LINE__) ) + + +void *genvector_p(int inum, size_t elsize, const char *file, const int line); +void genvectorfree_p(void *var, const char *file, const int line); +void **genmatrix_p(int jnum, int inum, size_t elsize, const char *file, const int line); +void ***gentrimatrix_p(int knum, int jnum, int inum, size_t elsize, const char *file, const int line); +void genmatrixfree_p(void **var, const char *file, const int line); +void gentrimatrixfree_p(void ***var, const char *file, const int line); + +void *genmalloc_memory_add_p(void *malloc_mem_ptr, size_t size, const char *file, const int line); +void genmalloc_memory_remove_p(void *malloc_mem_ptr, const char *file, const int line); +void genmem_free_all_p(const char *file, const int line); + +#ifdef __cplusplus +} +#endif + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/genmalloc.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/genmalloc.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ + +#include +#include +#include +#include +#include "genmalloc.h" + +#ifndef DEBUG +#define DEBUG 0 +#endif + +double ***gentrimatrix_double_p(int knum, int jnum, int inum, const char *file, const int line); +int ***gentrimatrix_int_p(int knum, int jnum, int inum, const char *file, const int line); + +SLIST_HEAD(slist_genmalloc_memory_head, genmalloc_memory_entry) genmalloc_memory_head = SLIST_HEAD_INITIALIZER(genmalloc_memory_head); +struct slist_genmalloc_memory_head *genmalloc_memory_headp; +struct genmalloc_memory_entry { + void *mem_ptr; + size_t mem_size; + SLIST_ENTRY(genmalloc_memory_entry) genmalloc_memory_entries; +} *genmalloc_memory_item; + +void *genvector_p(int inum, size_t elsize, const char *file, const int line) +{ + // Just to get rid of warning + if (1 == 2) printf("Warning file %s line %d\n", file, line); + + void *out; + size_t mem_size; + + mem_size = inum*elsize; + out = (void *)calloc((size_t)inum, elsize); + genmalloc_memory_add(out, mem_size); + + return (out); +} + +void genvectorfree_p(void *var, const char *file, const int line) +{ + // Just to get rid of warning + if (1 == 2) printf("Warning file %s line %d\n", file, line); + + genmalloc_memory_remove(var); +} + +void **genmatrix_p(int jnum, int inum, size_t elsize, const char *file, const int line) +{ + // Just to get rid of warning + if (1 == 2) printf("Warning file %s line %d\n", file, line); + + void **out; + size_t mem_size; + + mem_size = jnum*sizeof(void *); + out = (void **)malloc(mem_size); + genmalloc_memory_add(out, mem_size); + + mem_size = jnum*inum*elsize; + out[0] = (void *)calloc((size_t)jnum*(size_t)inum, elsize); + genmalloc_memory_add(out[0], mem_size); + + for (int i = 1; i < jnum; i++) { + out[i] = out[i-1] + inum*elsize; + } + + return (out); +} + +void genmatrixfree_p(void **var, const char *file, const int line) +{ + // Just to get rid of warning + if (1 == 2) printf("Warning file %s line %d\n", file, line); + + genmalloc_memory_remove(var[0]); + genmalloc_memory_remove(var); +} + +void ***gentrimatrix_p(int knum, int jnum, int inum, size_t elsize, const char *file, const int line) +{ + // Just to get rid of warning + if (1 == 2) printf("Warning file %s line %d\n", file, line); + + void ***out = NULL; + if (elsize == 8) { + out = (void ***)gentrimatrix_double_p(knum, jnum, inum, file, line); + } else if (elsize == 4) { + out = (void ***)gentrimatrix_int_p(knum, jnum, inum, file, line); + } else { + printf("Error -- element size not supported in genmalloc for call at %s line %d\n",file,line); + } + + return(out); +} + +double ***gentrimatrix_double_p(int knum, int jnum, int inum, const char *file, const int line) +{ + // Just to get rid of warning + if (1 == 2) printf("Warning file %s line %d\n", file, line); + + double ***out; + size_t mem_size; + const size_t elsize = 8; + + mem_size = knum*sizeof(void **); + out = (double ***)malloc(mem_size); + genmalloc_memory_add(out, mem_size); + + mem_size = knum*jnum*sizeof(void *); + out[0] = (double **) malloc(mem_size); + genmalloc_memory_add(out[0], mem_size); + + size_t nelems = knum*jnum*inum; + mem_size = nelems*elsize; + out[0][0] = (void *)calloc(nelems, elsize); + genmalloc_memory_add(out[0][0], mem_size); + + for (int k = 0; k < knum; k++) + { + if (k > 0) + { + out[k] = out[k-1] + jnum; + out[k][0] = out[k-1][0] + (jnum*inum); + } + + for (int j = 1; j < jnum; j++) + { + out[k][j] = out[k][j-1] + inum; + } + } + + return (out); +} + +int ***gentrimatrix_int_p(int knum, int jnum, int inum, const char *file, const int line) +{ + // Just to get rid of warning + if (1 == 2) printf("Warning file %s line %d\n", file, line); + + int ***out; + size_t mem_size; + const size_t elsize = 4; + + mem_size = knum*sizeof(void **); + out = (int ***)malloc(mem_size); + genmalloc_memory_add(out, mem_size); + + mem_size = knum*jnum*sizeof(void *); + out[0] = (int **) malloc(mem_size); + genmalloc_memory_add(out[0], mem_size); + + size_t nelems = knum*jnum*inum; + mem_size = nelems*elsize; + out[0][0] = (void *)calloc(nelems, elsize); + genmalloc_memory_add(out[0][0], mem_size); + + for (int k = 0; k < knum; k++) + { + if (k > 0) + { + out[k] = out[k-1] + jnum; + out[k][0] = out[k-1][0] + (jnum*inum); + } + + for (int j = 1; j < jnum; j++) + { + out[k][j] = out[k][j-1] + inum; + } + } + + return (out); +} + +void gentrimatrixfree_p(void ***var, const char *file, const int line) +{ + // Just to get rid of warning + if (1 == 2) printf("Warning file %s line %d\n", file, line); + + genmalloc_memory_remove(var[0][0]); + genmalloc_memory_remove(var[0]); + genmalloc_memory_remove(var); +} + +void *genmalloc_memory_add_p(void *malloc_mem_ptr, size_t size, const char *file, const int line){ + // Just to get rid of warning + if (1 == 2) printf("Warning file %s line %d\n", file, line); + + if (SLIST_EMPTY(&genmalloc_memory_head)) SLIST_INIT(&genmalloc_memory_head); + + genmalloc_memory_item = malloc(sizeof(struct genmalloc_memory_entry)); + genmalloc_memory_item->mem_ptr = malloc_mem_ptr; + genmalloc_memory_item->mem_size = size; + if (DEBUG) printf("GENMALLOC_MEMORY_ADD: DEBUG -- malloc memory pointer is %p called from file %s line %d\n",malloc_mem_ptr,file,line); + + SLIST_INSERT_HEAD(&genmalloc_memory_head, genmalloc_memory_item, genmalloc_memory_entries); + + return(malloc_mem_ptr); +} + +void genmalloc_memory_remove_p(void *malloc_mem_ptr, const char *file, const int line){ + // Just to get rid of warning + if (1 == 2) printf("Warning file %s line %d\n", file, line); + + SLIST_FOREACH(genmalloc_memory_item, &genmalloc_memory_head, genmalloc_memory_entries){ + if (genmalloc_memory_item->mem_ptr == malloc_mem_ptr) { + if (DEBUG) printf("GENMALLOC_MEMORY_REMOVE: DEBUG -- freeing malloc memory pointer %p called from file %s line %d\n",malloc_mem_ptr,file,line); + free(malloc_mem_ptr); + SLIST_REMOVE(&genmalloc_memory_head, genmalloc_memory_item, genmalloc_memory_entry, genmalloc_memory_entries); + free(genmalloc_memory_item); + break; + } + } +} + +void genmem_free_all_p(const char *file, const int line){ + // Just to get rid of warning + if (1 == 2) printf("Warning file %s line %d\n", file, line); + + while (!SLIST_EMPTY(&genmalloc_memory_head)) { + genmalloc_memory_item = SLIST_FIRST(&genmalloc_memory_head); + if (DEBUG) printf("GENMEM_FREE_ALL: DEBUG -- freeing genmalloc memory %p called from file %s line %d\n",genmalloc_memory_item->mem_ptr,file,line); + free(genmalloc_memory_item->mem_ptr); + SLIST_REMOVE_HEAD(&genmalloc_memory_head, genmalloc_memory_entries); + free(genmalloc_memory_item); + } +} + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/glibc_compat_rand.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/glibc_compat_rand.h @@ -0,0 +1,16 @@ +/*===------------- glibc_compat_rand.h- glibc rand emulation --------------===*\ +|* +|* The LLVM Compiler Infrastructure +|* +|* This file is distributed under the University of Illinois Open Source +|* License. See LICENSE.TXT for details. +|* +\*===----------------------------------------------------------------------===*/ + +#ifndef GLIBC_COMPAT_RAND_H +#define GLIBC_COMPAT_RAND_H + +int glibc_compat_rand(void); +void glibc_compat_srand(unsigned int seed); + +#endif /* GLIBC_COMPAT_RAND_H */ Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/glibc_compat_rand.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/glibc_compat_rand.c @@ -0,0 +1,56 @@ +/*===------------- glibc_compat_rand.h- glibc rand emulation --------------===*\ +|* +|* The LLVM Compiler Infrastructure +|* +|* This file is distributed under the University of Illinois Open Source +|* License. See LICENSE.TXT for details. +|* +\*===----------------------------------------------------------------------===*/ + +#include "glibc_compat_rand.h" + +/** + * This rand implementation is designed to emulate the implementation of + * rand/srand in recent versions of glibc. This is used for programs which + * require this specific rand implementation in order to pass verification + * tests. + */ + +#define TABLE_SIZE 34 +#define NUM_DISCARDED 344 +static unsigned int table[TABLE_SIZE]; +static int next; + +int glibc_compat_rand(void) { + /* Calculate the indices i-3 and i-31 in the circular vector. */ + int i3 = (next < 3) ? (TABLE_SIZE + next - 3) : (next - 3); + int i31 = (next < 31) ? (TABLE_SIZE + next - 31) : (next - 31); + + table[next] = table[i3] + table[i31]; + unsigned int r = table[next] >> 1; + + ++next; + if (next > TABLE_SIZE) + next = 0; + + return r; +} + +void glibc_compat_srand(unsigned int seed) { + table[0] = seed; + for (int i = 1; i < TABLE_SIZE - 3; ++i) { + int r = 16807ll * ((long long) table[i - 1]) % 2147483647; + if (r < 0) + r += 2147483647; + + table[i] = r; + } + + for (int i = TABLE_SIZE - 3; i < TABLE_SIZE; ++i) + table[i] = table[i - 31]; + + next = 0; + + for (int i = 0; i < NUM_DISCARDED; ++i) + (void)glibc_compat_rand(); +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/graphics.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/graphics.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#ifndef _GRAPHICS_H_ +#define _GRAPHICS_H_ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + +enum graphics_file_type{ + GRAPHICS_NONE, + GRAPHICS_DATA, + GRAPHICS_BMP, + GRAPHICS_GIF, + GRAPHICS_JPEG, + GRAPHICS_MPEG, + GRAPHICS_PDF, + GRAPHICS_PNG, + GRAPHICS_SVG +}; + +void set_graphics_window(float graphics_xmin_in, float graphics_xmax_in, + float graphics_ymin_in, float graphics_ymax_in); +void init_graphics_output(void); +void terminate_graphics_output(void); +void set_graphics_viewmode(int graphics_view_mode_in); +void set_graphics_mysize(int graphics_mysize_in); +void set_graphics_outline(int graphics_outline_in); +void set_graphics_cell_data_double(double *data_in); +void set_graphics_cell_data_float(float *data_in); +void set_graphics_cell_proc(int *graphics_proc_in); +void set_graphics_cell_coordinates_double(double *x_in, double *dx_in, + double *y_in, double *dy_in); +void set_graphics_cell_coordinates_float(float *x_in, float *dx_in, + float *y_in, float *dy_in); +void write_graphics_info(int graph_num, int ncycle, double simTime, + int rollback_img, int rollback_num); + +#ifdef __cplusplus +} +#endif + +#endif + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/graphics.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/graphics.c @@ -0,0 +1,706 @@ +/* + * Copyright (c) 2011, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#include +#include +#include +#include "graphics.h" + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +static int magick_on = 0; + +#ifdef HAVE_MAGICKWAND +#include + +#define MAGICK_NCOLORS 1280 + +void Magick_Scale(); + +//static int graphics_movie = 0; +static MagickWand *magick_wand = NULL; +static DrawingWand *draw_wand = NULL; +static PixelWand *pixel_wand = NULL; + +struct MagickColorTable { + int Red; + int Blue; + int Green; +}; + +static struct MagickColorTable MagickRainbow[MAGICK_NCOLORS]; +#endif + + +#define WINSIZE 800 + +void DrawSquaresToFile(int graph_num, int ncycle, double simTime, int rollback_img, int rollback_num); + +/* + * Variables that I added to make everything work for getting graphics + * data output to files while running no graphics with CLAMR + * Brian Atkinson +*/ +static int autoscale = 0; +static double xconversion = 0.0; +static double yconversion = 0.0; +static int Ncolors = 256; +static int iteration = 0; + +char *graphics_directory = "graphics_output"; +enum graphics_file_type graphics_type; // type of graphics output + +//static int mode = MOVE; + +static int width; +static float graphics_xmin=0.0, graphics_xmax=0.0, graphics_ymin=0.0, graphics_ymax=0.0; + +static int graphics_outline = 0; +static int graphics_view_mode = 0; +static int graphics_mysize = 0; + +enum spatial_data_type {SPATIAL_DOUBLE, SPATIAL_FLOAT}; +static int spatial_type = SPATIAL_FLOAT; + +static double *x_double=NULL, *y_double=NULL, *dx_double=NULL, *dy_double=NULL; +static float *x_float=NULL, *y_float=NULL, *dx_float=NULL, *dy_float=NULL; + +enum plot_data_type {DATA_DOUBLE, DATA_FLOAT}; +static int data_type = DATA_FLOAT; +static double *data_double=NULL; +static float *data_float=NULL; +static int *graphics_proc=NULL; + +void init_graphics_output(void){ + width = (WINSIZE / (graphics_ymax - graphics_ymin)) * (graphics_xmax - graphics_xmin); + xconversion = (double)WINSIZE/ (graphics_xmax - graphics_xmin); + yconversion = (double)WINSIZE/(graphics_ymax - graphics_ymin); + + struct stat stat_descriptor; + if (stat(graphics_directory,&stat_descriptor) == -1){ + mkdir(graphics_directory,0777); + } + + if (graphics_type != GRAPHICS_DATA && graphics_type != GRAPHICS_NONE) magick_on = 1; + +#ifdef HAVE_MAGICKWAND + if (magick_on){ + //MagickWandGenesis(); + // Create wand + magick_wand = NewMagickWand(); + + Magick_Scale(); + } +#endif +} + +void terminate_graphics_output(void){ +#ifdef HAVE_MAGICKWAND + if (magick_on){ + magick_wand = DestroyMagickWand(magick_wand); + MagickWandTerminus(); + } +#endif +} + +void set_graphics_window(float graphics_xmin_in, float graphics_xmax_in, float graphics_ymin_in, float graphics_ymax_in){ + graphics_xmin = graphics_xmin_in; + graphics_xmax = graphics_xmax_in; + graphics_ymin = graphics_ymin_in; + graphics_ymax = graphics_ymax_in; +} +void set_graphics_cell_data_double(double *data_in){ + data_type = DATA_DOUBLE; + data_double = data_in; +} +void set_graphics_cell_data_float(float *data_in){ + data_type = DATA_FLOAT; + data_float = data_in; +} +void set_graphics_cell_proc(int *graphics_proc_in){ + graphics_proc = graphics_proc_in; +} +void set_graphics_cell_coordinates_double(double *x_in, double *dx_in, double *y_in, double *dy_in){ + spatial_type = SPATIAL_DOUBLE; + x_double = x_in; + dx_double = dx_in; + y_double = y_in; + dy_double = dy_in; +} +void set_graphics_cell_coordinates_float(float *x_in, float *dx_in, float *y_in, float *dy_in){ + spatial_type = SPATIAL_FLOAT; + x_float = x_in; + dx_float = dx_in; + y_float = y_in; + dy_float = dy_in; +} +void set_graphics_viewmode(int graphics_view_mode_in){ + graphics_view_mode = graphics_view_mode_in; +} +void set_graphics_mysize(int graphics_mysize_in){ + graphics_mysize = graphics_mysize_in; +} +void set_graphics_outline(int graphics_outline_in){ + graphics_outline = graphics_outline_in; +} + +/* + * Created this function get graphics data while running + * the no graphic version of CLAMR. The output for the main + * cell data is written out to graph#.data files and the gridline + * data is writeen out to outline#.lin files. + * Brian Atkinson +*/ +void DrawSquaresToFile(int graph_num, int ncycle, double simTime, int rollback_img, int rollback_num){ +#ifdef HAVE_MAGICKWAND + if (magick_on) { + draw_wand = NewDrawingWand(); + pixel_wand = NewPixelWand(); + + MagickSetSize(magick_wand,WINSIZE,WINSIZE); + MagickSetColorspace(magick_wand,sRGBColorspace); + MagickReadImage(magick_wand,"xc:white"); + + DrawSetViewbox(draw_wand, 0, 0, WINSIZE, WINSIZE); + DrawScale(draw_wand, xconversion, -yconversion); + DrawTranslate(draw_wand, -graphics_xmin, graphics_ymin); + + int npart = graphics_mysize/16; + for (int i=0; i scaleMax) scaleMax = data_double[i]; + if (data_double[i] < scaleMin) scaleMin = data_double[i]; + } + } else { + for(int i = 0; i scaleMax) scaleMax = data_float[i]; + if (data_float[i] < scaleMin) scaleMin = data_float[i]; + } + } + } + + int magick_step = MAGICK_NCOLORS/(scaleMax - scaleMin); + + if (graphics_outline) { + PixelGetBlack(pixel_wand); + + DrawSetStrokeColor(draw_wand,pixel_wand); + DrawSetStrokeWidth(draw_wand,0.01); + DrawSetStrokeAntialias(draw_wand,1); + DrawSetStrokeOpacity(draw_wand,1); + } + + if (data_type == DATA_DOUBLE){ + + for(int i = 0; i < graphics_mysize; i++) { + int magick_color; + if (data_type == DATA_DOUBLE){ + magick_color = (int)(data_double[i]-scaleMin)*magick_step; + } else { + magick_color = (int)(data_float[i]-scaleMin)*magick_step; + } + magick_color = MAGICK_NCOLORS-magick_color; + if (magick_color < 0) { + magick_color=0; + } + if (magick_color >= MAGICK_NCOLORS) magick_color = MAGICK_NCOLORS-1; + + char cstring[40]; + sprintf(cstring,"rgba(%d,%d,%d,%d)",MagickRainbow[magick_color].Red, + MagickRainbow[magick_color].Green, + MagickRainbow[magick_color].Blue,120); + PixelSetColor(pixel_wand, cstring); + + DrawSetFillColor(draw_wand, pixel_wand); + + DrawRectangle(draw_wand, x_double[i], y_double[i], + x_double[i]+dx_double[i], y_double[i]+dy_double[i]); +/* + printf("DEBUG -- i %d magick_color %d magick_step %d graphics_proc %d cstring %s corners %lg %lg %lg %lg\n", + i,magick_color,magick_step,graphics_proc[i],cstring, + x_double[i], y_double[i], + x_double[i]+dx_double[i], y_double[i]+dy_double[i]); +*/ + } + } else { + + for(int i = 0; i < graphics_mysize; i++) { + int magick_color; + if (data_type == DATA_DOUBLE){ + magick_color = (int)(data_double[i]-scaleMin)*magick_step; + } else { + magick_color = (int)(data_float[i]-scaleMin)*magick_step; + } + magick_color = MAGICK_NCOLORS-magick_color; + if (magick_color < 0) { + magick_color=0; + } + if (magick_color >= MAGICK_NCOLORS) magick_color = MAGICK_NCOLORS-1; + + char cstring[40]; + sprintf(cstring,"rgba(%d,%d,%d,%d)",MagickRainbow[magick_color].Red, + MagickRainbow[magick_color].Green, + MagickRainbow[magick_color].Blue,120); + PixelSetColor(pixel_wand, cstring); + + DrawSetFillColor(draw_wand, pixel_wand); + + DrawRectangle(draw_wand, x_float[i], y_float[i], + x_float[i]+dx_float[i], y_float[i]+dy_float[i]); + } + } + + MagickDrawImage(magick_wand, draw_wand); + + char filename[50]; + char graphics_file_extension[10]; + if (graphics_type == GRAPHICS_BMP) strcpy(graphics_file_extension,".bmp"); + if (graphics_type == GRAPHICS_GIF) strcpy(graphics_file_extension,".gif"); + if (graphics_type == GRAPHICS_JPEG) strcpy(graphics_file_extension,".jpeg"); + if (graphics_type == GRAPHICS_MPEG) strcpy(graphics_file_extension,".mpeg"); + if (graphics_type == GRAPHICS_PDF) strcpy(graphics_file_extension,".pdf"); + if (graphics_type == GRAPHICS_PNG) strcpy(graphics_file_extension,".png"); + if (graphics_type == GRAPHICS_SVG) strcpy(graphics_file_extension,".svg"); + sprintf(filename,"%s/graph%05d%s", graphics_directory, graph_num, graphics_file_extension); + MagickWriteImage(magick_wand, filename); + //MagickDisplayImage(magick_wand, "x:"); + + draw_wand = DestroyDrawingWand(draw_wand); + pixel_wand = DestroyPixelWand(pixel_wand); + } +#endif + if (graphics_type == GRAPHICS_DATA){ + double scaleMax = 25.0, scaleMin = 0.0; + int i; + int color; + char filename[50], filename2[50]; + + if(rollback_img){ + sprintf(filename,"%s/graph%dcp%05d.data", graphics_directory, graph_num, rollback_num); + sprintf(filename2,"%s/outline%dcp%05d.lin",graphics_directory, graph_num, rollback_num); + } + else{ + sprintf(filename,"%s/graph%05d.data", graphics_directory, graph_num); + sprintf(filename2,"%s/outline%05d.lin",graphics_directory, graph_num); + } + FILE *fp = fopen(filename,"w"); + FILE *fp2 = fopen(filename2,"w"); + if(fp && fp2){ + fprintf(fp,"%d,%lf\n",ncycle,simTime); + if (autoscale) { + scaleMax=-1.0e30; + scaleMin=1.0e30; + if (data_type == DATA_DOUBLE){ + for(i = 0; i scaleMax) scaleMax = data_double[i]; + if (data_double[i] < scaleMin) scaleMin = data_double[i]; + } + } else { + for(i = 0; i scaleMax) scaleMax = data_float[i]; + if (data_float[i] < scaleMin) scaleMin = data_float[i]; + } + } + } + + double step = Ncolors/(scaleMax - scaleMin); + int xloc, xwid, yloc, ywid; + int xloc1, xloc2, yloc1, yloc2; + for(i = 0; i < graphics_mysize; i++) { + if (data_type == DATA_DOUBLE){ + color = (int)(data_double[i]-scaleMin)*step; + } else { + color = (int)(data_float[i]-scaleMin)*step; + } + color = Ncolors-color; + if (color < 0) { + color=0; + } + if (color >= Ncolors) color = Ncolors-1; + + if (data_type == DATA_DOUBLE){ + xloc = (int)((x_double[i]-graphics_xmin)*xconversion); + xwid = (int)((x_double[i]+dx_double[i]-graphics_xmin)*xconversion-xloc); + yloc = (int)((graphics_ymax-(y_double[i]+dy_double[i]))*yconversion); + ywid = (int)((graphics_ymax-y_double[i])*yconversion); + ywid -= yloc; + //fprintf(fp,"%d,%d,%d,%d,%f\n",xloc,yloc,xwid,ywid,data[i]); + fprintf(fp,"%d,%d,%d,%d,%d\n",xloc,yloc,xwid,ywid,color); + + xloc1 = (int)((x_double[i]-graphics_xmin)*xconversion); + xloc2 = (int)((x_double[i]+dx_double[i]-graphics_xmin)*xconversion); + yloc1 = (int)((graphics_ymax-y_double[i])*yconversion); + yloc2 = (int)((graphics_ymax-(y_double[i]+dy_double[i]))*yconversion); + fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc2,xloc2,yloc2); + fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc2,yloc1); + fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc1,yloc2); + fprintf(fp2,"%d,%d,%d,%d\n",xloc2,yloc1,xloc2,yloc2); + } else { + xloc = (int)((x_float[i]-graphics_xmin)*xconversion); + xwid = (int)((x_float[i]+dx_float[i]-graphics_xmin)*xconversion-xloc); + yloc = (int)((graphics_ymax-(y_float[i]+dy_float[i]))*yconversion); + ywid = (int)((graphics_ymax-y_float[i])*yconversion); + ywid -= yloc; + //fprintf(fp,"%d,%d,%d,%d,%f\n",xloc,yloc,xwid,ywid,data[i]); + fprintf(fp,"%d,%d,%d,%d,%d\n",xloc,yloc,xwid,ywid,color); + + xloc1 = (int)((x_float[i]-graphics_xmin)*xconversion); + xloc2 = (int)((x_float[i]+dx_float[i]-graphics_xmin)*xconversion); + yloc1 = (int)((graphics_ymax-y_float[i])*yconversion); + yloc2 = (int)((graphics_ymax-(y_float[i]+dy_float[i]))*yconversion); + fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc2,xloc2,yloc2); + fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc2,yloc1); + fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc1,yloc2); + fprintf(fp2,"%d,%d,%d,%d\n",xloc2,yloc1,xloc2,yloc2); + } + } + fclose(fp); + fclose(fp2); + iteration++; + } + else{ + if(fp == NULL){ + printf("Could not open %s in DisplayStateToFile\n", filename); + } + else{ + printf("Could not open %s in DisplayStateToFile\n", filename2); + } + } + } +} + +void write_graphics_info(int graph_num, int ncycle, double simTime, int rollback_img, int rollback_num){ + if (graphics_view_mode == 0) { + DrawSquaresToFile(graph_num, ncycle, simTime, rollback_img, rollback_num); + } else { + DisplayStateToFile(graph_num, ncycle, simTime, rollback_img, rollback_num); + } +} + +#ifdef HAVE_MAGICKWAND +void Magick_Scale() { + int i, r; + for (i=0, r=0; i<256; i++, r++) { + MagickRainbow[ i].Red = 0; + MagickRainbow[ i].Green = r; + MagickRainbow[ i].Blue = 255; + } + for (i=0, r=255; i<256; i++, r--) { + MagickRainbow[ 256+i].Red = 0; + MagickRainbow[ 256+i].Green = 255; + MagickRainbow[ 256+i].Blue = r; + } + for (i=0, r=0; i<256; i++, r++) { + MagickRainbow[ 512+i].Red = r; + MagickRainbow[ 512+i].Green = 255; + MagickRainbow[ 512+i].Blue = 0; + } + for (i=0, r=255; i<256; i++, r--) { + MagickRainbow[ 768+i].Red = 255; + MagickRainbow[ 768+i].Green = r; + MagickRainbow[ 768+i].Blue = 0; + } + for (i=0, r=0; i<256; i++, r++) { + MagickRainbow[1024+i].Red = 255; + MagickRainbow[1024+i].Green = 0; + MagickRainbow[1024+i].Blue = r; + } +} +#endif + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hash.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hash.h @@ -0,0 +1,108 @@ +// Uses LANL Copyright Disclosure C14043/LA-CC-14-003 + +#ifndef _HASH_H +#define _HASH_H + +#ifdef _OPENMP +#include +#endif + +#ifdef HAVE_OPENCL +#include "ezcl/ezcl.h" +#endif + +enum choose_hash_method +{ METHOD_UNSET = 0, // use 0 for no method set + PERFECT_HASH, // perfect hash 1 + LINEAR, // linear hash 2 + QUADRATIC, // quadratic hash 3 + PRIME_JUMP }; // prime_jump hash 4 + +typedef unsigned int uint; +typedef unsigned long ulong; + +#ifdef __cplusplus +extern "C" +{ +#endif + +int *compact_hash_init(int ncells, uint isize, uint jsize, uint report_level); + +#ifdef _OPENMP + int *compact_hash_init_openmp(int ncells, uint isize, uint jsize, uint report_level); +#endif + +int get_hash_method(void); +long long get_hash_table_size(void); + +void write_hash_perfect(uint ic, ulong hashkey, int *hash); +void write_hash_linear(uint ic, ulong hashkey, int *hash); +void write_hash_linear_report_level_1(uint ic, ulong hashkey, int *hash); +void write_hash_linear_report_level_2(uint ic, ulong hashkey, int *hash); +void write_hash_linear_report_level_3(uint ic, ulong hashkey, int *hash); +void write_hash_quadratic(uint ic, ulong hashkey, int *hash); +void write_hash_quadratic_report_level_1(uint ic, ulong hashkey, int *hash); +void write_hash_quadratic_report_level_2(uint ic, ulong hashkey, int *hash); +void write_hash_quadratic_report_level_3(uint ic, ulong hashkey, int *hash); +void write_hash_primejump(uint ic, ulong hashkey, int *hash); +void write_hash_primejump_report_level_1(uint ic, ulong hashkey, int *hash); +void write_hash_primejump_report_level_2(uint ic, ulong hashkey, int *hash); +void write_hash_primejump_report_level_3(uint ic, ulong hashkey, int *hash); +extern void (*write_hash)(uint ic, ulong hashkey, int *hash); // declared in hash.c + +#ifdef _OPENMP + void write_hash_linear_openmp(uint ic, ulong hashkey, int *hash); + void write_hash_linear_openmp_report_level_1(uint ic, ulong hashkey, int *hash); + void write_hash_linear_openmp_report_level_2(uint ic, ulong hashkey, int *hash); + void write_hash_linear_openmp_report_level_3(uint ic, ulong hashkey, int *hash); + void write_hash_quadratic_openmp(uint ic, ulong hashkey, int *hash); + void write_hash_quadratic_openmp_report_level_1(uint ic, ulong hashkey, int *hash); + void write_hash_quadratic_openmp_report_level_2(uint ic, ulong hashkey, int *hash); + void write_hash_quadratic_openmp_report_level_3(uint ic, ulong hashkey, int *hash); + void write_hash_primejump_openmp(uint ic, ulong hashkey, int *hash); + void write_hash_primejump_openmp_report_level_1(uint ic, ulong hashkey, int *hash); + void write_hash_primejump_openmp_report_level_2(uint ic, ulong hashkey, int *hash); + void write_hash_primejump_openmp_report_level_3(uint ic, ulong hashkey, int *hash); + extern void (*write_hash)(uint ic, ulong hashkey, int *hash); // declared in hash.c +#endif + +int read_hash_perfect(ulong hashkey, int *hash); +int read_hash_linear(ulong hashkey, int *hash); +int read_hash_linear_report_level_1(ulong hashkey, int *hash); +int read_hash_linear_report_level_2(ulong hashkey, int *hash); +int read_hash_linear_report_level_3(ulong hashkey, int *hash); +int read_hash_quadratic(ulong hashkey, int *hash); +int read_hash_quadratic_report_level_1(ulong hashkey, int *hash); +int read_hash_quadratic_report_level_2(ulong hashkey, int *hash); +int read_hash_quadratic_report_level_3(ulong hashkey, int *hash); +int read_hash_primejump(ulong hashkey, int *hash); +int read_hash_primejump_report_level_1(ulong hashkey, int *hash); +int read_hash_primejump_report_level_2(ulong hashkey, int *hash); +int read_hash_primejump_report_level_3(ulong hashkey, int *hash); +extern int (*read_hash)(ulong hashkey, int *hash); // declared in hash.c + +void compact_hash_delete(int *hash); + +void write_hash_collision_report(void); +void read_hash_collision_report(void); +void final_hash_collision_report(void); + +const char *get_hash_kernel_source_string(void); +void hash_lib_init(void); +void hash_lib_terminate(void); + +#ifdef HAVE_OPENCL +cl_mem gpu_compact_hash_init(ulong ncells, int imaxsize, int jmaxsize, int gpu_hash_method, uint hash_report_level_in, + ulong *gpu_hash_table_size, ulong *hashsize, cl_mem *dev_hash_header_in); +cl_mem gpu_get_hash_header(void); +void gpu_compact_hash_delete(cl_mem dev_hash, cl_mem dev_hash_header); +#endif +int read_dev_hash(int hash_method, ulong hash_table_size, ulong AA, ulong BB, ulong hashkey, int *hash); + +#ifdef __cplusplus +} +#endif + + +#endif // _HASH_H + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hash.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hash.c @@ -0,0 +1,1277 @@ +//#if defined __INTEL_COMPILER + +#include +#define __USE_XOPEN +#include +#include "hash.h" +#include "genmalloc.h" +#ifdef HAVE_OPENCL +#include "hashlib_kern.inc" +#include "hashlib_source_kern.inc" +#endif + +#ifndef NO_GLIBC_COMPAT_RAND +#include "glibc_compat_rand.h" +#define rand glibc_compat_rand +#define srand glibc_compat_srand +#define drand48() (1.0 * rand() / RAND_MAX) +#define srand48(x) srand(x) +#endif + +static ulong AA; +static ulong BB; +static ulong prime=4294967291; +static uint hashtablesize; +static uint hash_stride; +static uint hash_ncells; +static uint write_hash_collisions; +static uint read_hash_collisions; +static double write_hash_collisions_runsum = 0.0; +static double read_hash_collisions_runsum = 0.0; +static uint write_hash_collisions_count = 0; +static uint read_hash_collisions_count = 0; +static uint hash_report_level = 2; +static uint hash_queries; +static int hash_method = METHOD_UNSET; +static uint hash_jump_prime = 41; +static double hash_mult = 3.0; + +size_t hash_header_size = 16; + +#ifdef HAVE_OPENCL +cl_mem dev_hash_header = NULL; +#endif + +float mem_opt_factor; + +int choose_hash_method = METHOD_UNSET; + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +int (*read_hash)(ulong, int *); +void (*write_hash)(uint, ulong, int *); + +int get_hash_method(void) { + return(hash_method); +} + +long long get_hashtablesize(void) { + return(hashtablesize); +} + +int *compact_hash_init(int ncells, uint isize, uint jsize, uint report_level){ + hash_ncells = 0; + write_hash_collisions = 0; + read_hash_collisions = 0; + hash_queries = 0; + hash_report_level = report_level; + hash_stride = isize; + int *hash = NULL; + + if (choose_hash_method != METHOD_UNSET) hash_method = choose_hash_method; + + uint compact_hash_size = (uint)((double)ncells*hash_mult); + uint perfect_hash_size = (uint)(isize*jsize); + + if (hash_method == METHOD_UNSET){ + float hash_mem_factor = 20.0; + float hash_mem_ratio = (double)perfect_hash_size/(double)compact_hash_size; + if (mem_opt_factor != 1.0) hash_mem_factor /= (mem_opt_factor*0.2); + hash_method = (hash_mem_ratio < hash_mem_factor) ? PERFECT_HASH : QUADRATIC; + + if (hash_report_level >= 2) printf("DEBUG hash_method %d hash_mem_ratio %f hash_mem_factor %f mem_opt_factor %f perfect_hash_size %u compact_hash_size %u\n", + hash_method,hash_mem_ratio,hash_mem_factor,mem_opt_factor,perfect_hash_size,compact_hash_size); + } + + int do_compact_hash = (hash_method == PERFECT_HASH) ? 0 : 1; + + if (hash_report_level >= 2) printf("DEBUG do_compact_hash %d hash_method %d perfect_hash_size %u compact_hash_size %u\n", + do_compact_hash,hash_method,perfect_hash_size,compact_hash_size); + + if (do_compact_hash) { + hashtablesize = compact_hash_size; + AA = (ulong)(1.0+(double)(prime-1)*drand48()); + BB = (ulong)(0.0+(double)(prime-1)*drand48()); + if (AA > prime-1 || BB > prime-1) exit(0); + if (hash_report_level > 1) printf("Factors AA %lu BB %lu\n",AA,BB); + + hash = (int *)genvector(2*hashtablesize,sizeof(int)); + for (uint ii = 0; ii<2*hashtablesize; ii+=2){ + hash[ii] = -1; + } + + if (hash_method == LINEAR){ + if (hash_report_level == 0){ + read_hash = read_hash_linear; + write_hash = write_hash_linear; + } else if (hash_report_level == 1){ + read_hash = read_hash_linear_report_level_1; + write_hash = write_hash_linear_report_level_1; + } else if (hash_report_level == 2){ + read_hash = read_hash_linear_report_level_2; + write_hash = write_hash_linear_report_level_2; + } else if (hash_report_level == 3){ + read_hash = read_hash_linear_report_level_3; + write_hash = write_hash_linear_report_level_3; + } + } else if (hash_method == QUADRATIC) { + if (hash_report_level == 0){ + read_hash = read_hash_quadratic; + write_hash = write_hash_quadratic; + } else if (hash_report_level == 1){ + read_hash = read_hash_quadratic_report_level_1; + write_hash = write_hash_quadratic_report_level_1; + } else if (hash_report_level == 2){ + read_hash = read_hash_quadratic_report_level_2; + write_hash = write_hash_quadratic_report_level_2; + } else if (hash_report_level == 3){ + read_hash = read_hash_quadratic_report_level_3; + write_hash = write_hash_quadratic_report_level_3; + } + } else if (hash_method == PRIME_JUMP) { + if (hash_report_level == 0){ + read_hash = read_hash_primejump; + write_hash = write_hash_primejump; + } else if (hash_report_level == 1){ + read_hash = read_hash_primejump_report_level_1; + write_hash = write_hash_primejump_report_level_1; + } else if (hash_report_level == 2){ + read_hash = read_hash_primejump_report_level_2; + write_hash = write_hash_primejump_report_level_2; + } else if (hash_report_level == 3){ + read_hash = read_hash_primejump_report_level_3; + write_hash = write_hash_primejump_report_level_3; + } + } + } else { + hashtablesize = perfect_hash_size; + + hash = (int *)genvector(hashtablesize,sizeof(int)); + for (uint ii = 0; ii= 2) { + printf("Hash table size %u perfect hash table size %u memory savings %d by percentage %lf\n", + hashtablesize,isize*jsize,(int)isize*(int)jsize-(int)hashtablesize, + (double)hashtablesize/(double)(isize*jsize) * 100.0); + } + + return(hash); +} + +#ifdef _OPENMP +int *compact_hash_init_openmp(int ncells, uint isize, uint jsize, uint report_level){ + static int *hash = NULL; + + static float hash_mem_factor; + static float hash_mem_ratio; + static int do_compact_hash; + static uint compact_hash_size; + static uint perfect_hash_size; + +#pragma omp barrier +#pragma omp master + { + + hash_ncells = 0; + write_hash_collisions = 0; + read_hash_collisions = 0; + hash_queries = 0; + hash_report_level = report_level; + hash_stride = isize; + + if (choose_hash_method != METHOD_UNSET) hash_method = choose_hash_method; + + compact_hash_size = (uint)((double)ncells*hash_mult); + perfect_hash_size = (uint)(isize*jsize); + + if (hash_method == METHOD_UNSET){ + hash_mem_factor = 20.0; + hash_mem_ratio = (double)perfect_hash_size/(double)compact_hash_size; + if (mem_opt_factor != 1.0) hash_mem_factor /= (mem_opt_factor*0.2); + hash_method = (hash_mem_ratio < hash_mem_factor) ? PERFECT_HASH : QUADRATIC; + //hash_method = QUADRATIC; + + if (hash_report_level >= 2) printf("DEBUG hash_method %d hash_mem_ratio %f hash_mem_factor %f mem_opt_factor %f perfect_hash_size %u compact_hash_size %u\n", + hash_method,hash_mem_ratio,hash_mem_factor,mem_opt_factor,perfect_hash_size,compact_hash_size); + } + + do_compact_hash = (hash_method == PERFECT_HASH) ? 0 : 1; + + if (hash_report_level >= 2) printf("DEBUG do_compact_hash %d hash_method %d perfect_hash_size %u compact_hash_size %u\n", + do_compact_hash,hash_method,perfect_hash_size,compact_hash_size); + + } // end omp master +#pragma omp barrier + + if (do_compact_hash) { +#pragma omp master + { + hashtablesize = compact_hash_size; + //srand48(0); + AA = (ulong)(1.0+(double)(prime-1)*drand48()); + BB = (ulong)(0.0+(double)(prime-1)*drand48()); + if (AA > prime-1 || BB > prime-1) exit(0); + if (hash_report_level > 1) printf("Factors AA %lu BB %lu\n",AA,BB); + + hash = (int *)genvector(2*hashtablesize,sizeof(int)); + } // end omp master +#pragma omp barrier + +#pragma omp for + for (uint ii = 0; ii= 2) { + printf("Hash table size %u perfect hash table size %u memory savings %u by percentage %lf\n", + hashtablesize,isize*jsize,isize*jsize-hashtablesize, + (double)hashtablesize/(double)(isize*jsize)); + } + } +#pragma omp barrier + + return(hash); +} +#endif + +void write_hash_perfect(uint ic, ulong hashkey, int *hash){ + hash[hashkey] = ic; +} + +void write_hash_linear(uint ic, ulong hashkey, int *hash){ + uint hashloc; + + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc++,hashloc = hashloc%hashtablesize); + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +void write_hash_linear_report_level_1(uint ic, ulong hashkey, int *hash){ + uint hashloc; + + hash_ncells++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc++,hashloc = hashloc%hashtablesize){ + write_hash_collisions++; + } + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +void write_hash_linear_report_level_2(uint ic, ulong hashkey, int *hash){ + uint hashloc; + + hash_ncells++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc++,hashloc = hashloc%hashtablesize){ + write_hash_collisions++; + } + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +void write_hash_linear_report_level_3(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc; + + hash_ncells++; + hashloc = (hashkey*AA+BB)%prime%hashtablesize; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc++,hashloc = hashloc%hashtablesize){ + int hashloctmp = hashloc+1; + hashloctmp = hashloctmp%hashtablesize; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride); + icount++; + } + write_hash_collisions += icount; + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +void write_hash_quadratic(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc; + + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize) { + icount++; + } + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +void write_hash_quadratic_report_level_1(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc; + + hash_ncells++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){ + icount++; + } + write_hash_collisions += icount; + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +void write_hash_quadratic_report_level_2(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc; + + hash_ncells++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){ + icount++; + } + write_hash_collisions += icount; + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +void write_hash_quadratic_report_level_3(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc; + + hash_ncells++; + hashloc = (hashkey*AA+BB)%prime%hashtablesize; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){ + icount++; + int hashloctmp = hashloc+icount*icount; + hashloctmp = hashloctmp%hashtablesize; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride); + } + write_hash_collisions += icount; + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +void write_hash_primejump(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc; + + uint jump = 1+hashkey%hash_jump_prime; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize) { + icount++; + } + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +void write_hash_primejump_report_level_1(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc; + + uint jump = 1+hashkey%hash_jump_prime; + hash_ncells++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){ + icount++; + } + write_hash_collisions += icount; + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +void write_hash_primejump_report_level_2(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc; + + uint jump = 1+hashkey%hash_jump_prime; + hash_ncells++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){ + icount++; + } + write_hash_collisions += icount; + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +void write_hash_primejump_report_level_3(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc; + + uint jump = 1+hashkey%hash_jump_prime; + hash_ncells++; + hashloc = (hashkey*AA+BB)%prime%hashtablesize; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){ + icount++; + int hashloctmp = hashloc+1; + hashloctmp = hashloctmp%hashtablesize; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride); + } + write_hash_collisions += icount; + + hash[2*hashloc] = hashkey; + hash[2*hashloc+1] = ic; +} + +#ifdef _OPENMP +void write_hash_linear_openmp(uint ic, ulong hashkey, int *hash){ + int icount; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;; + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc++; + hashloc %= hashtablesize; + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; +} + +void write_hash_linear_openmp_report_level_1(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;; + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc++; + hashloc %= hashtablesize; + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + icount++; + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; + +#pragma omp atomic + write_hash_collisions += icount;; +#pragma omp atomic + hash_ncells++; +} + +void write_hash_linear_openmp_report_level_2(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;; + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc++; + hashloc %= hashtablesize; + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + icount++; + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; + +#pragma omp atomic + write_hash_collisions += icount;; +#pragma omp atomic + hash_ncells++; +} + +void write_hash_linear_openmp_report_level_3(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc++; + hashloc %= hashtablesize; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + icount++; + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; + +#pragma omp atomic + write_hash_collisions += icount;; +#pragma omp atomic + hash_ncells++; +} + +void write_hash_quadratic_openmp(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize; + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc+=(icount*icount); + hashloc %= hashtablesize; + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; +} + +void write_hash_quadratic_openmp_report_level_1(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize; + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc+=(icount*icount); + hashloc %= hashtablesize; + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; + +#pragma omp atomic + write_hash_collisions += icount;; +#pragma omp atomic + hash_ncells++; +} + +void write_hash_quadratic_openmp_report_level_2(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize; + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc+=(icount*icount); + hashloc %= hashtablesize; + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; + +#pragma omp atomic + write_hash_collisions += icount;; +#pragma omp atomic + hash_ncells++; +} + +void write_hash_quadratic_openmp_report_level_3(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc+=(icount*icount); + hashloc %= hashtablesize; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; + +#pragma omp atomic + write_hash_collisions += icount;; +#pragma omp atomic + hash_ncells++; +} + +void write_hash_primejump_openmp(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint jump = 1+hashkey%hash_jump_prime; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize; + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc+=(icount*jump); + hashloc %= hashtablesize; + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; +} + +void write_hash_primejump_openmp_report_level_1(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint jump = 1+hashkey%hash_jump_prime; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize; + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc+=(icount*jump); + hashloc %= hashtablesize; + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; + +#pragma omp atomic + write_hash_collisions += icount;; +#pragma omp atomic + hash_ncells++; +} + +void write_hash_primejump_openmp_report_level_2(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint jump = 1+hashkey%hash_jump_prime; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize; + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc+=(icount*jump); + hashloc %= hashtablesize; + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; + +#pragma omp atomic + write_hash_collisions += icount;; +#pragma omp atomic + hash_ncells++; +} + +void write_hash_primejump_openmp_report_level_3(uint ic, ulong hashkey, int *hash){ + int icount = 0; + uint jump = 1+hashkey%hash_jump_prime; + uint hashloc = (hashkey*AA+BB)%prime%hashtablesize; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + + int MaxTries = 1000; + + int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + //printf("old_key is %d\n",old_key); + + for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){ + hashloc+=(icount*jump); + hashloc %= hashtablesize; + printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + + old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); + } + + if (icount < MaxTries) hash[2*hashloc+1] = ic; + +#pragma omp atomic + write_hash_collisions += icount;; +#pragma omp atomic + hash_ncells++; +} +#endif + +int read_hash_perfect(ulong hashkey, int *hash){ + return(hash[hashkey]); +} + +int read_hash_linear(ulong hashkey, int *hash){ + int hashval = -1; + uint hashloc; + int icount=0; + + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){ + icount++; + } + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +int read_hash_linear_report_level_1(ulong hashkey, int *hash){ + int hashval = -1; + uint hashloc; + int icount=0; + + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){ + icount++; + } + read_hash_collisions += icount; + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +int read_hash_linear_report_level_2(ulong hashkey, int *hash){ + int max_collisions_allowed = 1000; + int hashval = -1; + uint hashloc; + int icount=0; + + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){ + icount++; + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +int read_hash_linear_report_level_3(ulong hashkey, int *hash){ + int max_collisions_allowed = 1000; + int hashval = -1; + uint hashloc; + int icount=0; + + hash_queries++; + hashloc = (hashkey*AA+BB)%prime%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){ + icount++; + uint hashloctmp = hashloc+1; + hashloctmp = hashloctmp%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride); + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +int read_hash_quadratic(ulong hashkey, int *hash){ + int hashval = -1; + uint hashloc; + int icount=0; + + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){ + icount++; + } + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +int read_hash_quadratic_report_level_1(ulong hashkey, int *hash){ + int hashval = -1; + uint hashloc; + int icount=0; + + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){ + icount++; + } + read_hash_collisions += icount; + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +int read_hash_quadratic_report_level_2(ulong hashkey, int *hash){ + int max_collisions_allowed = 1000; + int hashval = -1; + uint hashloc; + int icount=0; + + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){ + icount++; + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +int read_hash_quadratic_report_level_3(ulong hashkey, int *hash){ + int max_collisions_allowed = 1000; + int hashval = -1; + uint hashloc; + int icount=0; + + hash_queries++; + hashloc = (hashkey*AA+BB)%prime%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){ + icount++; + uint hashloctmp = hashloc+1; + hashloctmp = hashloctmp%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride); + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +int read_hash_primejump(ulong hashkey, int *hash){ + int hashval = -1; + uint hashloc; + int icount=0; + + uint jump = 1+hashkey%hash_jump_prime; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){ + icount++; + } + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +int read_hash_primejump_report_level_1(ulong hashkey, int *hash){ + int hashval = -1; + uint hashloc; + int icount=0; + + uint jump = 1+hashkey%hash_jump_prime; + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){ + icount++; + } + read_hash_collisions += icount; + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +int read_hash_primejump_report_level_2(ulong hashkey, int *hash){ + int max_collisions_allowed = 1000; + int hashval = -1; + uint hashloc; + int icount=0; + + uint jump = 1+hashkey%hash_jump_prime; + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){ + icount++; + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +int read_hash_primejump_report_level_3(ulong hashkey, int *hash){ + int max_collisions_allowed = 1000; + int hashval = -1; + uint hashloc; + int icount=0; + + uint jump = 1+hashkey%hash_jump_prime; + hash_queries++; + hashloc = (hashkey*AA+BB)%prime%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){ + icount++; + uint hashloctmp = hashloc+1; + hashloctmp = hashloctmp%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride); + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + +void compact_hash_delete(int *hash){ + read_hash = NULL; + genvectorfree((void *)hash); + hash_method = METHOD_UNSET; +} + +void write_hash_collision_report(void){ + if (hash_method == PERFECT_HASH) return; + if (hash_report_level == 1) { + write_hash_collisions_runsum += (double)write_hash_collisions/(double)hash_ncells; + write_hash_collisions_count++; + } else if (hash_report_level >= 2) { + printf("Write hash collision report -- collisions per cell %lf, collisions %d cells %d\n",(double)write_hash_collisions/(double)hash_ncells,write_hash_collisions,hash_ncells); + } +} + +void read_hash_collision_report(void){ + //printf("hash table size bytes %ld\n",hashtablesize*sizeof(int)); + if (hash_method == PERFECT_HASH) return; + if (hash_report_level == 1) { + read_hash_collisions_runsum += (double)read_hash_collisions/(double)hash_queries; + read_hash_collisions_count++; + } else if (hash_report_level >= 2) { + printf("Read hash collision report -- collisions per cell %lf, collisions %d cells %d\n",(double)read_hash_collisions/(double)hash_queries,read_hash_collisions,hash_queries); + hash_queries = 0; + read_hash_collisions = 0; + } +} + +void final_hash_collision_report(void){ + printf("hash table size bytes %ld\n",hashtablesize*sizeof(int)); + if (hash_report_level >= 1 && read_hash_collisions_count > 0) { + printf("Final hash collision report -- write/read collisions per cell %lf/%lf\n",write_hash_collisions_runsum/(double)write_hash_collisions_count,read_hash_collisions_runsum/(double)read_hash_collisions_count); + } +} + +#ifdef HAVE_OPENCL +const char *get_hash_kernel_source_string(void) +{ + return(hashlib_source_kern_source); +} +#endif + +#ifdef HAVE_OPENCL +static cl_kernel kernel_hash_init; +void hash_lib_init(void){ + cl_context context = ezcl_get_context(); + + const char *defines = NULL; + cl_program program = ezcl_create_program_wsource(context, defines, hashlib_kern_source); + + kernel_hash_init = ezcl_create_kernel_wprogram(program, "hash_init_cl"); + + ezcl_program_release(program); +} + +void hash_lib_terminate(void){ + ezcl_kernel_release(kernel_hash_init); +} + +cl_mem gpu_compact_hash_init(ulong ncells, int imaxsize, int jmaxsize, int gpu_hash_method, uint hash_report_level_in, + ulong *gpu_hashtablesize, ulong *hashsize, cl_mem *dev_hash_header_in) +{ + hash_report_level = hash_report_level_in; + + uint gpu_compact_hash_size = (uint)((double)ncells*hash_mult); + uint gpu_perfect_hash_size = (uint)(imaxsize*jmaxsize); + + if (gpu_hash_method == METHOD_UNSET) { + float gpu_hash_mem_factor = 20.0; + float gpu_hash_mem_ratio = (double)gpu_perfect_hash_size/(double)gpu_compact_hash_size; + if (mem_opt_factor != 1.0) gpu_hash_mem_factor /= (mem_opt_factor*0.2); + gpu_hash_method = (gpu_hash_mem_ratio < gpu_hash_mem_factor) ? PERFECT_HASH : QUADRATIC; + } + + int gpu_do_compact_hash = (gpu_hash_method == PERFECT_HASH) ? 0 : 1; + + ulong gpu_AA = 1; + ulong gpu_BB = 0; + if (gpu_do_compact_hash){ + (*gpu_hashtablesize) = gpu_compact_hash_size; + gpu_AA = (ulong)(1.0+(double)(prime-1)*drand48()); + gpu_BB = (ulong)(0.0+(double)(prime-1)*drand48()); + //if ( gpu_AA > prime-1 || gpu_BB > prime-1) exit(0); + (*hashsize) = 2*gpu_compact_hash_size; + } else { + (*gpu_hashtablesize) = gpu_perfect_hash_size; + (*hashsize) = gpu_perfect_hash_size; + } + + hashtablesize = (*hashsize); + + const uint TILE_SIZE = 128; + + cl_command_queue command_queue = ezcl_get_command_queue(); + + cl_mem dev_hash = ezcl_malloc(NULL, "dev_hash", hashsize, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + ulong *gpu_hash_header = (ulong *)genvector(hash_header_size, sizeof(ulong)); + gpu_hash_header[0] = (ulong)gpu_hash_method; + gpu_hash_header[1] = (*gpu_hashtablesize); + gpu_hash_header[2] = gpu_AA; + gpu_hash_header[3] = gpu_BB; + dev_hash_header = ezcl_malloc(NULL, "dev_hash_header", &hash_header_size, sizeof(cl_ulong), CL_MEM_READ_WRITE, 0); + ezcl_enqueue_write_buffer(command_queue, dev_hash_header, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &gpu_hash_header[0], NULL); + + genvectorfree(gpu_hash_header); + + (*dev_hash_header_in) = dev_hash_header; + + size_t hash_local_work_size = MIN((*hashsize), TILE_SIZE); + size_t hash_global_work_size = (((*hashsize)+hash_local_work_size - 1) /hash_local_work_size) * hash_local_work_size; + + ezcl_set_kernel_arg(kernel_hash_init, 0, sizeof(cl_int), (void *)hashsize); + ezcl_set_kernel_arg(kernel_hash_init, 1, sizeof(cl_mem), (void *)&dev_hash); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_init, 1, NULL, &hash_global_work_size, &hash_local_work_size, NULL); + + return(dev_hash); +} + +void gpu_compact_hash_delete(cl_mem dev_hash, cl_mem dev_hash_header){ + ezcl_device_memory_delete(dev_hash); + ezcl_device_memory_delete(dev_hash_header); + hash_method = METHOD_UNSET; +} + +cl_mem gpu_get_hash_header(void){ + return(dev_hash_header); +} +#endif + +int read_dev_hash(int hash_method, ulong hashtablesize, ulong AA, ulong BB, ulong hashkey, int *hash){ + //int hash_report_level = 3; + int max_collisions_allowed = 1000; + int hashval = -1; + uint hashloc; + int icount=0; + if (hash_method == PERFECT_HASH) { + return(hash[hashkey]); + } + if (hash_method == LINEAR) { + if (hash_report_level == 0) { + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){ + icount++; + } + } else if (hash_report_level == 1) { + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){ + icount++; + } + read_hash_collisions += icount; + } else if (hash_report_level == 2) { + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){ + icount++; + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + } else if (hash_report_level == 3) { + hash_queries++; + hashloc = (hashkey*AA+BB)%prime%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){ + icount++; + uint hashloctmp = hashloc+1; + hashloctmp = hashloctmp%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride); + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + } else { + printf("Error -- Illegal value of hash_report_level %d\n",hash_report_level); + exit(1); + } + } else if (hash_method == QUADRATIC) { + if (hash_report_level == 0) { + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){ + icount++; + } + } else if (hash_report_level == 1) { + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){ + icount++; + } + read_hash_collisions += icount; + } else if (hash_report_level == 2) { + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){ + icount++; + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + } else if (hash_report_level == 3) { + hash_queries++; + hashloc = (hashkey*AA+BB)%prime%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){ + icount++; + uint hashloctmp = hashloc+1; + hashloctmp = hashloctmp%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride); + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + } else { + printf("Error -- Illegal value of hash_report_level %d\n",hash_report_level); + exit(1); + } + } else if (hash_method == PRIME_JUMP) { + uint jump = 1+hashkey%hash_jump_prime; + if (hash_report_level == 0) { + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){ + icount++; + } + } else if (hash_report_level == 1) { + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){ + icount++; + } + read_hash_collisions += icount; + } else if (hash_report_level == 2) { + hash_queries++; + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){ + icount++; + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + } else if (hash_report_level == 3) { + hash_queries++; + hashloc = (hashkey*AA+BB)%prime%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride); + for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){ + icount++; + uint hashloctmp = hashloc+1; + hashloctmp = hashloctmp%hashtablesize; + printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride); + if (icount > max_collisions_allowed) { + printf("Error -- too many read hash collisions\n"); + exit(0); + } + } + read_hash_collisions += icount; + } else { + printf("Error -- Illegal value of hash_report_level %d\n",hash_report_level); + exit(1); + } + } else { + printf("Error -- Illegal value of hash_method %d\n",hash_method); + exit(1); + } + + if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1]; + return(hashval); +} + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfc.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfc.h @@ -0,0 +1,86 @@ +/* --------------------------------------------------------------------- +Author: H. Carter Edwards + hcedwar@sandia.gov + +Copyright: Copyright (C) 1997 H. Carter Edwards + Graduate Student + University of Texas + +Re-release: Copyright (C) 2011-2012 H. Carter Edwards + +Purpose: Domain paritioning based upon Hilbert Space-Filling Curve + ordering. + +License: Re-release under the less-restrictive CLAMR software terms. + Permitted by email with H. Carter Edwards on 9/13/2011 + +Disclaimer: + + These routines comes with ABSOLUTELY NO WARRANTY; + This is free software, and you are welcome to redistribute it + under certain conditions. See License terms in file 'LICENSE'. +--------------------------------------------------------------------- */ + +/*---------------------------------------------------------------------- +Description: + Inverse of the Hilbert Space-Filling Curve Map from a 2D or 3D +domain to the 1D domain. Two different 2D and 3D domains are +supported. + +For the routines 'hsfc2d' and 'hsfc3d' the 2D and 3D domains are +defined as follows. +Note that + * 0 is the minimum value of an unsigned integer + * ~(0u) is the maximum value of an unsigned integer - all bits set +thus the 2D and 3D domains are + * [0,~(0u)] x [0,~(0u)] + * [0,~(0u)] x [0,~(0u)] x [0,~(0u)] +respectively. + +For the routines 'fhsfc2d' and 'fhsfc3d' the 2D and 3D domains are +defines as: + * [0.0,1.0] x [0.0,1.0] + * [0.0,1.0] x [0.0,1.0] x [0.0,1.0] +respectively. + +The 1D domain is a multiword (array of unsigned integers) key. +This key is essentially an unsigned integer of an arbitrary +number of bits. The most significant bit is the leading bit +of the first (0th) word of the key. The least significant +bit is the trailing bit of the last word. + +----------------------------------------------------------------------*/ + +#ifndef __HILBERT_SPACE_FILLING_CURVE_MAPPING__ +#define __HILBERT_SPACE_FILLING_CURVE_MAPPING__ + +#ifdef __cplusplus +extern "C" { +#endif + +extern void hsfc2d( + unsigned coord[] , /* IN: Normalized integer 2D coordinate */ + unsigned nkey , /* IN: Word length of key */ + unsigned key[] ); /* OUT: space-filling curve key */ + +extern void hsfc3d( + unsigned coord[] , /* IN: Normalized integer 3D coordinate */ + unsigned nkey , /* IN: Word length of 'key' */ + unsigned key[] ); /* OUT: space-filling curve key */ + +extern void fhsfc2d( + double coord[] , /* IN: Normalized floating point 2D coordinate */ + unsigned nkey , /* IN: Word length of key */ + unsigned key[] ); /* OUT: space-filling curve key */ + +extern void fhsfc3d( + double coord[] , /* IN: Normalized floating point 3D coordinate */ + unsigned nkey , /* IN: Word length of key */ + unsigned key[] ); /* OUT: space-filling curve key */ + +#ifdef __cplusplus +} +#endif + +#endif + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfc.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfc.c @@ -0,0 +1,279 @@ +/* --------------------------------------------------------------------- +Author: H. Carter Edwards + hcedwar@sandia.gov + +Copyright: Copyright (C) 1997 H. Carter Edwards + Graduate Student + University of Texas + +Re-release: Copyright (C) 2011-2012 H. Carter Edwards + +Purpose: Domain paritioning based upon Hilbert Space-Filling Curve + ordering. + +License: Re-release under the less-restrictive CLAMR software terms. + Permitted by email with H. Carter Edwards on 9/13/2011 + +Disclaimer: + + These routines comes with ABSOLUTELY NO WARRANTY; + This is free software, and you are welcome to redistribute it + under certain conditions. See License terms in file 'LICENSE'. +--------------------------------------------------------------------- */ + +/*---------------------------------------------------------------------- +Description: + Inverse of the Hilbert Space-Filling Curve Map from a 2D or 3D +domain to the 1D domain. Two different 2D and 3D domains are +supported. + +For the routines 'hsfc2d' and 'hsfc3d' the 2D and 3D domains are +defined as follows. +Note that + * 0 is the minimum value of an unsigned integer + * ~(0u) is the maximum value of an unsigned integer - all bits set +thus the 2D and 3D domains are + * [0,~(0u)] x [0,~(0u)] + * [0,~(0u)] x [0,~(0u)] x [0,~(0u)] +respectively. + +For the routines 'fhsfc2d' and 'fhsfc3d' the 2D and 3D domains are +defines as: + * [0.0,1.0] x [0.0,1.0] + * [0.0,1.0] x [0.0,1.0] x [0.0,1.0] +respectively. + +The 1D domain is a multiword (array of unsigned integers) key. +This key is essentially an unsigned integer of an arbitrary +number of bits. The most significant bit is the leading bit +of the first (0th) word of the key. The least significant +bit is the trailing bit of the last word. + +----------------------------------------------------------------------*/ + +#include +#include + +/* Bits per unsigned word */ + +#define MaxBits ( sizeof(unsigned) * CHAR_BIT ) + +/*--------------------------------------------------------------------*/ +/* 2D Hilbert Space-filling curve */ + +void hsfc2d( + unsigned coord[] , /* IN: Normalized integer coordinates */ + unsigned nkey , /* IN: Word length of key */ + unsigned key[] ) /* OUT: space-filling curve key */ +{ + static int init = 0 ; + static unsigned char gray_inv[ 2 * 2 ] ; + + const unsigned NKey = ( 2 < nkey ) ? 2 : (nkey) ; + const unsigned NBits = ( MaxBits * NKey ) / 2 ; + + unsigned i ; + unsigned char order[2+2] ; + unsigned char reflect ; + + /* GRAY coding */ + + if ( ! init ) { + unsigned char gray[ 2 * 2 ] ; + register unsigned k ; + register unsigned j ; + + gray[0] = 0 ; + for ( k = 1 ; k < sizeof(gray) ; k <<= 1 ) { + for ( j = 0 ; j < k ; j++ ) gray[k+j] = k | gray[k-(j+1)] ; + } + for ( k = 0 ; k < sizeof(gray) ; k++ ) gray_inv[ gray[k] ] = k ; + init = 1 ; + } + + /* Zero out the key */ + + for ( i = 0 ; i < NKey ; ++i ) key[i] = 0 ; + + order[0] = 0 ; + order[1] = 1 ; + reflect = ( 0 << 0 ) | ( 0 ); + + for ( i = 1 ; i <= NBits ; i++ ) { + const unsigned s = MaxBits - i ; + const unsigned c = gray_inv[ reflect ^ ( + ( ( ( coord[0] >> s ) & 01 ) << order[0] ) | + ( ( ( coord[1] >> s ) & 01 ) << order[1] ) ) ]; + + const unsigned off = 2 * i ; /* Bit offset */ + const unsigned which = off / MaxBits ; /* Which word to update */ + const unsigned shift = MaxBits - off % MaxBits ; /* Which bits to update */ + + /* Set the two bits */ + + if ( shift == MaxBits ) { /* Word boundary */ + key[ which - 1 ] |= c ; + } + else { + key[ which ] |= c << shift ; + } + + /* Determine the recursive quadrant */ + + switch( c ) { + case 3: + reflect ^= 03 ; + case 0: + order[2+0] = order[0] ; + order[2+1] = order[1] ; + order[0] = order[2+1] ; + order[1] = order[2+0] ; + break ; + } + } +} + +/*--------------------------------------------------------------------*/ +/* 3D Hilbert Space-filling curve */ + +void hsfc3d( + unsigned coord[] , /* IN: Normalized integer coordinates */ + unsigned nkey , /* IN: Word length of 'key' */ + unsigned key[] ) /* OUT: space-filling curve key */ +{ + static int init = 0 ; + static unsigned char gray_inv[ 2*2*2 ] ; + + const unsigned NKey = ( 3 < nkey ) ? 3 : (nkey) ; + const unsigned NBits = ( MaxBits * NKey ) / 3 ; + + unsigned i ; + unsigned char axis[3+3] ; + + /* GRAY coding */ + + if ( ! init ) { + unsigned char gray[ 2*2*2 ] ; + register unsigned k ; + register unsigned j ; + + gray[0] = 0 ; + for ( k = 1 ; k < sizeof(gray) ; k <<= 1 ) { + for ( j = 0 ; j < k ; j++ ) gray[k+j] = k | gray[k-(j+1)] ; + } + for ( k = 0 ; k < sizeof(gray) ; k++ ) gray_inv[ gray[k] ] = k ; + init = 1 ; + } + + /* Zero out the key */ + + for ( i = 0 ; i < NKey ; ++i ) key[i] = 0 ; + + axis[0] = 0 << 1 ; + axis[1] = 1 << 1 ; + axis[2] = 2 << 1 ; + + for ( i = 1 ; i <= NBits ; i++ ) { + const unsigned s = MaxBits - i ; + const unsigned c = gray_inv[ + (((( coord[ axis[0] >> 1 ] >> s ) ^ axis[0] ) & 01 ) << 0 ) | + (((( coord[ axis[1] >> 1 ] >> s ) ^ axis[1] ) & 01 ) << 1 ) | + (((( coord[ axis[2] >> 1 ] >> s ) ^ axis[2] ) & 01 ) << 2 ) ]; + unsigned n ; + + /* Set the 3bits */ + + for ( n = 0 ; n < 3 ; ++n ) { + const unsigned bit = 01 & ( c >> ( 2 - n ) ); /* Bit value */ + const unsigned off = 3 * i + n ; /* Bit offset */ + const unsigned which = off / MaxBits ; /* Which word */ + const unsigned shift = MaxBits - off % MaxBits ; /* Which bits */ + + if ( MaxBits == shift ) { /* Word boundary */ + key[ which - 1 ] |= bit ; + } + else { + key[ which ] |= bit << shift ; + } + } + + /* Determine the recursive quadrant */ + + axis[3+0] = axis[0] ; + axis[3+1] = axis[1] ; + axis[3+2] = axis[2] ; + + switch( c ) { + case 0: + axis[0] = axis[3+2]; + axis[1] = axis[3+1]; + axis[2] = axis[3+0]; + break ; + case 1: + axis[0] = axis[3+0]; + axis[1] = axis[3+2]; + axis[2] = axis[3+1]; + break ; + case 2: + axis[0] = axis[3+0]; + axis[1] = axis[3+1]; + axis[2] = axis[3+2]; + break ; + case 3: + axis[0] = axis[3+2] ^ 01 ; + axis[1] = axis[3+0] ^ 01 ; + axis[2] = axis[3+1]; + break ; + case 4: + axis[0] = axis[3+2]; + axis[1] = axis[3+0] ^ 01 ; + axis[2] = axis[3+1] ^ 01 ; + break ; + case 5: + axis[0] = axis[3+0]; + axis[1] = axis[3+1]; + axis[2] = axis[3+2]; + break ; + case 6: + axis[0] = axis[3+0]; + axis[1] = axis[3+2] ^ 01 ; + axis[2] = axis[3+1] ^ 01 ; + break ; + case 7: + axis[0] = axis[3+2] ^ 01 ; + axis[1] = axis[3+1]; + axis[2] = axis[3+0] ^ 01 ; + break ; + default: + exit(-1); + } + } +} + +/*--------------------------------------------------------------------*/ + +void fhsfc2d( + double coord[] , /* IN: Normalized floating point coordinates */ + unsigned nkey , /* IN: Word length of key */ + unsigned key[] ) /* OUT: space-filling curve key */ +{ + const double imax = ~(0u); + unsigned c[2] ; + c[0] = coord[0] * imax ; + c[1] = coord[1] * imax ; + hsfc2d( c , nkey , key ); +} + +void fhsfc3d( + double coord[] , /* IN: Normalized floating point coordinates */ + unsigned nkey , /* IN: Word length of key */ + unsigned key[] ) /* OUT: space-filling curve key */ +{ + const double imax = ~(0u); + unsigned c[3] ; + c[0] = coord[0] * imax ; + c[1] = coord[1] * imax ; + c[2] = coord[2] * imax ; + hsfc3d( c , nkey , key ); +} + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfcsort.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfcsort.c @@ -0,0 +1,268 @@ +/* --------------------------------------------------------------------- +Author: H. Carter Edwards + hcedwar@sandia.gov + +Copyright: Copyright (C) 1997 H. Carter Edwards + Graduate Student + University of Texas + +Re-release: Copyright (C) 2011-2012 H. Carter Edwards + +Purpose: Domain paritioning based upon Hilbert Space-Filling Curve + ordering. + +License: Re-release under the less-restrictive CLAMR software terms. + Permitted by email with H. Carter Edwards on 9/13/2011 + +Disclaimer: + + These routines comes with ABSOLUTELY NO WARRANTY; + This is free software, and you are welcome to redistribute it + under certain conditions. See License terms in file 'LICENSE'. +--------------------------------------------------------------------- */ + +#include +#include + +#include "hsfc.h" + +/*--------------------------------------------------------------------*/ +/* Make it callable from FORTRAN: + * Interface types: INTEGER and REAL*8 + */ + +void hsfc2sort( + const int N , /* IN: Number of points */ + const double * X , /* IN: array of X-Coordinates */ + const double * Y , /* IN: array of Y-Coordinates */ + const int ibase, /* 0 for C and 1 for Fortran */ + int * Info , /* OUT: (1 <= LDInfo) [ HSFC ordering ] + (2 <= LDInfo) [ HSFC index, #1 ] + (3 <= LDInfo) [ HSFC index, #2 ] */ + int LDInfo /* IN: Leading dimension of Info */ + ); + +/*--------------------------------------------------------------------*/ + +#define MaxBits ( sizeof(unsigned) * CHAR_BIT ) + +#define NBITC (32) /* 32 Bits per coordinate, resolve data at 2^31 */ +#define NKEY(ND) ((NBITC * ND + MaxBits - 1) / MaxBits) + +/*--------------------------------------------------------------------*/ + +static int ui1comp( const void * const I1 , const void * const I2 ) +{ + return ( + ( ((const unsigned *)I1)[0] != ((const unsigned *)I2)[0] ) ? ( + ( ((const unsigned *)I1)[0] < ((const unsigned *)I2)[0] ) ? -1 : 1 ) : ( + 0 )); +} + +static int ui2comp( const void * const I1 , const void * const I2 ) +{ + return ( + ( ((const unsigned *)I1)[0] != ((const unsigned *)I2)[0] ) ? ( + ( ((const unsigned *)I1)[0] < ((const unsigned *)I2)[0] ) ? -1 : 1 ) : ( + ( ((const unsigned *)I1)[1] != ((const unsigned *)I2)[1] ) ? ( + ( ((const unsigned *)I1)[1] < ((const unsigned *)I2)[1] ) ? -1 : 1 ) : ( + 0 ))); +} + +/*--------------------------------------------------------------------*/ + +static int ui3comp( const void * const I1 , const void * const I2 ) +{ + return ( + ( ((const unsigned *)I1)[0] != ((const unsigned *)I2)[0] ) ? ( + ( ((const unsigned *)I1)[0] < ((const unsigned *)I2)[0] ) ? -1 : 1 ) : ( + ( ((const unsigned *)I1)[1] != ((const unsigned *)I2)[1] ) ? ( + ( ((const unsigned *)I1)[1] < ((const unsigned *)I2)[1] ) ? -1 : 1 ) : ( + ( ((const unsigned *)I1)[2] != ((const unsigned *)I2)[2] ) ? ( + ( ((const unsigned *)I1)[2] < ((const unsigned *)I2)[2] ) ? -1 : 1 ) : ( + 0 )))); +} + +static int N_uiNcomp = 0 ; + +static int uiNcomp( const void * const I1 , const void * const I2 ) +{ + const int N = N_uiNcomp ; + register int i ; + + for ( i = 0 ; i < N && + ((const unsigned *)I1)[i] != ((const unsigned *)I2)[i] ; ++i ); + + return ( i < N ) ? ( + ( ((const unsigned *)I1)[i] < ((const unsigned *)I2)[i] ) ? -1 : 1 ) : 0 ; +} + +/*--------------------------------------------------------------------*/ + +void hsfc2sort( + const int N , /* IN: Number of points */ + const double * X , /* IN: array of X-Coordinates */ + const double * Y , /* IN: array of Y-Coordinates */ + const int ibase, /* 0 for C and 1 for Fortran */ + int * Info , /* OUT: (1 <= LDInfo) [ HSFC ordering ] + (2 <= LDInfo) [ HSFC index, #1 ] + (3 <= LDInfo) [ HSFC index, #2 ] */ + int LDInfo )/* IN: Leading dimension of Info */ +{ + /*------------------------------------------------------------------*/ + + const double imax = ((double) ~(0u)) ; + + const unsigned ldinfo = LDInfo ; + const unsigned long long npt = N ; + const unsigned nkey = NKEY(2) ; + const unsigned ldT = nkey + 1 ; + + unsigned * const T = (unsigned *) malloc( sizeof(unsigned) * ldT * npt ); + + int i , ix , iy , ii , it ; + + /* Fill SFC table */ + + for ( i = it = ix = iy = 0 ; (unsigned long long)i < npt ; + ++i , ix++ , iy++ , it += ldT ) { + double xy[2] ; + unsigned coord[2] ; + + xy[0] = X[ix] ; + xy[1] = Y[iy] ; + + coord[0] = xy[0] * imax ; + coord[1] = xy[1] * imax ; + + hsfc2d( coord , nkey , T + it ); + T[it+nkey] = i ; + } + + /* SFC Key output */ + + if ( 2 < ldinfo && 1 < nkey ) { + for ( ii = 1, it = 0, i = 0 ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT ) { + Info[ii] = T[it]; + Info[ii+1] = T[it+1]; + } + } + else if ( 1 < ldinfo ) { + for ( ii = 1, it = 0 ,i = 0 ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT ) { + Info[ii] = T[it] ; + } + } + + /* Sort */ + + switch ( nkey ) { + case 0: break ; + case 1: qsort( T , npt , sizeof(unsigned) * ldT , ui1comp ); break ; + case 2: qsort( T , npt , sizeof(unsigned) * ldT , ui2comp ); break ; + case 3: qsort( T , npt , sizeof(unsigned) * ldT , ui3comp ); break ; + default: + N_uiNcomp = nkey ; + qsort( T , npt , sizeof(unsigned) * ldT , uiNcomp ); + N_uiNcomp = 0 ; + break ; + } + + for (ii = 0, i = 0, it = nkey ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT) { + Info[ii] = T[it] + ibase; /* 1 -- FORTRAN convention, 0 -- C */ + } + + free( (void *) T ); + + return ; +} + +/*--------------------------------------------------------------------*/ + +void hsfc3sort( + const int N , /* IN: Number of points */ + const double * X , /* IN: array of X-Coordinates */ + const double * Y , /* IN: array of Y-Coordinates */ + const double * Z , /* IN: array of Y-Coordinates */ + const int ibase , /* IN: Stride for Y array */ + int * Info , /* OUT: (1 <= LDInfo) [ HSFC ordering ] + (2 <= LDInfo) [ HSFC index, #1 ] + (3 <= LDInfo) [ HSFC index, #2 ] + (4 <= LDInfo) [ HSFC index, #3 ] */ + int LDInfo )/* IN: Leading dimension of Info */ +{ + /*------------------------------------------------------------------*/ + + const double imax = ((double) ~(0u)) ; + + const unsigned ldinfo = LDInfo ; + const unsigned long long npt = N ; + const unsigned nkey = NKEY(3) ; + const unsigned ldT = nkey + 1 ; + + unsigned * const T = (unsigned *) malloc( sizeof(unsigned) * ldT * npt ); + + int i , ix , iy , iz , ii , it ; + + /* Fill SFC table */ + + for ( i = it = ix = iy = iz = 0 ; (unsigned long long)i < npt ; + ++i , ix++ , iy++ , iz++ , it += ldT ) { + double xyz[3] ; + unsigned coord[3] ; + + xyz[0] = X[ix] ; + xyz[1] = Y[iy] ; + xyz[2] = Z[iz] ; + + coord[0] = xyz[0] * imax ; + coord[1] = xyz[1] * imax ; + coord[2] = xyz[2] * imax ; + + hsfc3d( coord , nkey , T + it ); + T[it+nkey] = i ; + } + + /* SFC Key output */ + + if ( 3 < ldinfo && 2 < nkey ) { + for ( ii = 1, it = 0, i = 0 ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT ) { + Info[ii] = T[it]; + Info[ii+1] = T[it+1]; + Info[ii+2] = T[it+2]; + } + } + else if ( 2 < ldinfo && 1 < nkey ) { + for ( ii = 1, it = 0, i = 0 ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT ) { + Info[ii] = T[it]; + Info[ii+1] = T[it+1]; + } + } + else if ( 1 < ldinfo ) { + for ( ii = 1, it = 0 ,i = 0 ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT ) { + Info[ii] = T[it] ; + } + } + + /* Sort */ + + switch ( nkey ) { + case 0: break ; + case 1: qsort( T , npt , sizeof(unsigned) * ldT , ui1comp ); break ; + case 2: qsort( T , npt , sizeof(unsigned) * ldT , ui2comp ); break ; + case 3: qsort( T , npt , sizeof(unsigned) * ldT , ui3comp ); break ; + default: + N_uiNcomp = nkey ; + qsort( T , npt , sizeof(unsigned) * ldT , uiNcomp ); + N_uiNcomp = 0 ; + break ; + } + + for (ii = 0, i = 0, it = nkey ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT) { + Info[ii] = T[it] + ibase ; /* FORTRAN convention */ + } + + free( (void *) T ); + + return ; +} + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/input.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/input.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + * + * This file and the associated header is based on a file from the capablanca + * project available under the MIT open-source license. As author of that code, + * I, Neal Davis, permit repurposing and redistribution for CLAMR under the New + * BSD License used above. + * http://code.google.com/p/capablanca/ + */ +#ifndef _INPUT_H +#define _INPUT_H + +void outputHelp(); +void outputVersion(); +void parseInput(const int argc, char** argv); + +#endif /* _INPUT_H */ + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/input.cpp =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/input.cpp @@ -0,0 +1,513 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + * + * This file and the associated header is based on a file from the capablanca + * project available under the MIT open-source license. As author of that code, + * I, Neal Davis, permit repurposing and redistribution for CLAMR under the New + * BSD License used above. + * http://code.google.com/p/capablanca/ + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "state.h" +#include "partition.h" +#include "mesh.h" +#include "hash.h" +#include "crux.h" +//#include "graphics/display.h" +#include "graphics.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#define OUTPUT_INTERVAL 100 +#define COARSE_GRID_RES 128 +#define MAX_TIME_STEP 3000 + +using namespace std; + +// Global variables. +char progName[12]; // Program name. +char progVers[8]; // Program version. + +// External global variables. +extern bool verbose, + localStencil, + outline, + face_based, + dynamic_load_balance_on, + h5_spoutput, + restart; +extern int outputInterval, + crux_type, + enhanced_precision_sum, + tmax, + levmx, + nx, + ny, + niter, + measure_type, + lttrace_on, + do_quo_setup, + calc_neighbor_type, + choose_hash_method, + initial_order, + graphic_outputInterval, + graphics_type, + checkpoint_outputInterval, + neighbor_remap, + num_of_rollback_states, + cycle_reorder; +extern float + mem_opt_factor; +extern double + upper_mass_diff_percentage; + +extern char* restart_file; + +void outputHelp() +{ cout << "CLAMR is an experimental adaptive mesh refinement code for the GPU." << endl + #ifdef PACKAGE_VERSION + << "Version is " << PACKAGE_VERSION << endl << endl + #endif + << "Usage: " << progName << " [options]..." << endl + << " -b Number of rollback images, disk or in memory (default 2);" << endl + << " -c Checkpoint to disk at interval specified;" << endl + << " -C Checkpoint to memory at interval specified;" << endl + << " -d turn on LTTRACE;" << endl + << " -D turn on dynamic load balancing using LTTRACE;" << endl + << " -e force hash_method, ie linear, quadratic..." < specify I step between saving graphics information for post processing;" << endl + << " -G specify graphics file type for post processing;" << endl + << " \"bmp\"" << endl + << " \"gif\"" << endl + << " \"jpeg\"" << endl + << " \"mpeg\"" << endl + << " \"pdf\"" << endl + << " \"png\"" << endl + << " \"svg\"" << endl + << " \"data\"" << endl + << " -h display this help message;" << endl + << " -i specify I steps between output files;" << endl + << " -l max number of levels;" << endl + << " -M memory optimization factor 1.0 <= M <=100.0 (default 1.0 -- represents 1/20 perfect hash);" << endl + << " -m specify partition measure type;" << endl + << " \"with_duplicates\"" << endl + << " \"without_duplicates\"" << endl + << " -N specify calc neighbor type;" << endl + << " \"hash_table\"" << endl + << " \"kdtree\"" << endl + << " -n specify coarse grid resolution of NxN;" << endl + << " -o turn off outlines;" << endl + << " -P

specify initial order P;" << endl + << " \"original_order\"" << endl + << " \"hilbert_sort\"" << endl + << " \"hilbert_partition\"" << endl + << " \"z_order\"" << endl + << " -p

specify ordering P every cycle;" << endl + << " \"original_order\"" << endl + << " \"hilbert_sort\"" << endl + << " \"hilbert_partition\"" << endl + << " \"local_hilbert\"" << endl + << " \"local_fixed\"" << endl + << " \"z_order\"" << endl + << " -q turn on quo;" << endl + << " -r regular sum instead of enhanced precision sum (Kahan sum);" << endl + << " -R restart simulation from the backup file specified;" << endl + << " -s specify space-filling curve method S;" << endl + << " -S write out double precision data as single precision;" << endl + << " -T execute with TVD;" << endl + << " -t specify T time steps to run;" << endl + << " -u allowed percentage of difference between total mass between iterations." << endl + << " the default value for this parameter is 2.6e-13;" << endl + << " -V use verbose output;" << endl + << " -v display version information." << endl + << " -z force recalculation of neighbors." << endl; } + +void outputVersion() +{ cout << progName << " " << progVers << endl; } + +/* parseInput(const int argc, char** argv) + * + * Interpret the command line input. + */ +void parseInput(const int argc, char** argv) +{ strcpy(progName, "clamr"); + #ifdef PACKAGE_VERSION + strcpy(progVers, PACKAGE_VERSION); + #endif + // Reconstruct command line argument as a string. + char progCL[256]; // Complete program command line. + strcpy(progCL, argv[0]); + for (int i = 1; i < argc; i++) + { strcat(progCL, " "); + strcat(progCL, argv[i]); } + + // Set variables to defaults, which may be overridden by CLI. + verbose = false; + localStencil = true; + outline = true; +#ifdef HAVE_LTTRACE + lttrace_on = 0; +#endif +#ifdef HAVE_QUO + do_quo_setup = 0; +#endif + dynamic_load_balance_on = false; + crux_type = CRUX_NONE; + face_based = false; + restart = false; + restart_file = NULL; + outputInterval = OUTPUT_INTERVAL; + nx = COARSE_GRID_RES; + ny = COARSE_GRID_RES; + niter = MAX_TIME_STEP; + neighbor_remap = true; + //measure_type = CSTARVALUE; + measure_type = NO_PARTITION_MEASURE; + calc_neighbor_type = HASH_TABLE; + choose_hash_method = METHOD_UNSET; + initial_order = HILBERT_SORT; + cycle_reorder = ORIGINAL_ORDER; + graphic_outputInterval = INT_MAX; + graphics_type = GRAPHICS_NONE; + checkpoint_outputInterval = INT_MAX; + num_of_rollback_states = 2; + levmx = 1; + mem_opt_factor = 1.0; + upper_mass_diff_percentage = -1.0; + enhanced_precision_sum = SUM_KAHAN; + + char *val; + if (argc > 1) + { int i = 1; + val = strtok(argv[i++], " ,.-"); + while (val != NULL){ + switch (val[0]){ + case 'b': // Number of rollback images, disk or in memory (default 2) + sprintf(val,"0"); + if (i < argc) val = strtok(argv[i++], " ,"); + if(atoi(val) < 1){ + printf("backup number must be at least 1, setting to default value 2\n"); + } + else{ + num_of_rollback_states = atoi(val); + } + break; + case 'c': // Checkpoint to disk at interval specified + val = strtok(argv[i++], " ,.-"); + checkpoint_outputInterval = atoi(val); + crux_type = CRUX_DISK; + break; + + case 'C': // Checkpoint to memory at interval specified + val = strtok(argv[i++], " ,.-"); + checkpoint_outputInterval = atoi(val); + crux_type = CRUX_IN_MEMORY; + break; + + case 'd': // Turn on lttrace. + // This is provided as a separate option to measure + // the overhead of having lttrace on. +#ifdef HAVE_LTTRACE + lttrace_on = 1; +#endif + break; + + case 'D': // Turn on dynamic load balancing. + // This forces on lttrace. +#ifdef HAVE_LTTRACE + lttrace_on = true; + dynamic_load_balance_on = true; +#endif + break; + + case 'e': // hash method specified. + val = strtok(argv[i++], " ,"); + if (! strcmp(val,"perfect") ) { + choose_hash_method = PERFECT_HASH; + } else if (! strcmp(val,"linear") ) { + choose_hash_method = LINEAR; + } else if (! strcmp(val,"quadratic") ) { + choose_hash_method = QUADRATIC; + } else if (! strcmp(val,"prime_jump") ) { + choose_hash_method = PRIME_JUMP; + } + break; + + case 'f': // Use face-based finite difference + face_based = true; + break; + + case 'g': // Save graphics data to files during simulation. + val = strtok(argv[i++], " ,.-"); + graphic_outputInterval = atoi(val); + if (graphics_type == GRAPHICS_NONE) graphics_type = GRAPHICS_DATA; + break; + + case 'G': // Graphics data file type. + val = strtok(argv[i++], " ,.-"); + if (! strcmp(val,"none") ) { + graphics_type = GRAPHICS_NONE; + graphic_outputInterval = INT_MAX; + } else if (! strcmp(val,"data") ) { + graphics_type = GRAPHICS_DATA; +#ifdef HAVE_MAGICKWAND + } else if (! strcmp(val,"bmp") ) { + graphics_type = GRAPHICS_BMP; + } else if (! strcmp(val,"gif") ) { + graphics_type = GRAPHICS_GIF; + } else if (! strcmp(val,"jpeg") ) { + graphics_type = GRAPHICS_JPEG; + } else if (! strcmp(val,"mpeg") ) { + graphics_type = GRAPHICS_MPEG; + } else if (! strcmp(val,"pdf") ) { + graphics_type = GRAPHICS_PDF; + } else if (! strcmp(val,"png") ) { + graphics_type = GRAPHICS_PNG; + } else if (! strcmp(val,"svg") ) { + graphics_type = GRAPHICS_SVG; +#endif + } else { + printf("Unrecognized option for graphics file type %s\n",val); + exit(-1); + } + break; + + case 'h': // Output help. + outputHelp(); + cout.flush(); + exit(EXIT_SUCCESS); + break; + + case 'i': // Output interval specified. + val = strtok(argv[i++], " ,.-"); + outputInterval = atoi(val); + break; + + case 'l': // max level specified. + val = strtok(argv[i++], " ,"); + levmx = atoi(val); + break; + + case 'M': // memory optimization factor + val = strtok(argv[i++], " ,"); + mem_opt_factor = atof(val); + break; + + case 'm': // partition measure specified. + val = strtok(argv[i++], " ,"); + if (! strcmp(val,"no_partition_measure") ) { + measure_type = NO_PARTITION_MEASURE; + } else if (! strcmp(val,"with_duplicates") ) { + measure_type = WITH_DUPLICATES; + } else if (! strcmp(val,"without_duplicates") ) { + measure_type = WITHOUT_DUPLICATES; + } else if (! strcmp(val,"cvalue") ) { + measure_type = CVALUE; + } else if (! strcmp(val,"cstarvalue") ) { + measure_type = CSTARVALUE; + } + break; + + case 'N': // calc neighbor type specified. + val = strtok(argv[i++], " ,"); + if (! strcmp(val,"hash_table") ) { + calc_neighbor_type = HASH_TABLE; + } else if (! strcmp(val,"kdtree") ) { + calc_neighbor_type = KDTREE; + } + break; + + case 'n': // Domain grid resolution specified. + val = strtok(argv[i++], " ,"); + nx = atoi(val); + ny = nx; + break; + + case 'o': // Turn off outlines on mesh drawing. + outline = false; + break; + + case 'P': // Initial order specified. + val = strtok(argv[i++], " ,"); + if (! strcmp(val,"original_order") ) { + initial_order = ORIGINAL_ORDER; + } else if (! strcmp(val,"hilbert_sort") ) { + initial_order = HILBERT_SORT; + } else if (! strcmp(val,"hilbert_partition") ) { + initial_order = HILBERT_PARTITION; + } else if (! strcmp(val,"z_order") ) { + initial_order = ZORDER; + } + break; + + case 'p': // Initial order specified. + val = strtok(argv[i++], " ,"); + if (! strcmp(val,"original_order") ) { + cycle_reorder = ORIGINAL_ORDER; + localStencil = false; + } else if (! strcmp(val,"hilbert_sort") ) { + cycle_reorder = HILBERT_SORT; + localStencil = false; + } else if (! strcmp(val,"hilbert_partition") ) { + cycle_reorder = HILBERT_PARTITION; + localStencil = false; + } else if (! strcmp(val,"local_hilbert") ) { + cycle_reorder = ORIGINAL_ORDER; + localStencil = true; + } else if (! strcmp(val,"local_fixed") ) { + cycle_reorder = ORIGINAL_ORDER; + localStencil = false; + } else if (! strcmp(val,"z_order") ) { + cycle_reorder = ZORDER; + localStencil = false; + } + break; + + case 'q': // turn on quo package. +#ifdef HAVE_QUO + do_quo_setup = 1; +#endif + break; + + case 'r': // Regular sum instead of enhanced precision sum. + val = strtok(argv[i++], " ,"); + if (! strcmp(val,"regular_sum") ) { + enhanced_precision_sum = SUM_REGULAR; + } else if (! strcmp(val,"kahan_sum") ) { + enhanced_precision_sum = SUM_KAHAN; + } else { + printf("Error with sum argument %s\n",val); + exit(0); + } + break; + + case 'R': // Restart application from last checkpoint + restart = true; + restart_file = strtok(argv[i++], " ,"); + +#ifndef HDF5_FF + struct stat stat_descriptor; + if (stat(restart_file,&stat_descriptor) == -1){ + printf("Error -- restart file %s does not exist\n",restart_file); + exit(0); + } +#endif + break; + + case 's': // Space-filling curve method specified (default HILBERT_SORT). + // Add different problem setups such as sloped wave in x, y and diagonal directions to help check algorithm + // HILBERT_SORT + break; + + case 'T': // TVD inclusion specified. + break; + + case 't': // Number of time steps specified. + val = strtok(argv[i++], " ,.-"); + niter = atoi(val); + break; + + case 'u': // Allowed percentage of difference in mass per iteration + val = strtok(argv[i++], " ,"); + upper_mass_diff_percentage = atof(val); + break; + + case 'V': // Verbose output desired. + verbose = true; + break; + + case 'v': // Version. + outputVersion(); + cout.flush(); + exit(EXIT_SUCCESS); + break; + + case 'z': // Neighbor remap -- default is true, -z sets to false + neighbor_remap = false; + break; + + default: // Unknown parameter encountered. + cout << "âš  Unknown input parameter " << val << endl; + outputHelp(); + cout.flush(); + exit(EXIT_FAILURE); + break; } + + val = strtok(argv[i++], " ,.-"); + } + } + +/* + if(upper_mass_diff_percentage < 0){ + upper_mass_diff_percentage = 1.0e-12; + } +*/ +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/memstats.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/memstats.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#ifndef _MEMSTATS_H +#define _MEMSTATS_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +long long memstats_memused(); +long long memstats_mempeak(); +long long memstats_memfree(); +long long memstats_memtotal(); + +#ifdef __cplusplus +} +#endif + +#endif /* _MEMSTATS_H */ + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/memstats.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/memstats.c @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#include +#include +#include +#include +#include +#include +#include + +#ifdef __APPLE_CC__ +#include +#include +#endif + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "memstats.h" + +pid_t pid; +FILE *stat_fp = NULL, *meminfo_fp = NULL; + +long long memstats_memused(){ + long long mem_current=0; +#ifdef __APPLE_CC__ +/* This is all memory used and we want the memory for only our process -- do alternate + vm_size_t page_size; + mach_port_t mach_port; + mach_msg_type_number_t count = HOST_VM_INFO_COUNT; + + host_page_size(mach_port, &page_size); + vm_statistics_data_t vmstat; + host_statistics (mach_host_self (), HOST_VM_INFO, (host_info_t) &vmstat, &count); + + mem_current = (vmstat.wire_count + vmstat.active_count + vmstat.inactive_count)*page_size/1024; +*/ + + struct task_basic_info t_info; + mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; + task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count); + + mem_current = t_info.resident_size; +#else + char proc_stat_file[50]; + char *p; + int err; + int memdebug = 0; + //long long page_size = 1; //4096 + + if (!stat_fp){ + pid = getpid(); + sprintf(proc_stat_file, "/proc/%d/status", pid); + stat_fp = fopen(proc_stat_file, "r"); + if (!stat_fp){ + //printf("fopen %s failed: \n", proc_stat_file); + return(-1); + } + } + + err = fflush(stat_fp); + if (err) { + printf("fflush %s failed: %s\n", proc_stat_file, strerror(err)); + return(-1); + } + err = fseek(stat_fp, 0L, 0); + if (err) { + printf("fseek %s failed: %s\n", proc_stat_file, strerror(err)); + return(-1); + } + + char *str = (char *)malloc(140*sizeof(char)); + while (!feof(stat_fp)){ + str = fgets(str, 132, stat_fp); + if (str == NULL){ + printf("Warning: Error in reading %s for memory stats\n",proc_stat_file); + } + p = strtok(str,":"); + //printf("p is |%s|\n",p); + if (!strcmp(p, "VmRSS")) { + p = strtok('\0'," "); + p = strtok('\0'," "); + //mem_current = atoll(p)*1024; // Size is in kB + mem_current = atoll(p); // Size is in kB + if (memdebug) { + printf("VmRSS %lld\n",mem_current); + } + break; + } + } + free(str); + + fclose(stat_fp); + stat_fp = NULL; +#endif + + return(mem_current); +} + +long long memstats_mempeak(){ + char proc_stat_file[50]; + char *p; + int err; + int memdebug = 0; + long long mem_current=0; + //long long page_size = 1; //4096 + + if (!stat_fp){ + pid = getpid(); + sprintf(proc_stat_file, "/proc/%d/status", pid); + stat_fp = fopen(proc_stat_file, "r"); + if (!stat_fp){ + //printf("fopen %s failed: \n", proc_stat_file); + return(-1); + } + } + + err = fflush(stat_fp); + if (err) { + printf("fflush %s failed: %s\n", proc_stat_file, strerror(err)); + return(-1); + } + err = fseek(stat_fp, 0L, 0); + if (err) { + printf("fseek %s failed: %s\n", proc_stat_file, strerror(err)); + return(-1); + } + + char *str = (char *)malloc(140*sizeof(char)); + while (!feof(stat_fp)){ + str = fgets(str, 132, stat_fp); + if (str == NULL){ + printf("Warning: Error in reading %s for memory stats\n",proc_stat_file); + } + p = strtok(str,":"); + //printf("p is |%s|\n",p); + if (!strcmp(p, "VmHWM")) { + p = strtok('\0'," "); + p = strtok('\0'," "); + //mem_current = atoll(p)*1024; // Size is in kB + mem_current = atoll(p); // Size is in kB + if (memdebug) { + printf("VmRSS %lld\n",mem_current); + } + break; + } + } + + fclose(stat_fp); + stat_fp = NULL; + free(str); + + return(mem_current); +} + +#define TIMER_ONEK 1024 +long long memstats_memfree(){ + long long freemem; +#ifdef __APPLE_CC__ + vm_size_t page_size; + mach_port_t mach_port; + mach_msg_type_number_t count = HOST_VM_INFO_COUNT; + + mach_port = mach_host_self(); + host_page_size(mach_port, &page_size); + vm_statistics64_data_t vmstat; + host_statistics64 (mach_port, HOST_VM_INFO, (host_info_t) &vmstat, &count); + + freemem = vmstat.free_count*page_size/1024; +#else + int err; + int memdebug = 0; + char buf[260]; + char *p; + + freemem = -1; + + if (!meminfo_fp){ + meminfo_fp = fopen("/proc/meminfo", "r"); + if (!meminfo_fp){ + printf("fopen failed: \n"); + return(-1); + } + } + + err = fflush(meminfo_fp); + if (err) { + printf("fflush failed: %s\n", strerror(err)); + return(-1); + } + err = fseek(meminfo_fp, 0L, 0); + if (err) { + printf("fseek failed: %s\n", strerror(err)); + return(-1); + } + + while (!feof(meminfo_fp)) { + if (fgets(buf, 255, meminfo_fp)) { /* read header */ + //printf("buf is %s\n",buf); + p = strtok(buf, ":"); + if (memdebug){ + printf("p: |%s|\n",p); + } + if (!strcmp(p, "MemFree")) { + p = strtok('\0', " "); + //printf("p is %s\n",p); + freemem = atoll(p); // in kB + break; + } + } + } + + //return(freemem+cachedmem); + + fclose(meminfo_fp); + meminfo_fp = NULL; +#endif + + return(freemem); +} + +long long memstats_memtotal(){ + long long totalmem; +#ifdef __APPLE_CC__ +/* + vm_size_t page_size; + mach_port_t mach_port; + mach_msg_type_number_t count = HOST_VM_INFO_COUNT; + + host_page_size(mach_port, &page_size); + vm_statistics_data_t vmstat; + host_statistics (mach_host_self (), HOST_VM_INFO, (host_info_t) &vmstat, &count); + + totalmem = (vmstat.wire_count + vmstat.active_count + vmstat.inactive_count + vmstat.free_count) + *page_size/1024; +*/ +// alternate + int mib[2]; + mib[0] = CTL_HW; + mib[1] = HW_MEMSIZE; + size_t length = sizeof(long long); + sysctl(mib, 2, &totalmem, &length, NULL, 0); + totalmem /= 1024; +#else + int err; + int memdebug = 0; + char buf[260]; + char *p; + + totalmem = -1; + + if (!meminfo_fp){ + meminfo_fp = fopen("/proc/meminfo", "r"); + if (!meminfo_fp){ + printf("fopen failed: \n"); + return(-1); + } + } + + err = fflush(meminfo_fp); + if (err) { + printf("fflush failed: %s\n", strerror(err)); + return(-1); + } + err = fseek(meminfo_fp, 0L, 0); + if (err) { + printf("fseek failed: %s\n", strerror(err)); + return(-1); + } + + while (!feof(meminfo_fp)) { + if (fgets(buf, 255, meminfo_fp)) { /* read header */ + //printf("buf is %s\n",buf); + p = strtok(buf, ":"); + if (memdebug){ + printf("p: |%s|\n",p); + } + if (!strcmp(p, "MemTotal")) { + p = strtok('\0', " "); + //printf("p is %s\n",p); + totalmem = atoll(p); // in kB + break; + } + } + } + + fclose(meminfo_fp); + meminfo_fp = NULL; +#endif + + return(totalmem); +} + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.h @@ -0,0 +1,711 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#ifndef MESH_H_ +#define MESH_H_ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "MallocPlus.h" +#include +#include +#include +#include +#include "KDTree.h" +#include "crux.h" +#include "partition.h" +#ifdef HAVE_OPENCL +#include "ezcl/ezcl.h" +#endif + +#if !defined(FULL_PRECISION) && !defined(MIXED_PRECISION) && !defined(MINIMUM_PRECISION) +#define FULL_PRECISION +#endif +#ifdef NO_CL_DOUBLE +#undef FULL_PRECISION +#undef MIXED_PRECISION +#define MINIMUM_PRECISION +#endif + +#if defined(MINIMUM_PRECISION) + typedef float real_t; // this is used for intermediate calculations + typedef float spatial_t; // for spatial variables +#ifdef HAVE_OPENCL + typedef cl_float cl_real_t; // for intermediate gpu physics state variables + typedef cl_float cl_spatial_t; +#endif +#ifdef HAVE_MPI + #define MPI_REAL_T MPI_FLOAT // for MPI communication for physics state variables + #define MPI_SPATIAL_T MPI_FLOAT +#endif + +#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats + typedef double real_t; + typedef float spatial_t; // for spatial variables +#ifdef HAVE_OPENCL + typedef cl_double cl_real_t; // for intermediate gpu physics state variables + typedef cl_float cl_spatial_t; +#endif +#ifdef HAVE_MPI + #define MPI_REAL_T MPI_DOUBLE + #define MPI_SPATIAL_T MPI_FLOAT +#endif + +#elif defined(FULL_PRECISION) + typedef double real_t; + typedef double spatial_t; // for spatial variables +#ifdef HAVE_OPENCL + typedef cl_double cl_real_t; // for intermediate gpu physics state variables + typedef cl_double cl_spatial_t; +#endif +#ifdef HAVE_MPI + #define MPI_REAL_T MPI_DOUBLE + #define MPI_SPATIAL_T MPI_DOUBLE +#endif +#endif + +#define TILE_SIZE 128 + +#define SWAP_PTR(xnew,xold,xtmp) (xtmp=xnew, xnew=xold, xold=xtmp) +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +typedef unsigned int uint; + +//float mem_opt_factor = 1.0; + +enum boundary +{ REAL_CELL = 1, // Denotes cell type of real cell. + LEFT_BOUNDARY = -1, // Denotes left boundary ghost cell. + RIGHT_BOUNDARY = -2, // Denotes right boundary ghost cell. + BOTTOM_BOUNDARY= -3, // Denotes bottom boundary ghost cell. + TOP_BOUNDARY = -4, // Denotes top boundary ghost cell. + FRONT_BOUNDARY = -5, // Denotes front boundary ghost cell. + BACK_BOUNDARY = -6 }; // Denotes back boundary ghost cell. + +enum dimensionality +{ ONE_DIMENSIONAL = 1, // Dimensionality based at 1 for clarity. + TWO_DIMENSIONAL, + THREE_DIMENSIONAL}; + +enum orientation +{ SW, // SW quadrant. + NW, // NW quadrant. + NE, // NE quadrant. + SE }; // SE quadrant. + +enum neighbor_calc +{ HASH_TABLE, // Hash Table. + KDTREE }; // kD-tree. + +enum mesh_timers +{ + MESH_TIMER_COUNT_BCS, + MESH_TIMER_CALC_NEIGHBORS, + MESH_TIMER_HASH_SETUP, + MESH_TIMER_HASH_QUERY, + MESH_TIMER_FIND_BOUNDARY, + MESH_TIMER_PUSH_SETUP, + MESH_TIMER_PUSH_BOUNDARY, + MESH_TIMER_LOCAL_LIST, + MESH_TIMER_LAYER1, + MESH_TIMER_LAYER2, + MESH_TIMER_LAYER_LIST, + MESH_TIMER_COPY_MESH_DATA, + MESH_TIMER_FILL_MESH_GHOST, + MESH_TIMER_FILL_NEIGH_GHOST, + MESH_TIMER_SET_CORNER_NEIGH, + MESH_TIMER_NEIGH_ADJUST, + MESH_TIMER_SETUP_COMM, + MESH_TIMER_KDTREE_SETUP, + MESH_TIMER_KDTREE_QUERY, + MESH_TIMER_REFINE_SMOOTH, + MESH_TIMER_REZONE_ALL, + MESH_TIMER_PARTITION, + MESH_TIMER_CALC_SPATIAL_COORDINATES, + MESH_TIMER_LOAD_BALANCE, + MESH_TIMER_SIZE +}; + +enum mesh_counters +{ + MESH_COUNTER_REZONE, + MESH_COUNTER_REFINE_SMOOTH, + MESH_COUNTER_CALC_NEIGH, + MESH_COUNTER_LOAD_BALANCE, + MESH_COUNTER_SIZE +}; + +//#ifdef DEBUG_RESTORE_VALS +static const char *mesh_counter_descriptor[MESH_COUNTER_SIZE] = { + "mesh_counter_rezone", + "mesh_counter_refine_smooth", + "mesh_counter_calc_neigh", + "mesh_counter_load_balance" +}; +//#endif + +typedef enum mesh_timers mesh_timer_category; +typedef enum mesh_counters mesh_counter_category; + +enum mesh_device_types +{ + MESH_DEVICE_CPU, + MESH_DEVICE_GPU +}; + +typedef mesh_device_types mesh_device_type; + +using namespace std; + +/****************************************************************//** + * Mesh class + * Contains the cell-based adaptive mesh refinement + * (AMR) object with its data and methods. + *******************************************************************/ +class Mesh +{ + +public: + int ndim; //!< Dimensionality of mesh (2 or 3). + + MallocPlus mesh_memory; + MallocPlus gpu_mesh_memory; + +#ifdef HAVE_OPENCL + string defines; +#endif + + double cpu_timers[MESH_TIMER_SIZE]; + long long gpu_timers[MESH_TIMER_SIZE]; + + int cpu_counters[MESH_COUNTER_SIZE]; + int gpu_counters[MESH_COUNTER_SIZE]; + + bool do_rezone, + gpu_do_rezone; + + int mype, + numpe, + parallel, + cell_handle, + noffset; + + int *lowerBound_Global, + *upperBound_Global; + + float mem_factor; + + double offtile_ratio_local; + int offtile_local_count; + + vector corners_i, + corners_j; + + vector nsizes, + ndispl; + + FILE *fp; + + TKDTree tree; //!< k-D tree for neighbor search. + vector proc; + vector lev_ibegin, //!< Lowest x-index in use at specified level of refinement. + lev_iend, //!< Highest x-index in use at specified level of refinement. + lev_jbegin, //!< Lowest y-index in use at specified level of refinement. + lev_jend, //!< Highest y-index in use at specified level of refinement. + lev_kbegin, //!< Lowest z-index in use at specified level of refinement. + lev_kend, //!< Highest z-index in use at specified level of refinement. + levtable; //!< Powers of two to simplify i,j calculations + vector lev_deltax, //!< Grid spacing along x-axis at specified level of refinement. + lev_deltay, //!< Grid spacing along y-axis at specified level of refinement. + lev_deltaz; //!< Grid spacing along z-axis at specified level of refinement. + int levmx, //!< Maximum level of refinement allowed. + have_boundary,//!< Mesh includes boundary cells, else creates on the fly + ibase, //!< Index basis for arrays (0 for C, 1 for Fortan). + imin, //!< Lowest x-index in use. + imax, //!< Highest x-index in use. + jmin, //!< Lowest y-index in use. + jmax, //!< Highest y-index in use. + kmin, //!< Lowest z-index in use. + kmax; //!< Highest z-index in use. + size_t ncells, //!< Number of cells in mesh. + ncells_global, //!< Global number of cells for parallel runs + ncells_ghost; //!< Number of cells in mesh with ghost cells. + real_t xmin, //!< Lowest x-coordinate in use. + xmax, //!< Highest x-coordinate in use. + ymin, //!< Lowest y-coordinate in use. + ymax, //!< Highest y-coordinate in use. + zmin, //!< Lowest z-coordinate in use. + zmax, //!< Highest z-coordinate in use. + xcentermin, //!< Center of minimum x cell + xcentermax, //!< Center of maximum x cell + ycentermin, //!< Center of minimum y cell + ycentermax, //!< Center of maximum y cell + zcentermin, //!< Center of minimum z cell + zcentermax, //!< Center of maximum z cell + deltax, //!< Grid spacing along x-axis. + deltay, //!< Grid spacing along y-axis. + deltaz; //!< Grid spacing along z-axis. + + vector index; //!< 1D ordered index of mesh elements. + + // mesh state data + int *i, //!< 1D array of mesh element x-indices. + *j, //!< 1D array of mesh element y-indices. + *k, //!< 1D array of mesh element z-indices. + *level, //!< 1D array of mesh element refinement levels. + //!< derived data from mesh state data + *celltype, //!< 1D ordered index of mesh element cell types (ghost or real). + *nlft, //!< 1D ordered index of mesh element left neighbors. + *nrht, //!< 1D ordered index of mesh element right neighbors. + *nbot, //!< 1D ordered index of mesh element bottom neighbors. + *ntop, //!< 1D ordered index of mesh element top neighbors. + *nfrt, //!< 1D ordered index of mesh element front neighbors. + *nbak; //!< 1D ordered index of mesh element back neighbors. + + vector x, //!< 1D ordered index of mesh element x-coordinates. + dx, //!< 1D ordered index of mesh element x-coordinate spacings. + y, //!< 1D ordered index of mesh element y-coordinates. + dy, //!< 1D ordered index of mesh element y-coordinate spacings. + z, //!< 1D ordered index of mesh element z-coordinates. + dz; //!< 1D ordered index of mesh element z-coordinate spacings. + +#ifdef HAVE_OPENCL + cl_mem dev_ioffset; + + cl_mem dev_celltype, + dev_i, + dev_j, + dev_level, + dev_nlft, + dev_nrht, + dev_nbot, + dev_ntop; + + cl_mem dev_levdx, // corresponds to lev_deltax + dev_levdy, // corresponds to lev_deltay + dev_levibeg, + dev_leviend, + dev_levjbeg, + dev_levjend, + dev_levtable; // + + cl_mem dev_corners_i, + dev_corners_j; +#endif + + int nxface; + int nyface; + + vector xface_i; + vector xface_j; + vector xface_level; + vector map_xface2cell_lower; + vector map_xface2cell_upper; + + vector map_xcell2face_left1; + vector map_xcell2face_left2; + vector map_xcell2face_right1; + vector map_xcell2face_right2; + + vector ixmin_level; + vector ixmax_level; + vector jxmin_level; + vector jxmax_level; + vector ixadjust; + vector jxadjust; + + vector yface_i; + vector yface_j; + vector yface_level; + vector map_yface2cell_lower; + vector map_yface2cell_upper; + + vector map_ycell2face_bot1; + vector map_ycell2face_bot2; + vector map_ycell2face_top1; + vector map_ycell2face_top2; + + vector iymin_level; + vector iymax_level; + vector jymin_level; + vector jymax_level; + vector iyadjust; + vector jyadjust; + + // Public constructors. + Mesh(FILE *fin, int *numpe); + Mesh(int nx, int ny, int levmx_in, int ndim_in, double deltax_in, double deltay_in, int boundary, int parallel_in, int do_gpu_calc); + + // Member functions. + void init(int nx, int ny, real_t circ_radius, partition_method initial_order, int do_gpu_calc); + void terminate(void); + + void set_bounds(int n); + void get_bounds(int& lowerBound, int& upperBound); + +/****************************************************************//** + * @name Memory routines + *******************************************************************/ +///@{ + +/****************************************************************//** + * \brief + * Allocates the basic mesh memory, i, j, and level, using the MallocPlus + * memory database. + * + * **Parameters** + * * size_t ncells -- number of cells in the mesh + * + * Typical Usage + * + * mesh.allocate(ncells); + *******************************************************************/ + void allocate(size_t ncells); + + void resize(size_t new_ncells); + void memory_reset_ptrs(void); + void resize_old_device_memory(size_t ncells); +///@} + +/* inline "macros" */ + +///@{ +/****************************************************************//** + * \brief + * Boundary cell tests + *******************************************************************/ + int is_lower_boundary(int *iv, int *lev_begin, int ic) { return (iv[ic] < lev_begin[level[ic]]); } + int is_upper_boundary(int *iv, int *lev_end, int ic) { return (iv[ic] > lev_end[level[ic]]); } + + int is_left_boundary(int ic) { return (i[ic] < lev_ibegin[level[ic]]); } + int is_right_boundary(int ic) { return (i[ic] > lev_iend[ level[ic]]); } + int is_bottom_boundary(int ic) { return (j[ic] < lev_jbegin[level[ic]]); } + int is_top_boundary(int ic) { return (j[ic] > lev_jend[ level[ic]]); } + int is_front_boundary(int ic) { return (k[ic] < lev_kbegin[level[ic]]); } + int is_back_boundary(int ic) { return (k[ic] > lev_kend[ level[ic]]); } +///@} + +///@{ +/****************************************************************//** + * \brief + * Tests for positioning in set of 4 cells + *******************************************************************/ + int is_lower(int i) { return(i % 2 == 0); } + int is_upper(int i) { return(i % 2 == 1); } + + int is_lower_left(int i, int j) { return(i % 2 == 0 && j % 2 == 0); } + int is_lower_right(int i, int j) { return(i % 2 == 1 && j % 2 == 0); } + int is_upper_left(int i, int j) { return(i % 2 == 0 && j % 2 == 1); } + int is_upper_right(int i, int j) { return(i % 2 == 1 && j % 2 == 1); } +///@} + +///@{ +/****************************************************************//** + * \brief + * Level tests + *******************************************************************/ + int is_same_level_or_coarser(int nn, int nz) { return(level[nn] <= level[nz]); } + int is_coarser(int nn, int nz) { return(level[nn] < level[nz]); } + int is_finer(int nn, int nz) { return(level[nn] > level[nz]); } + int is_same_level(int nn, int nz) { return(level[nn] == level[nz]); } +///@} + +/* accessor routines */ + double get_cpu_timer(mesh_timer_category category) {return(cpu_timers[category]); }; + /* Convert nanoseconds to msecs */ + double get_gpu_timer(mesh_timer_category category) {return((double)(gpu_timers[category])*1.0e-9); }; + + void parallel_output(const char *string, double local_value, int output_level, const char *units); + void parallel_output(const char *string, long long local_value, int output_level, const char *units); + void parallel_output(const char *string, int local_value, int output_level, const char *units); + void timer_output(mesh_timer_category category, mesh_device_types device_type, int timer_level); + + int get_cpu_counter(mesh_counter_category category) {return(cpu_counters[category]); }; + int get_gpu_counter(mesh_counter_category category) {return(gpu_counters[category]); }; + + int get_calc_neighbor_type(void); + + void print_partition_measure(void); + void print_calc_neighbor_type(void); + void print_partition_type(void); +/* end accessor routines */ + +/* Debugging, internal, or not used yet */ +#ifdef HAVE_OPENCL + int gpu_count_BCs(); +#endif + void kdtree_setup(void); + void partition_measure(void); + void partition_cells(int numpe, + vector &order, + enum partition_method method); + void calc_distribution(int numpe); + void calc_symmetry(vector &dsym, + vector &xsym, + vector &ysym); + +/* End of debugging, internal, or not used yet */ + + //void calc_face_list_test(double *H); + void calc_face_list(void); + void calc_face_list_wmap(void); + void calc_face_list_wbidirmap(void); + void calc_face_list_clearmaps(void); + + int **get_xface_flag(int lev, bool print_output=0); + int **get_yface_flag(int lev, bool print_output=0); + void get_flat_grid(int lev, int ***zone_flag, int ***zone_cell); + +///@{ +/****************************************************************//** + * \brief + * Calculate neighbors + * + * **Parameters** + * + * Input -- from within the object + * i, j, level + * Output -- in the object + * nlft, nrht, nbot, ntop arrays + *******************************************************************/ + void calc_neighbors(int ncells); + void calc_neighbors_local(void); +#ifdef HAVE_OPENCL + void gpu_calc_neighbors(void); + void gpu_calc_neighbors_local(void); +#endif + // TODO: Not created yet; overloading for 3D mesh support. (davis68) + void calc_neighbors(vector &nlft, + vector &nrht, + vector &nbot, + vector &ntop, + vector &nfrt, + vector &nbak, + vector index); +///@} + +///@{ +/****************************************************************//** + * \brief + * Calculate rezone count + * + * **Parameters** + * + * Input + * mpot -- potential mesh refinement + * ioffset -- write offset for each cell + * Output + * result -- cell count + *******************************************************************/ + int rezone_count(vector mpot, int &icount, int &jcount); +#ifdef HAVE_OPENCL + void gpu_rezone_count2(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result); + void gpu_rezone_count(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result); + void gpu_rezone_scan(size_t block_size, size_t local_work_size, cl_mem dev_ioffset, cl_mem &dev_result); +#endif +///@} + +///@{ +/****************************************************************//** + * \brief + * Refine Smooth -- smooths jump in refinement level so that only a 1 to 2 jump occurs + * + * **Parameters** + * + * Input/Output + * mpot -- potential mesh refinement array, 1 is refine and -1 coarsen + * ioffset -- write offset for each cell to account for new cells + * result -- refinement count + *******************************************************************/ + size_t refine_smooth(vector &mpot, int &icount, int &jcount); +#ifdef HAVE_OPENCL + int gpu_refine_smooth(cl_mem &dev_mpot, int &icount, int &jcount); +#endif +///@} + +///@{ +/****************************************************************//** + * \brief + * Rezone mesh + * + * **Parameters** + * + * Input + * add_ncells -- for each processor. A global sum will be done and the main part of + * the rezone will be skipped if no cells are added. + * mpot -- mesh rezone potential + * have_state flag -- 0 (false) for setup when physics state has not been allocated + * ioffset -- partial prefix scan results for starting address to write new cells + * state_memory -- linked list of arrays for state + * Output + * new mesh and state arrays with refinement/coarsening performed + *******************************************************************/ + void rezone_all(int icount, int jcount, vector mpot, int have_state, MallocPlus &state_memory); +#ifdef HAVE_OPENCL + void gpu_rezone_all(int icount, int jcount, cl_mem &dev_mpot, MallocPlus &gpu_state_memory); +#endif +///@} + +///@{ +/****************************************************************//** + * \brief + * Load balance -- only needed for parallel (MPI) runs + * + * **Parameters** + * + * Input + * numcells -- ncells from rezone all routine. This is a copy in so that a local + * value can be used for load_balance and gpu_load_balance without it getting + * reset for clamr_checkall routine + * weight -- weighting array per cell for balancing. Currently not used. Null value + * indicates even weighting of cells for load balance. + * state_memory or gpu_state_memory -- linked-list of arrays from physics routine + * to be load balanced. + * Output -- arrays will be returned load balanced with new sizes. Pointers to arrays + * will need to be reset + *******************************************************************/ +#ifdef HAVE_MPI + void do_load_balance_local(size_t numcells, float *weight, MallocPlus &state_memory); +#ifdef HAVE_OPENCL + int gpu_do_load_balance_local(size_t numcells, float *weight, MallocPlus &gpu_state_memory); +#endif +#endif +///@} + +///@{ +/****************************************************************//** + * \brief + * Calculate spatial coordinates + * + * **Parameters** + * + * Input -- from within the object + * i, j, level + * Output + * x, y -- coordinates for each cell + * dx, dy -- size of each cell + *******************************************************************/ + void calc_spatial_coordinates(int ibase); +#ifdef HAVE_OPENCL + void gpu_calc_spatial_coordinates(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy); +#endif +///@} + +///@{ +/****************************************************************//** + * \brief + * Testing routines + *******************************************************************/ +#ifdef HAVE_OPENCL + void compare_dev_local_to_local(void); // Not currently called + void compare_neighbors_gpu_global_to_cpu_global(void); +#endif + void compare_neighbors_cpu_local_to_cpu_global(uint ncells_ghost, uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl); +#ifdef HAVE_OPENCL + void compare_neighbors_all_to_gpu_local(Mesh *mesh_global, int *nsizes, int *ndispl); + void compare_mpot_gpu_global_to_cpu_global(int *mpot, cl_mem dev_mpot); +#endif + void compare_mpot_cpu_local_to_cpu_global(uint ncells_global, int *nsizes, int *displ, int *mpot, int *mpot_global, int cycle); +#ifdef HAVE_OPENCL + void compare_mpot_all_to_gpu_local(int *mpot, int *mpot_global, cl_mem dev_mpot, cl_mem dev_mpot_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle); + void compare_ioffset_gpu_global_to_cpu_global(uint old_ncells, int *mpot); + void compare_ioffset_all_to_gpu_local(uint old_ncells, uint old_ncells_global, int block_size, int block_size_global, int *mpot, int *mpot_global, cl_mem dev_ioffset, cl_mem dev_ioffset_global, int *ioffset, int *ioffset_global, int *celltype_global, int *i_global, int *j_global); + void compare_coordinates_gpu_global_to_cpu_global_double(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, double *H); + void compare_coordinates_gpu_global_to_cpu_global_float(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, float *H); +#endif + void compare_coordinates_cpu_local_to_cpu_global_double(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, double *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, double *H_global, int cycle); + void compare_coordinates_cpu_local_to_cpu_global_float(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, float *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, float *H_global, int cycle); +#ifdef HAVE_OPENCL + void compare_indices_gpu_global_to_cpu_global(void); +#endif + void compare_indices_cpu_local_to_cpu_global(uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl, int cycle); +#ifdef HAVE_OPENCL + void compare_indices_all_to_gpu_local(Mesh *mesh_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle); +#endif +///@} + + size_t get_checkpoint_size(void); + void store_checkpoint(Crux *crux); + void restore_checkpoint(Crux *crux); + + void calc_celltype_threaded(size_t ncells); + void calc_celltype(size_t ncells); + +private: + // Private constructors. + Mesh(const Mesh&); // Blocks copy constructor so copies are not made inadvertently. + + // Member functions. + void print_object_info(); + + void set_refinement_order(int order[4], int ic, int ifirst, int ilast, int jfirst, int jlast, + int level_first, int level_last, int *i, int *j, int *level); + + void write_grid(int ncycle); + void calc_centerminmax(void); + void calc_minmax(void); + + void print(void); + void print_local(void); +#ifdef HAVE_OPENCL + void print_dev_local(); +#endif + +}; + +#endif /* MESH_H */ Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.cpp =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.cpp @@ -0,0 +1,10456 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#ifdef HAVE_MPI +#include "mpi.h" +#endif + +#include +#include +#include +#include +#ifdef _OPENMP +#include +#endif +//#include "hsfc.h" +#include "KDTree.h" +#include "mesh.h" +#ifdef HAVE_OPENCL +#include "ezcl/ezcl.h" +#endif +#include "timer.h" +#ifdef HAVE_MPI +#include "l7/l7.h" +#endif +#include "reduce.h" +#include "genmalloc.h" +#include "hash.h" + +#define DEBUG 0 +//#define BOUNDS_CHECK 1 + +#ifndef DEBUG +#define DEBUG 0 +#endif +#define DEBUG_RESTORE_VALS 1 + +typedef int scanInt; +void scan ( scanInt *input , scanInt *output , scanInt length); + +#ifdef _OPENMP +#undef REZONE_NO_OPTIMIZATION +#else +#define REZONE_NO_OPTIMIZATION 1 +#endif + +#define TIMING_LEVEL 2 + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +#define IPOW2(a) (2 << (a)) + +#if defined(MINIMUM_PRECISION) +#define CONSERVATION_EPS .1 +#define STATE_EPS 15.0 + +#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats +#define CONSERVATION_EPS .02 +#define STATE_EPS .025 + +#elif defined(FULL_PRECISION) +#define CONSERVATION_EPS .02 +#define STATE_EPS .025 + +#endif + +typedef unsigned int uint; +#ifdef __APPLE_CC__ +typedef unsigned long ulong; +#endif + +#define TWO 2 +#define HALF 0.5 + +#define __NEW_STENCIL__ +//#define __OLD_STENCIL__ +//#define STENCIL_WARNING 1 + +#ifdef STENCIL_WARNING +int do_stencil_warning=1; +#else +int do_stencil_warning=0; +#endif + +#ifdef HAVE_OPENCL +#include "mesh_kernel.inc" +#endif + +extern bool localStencil; +int calc_neighbor_type; +bool dynamic_load_balance_on; +bool neighbor_remap; + +#ifdef _OPENMP +static bool iversion_flag = false; +#endif + +static const char *mesh_timer_descriptor[MESH_TIMER_SIZE] = { + "mesh_timer_count_BCs", + "mesh_timer_calc_neighbors", + "mesh_timer_hash_setup", + "mesh_timer_hash_query", + "mesh_timer_find_boundary", + "mesh_timer_push_setup", + "mesh_timer_push_boundary", + "mesh_timer_local_list", + "mesh_timer_layer1", + "mesh_timer_layer2", + "mesh_timer_layer_list", + "mesh_timer_copy_mesh_data", + "mesh_timer_fill_mesh_ghost", + "mesh_timer_fill_neigh_ghost", + "mesh_timer_set_corner_neigh", + "mesh_timer_neigh_adjust", + "mesh_timer_setup_comm", + "mesh_timer_kdtree_setup", + "mesh_timer_kdtree_query", + "mesh_timer_refine_smooth", + "mesh_timer_rezone_all", + "mesh_timer_partition", + "mesh_timer_calc_spatial_coordinates", + "mesh_timer_load_balance" +}; + +#ifdef HAVE_OPENCL +cl_kernel kernel_hash_adjust_sizes; +cl_kernel kernel_hash_setup; +cl_kernel kernel_hash_setup_local; +cl_kernel kernel_neighbor_init; +cl_kernel kernel_calc_neighbors; +cl_kernel kernel_calc_neighbors_local; +cl_kernel kernel_calc_border_cells; +cl_kernel kernel_calc_border_cells2; +cl_kernel kernel_finish_scan; +cl_kernel kernel_get_border_data; +cl_kernel kernel_calc_layer1; +cl_kernel kernel_calc_layer1_sethash; +cl_kernel kernel_calc_layer2; +cl_kernel kernel_get_border_data2; +cl_kernel kernel_calc_layer2_sethash; +cl_kernel kernel_copy_mesh_data; +cl_kernel kernel_fill_mesh_ghost; +cl_kernel kernel_fill_neighbor_ghost; +cl_kernel kernel_set_corner_neighbor; +cl_kernel kernel_adjust_neighbors_local; +cl_kernel kernel_reduction_scan2; +cl_kernel kernel_reduction_count; +cl_kernel kernel_reduction_count2; +cl_kernel kernel_hash_size; +cl_kernel kernel_finish_hash_size; +cl_kernel kernel_calc_spatial_coordinates; +cl_kernel kernel_count_BCs; +cl_kernel kernel_do_load_balance_lower; +cl_kernel kernel_do_load_balance_middle; +cl_kernel kernel_do_load_balance_upper; +#ifndef MINIMUM_PRECISION +cl_kernel kernel_do_load_balance_double; +#endif +cl_kernel kernel_do_load_balance_float; +cl_kernel kernel_refine_smooth; +cl_kernel kernel_coarsen_smooth; +cl_kernel kernel_coarsen_check_block; +cl_kernel kernel_rezone_all; +cl_kernel kernel_rezone_neighbors; +#ifndef MINIMUM_PRECISION +cl_kernel kernel_rezone_one_double; +#endif +cl_kernel kernel_rezone_one_float; +cl_kernel kernel_copy_mpot_ghost_data; +cl_kernel kernel_set_boundary_refinement; +#endif + +extern size_t hash_header_size; +extern int choose_hash_method; + +void Mesh::write_grid(int ncycle) +{ + FILE *fp; + char filename[20]; + + if (ncycle<0) ncycle=0; + sprintf(filename,"grid%02d.gph",ncycle); + fp=fopen(filename,"w"); + + fprintf(fp,"viewport %lf %lf %lf %lf\n",xmin,ymin,xmax,ymax); + for (uint ic = 0; ic < ncells; ic++) { + fprintf(fp,"rect %lf %lf %lf %lf\n",x[ic],y[ic],x[ic]+dx[ic],y[ic]+dy[ic]); + } + + fprintf(fp,"line_init %lf %lf\n",x[0]+0.5*dx[0],y[0]+0.5*dy[0]); + for (uint ic = 1; ic < ncells; ic++){ + fprintf(fp,"line %lf %lf\n",x[ic]+0.5*dx[ic],y[ic]+0.5*dy[ic]); + } + + for (uint ic = 0; ic < ncells; ic++){ + fprintf(fp,"text %lf %lf %d\n",x[ic]+0.5*dx[ic],y[ic]+0.5*dy[ic],ic); + } + + fclose(fp); +} + +Mesh::Mesh(FILE *fin, int *numpe) +{ + char string[80]; + ibase = 1; + + time_t trand; + time(&trand); + srand48((long)trand); + + if(fgets(string, 80, fin) == NULL) exit(-1); + sscanf(string,"levmax %d",&levmx); + if(fgets(string, 80, fin) == NULL) exit(-1); + sscanf(string,"cells %ld",&ncells); + if(fgets(string, 80, fin) == NULL) exit(-1); + sscanf(string,"numpe %d",numpe); + if(fgets(string, 80, fin) == NULL) exit(-1); + sscanf(string,"ndim %d",&ndim); + if(fgets(string, 80, fin) == NULL) exit(-1); +#ifdef MINIMUM_PRECISION + sscanf(string,"xaxis %f %f",&xmin, &deltax); +#else + sscanf(string,"xaxis %lf %lf",&xmin, &deltax); +#endif + if(fgets(string, 80, fin) == NULL) exit(-1); + sscanf(string,"yaxis %lf %lf",(double*)&ymin, (double*)&deltay); + if (ndim == THREE_DIMENSIONAL){ + if(fgets(string, 80, fin) == NULL) exit(-1); + sscanf(string,"zaxis %lf %lf",(double*)&zmin, (double*)&deltaz); + } + if(fgets(string, 80, fin) == NULL) exit(-1); + + index.resize(ncells); + + allocate(ncells); + + uint ic=0; + while(fgets(string, 80, fin)!=NULL){ + sscanf(string, "%d %d %d %d", &(index[ic]), &(i[ic]), &(j[ic]), &(level[ic])); + ic++; + } + + ibase = 0; + calc_spatial_coordinates(ibase); + KDTree_Initialize(&tree); + + + print(); + + if (ic != ncells) { + printf("Error -- cells read does not match number specified\n"); + } + return; +} + +void Mesh::print(void) +{ + assert(&nlft[0] != NULL); + assert(&x[0] != NULL); + assert(&index[0] != NULL); + + //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size()); + printf("index orig index i j lev nlft nrht nbot ntop xlow xhigh ylow yhigh\n"); + for (uint ic=0; ic= ncells_ghost){ + fprintf(fp,"%d: index global i j lev nlft nrht nbot ntop \n",mype); + for (uint ic=0; ici_tmp(ncells_ghost); + vectorj_tmp(ncells_ghost); + vectorlevel_tmp(ncells_ghost); + vectornlft_tmp(ncells_ghost); + vectornrht_tmp(ncells_ghost); + vectornbot_tmp(ncells_ghost); + vectorntop_tmp(ncells_ghost); + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &i_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &j_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL); + + //fprintf(fp,"\n%d: Printing mesh for dev_local\n\n",mype); + + fprintf(fp,"%d: index global i j lev nlft nrht nbot ntop \n",mype); + for (uint ic=0; ici_tmp(ncells_ghost); + vectorj_tmp(ncells_ghost); + vectorlevel_tmp(ncells_ghost); + vectornlft_tmp(ncells_ghost); + vectornrht_tmp(ncells_ghost); + vectornbot_tmp(ncells_ghost); + vectorntop_tmp(ncells_ghost); + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &i_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &j_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL); + + fprintf(fp,"\n%d: Comparing mesh for dev_local to local\n\n",mype); + //fprintf(fp,"%d: index global i j lev nlft nrht nbot ntop \n",mype); + for (uint ic=0; icnlft_check(ncells); + vectornrht_check(ncells); + vectornbot_check(ncells); + vectorntop_check(ncells); + ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells*sizeof(cl_int), &nlft_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells*sizeof(cl_int), &nrht_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells*sizeof(cl_int), &nbot_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells*sizeof(cl_int), &ntop_check[0], NULL); + + //printf("\n%d: Comparing neighbors for gpu_global to cpu_global\n\n",mype); + for (uint ic=0; icnlft; + int *nrht_global = mesh_global->nrht; + int *nbot_global = mesh_global->nbot; + int *ntop_global = mesh_global->ntop; + + vector Test(ncells_ghost); + for(uint ic=0; ic 1) L7_Update(&Test[0], L7_INT, cell_handle); + + vector Test_global(ncells_global); + MPI_Allgatherv(&Test[0], nsizes[mype], MPI_INT, &Test_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + + vector Test_check(ncells); + vector Test_check_global(ncells_global); + + // ==================== check left value ==================== + for (uint ic=0; icncells; + int *nlft_global = mesh_global->nlft; + int *nrht_global = mesh_global->nrht; + int *nbot_global = mesh_global->nbot; + int *ntop_global = mesh_global->ntop; + + // Checking CPU parallel to CPU global + vector Test(ncells_ghost); + for(uint ic=0; ic 1) L7_Update(&Test[0], L7_INT, cell_handle); + + vector Test_global(ncells_global); + MPI_Allgatherv(&Test[0], nsizes[mype], MPI_INT, &Test_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + + vector Test_check(ncells); + vector Test_check_global(ncells_global); + + // ==================== check left value ==================== + for (uint ic=0; ic nlft_check(ncells_ghost); vector nrht_check(ncells_ghost); + vector nbot_check(ncells_ghost); vector ntop_check(ncells_ghost); + ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_check[0], NULL); + + for (uint ic=0; ic nlft_check(ncells_ghost); vector nrht_check(ncells_ghost); + //vector nbot_check(ncells_ghost); vector ntop_check(ncells_ghost); + ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_check[0], NULL); + + for (uint ic=0; ic i_check(ncells); + vector j_check(ncells); + vector level_check(ncells); + vector celltype_check(ncells); + /// Set read buffers for data. + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells*sizeof(cl_int), &i_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells*sizeof(cl_int), &j_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells*sizeof(cl_int), &level_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE, 0, ncells*sizeof(cl_int), &celltype_check[0], NULL); + for (uint ic = 0; ic < ncells; ic++){ + if (i[ic] != i_check[ic] ) printf("DEBUG -- i: ic %d i %d i_check %d\n",ic, i[ic], i_check[ic]); + if (j[ic] != j_check[ic] ) printf("DEBUG -- j: ic %d j %d j_check %d\n",ic, j[ic], j_check[ic]); + if (level[ic] != level_check[ic] ) printf("DEBUG -- level: ic %d level %d level_check %d\n",ic, level[ic], level_check[ic]); + if (celltype[ic] != celltype_check[ic] ) printf("DEBUG -- celltype: ic %d celltype %d celltype_check %d\n",ic, celltype[ic], celltype_check[ic]); + } +} +#endif + +void Mesh::compare_indices_cpu_local_to_cpu_global(uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl, int cycle) +{ + int *celltype_global = mesh_global->celltype; + int *i_global = mesh_global->i; + int *j_global = mesh_global->j; + int *level_global = mesh_global->level; + + vector i_check_global(ncells_global); + vector j_check_global(ncells_global); + vector level_check_global(ncells_global); + vector celltype_check_global(ncells_global); + +/* + vector i_check_local(ncells); + vector j_check_local(ncells); + vector level_check_local(ncells); + vector celltype_check_local(ncells); +*/ + +#ifdef HAVE_MPI + MPI_Allgatherv(&celltype[0], nsizes[mype], MPI_INT, &celltype_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + MPI_Allgatherv(&i[0], nsizes[mype], MPI_INT, &i_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + MPI_Allgatherv(&j[0], nsizes[mype], MPI_INT, &j_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + MPI_Allgatherv(&level[0], nsizes[mype], MPI_INT, &level_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + +/* + MPI_Scatterv(&celltype_global[0], &nsizes[0], &ndispl[0], MPI_INT, &celltype_check_local[0], nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD); + MPI_Scatterv(&i_global[0], &nsizes[0], &ndispl[0], MPI_INT, &i_check_local[0], nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD); + MPI_Scatterv(&j_global[0], &nsizes[0], &ndispl[0], MPI_INT, &j_check_local[0], nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD); + MPI_Scatterv(&level_global[0], &nsizes[0], &ndispl[0], MPI_INT, &level_check_local[0], nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD); +*/ +#else + // Just to get rid of compiler warnings + if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d\n", + nsizes[0],ndispl[0]); +#endif + + for (uint ic = 0; ic < ncells_global; ic++){ + if (celltype_global[ic] != celltype_check_global[ic]) printf("DEBUG rezone 3 at cycle %d celltype_global & celltype_check_global %d %d %d \n",cycle,ic,celltype_global[ic],celltype_check_global[ic]); + if (i_global[ic] != i_check_global[ic]) printf("DEBUG rezone 3 at cycle %d i_global & i_check_global %d %d %d \n",cycle,ic,i_global[ic],i_check_global[ic]); + if (j_global[ic] != j_check_global[ic]) printf("DEBUG rezone 3 at cycle %d j_global & j_check_global %d %d %d \n",cycle,ic,j_global[ic],j_check_global[ic]); + if (level_global[ic] != level_check_global[ic]) printf("DEBUG rezone 3 at cycle %d level_global & level_check_global %d %d %d \n",cycle,ic,level_global[ic],level_check_global[ic]); + } + +/* + for (uint ic = 0; ic < ncells; ic++){ + if (celltype[ic] != celltype_check_local[ic]) fprintf(fp,"DEBUG rezone 3 at cycle %d celltype & celltype_check_local %d %d %d \n",cycle,ic,celltype[ic],celltype_check_local[ic]); + if (i[ic] != i_check_local[ic]) fprintf(fp,"DEBUG rezone 3 at cycle %d i & i_check_local %d %d %d \n",cycle,ic,i[ic],i_check_local[ic]); + if (j[ic] != j_check_local[ic]) fprintf(fp,"DEBUG rezone 3 at cycle %d j & j_check_local %d %d %d \n",cycle,ic,j[ic],j_check_local[ic]); + if (level[ic] != level_check_local[ic]) fprintf(fp,"DEBUG rezone 3 at cycle %d level & level_check_local %d %d %d \n",cycle,ic,level[ic],level_check_local[ic]); + } +*/ +} + +#ifdef HAVE_OPENCL +void Mesh::compare_indices_all_to_gpu_local(Mesh *mesh_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle) +{ +#ifdef HAVE_MPI + cl_command_queue command_queue = ezcl_get_command_queue(); + + int *level_global = mesh_global->level; + int *celltype_global = mesh_global->celltype; + int *i_global = mesh_global->i; + int *j_global = mesh_global->j; + + cl_mem &dev_celltype_global = mesh_global->dev_celltype; + cl_mem &dev_i_global = mesh_global->dev_i; + cl_mem &dev_j_global = mesh_global->dev_j; + cl_mem &dev_level_global = mesh_global->dev_level; + + // Need to compare dev_H to H, etc + vector level_check(ncells); + vector celltype_check(ncells); + vector i_check(ncells); + vector j_check(ncells); + /// Set read buffers for data. + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells*sizeof(cl_int), &level_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_FALSE, 0, ncells*sizeof(cl_int), &celltype_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells*sizeof(cl_int), &i_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_TRUE, 0, ncells*sizeof(cl_int), &j_check[0], NULL); + for (uint ic = 0; ic < ncells; ic++){ + if (level[ic] != level_check[ic] ) printf("%d: DEBUG rezone 1 cell %d level %d level_check %d\n",mype, ic, level[ic], level_check[ic]); + if (celltype[ic] != celltype_check[ic] ) printf("%d: DEBUG rezone 1 cell %d celltype %d celltype_check %d\n",mype, ic, celltype[ic], celltype_check[ic]); + if (i[ic] != i_check[ic] ) printf("%d: DEBUG rezone 1 cell %d i %d i_check %d\n",mype, ic, i[ic], i_check[ic]); + if (j[ic] != j_check[ic] ) printf("%d: DEBUG rezone 1 cell %d j %d j_check %d\n",mype, ic, j[ic], j_check[ic]); + } + + // And compare dev_H gathered to H_global, etc + vectorcelltype_check_global(ncells_global); + vectori_check_global(ncells_global); + vectorj_check_global(ncells_global); + vectorlevel_check_global(ncells_global); + MPI_Allgatherv(&celltype_check[0], nsizes[mype], MPI_INT, &celltype_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + MPI_Allgatherv(&i_check[0], nsizes[mype], MPI_INT, &i_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + MPI_Allgatherv(&j_check[0], nsizes[mype], MPI_INT, &j_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + MPI_Allgatherv(&level_check[0], nsizes[mype], MPI_INT, &level_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (uint ic = 0; ic < ncells_global; ic++){ + if (level_global[ic] != level_check_global[ic] ) printf("%d: DEBUG rezone 2 cell %d level_global %d level_check_global %d\n",mype, ic, level_global[ic], level_check_global[ic]); + if (celltype_global[ic] != celltype_check_global[ic] ) printf("%d: DEBUG rezone 2 cell %d celltype_global %d celltype_check_global %d\n",mype, ic, celltype_global[ic], celltype_check_global[ic]); + if (i_global[ic] != i_check_global[ic] ) printf("%d: DEBUG rezone 2 cell %d i_global %d i_check_global %d\n",mype, ic, i_global[ic], i_check_global[ic]); + if (j_global[ic] != j_check_global[ic] ) printf("%d: DEBUG rezone 2 cell %d j_global %d j_check_global %d\n",mype, ic, j_global[ic], j_check_global[ic]); + } + + // And compare H gathered to H_global, etc + MPI_Allgatherv(&celltype[0], nsizes[mype], MPI_INT, &celltype_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + MPI_Allgatherv(&i[0], nsizes[mype], MPI_INT, &i_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + MPI_Allgatherv(&j[0], nsizes[mype], MPI_INT, &j_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + MPI_Allgatherv(&level[0], nsizes[mype], MPI_INT, &level_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (uint ic = 0; ic < ncells_global; ic++){ + if (celltype_global[ic] != celltype_check_global[ic]) printf("DEBUG rezone 3 at cycle %d celltype_global & celltype_check_global %d %d %d \n",ncycle,ic,celltype_global[ic],celltype_check_global[ic]); + if (i_global[ic] != i_check_global[ic]) printf("DEBUG rezone 3 at cycle %d i_global & i_check_global %d %d %d \n",ncycle,ic,i_global[ic],i_check_global[ic]); + if (j_global[ic] != j_check_global[ic]) printf("DEBUG rezone 3 at cycle %d j_global & j_check_global %d %d %d \n",ncycle,ic,j_global[ic],j_check_global[ic]); + if (level_global[ic] != level_check_global[ic]) printf("DEBUG rezone 3 at cycle %d level_global & level_check_global %d %d %d \n",ncycle,ic,level_global[ic],level_check_global[ic]); + } + + // Now the global dev_H_global to H_global, etc + ezcl_enqueue_read_buffer(command_queue, dev_celltype_global, CL_FALSE, 0, ncells_global*sizeof(cl_int), &celltype_check_global[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_i_global, CL_FALSE, 0, ncells_global*sizeof(cl_int), &i_check_global[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j_global, CL_FALSE, 0, ncells_global*sizeof(cl_int), &j_check_global[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level_global, CL_TRUE, 0, ncells_global*sizeof(cl_int), &level_check_global[0], NULL); + for (uint ic = 0; ic < ncells_global; ic++){ + if (celltype_global[ic] != celltype_check_global[ic]) printf("DEBUG rezone 4 at cycle %d celltype_global & celltype_check_global %d %d %d \n",ncycle,ic,celltype_global[ic],celltype_check_global[ic]); + if (i_global[ic] != i_check_global[ic]) printf("DEBUG rezone 4 at cycle %d i_global & i_check_global %d %d %d \n",ncycle,ic,i_global[ic],i_check_global[ic]); + if (j_global[ic] != j_check_global[ic]) printf("DEBUG rezone 4 at cycle %d j_global & j_check_global %d %d %d \n",ncycle,ic,j_global[ic],j_check_global[ic]); + if (level_global[ic] != level_check_global[ic]) printf("DEBUG rezone 4 at cycle %d level_global & level_check_global %d %d %d \n",ncycle,ic,level_global[ic],level_check_global[ic]); + } +#else + // Just to get rid of compiler warnings + if (1 == 2) printf("DEBUG -- mesh_global %p ncells_global %d nsizes[0] %d ndispl[0] %d ncycle %d\n", + mesh_global,ncells_global,nsizes[0],ndispl[0],ncycle); +#endif +} + +void Mesh::compare_coordinates_gpu_global_to_cpu_global_double(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, double *H) +{ + cl_command_queue command_queue = ezcl_get_command_queue(); + + vectorx_check(ncells); + vectordx_check(ncells); + vectory_check(ncells); + vectordy_check(ncells); + vectorH_check(ncells); + ezcl_enqueue_read_buffer(command_queue, dev_x, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &x_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_dx, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dx_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_y, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &y_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_dy, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dy_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_H, CL_TRUE, 0, ncells*sizeof(cl_double), &H_check[0], NULL); + for (uint ic = 0; ic < ncells; ic++){ + if (x[ic] != x_check[ic] || dx[ic] != dx_check[ic] || y[ic] != y_check[ic] || dy[ic] != dy_check[ic] ) { + printf("Error -- mismatch in spatial coordinates for cell %d is gpu %lf %lf %lf %lf cpu %lf %lf %lf %lf\n",ic,x_check[ic],dx_check[ic],y_check[ic],dy_check[ic],x[ic],dx[ic],y[ic],dy[ic]); + exit(0); + } + } + for (uint ic = 0; ic < ncells; ic++){ + if (fabs(H[ic] - H_check[ic]) > CONSERVATION_EPS) { + printf("Error -- mismatch in H for cell %d is gpu %lf cpu %lf\n",ic,H_check[ic],H[ic]); + exit(0); + } + } +} + +void Mesh::compare_coordinates_gpu_global_to_cpu_global_float(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, float *H) +{ + cl_command_queue command_queue = ezcl_get_command_queue(); + + vectorx_check(ncells); + vectordx_check(ncells); + vectory_check(ncells); + vectordy_check(ncells); + vectorH_check(ncells); + ezcl_enqueue_read_buffer(command_queue, dev_x, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &x_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_dx, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dx_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_y, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &y_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_dy, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dy_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_H, CL_TRUE, 0, ncells*sizeof(cl_float), &H_check[0], NULL); + for (uint ic = 0; ic < ncells; ic++){ + if (x[ic] != x_check[ic] || dx[ic] != dx_check[ic] || y[ic] != y_check[ic] || dy[ic] != dy_check[ic] ) { + printf("Error -- mismatch in spatial coordinates for cell %d is gpu %lf %lf %lf %lf cpu %lf %lf %lf %lf\n",ic,x_check[ic],dx_check[ic],y_check[ic],dy_check[ic],x[ic],dx[ic],y[ic],dy[ic]); + exit(0); + } + } + for (uint ic = 0; ic < ncells; ic++){ + if (fabs(H[ic] - H_check[ic]) > CONSERVATION_EPS) { + printf("Error -- mismatch in H for cell %d is gpu %lf cpu %lf\n",ic,H_check[ic],H[ic]); + exit(0); + } + } +} +#endif + +void Mesh::compare_coordinates_cpu_local_to_cpu_global_double(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, double *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, double *H_global, int cycle) +{ + vector x_check_global(ncells_global); + vector dx_check_global(ncells_global); + vector y_check_global(ncells_global); + vector dy_check_global(ncells_global); + vector H_check_global(ncells_global); + +#ifdef HAVE_MPI + MPI_Allgatherv(&x[0], nsizes[mype], MPI_SPATIAL_T, &x_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD); + MPI_Allgatherv(&dx[0], nsizes[mype], MPI_SPATIAL_T, &dx_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD); + MPI_Allgatherv(&y[0], nsizes[mype], MPI_SPATIAL_T, &y_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD); + MPI_Allgatherv(&dy[0], nsizes[mype], MPI_SPATIAL_T, &dy_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD); + MPI_Allgatherv(&H[0], nsizes[mype], MPI_DOUBLE, &H_check_global[0], &nsizes[0], &ndispl[0], MPI_DOUBLE, MPI_COMM_WORLD); +#else + // Just to get rid of compiler warnings + if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d x %p dx %p y %p dy %p H %p\n", + nsizes[0],ndispl[0],x,dx,y,dy,H); +#endif + + for (uint ic = 0; ic < ncells_global; ic++){ + if (fabs(x_global[ic] -x_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d x_global & x_check_global %d %lf %lf \n",cycle,ic,x_global[ic], x_check_global[ic]); + if (fabs(dx_global[ic]-dx_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dx_global & dx_check_global %d %lf %lf \n",cycle,ic,dx_global[ic],dx_check_global[ic]); + if (fabs(y_global[ic] -y_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d y_global & y_check_global %d %lf %lf \n",cycle,ic,y_global[ic], y_check_global[ic]); + if (fabs(dy_global[ic]-dy_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dy_global & dy_check_global %d %lf %lf \n",cycle,ic,dy_global[ic],dy_check_global[ic]); + if (fabs(H_global[ic] -H_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d H_global & H_check_global %d %lf %lf \n",cycle,ic,H_global[ic], H_check_global[ic]); + } + +} + +void Mesh::compare_coordinates_cpu_local_to_cpu_global_float(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, float *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, float *H_global, int cycle) +{ + vector x_check_global(ncells_global); + vector dx_check_global(ncells_global); + vector y_check_global(ncells_global); + vector dy_check_global(ncells_global); + vector H_check_global(ncells_global); + +#ifdef HAVE_MPI + MPI_Allgatherv(&x[0], nsizes[mype], MPI_SPATIAL_T, &x_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD); + MPI_Allgatherv(&dx[0], nsizes[mype], MPI_SPATIAL_T, &dx_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD); + MPI_Allgatherv(&y[0], nsizes[mype], MPI_SPATIAL_T, &y_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD); + MPI_Allgatherv(&dy[0], nsizes[mype], MPI_SPATIAL_T, &dy_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD); + MPI_Allgatherv(&H[0], nsizes[mype], MPI_FLOAT, &H_check_global[0], &nsizes[0], &ndispl[0], MPI_FLOAT, MPI_COMM_WORLD); +#else + // Just to get rid of compiler warnings + if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d x %p dx %p y %p dy %p H %p\n", + nsizes[0],ndispl[0],x,dx,y,dy,H); +#endif + + for (uint ic = 0; ic < ncells_global; ic++){ + if (fabs(x_global[ic] -x_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d x_global & x_check_global %d %lf %lf \n",cycle,ic,x_global[ic], x_check_global[ic]); + if (fabs(dx_global[ic]-dx_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dx_global & dx_check_global %d %lf %lf \n",cycle,ic,dx_global[ic],dx_check_global[ic]); + if (fabs(y_global[ic] -y_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d y_global & y_check_global %d %lf %lf \n",cycle,ic,y_global[ic], y_check_global[ic]); + if (fabs(dy_global[ic]-dy_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dy_global & dy_check_global %d %lf %lf \n",cycle,ic,dy_global[ic],dy_check_global[ic]); + if (fabs(H_global[ic] -H_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d H_global & H_check_global %d %lf %lf \n",cycle,ic,H_global[ic], H_check_global[ic]); + } + +} + +#ifdef HAVE_OPENCL +void Mesh::compare_mpot_gpu_global_to_cpu_global(int *mpot, cl_mem dev_mpot) +{ + cl_command_queue command_queue = ezcl_get_command_queue(); + + vectormpot_check(ncells); + ezcl_enqueue_read_buffer(command_queue, dev_mpot, CL_TRUE, 0, ncells*sizeof(cl_int), &mpot_check[0], NULL); + + for (uint ic=0; icmpot_save_global(ncells_global); +#ifdef HAVE_MPI + MPI_Allgatherv(&mpot[0], ncells, MPI_INT, &mpot_save_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); +#else + // Just to get rid of compiler warnings + if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d mpot %p\n", + nsizes[0],ndispl[0],mpot); +#endif + for (uint ic = 0; ic < ncells_global; ic++){ + if (mpot_global[ic] != mpot_save_global[ic]) { + if (mype == 0) printf("%d: DEBUG refine_potential 3 at cycle %d cell %d mpot_global & mpot_save_global %d %d \n",mype,cycle,ic,mpot_global[ic],mpot_save_global[ic]); + } + } + +} + +#ifdef HAVE_OPENCL +void Mesh::compare_mpot_all_to_gpu_local(int *mpot, int *mpot_global, cl_mem dev_mpot, cl_mem dev_mpot_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle) +{ +#ifdef HAVE_MPI + cl_command_queue command_queue = ezcl_get_command_queue(); + + // Need to compare dev_mpot to mpot + vectormpot_save(ncells); + ezcl_enqueue_read_buffer(command_queue, dev_mpot, CL_TRUE, 0, ncells*sizeof(cl_int), &mpot_save[0], NULL); + for (uint ic = 0; ic < ncells; ic++){ + if (mpot[ic] != mpot_save[ic]) { + printf("%d: DEBUG refine_potential 1 at cycle %d cell %d mpot & mpot_save %d %d \n",mype,ncycle,ic,mpot[ic],mpot_save[ic]); + } + } + + // Compare dev_mpot to mpot_global + vectormpot_save_global(ncells_global); + MPI_Allgatherv(&mpot_save[0], nsizes[mype], MPI_INT, &mpot_save_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (uint ic = 0; ic < ncells_global; ic++){ + if (mpot_global[ic] != mpot_save_global[ic]) { + if (mype == 0) printf("%d: DEBUG refine_potential 2 at cycle %d cell %d mpot_global & mpot_save_global %d %d \n",mype,ncycle,ic,mpot_global[ic],mpot_save_global[ic]); + } + } + + // Compare mpot to mpot_global + MPI_Allgatherv(&mpot[0], nsizes[mype], MPI_INT, &mpot_save_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (uint ic = 0; ic < ncells_global; ic++){ + if (mpot_global[ic] != mpot_save_global[ic]) { + if (mype == 0) printf("%d: DEBUG refine_potential 3 at cycle %d cell %d mpot_global & mpot_save_global %d %d \n",mype,ncycle,ic,mpot_global[ic],mpot_save_global[ic]); + } + } + + // Compare dev_mpot_global to mpot_global + ezcl_enqueue_read_buffer(command_queue, dev_mpot_global, CL_TRUE, 0, ncells_global*sizeof(cl_int), &mpot_save_global[0], NULL); + for (uint ic = 0; ic < ncells_global; ic++){ + if (mpot_global[ic] != mpot_save_global[ic]) { + if (mype == 0) printf("%d: DEBUG refine_potential 4 at cycle %d cell %u mpot_global & mpot_save_global %d %d \n",mype,ncycle,ic,mpot_global[ic],mpot_save_global[ic]); + } + } +#else + // Just to get rid of compiler warnings + if (1 == 2) printf("DEBUG -- mpot %p mpot_global %p dev_mpot %p dev_mpot_global %p ncells_global %d nsizes[0] %d ndispl[0] %d ncycle %d\n", + mpot,mpot_global,dev_mpot,dev_mpot_global,ncells_global,nsizes[0],ndispl[0],ncycle); +#endif +} + +void Mesh::compare_ioffset_gpu_global_to_cpu_global(uint old_ncells, int *mpot) +{ + cl_command_queue command_queue = ezcl_get_command_queue(); + + size_t local_work_size = MIN(ncells, TILE_SIZE); + size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size; + + //size_t block_size = (ncells + TILE_SIZE - 1) / TILE_SIZE; // For on-device global reduction kernel. + size_t block_size = global_work_size/local_work_size; + + vector ioffset_check(block_size); + ezcl_enqueue_read_buffer(command_queue, dev_ioffset, CL_TRUE, 0, block_size*sizeof(cl_int), &ioffset_check[0], NULL); + + int mcount, mtotal; + mtotal = 0; + for (uint ig=0; ig<(old_ncells+TILE_SIZE-1)/TILE_SIZE; ig++){ + mcount = 0; + for (uint ic=ig*TILE_SIZE; ic<(ig+1)*TILE_SIZE; ic++){ + if (ic >= old_ncells) break; + + if (mpot[ic] < 0) { + if (celltype[ic] == REAL_CELL) { + // remove all but cell that will remain to get count right when split + // across processors + if (is_lower_left(i[ic],j[ic]) ) mcount++; + } else { + // either upper right or lower left will remain for boundary cells + if (is_upper_right(i[ic],j[ic]) || is_lower_left(i[ic],j[ic]) ) mcount++; + } + } + if (mpot[ic] >= 0) { + if (celltype[ic] == REAL_CELL){ + mcount += mpot[ic] ? 4 : 1; + } else { + mcount += mpot[ic] ? 2 : 1; + } + } + } + if (mtotal != ioffset_check[ig]) printf("DEBUG ig %d ioffset %d mcount %d\n",ig,ioffset_check[ig],mtotal); + mtotal += mcount; + } +} + +void Mesh::compare_ioffset_all_to_gpu_local(uint old_ncells, uint old_ncells_global, int block_size, int block_size_global, int *mpot, int *mpot_global, cl_mem dev_ioffset, cl_mem dev_ioffset_global, int *ioffset, int *ioffset_global, int *celltype_global, int *i_global, int *j_global) +{ + cl_command_queue command_queue = ezcl_get_command_queue(); + + // This compares ioffset for each block in the calculation + ezcl_enqueue_read_buffer(command_queue, dev_ioffset, CL_TRUE, 0, block_size*sizeof(cl_int), &ioffset[0], NULL); + int mtotal = 0; + for (uint ig=0; ig<(old_ncells+TILE_SIZE-1)/TILE_SIZE; ig++){ + int mcount = 0; + for (uint ic=ig*TILE_SIZE; ic<(ig+1)*TILE_SIZE; ic++){ + if (ic >= old_ncells) break; + + if (mpot[ic] < 0) { + if (celltype[ic] == REAL_CELL) { + // remove all but cell that will remain to get count right when split + // across processors + if (is_lower_left(i[ic],j[ic]) ) mcount++; + } else { + // either upper right or lower left will remain for boundary cells + if (is_upper_right(i[ic],j[ic]) || is_lower_left(i[ic],j[ic]) ) mcount++; + } + } + if (mpot[ic] >= 0) { + if (celltype[ic] == REAL_CELL){ + mcount += mpot[ic] ? 4 : 1; + } else { + mcount += mpot[ic] ? 2 : 1; + } + } + } + if (mtotal != ioffset[ig]) printf("%d: DEBUG ig %d ioffset %d mtotal %d\n",mype,ig,ioffset[ig],mtotal); + mtotal += mcount; + } + + // For global This compares ioffset for each block in the calculation + ezcl_enqueue_read_buffer(command_queue, dev_ioffset_global, CL_TRUE, 0, block_size_global*sizeof(cl_int), &ioffset_global[0], NULL); + mtotal = 0; + int count = 0; + for (uint ig=0; ig<(old_ncells_global+TILE_SIZE-1)/TILE_SIZE; ig++){ + int mcount = 0; + for (uint ic=ig*TILE_SIZE; ic<(ig+1)*TILE_SIZE; ic++){ + if (ic >= old_ncells_global) break; + + if (mpot_global[ic] < 0) { + if (celltype_global[ic] == REAL_CELL) { + // remove all but cell that will remain to get count right when split + // across processors + if (is_lower_left(i_global[ic],j_global[ic]) ) mcount++; + } else { + // either upper right or lower left will remain for boundary cells + if (is_upper_right(i_global[ic],j_global[ic]) || is_lower_left(i_global[ic],j_global[ic]) ) mcount++; + } + } + + if (mpot_global[ic] >= 0) { + if (celltype_global[ic] == REAL_CELL) { + mcount += mpot_global[ic] ? 4 : 1; + } else { + mcount += mpot_global[ic] ? 2 : 1; + } + } + } + if (mtotal != ioffset_global[ig]) { + printf("DEBUG global ig %d ioffset %d mtotal %d\n",ig,ioffset_global[ig],mtotal); + count++; + } + if (count > 10) exit(0); + mtotal += mcount; + } +} +#endif + +Mesh::Mesh(int nx, int ny, int levmx_in, int ndim_in, double deltax_in, double deltay_in, int boundary, int parallel_in, int do_gpu_calc) +{ + lowerBound_Global = NULL; + upperBound_Global = NULL; + for (int i = 0; i < MESH_TIMER_SIZE; i++){ + cpu_timers[i] = 0.0; + gpu_timers[i] = 0L; + } + + for (int i = 0; i < MESH_COUNTER_SIZE; i++){ + cpu_counters[i] = 0; + gpu_counters[i] = 0; + } + + ndim = ndim_in; + levmx = levmx_in; +#ifdef HAVE_OPENCL + if (ndim == TWO_DIMENSIONAL) defines = "-DTWO_DIMENSIONAL -DCARTESIAN"; +#endif + + offtile_ratio_local = 0; + offtile_local_count = 1; + + mype = 0; + numpe = 1; + ncells = 0; + ncells_ghost = 0; + parallel = parallel_in; + noffset = 0; + mem_factor = 1.0; + //mem_factor = 1.5; + +#ifdef HAVE_MPI + int mpi_init; + MPI_Initialized(&mpi_init); + if (mpi_init && parallel){ + MPI_Comm_rank(MPI_COMM_WORLD,&mype); + MPI_Comm_size(MPI_COMM_WORLD,&numpe); + } + // TODO add fini + if (parallel) mesh_memory.pinit(MPI_COMM_WORLD, 2L * 1024 * 1024 * 1024); +#endif + cell_handle = 0; + + if (numpe == 1) mem_factor = 1.0; + + deltax = deltax_in; + deltay = deltay_in; + + have_boundary = boundary; + + //int istart = 1; + //int jstart = 1; + //int iend = nx; + //int jend = ny; + int nxx = nx; + int nyy = ny; + imin = 0; + jmin = 0; + imax = nx+1; + jmax = ny+1; + if (have_boundary) { + //istart = 0; + //jstart = 0; + //iend = nx + 1; + //jend = ny + 1; + nxx = nx + 2; + nyy = ny + 2; + imin = 0; + jmin = 0; + imax = nx + 1; + jmax = ny + 1; + } + + xmin = -deltax * 0.5 * (real_t)nxx; + ymin = -deltay * 0.5 * (real_t)nyy; + xmax = deltax * 0.5 * (real_t)nxx; + ymax = deltay * 0.5 * (real_t)nyy; + + size_t lvlMxSize = levmx + 1; + + levtable.resize(lvlMxSize); + lev_ibegin.resize(lvlMxSize); + lev_jbegin.resize(lvlMxSize); + lev_iend.resize( lvlMxSize); + lev_jend.resize( lvlMxSize); + lev_deltax.resize(lvlMxSize); + lev_deltay.resize(lvlMxSize); + + lev_ibegin[0] = imin + 1; + lev_iend[0] = imax - 1; + lev_jbegin[0] = jmin + 1; + lev_jend[0] = jmax - 1; + lev_deltax[0] = deltax; + lev_deltay[0] = deltay; + + for (int lev = 1; lev <= levmx; lev++) { + lev_ibegin[lev] = lev_ibegin[lev-1]*2; + lev_iend[lev] = lev_iend [lev-1]*2 + 1; + lev_jbegin[lev] = lev_jbegin[lev-1]*2; + lev_jend[lev] = lev_jend [lev-1]*2 + 1; + lev_deltax[lev] = lev_deltax[lev-1]*0.5; + lev_deltay[lev] = lev_deltay[lev-1]*0.5; + } + for (uint lev=0; lev("dev_levtable"), &lvlMxSize, sizeof(cl_int), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0); + dev_levdx = ezcl_malloc(&lev_deltax[0], const_cast("dev_levdx"), &lvlMxSize, sizeof(cl_real_t), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0); + dev_levdy = ezcl_malloc(&lev_deltay[0], const_cast("dev_levdy"), &lvlMxSize, sizeof(cl_real_t), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0); + dev_levibeg = ezcl_malloc(&lev_ibegin[0], const_cast("dev_levibeg"), &lvlMxSize, sizeof(cl_int), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0); + dev_leviend = ezcl_malloc(&lev_iend[0], const_cast("dev_leviend"), &lvlMxSize, sizeof(cl_int), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0); + dev_levjbeg = ezcl_malloc(&lev_jbegin[0], const_cast("dev_levjbeg"), &lvlMxSize, sizeof(cl_int), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0); + dev_levjend = ezcl_malloc(&lev_jend[0], const_cast("dev_levjend"), &lvlMxSize, sizeof(cl_int), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0); +#endif + } + + ibase = 0; + + int ncells_corners = 4; + int i_corner[] = { 0, 0,imax,imax}; + int j_corner[] = { 0,jmax, 0,jmax}; + + for(int ic=0; ic 0) { // this is a restart. + nsizes.resize (numpe); + ndispl.resize (numpe); + if (parallel && numpe > 1) { +#ifdef HAVE_MPI + int ncells_int = ncells; + MPI_Allgather(&ncells_int, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD); + ndispl[0]=0; + for (int ip=1; ip= (int)noffset && ic < (int)(ncells+noffset)){ + int iclocal = ic-noffset; + index[iclocal] = ic; + i[iclocal] = ii; + j[iclocal] = jj; + level[iclocal] = 0; + } + ic++; + } + } + + //if (numpe > 1 && (initial_order != HILBERT_SORT && initial_order != HILBERT_PARTITION) ) mem_factor = 2.0; + partition_cells(numpe, index, initial_order); + + calc_celltype(ncells); + calc_spatial_coordinates(0); + + // Start lev loop here + for (int ilevel=1; ilevel<=levmx; ilevel++) { + + //int old_ncells = ncells; + + ncells_ghost = ncells; + calc_neighbors_local(); + + kdtree_setup(); + + int nez; + vector ind(ncells); + + #ifdef FULL_PRECISION + KDTree_QueryCircleIntersect_Double(&tree, &nez, &(ind[0]), circ_radius, ncells, &x[0], &dx[0], &y[0], &dy[0]); + #else + KDTree_QueryCircleIntersect_Float(&tree, &nez, &(ind[0]), circ_radius, ncells, &x[0], &dx[0], &y[0], &dy[0]); + #endif + + vector mpot(ncells_ghost,0); + + for (int ic=0; ic 1) { + int ncells_int = ncells; + MPI_Allgather(&ncells_int, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD); + ndispl[0]=0; + for (int ip=1; ip &mpot, int &icount, int &jcount) +{ + vector mpot_old; + + int newcount; + int newcount_global; + + struct timeval tstart_lev2; + + rezone_count(mpot, icount, jcount); + +#ifdef _OPENMP +#pragma omp parallel +{ //START Parallel Region +#endif + +#ifdef _OPENMP +#pragma omp master +{//MASTER START +#endif + newcount = icount; + newcount_global = newcount; + + if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2); + +#ifdef HAVE_MPI + if (parallel) { + MPI_Allreduce(&newcount, &newcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + } +#endif + +#ifdef _OPENMP +}//END MASTER +#pragma omp barrier +#endif + + if(newcount_global > 0 && levmx > 1) { + + size_t my_ncells=ncells; + if (parallel) my_ncells=ncells_ghost; + +#ifdef _OPENMP +#pragma omp master +{//MASTER START +#endif + cpu_counters[MESH_COUNTER_REFINE_SMOOTH]++; + + mpot_old.resize(my_ncells); +#ifdef _OPENMP +}//END MASTER +#pragma omp barrier +#endif + + int levcount = 1; + + while (newcount_global > 0 && levcount < levmx){ + + levcount++; +#ifdef _OPENMP +#pragma omp master +{//MASTER START +#endif + + mpot.swap(mpot_old); + newcount=0; +#ifdef HAVE_MPI + if (numpe > 1) { + L7_Update(&mpot_old[0], L7_INT, cell_handle); + } +#endif + +#ifdef _OPENMP +}//END MASTER +#pragma omp barrier +#endif + + int upperBound, lowerBound; + get_bounds(upperBound, lowerBound); + int mynewcount = newcount; //All threads get a mynewcount + +#ifdef _OPENMP +#pragma omp for reduction(+:newcount) +#endif + for(uint ic = 0; ic < ncells; ic++) { + // for(uint ic = lowerBound; ic < upperBound; ic++){ + int lev = level[ic]; + mpot[ic] = mpot_old[ic]; + if(mpot_old[ic] > 0) continue; + + int nl = nlft[ic]; + if (nl >= 0 && nl < (int)ncells_ghost) { + int ll = level[nl]; + if(mpot_old[nl] > 0) ll++; + + if(ll - lev > 1) { + mpot[ic]=1; + mynewcount++; + continue; + } + + ll = level[nl]; + if (ll > lev) { + int nlt = ntop[nl]; + if (nlt >= 0 && nlt < (int)ncells_ghost) { + int llt = level[nlt]; + if(mpot_old[nlt] > 0) llt++; + + if(llt - lev > 1) { + mpot[ic]=1; + mynewcount++; + continue; + } + } + } + } + + int nr = nrht[ic]; + if (nr >= 0 && nr < (int)ncells_ghost) { + int lr = level[nr]; + if(mpot_old[nr] > 0) lr++; + + if(lr - lev > 1) { + mpot[ic]=1; + mynewcount++; + continue; + } + + lr = level[nr]; + if (lr > lev) { + int nrt = ntop[nr]; + if (nrt >= 0 && nrt < (int)ncells_ghost) { + int lrt = level[nrt]; + if(mpot_old[nrt] > 0) lrt++; + + if(lrt - lev > 1) { + mpot[ic]=1; + mynewcount++; + continue; + } + } + } + } + + int nt = ntop[ic]; + if (nt >= 0 && nt < (int)ncells_ghost) { + int lt = level[nt]; + if(mpot_old[nt] > 0) lt++; + + if(lt - lev > 1) { + mpot[ic]=1; + mynewcount++; + continue; + } + + lt = level[nt]; + if (lt > lev) { + int ntr = nrht[nt]; + if (ntr >= 0 && ntr < (int)ncells_ghost) { + int ltr = level[ntr]; + if(mpot_old[ntr] > 0) ltr++; + + if(ltr - lev > 1) { + mpot[ic]=1; + mynewcount++; + continue; + } + } + } + } + + int nb = nbot[ic]; + if (nb >= 0 && nb < (int)ncells_ghost) { + int lb = level[nb]; + if(mpot_old[nb] > 0) lb++; + + if(lb - lev > 1) { + mpot[ic]=1; + mynewcount++; + continue; + } + + lb = level[nb]; + if (lb > lev) { + int nbr = nrht[nb]; + if (nbr >= 0 && nbr < (int)ncells_ghost) { + int lbr = level[nbr]; + if(mpot_old[nbr] > 0) lbr++; + + if(lbr - lev > 1) { + mpot[ic]=1; + mynewcount++; + continue; + } + } + } + } + } +#ifdef _OPENMP +#pragma omp atomic +#endif + newcount += mynewcount; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master +{ +#endif + icount += newcount; + newcount_global = newcount; + +#ifdef HAVE_MPI + if (parallel) { + MPI_Allreduce(&newcount, &newcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + } +#endif + +#ifdef _OPENMP +}//END MASTER +#pragma omp barrier +#endif + + } // while (newcount_global > 0 && levcount < levmx); + + } + + +#ifdef _OPENMP +#pragma omp master +{ +#endif + +#ifdef HAVE_MPI + if (numpe > 1) { + L7_Update(&mpot[0], L7_INT, cell_handle); + } +#endif + + mpot_old.clear(); + mpot_old.resize(ncells_ghost); + + mpot_old.swap(mpot); +#ifdef _OPENMP +}//END MASTER +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for(uint ic=0; ic= 0) continue; + if (mpot_old[ic] <= -1000000) continue; + if ( is_upper_right(i[ic],j[ic]) ) { + int nr = nrht[ic]; + int lr = level[nr]; + if (mpot_old[nr] > 0) lr++; + int nt = ntop[ic]; + int lt = level[nt]; + if (mpot_old[nt] > 0) lt++; + if (lr > level[ic] || lt > level[ic]) mpot[ic] = 0; + } else if ( is_upper_left(i[ic],j[ic] ) ) { + int nl = nlft[ic]; + int ll = level[nl]; + if (mpot_old[nl] > 0) ll++; + int nt = ntop[ic]; + int lt = level[nt]; + if (mpot_old[nt] > 0) lt++; + if (ll > level[ic] || lt > level[ic]) mpot[ic] = 0; + } else if ( is_lower_right(i[ic],j[ic] ) ) { + int nr = nrht[ic]; + int lr = level[nr]; + if (mpot_old[nr] > 0) lr++; + int nb = nbot[ic]; + int lb = level[nb]; + if (mpot_old[nb] > 0) lb++; + if (lr > level[ic] || lb > level[ic]) mpot[ic] = 0; + } else if ( is_lower_left(i[ic],j[ic] ) ) { + int nl = nlft[ic]; + int ll = level[nl]; + if (mpot_old[nl] > 0) ll++; + int nb = nbot[ic]; + int lb = level[nb]; + if (mpot_old[nb] > 0) lb++; + if (ll > level[ic] || lb > level[ic]) mpot[ic] = 0; + } + } + +#ifdef _OPENMP +#pragma omp master +{ +#endif + +#ifdef HAVE_MPI + if (numpe > 1) { + L7_Update(&mpot[0], L7_INT, cell_handle); + } +#endif + + mpot_old.swap(mpot); +#ifdef _OPENMP +}//END MASTER +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for(uint ic=0; ic= 0) continue; + if (mpot_old[ic] <= -1000000) continue; + if ( is_upper_right(i[ic],j[ic]) ) { + n1 = nbot[ic]; + n2 = nlft[ic]; + n3 = nlft[n1]; + } else if ( is_upper_left(i[ic],j[ic] ) ) { + n1 = nbot[ic]; + n2 = nrht[ic]; + n3 = nrht[n1]; + } else if ( is_lower_right(i[ic],j[ic] ) ) { + n1 = ntop[ic]; + n2 = nlft[ic]; + n3 = nlft[n1]; + } else if ( is_lower_left(i[ic],j[ic] ) ) { + n1 = ntop[ic]; + n2 = nrht[ic]; + n3 = nrht[n1]; + } + if (n3 < 0) { + mpot[ic] = 0; + } else { + int lev1 = level[n1]; + int lev2 = level[n2]; + int lev3 = level[n3]; + if (mpot_old[n1] > 0) lev1++; + if (mpot_old[n2] > 0) lev2++; + if (mpot_old[n3] > 0) lev3++; + + if (mpot_old[n1] != -1 || lev1 != level[ic] || + mpot_old[n2] != -1 || lev2 != level[ic] || + mpot_old[n3] != -1 || lev3 != level[ic]) { + mpot[ic] = 0; + } + } + } + +#ifdef _OPENMP +#pragma omp master +{ +#endif + +#ifdef HAVE_MPI + if (numpe > 1) { + L7_Update(&mpot[0], L7_INT, cell_handle); + } +#endif + +#ifdef _OPENMP +}//END MASTER +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for (uint ic=0; ic= 2) cpu_timers[MESH_TIMER_REFINE_SMOOTH] += cpu_timer_stop(tstart_lev2); + + return(newcount); +} + +#ifdef HAVE_OPENCL +int Mesh::gpu_refine_smooth(cl_mem &dev_mpot, int &icount, int &jcount) +{ + struct timeval tstart_lev2; + if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2); + + cl_command_queue command_queue = ezcl_get_command_queue(); + + size_t local_work_size = 128; + size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size; + size_t block_size = global_work_size/local_work_size; + + int icount_global = icount; + int jcount_global = jcount; + +#ifdef HAVE_MPI + if (parallel) { + int count[2], count_global[2]; + count[0] = icount; + count[1] = jcount; + MPI_Allreduce(&count, &count_global, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + icount_global = count_global[0]; + jcount_global = count_global[1]; + } +#endif + + int levcount = 1; + //int which_smooth=0; + + if(icount_global > 0 && levcount < levmx) { + size_t result_size = 1; + cl_mem dev_result = ezcl_malloc(NULL, const_cast("dev_result"), &result_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast("dev_redscratch"), &block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_mpot_old = ezcl_malloc(NULL, const_cast("dev_mpot_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + int newcount = icount; + int newcount_global = icount_global; + while (newcount_global > 0 && levcount < levmx) { + levcount++; + + gpu_counters[MESH_COUNTER_REFINE_SMOOTH]++; + +#ifdef HAVE_MPI + if (numpe > 1) { + L7_Dev_Update(dev_mpot, L7_INT, cell_handle); + } +#endif + + if (icount_global) { + ezcl_device_memory_swap(&dev_mpot_old, &dev_mpot); + + ezcl_set_kernel_arg(kernel_refine_smooth, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_refine_smooth, 1, sizeof(cl_int), (void *)&ncells_ghost); + ezcl_set_kernel_arg(kernel_refine_smooth, 2, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_refine_smooth, 3, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_refine_smooth, 4, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_refine_smooth, 5, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_refine_smooth, 6, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_refine_smooth, 7, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_refine_smooth, 8, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_refine_smooth, 9, sizeof(cl_mem), (void *)&dev_mpot_old); + ezcl_set_kernel_arg(kernel_refine_smooth,10, sizeof(cl_mem), (void *)&dev_mpot); + ezcl_set_kernel_arg(kernel_refine_smooth,11, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_refine_smooth,12, sizeof(cl_mem), (void *)&dev_result); + ezcl_set_kernel_arg(kernel_refine_smooth,13, local_work_size*sizeof(cl_int), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_refine_smooth, 1, NULL, &global_work_size, &local_work_size, NULL); + + gpu_rezone_count(block_size, local_work_size, dev_redscratch, dev_result); + + int result; + ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int), &result, NULL); + + //printf("result = %d after %d refine smooths\n",result,which_smooth); + //which_smooth++; + + icount = result; + } + + newcount = icount-newcount; + newcount_global = newcount; +#ifdef HAVE_MPI + if (parallel) { + MPI_Allreduce(&newcount, &newcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + } +#endif + icount_global += newcount_global; + //printf("DEBUG -- icount %d icount_global %d newcount %d newcount_global %d\n",icount,icount_global,newcount,newcount_global); + } + + ezcl_device_memory_delete(dev_mpot_old); + ezcl_device_memory_delete(dev_redscratch); + ezcl_device_memory_delete(dev_result); + } + + if (jcount_global) { +#ifdef HAVE_MPI + if (numpe > 1) { + L7_Dev_Update(dev_mpot, L7_INT, cell_handle); + } +#endif + + cl_mem dev_mpot_old = ezcl_malloc(NULL, const_cast("dev_mpot_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + if (jcount) { + ezcl_device_memory_swap(&dev_mpot_old, &dev_mpot); + + ezcl_set_kernel_arg(kernel_coarsen_smooth, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_coarsen_smooth, 1, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_coarsen_smooth, 2, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_coarsen_smooth, 3, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_coarsen_smooth, 4, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_coarsen_smooth, 5, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_coarsen_smooth, 6, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_coarsen_smooth, 7, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_coarsen_smooth, 8, sizeof(cl_mem), (void *)&dev_mpot_old); + ezcl_set_kernel_arg(kernel_coarsen_smooth, 9, sizeof(cl_mem), (void *)&dev_mpot); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_coarsen_smooth, 1, NULL, &global_work_size, &local_work_size, NULL); + } + +#ifdef HAVE_MPI + if (numpe > 1) { + L7_Dev_Update(dev_mpot, L7_INT, cell_handle); + } +#endif + + if (jcount) { + size_t result_size = 1; + cl_mem dev_result = ezcl_malloc(NULL, const_cast("dev_result"), &result_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast("dev_redscratch"), &block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + ezcl_device_memory_swap(&dev_mpot_old, &dev_mpot); + + ezcl_set_kernel_arg(kernel_coarsen_check_block, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_coarsen_check_block, 1, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_coarsen_check_block, 2, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_coarsen_check_block, 3, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_coarsen_check_block, 4, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_coarsen_check_block, 5, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_coarsen_check_block, 6, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_coarsen_check_block, 7, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_coarsen_check_block, 8, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_coarsen_check_block, 9, sizeof(cl_mem), (void *)&dev_mpot_old); + ezcl_set_kernel_arg(kernel_coarsen_check_block,10, sizeof(cl_mem), (void *)&dev_mpot); + ezcl_set_kernel_arg(kernel_coarsen_check_block,11, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_coarsen_check_block,12, sizeof(cl_mem), (void *)&dev_result); + ezcl_set_kernel_arg(kernel_coarsen_check_block,13, local_work_size*sizeof(cl_int), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_coarsen_check_block, 1, NULL, &global_work_size, &local_work_size, NULL); + + gpu_rezone_count(block_size, local_work_size, dev_redscratch, dev_result); + + int result; + ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int), &result, NULL); + + //printf("result = %d after coarsen smooth\n",result); + + jcount = result; + + ezcl_device_memory_delete(dev_redscratch); + ezcl_device_memory_delete(dev_result); + } + + jcount_global = jcount; + +#ifdef HAVE_MPI + if (parallel) { + MPI_Allreduce(&jcount, &jcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + } +#endif + + ezcl_device_memory_delete(dev_mpot_old); + } + + if (icount_global || jcount_global) { +#ifdef HAVE_MPI + if (numpe > 1) { + L7_Dev_Update(dev_mpot, L7_INT, cell_handle); + } +#endif + + size_t result_size = 1; + cl_mem dev_result = ezcl_malloc(NULL, const_cast("dev_result"), &result_size, sizeof(cl_int2), CL_MEM_READ_WRITE, 0); + cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast("dev_redscratch"), &block_size, sizeof(cl_int2), CL_MEM_READ_WRITE, 0); + dev_ioffset = ezcl_malloc(NULL, const_cast("dev_ioffset"), &block_size, sizeof(cl_uint), CL_MEM_READ_WRITE, 0); + + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 1, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 2, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 3, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 4, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 5, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 6, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 7, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 8, sizeof(cl_mem), (void *)&dev_mpot); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 9, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 10, sizeof(cl_mem), (void *)&dev_ioffset); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 11, sizeof(cl_mem), (void *)&dev_result); + ezcl_set_kernel_arg(kernel_set_boundary_refinement, 12, local_work_size*sizeof(cl_int2), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_set_boundary_refinement, 1, NULL, &global_work_size, &local_work_size, NULL); + + gpu_rezone_count2(block_size, local_work_size, dev_redscratch, dev_result); + + int my_result[2]; + ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, 1*sizeof(cl_int2), &my_result, NULL); + //printf("Result is %lu icount %d jcount %d\n", ncells+my_result[0]-my_result[1],my_result[0],my_result[1]); + icount = my_result[0]; + jcount = my_result[1]; + + icount_global = icount; + jcount_global = jcount; +#ifdef HAVE_MPI + if (parallel) { + int count[2], count_global[2]; + count[0] = icount; + count[1] = jcount; + MPI_Allreduce(&count, &count_global, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + icount_global = count_global[0]; + jcount_global = count_global[1]; + } +#endif + + gpu_rezone_scan(block_size, local_work_size, dev_ioffset, dev_result); + + //ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int), &my_result, NULL); + //printf("After scan, Result is %d\n", my_result[0]); + + ezcl_device_memory_delete(dev_result); + ezcl_device_memory_delete(dev_redscratch); + + } else { + ezcl_device_memory_delete(dev_mpot); + dev_mpot = NULL; + } + + gpu_do_rezone = (icount_global != 0 || jcount_global != 0) ? true : false; + + if (TIMING_LEVEL >= 2) gpu_timers[MESH_TIMER_REFINE_SMOOTH] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + + return ncells+icount-jcount; +} +#endif + +void Mesh::terminate(void) +{ + mesh_memory.memory_delete(i); + mesh_memory.memory_delete(j); + mesh_memory.memory_delete(level); + mesh_memory.memory_delete(celltype); + if (neighbor_remap) { + mesh_memory.memory_delete(nlft); + mesh_memory.memory_delete(nrht); + mesh_memory.memory_delete(nbot); + mesh_memory.memory_delete(ntop); + } + +#ifdef HAVE_OPENCL + hash_lib_terminate(); + + ezcl_device_memory_delete(dev_levtable); + ezcl_device_memory_delete(dev_levdx); + ezcl_device_memory_delete(dev_levdy); + ezcl_device_memory_delete(dev_levibeg); + ezcl_device_memory_delete(dev_leviend); + ezcl_device_memory_delete(dev_levjbeg); + ezcl_device_memory_delete(dev_levjend); + + ezcl_device_memory_delete(dev_level); + ezcl_device_memory_delete(dev_i); + ezcl_device_memory_delete(dev_j); + ezcl_device_memory_delete(dev_celltype); + if (neighbor_remap && dev_nlft != NULL){ + ezcl_device_memory_delete(dev_nlft); + ezcl_device_memory_delete(dev_nrht); + ezcl_device_memory_delete(dev_nbot); + ezcl_device_memory_delete(dev_ntop); + } + + ezcl_kernel_release(kernel_reduction_scan2); + ezcl_kernel_release(kernel_reduction_count); + ezcl_kernel_release(kernel_reduction_count2); + ezcl_kernel_release(kernel_hash_adjust_sizes); + ezcl_kernel_release(kernel_hash_setup); + ezcl_kernel_release(kernel_hash_setup_local); + ezcl_kernel_release(kernel_neighbor_init); + ezcl_kernel_release(kernel_calc_neighbors); + ezcl_kernel_release(kernel_calc_neighbors_local); + ezcl_kernel_release(kernel_calc_border_cells); + ezcl_kernel_release(kernel_calc_border_cells2); + ezcl_kernel_release(kernel_finish_scan); + ezcl_kernel_release(kernel_get_border_data); + ezcl_kernel_release(kernel_calc_layer1); + ezcl_kernel_release(kernel_calc_layer1_sethash); + ezcl_kernel_release(kernel_calc_layer2); + ezcl_kernel_release(kernel_get_border_data2); + ezcl_kernel_release(kernel_calc_layer2_sethash); + //ezcl_kernel_release(kernel_calc_neighbors_local2); + ezcl_kernel_release(kernel_copy_mesh_data); + ezcl_kernel_release(kernel_fill_mesh_ghost); + ezcl_kernel_release(kernel_fill_neighbor_ghost); + ezcl_kernel_release(kernel_set_corner_neighbor); + ezcl_kernel_release(kernel_adjust_neighbors_local); + //ezcl_kernel_release(kernel_copy_ghost_data); + //ezcl_kernel_release(kernel_adjust_neighbors); + ezcl_kernel_release(kernel_hash_size); + ezcl_kernel_release(kernel_finish_hash_size); + ezcl_kernel_release(kernel_calc_spatial_coordinates); + ezcl_kernel_release(kernel_do_load_balance_lower); + ezcl_kernel_release(kernel_do_load_balance_middle); + ezcl_kernel_release(kernel_do_load_balance_upper); +#ifndef MINIMUM_PRECISION + ezcl_kernel_release(kernel_do_load_balance_double); +#endif + ezcl_kernel_release(kernel_do_load_balance_float); + ezcl_kernel_release(kernel_refine_smooth); + ezcl_kernel_release(kernel_coarsen_smooth); + ezcl_kernel_release(kernel_coarsen_check_block); + ezcl_kernel_release(kernel_rezone_all); + ezcl_kernel_release(kernel_rezone_neighbors); +#ifndef MINIMUM_PRECISION + ezcl_kernel_release(kernel_rezone_one_double); +#endif + ezcl_kernel_release(kernel_rezone_one_float); + ezcl_kernel_release(kernel_copy_mpot_ghost_data); + ezcl_kernel_release(kernel_set_boundary_refinement); + terminate_kernel_2stage_sum(); + terminate_kernel_2stage_sum_int(); + if (! have_boundary){ + ezcl_kernel_release(kernel_count_BCs); + } +#endif +#if defined(HAVE_J7) && defined(HAVE_MPI) + if (parallel) mesh_memory.pfini(); +#endif +} + +int Mesh::rezone_count(vector mpot, int &icount, int &jcount) +{ + int my_icount=0; + int my_jcount=0; + +#ifdef _OPENMP +#pragma omp parallel for reduction (+:my_jcount,my_icount) +#endif + for (uint ic=0; ic 0) { + //printf("mpot[%d] = %d level %d levmx %d\n",ic,mpot[ic],level[ic],levmx); + if (celltype[ic] == REAL_CELL){ + my_icount += 3; + } else { + my_icount ++; + } + } + } + //printf("icount is %d\n",my_icount); + icount = my_icount; + jcount = my_jcount; + + return(icount+jcount); +} + +#ifdef HAVE_OPENCL +void Mesh::gpu_rezone_count2(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result) +{ + cl_command_queue command_queue = ezcl_get_command_queue(); + + /* + __kernel void finish_reduction_count2_cl( + const int isize, // 0 + __global int *redscratch, // 1 + __global int *result, // 2 + __local int *tile) // 3 + */ + ezcl_set_kernel_arg(kernel_reduction_count2, 0, sizeof(cl_int), (void *)&block_size); + ezcl_set_kernel_arg(kernel_reduction_count2, 1, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_reduction_count2, 2, sizeof(cl_mem), (void *)&dev_result); + ezcl_set_kernel_arg(kernel_reduction_count2, 3, local_work_size*sizeof(cl_int2), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_count2, 1, NULL, &local_work_size, &local_work_size, NULL); +} + +void Mesh::gpu_rezone_count(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result) +{ + cl_command_queue command_queue = ezcl_get_command_queue(); + + /* + __kernel void finish_reduction_count_cl( + const int isize, // 0 + __global int *redscratch, // 1 + __global int *result, // 2 + __local int *tile) // 3 + */ + ezcl_set_kernel_arg(kernel_reduction_count, 0, sizeof(cl_int), (void *)&block_size); + ezcl_set_kernel_arg(kernel_reduction_count, 1, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_reduction_count, 2, sizeof(cl_mem), (void *)&dev_result); + ezcl_set_kernel_arg(kernel_reduction_count, 3, local_work_size*sizeof(cl_int), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_count, 1, NULL, &local_work_size, &local_work_size, NULL); +} + +void Mesh::gpu_rezone_scan(size_t block_size, size_t local_work_size, cl_mem dev_ioffset, cl_mem &dev_result) +{ + cl_command_queue command_queue = ezcl_get_command_queue(); + + /* + __kernel void finish_reduction_scan_cl( + const int isize, // 0 + __global int *ioffset, // 1 + __global int *result, // 2 + __local int *tile) // 3 + */ + ezcl_set_kernel_arg(kernel_reduction_scan2, 0, sizeof(cl_int), (void *)&block_size); + ezcl_set_kernel_arg(kernel_reduction_scan2, 1, sizeof(cl_mem), (void *)&dev_ioffset); + ezcl_set_kernel_arg(kernel_reduction_scan2, 2, sizeof(cl_mem), (void *)&dev_result); + ezcl_set_kernel_arg(kernel_reduction_scan2, 3, local_work_size*sizeof(cl_uint2), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_scan2, 1, NULL, &local_work_size, &local_work_size, NULL); +} +#endif + +void Mesh::kdtree_setup() +{ + KDTree_Initialize(&tree); + + TBounds box; + for (uint ic=0; ic TWO_DIMENSIONAL) { + for (uint ic=0; ic xmax) xmax = xhigh; + } + for (uint ic=0; ic ymax) ymax = yhigh; + } + if (ndim > TWO_DIMENSIONAL) { + for (uint ic=0; ic zmax) zmax = zhigh; + } + } + +#ifdef HAVE_MPI + if (parallel) { + real_t xmin_global,xmax_global,ymin_global,ymax_global; + MPI_Allreduce(&xmin, &xmin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(&xmax, &xmax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&ymin, &ymin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(&ymax, &ymax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD); + xmin = xmin_global; + xmax = xmax_global; + ymin = ymin_global; + ymax = ymax_global; + } +#endif + +} +void Mesh::calc_centerminmax(void) +{ + xcentermin=+1.0e30, ycentermin=+1.0e30, zcentermin=+1.0e30; + xcentermax=-1.0e30, ycentermax=-1.0e30, zcentermax=-1.0e30; + real_t xmid, ymid, zmid; + + for (uint ic=0; ic xcentermax) xcentermax = xmid; + } + for (uint ic=0; ic ycentermax) ycentermax = ymid; + } + if (ndim > TWO_DIMENSIONAL) { + for (uint ic=0; ic zcentermax) zcentermax = zmid; + } + } + +#ifdef HAVE_MPI + if (parallel) { + real_t xcentermin_global,xcentermax_global,ycentermin_global,ycentermax_global; + MPI_Allreduce(&xcentermin, &xcentermin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(&xcentermax, &xcentermax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&ycentermin, &ycentermin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(&ycentermax, &ycentermax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD); + xcentermin = xcentermin_global; + xcentermax = xcentermax_global; + ycentermin = ycentermin_global; + ycentermax = ycentermax_global; + } +#endif + +} + +void Mesh::rezone_all(int icount, int jcount, vector mpot, int have_state, MallocPlus &state_memory) +{ + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + if (! do_rezone) { + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + index.clear(); + index.resize(ncells); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for (uint ic=0; ic celltype_save; + + static int new_ncells; + + static int *i_old, *j_old, *level_old; + + static int ifirst; + static int ilast; + static int jfirst; + static int jlast; + static int level_first; + static int level_last; + + static vector new_ic; + +#ifdef _OPENMP +#pragma omp master + { +#endif + celltype_save.resize(ncells); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + if (have_state) { +#ifdef _OPENMP +#pragma omp for +#endif + for (int ic = 0; ic < (int)ncells; ic++){ + celltype_save[ic] = celltype[ic]; + } + } + +#ifdef _OPENMP +#pragma omp master + { +#endif + new_ncells = ncells + add_ncells; +#ifdef _OPENMP + } +#pragma omp barrier +#endif + +// int ref_entry_count = 0; + if (have_state){ +#ifdef _OPENMP +#pragma omp for +#endif + for (uint ic=0; ic 0) ref_entry_count++; + if (mpot[ic] < 0) { + // Normal cell coarsening + if (is_lower_left(i[ic],j[ic]) ) mpot[ic] = -2; + // Boundary cell case + if (celltype[ic] != REAL_CELL && is_upper_right(i[ic],j[ic]) ) mpot[ic] = -3; + } + } + } + + // Initialize new variables +// int *i_old, *j_old, *level_old; + + int flags = RESTART_DATA; +#ifdef HAVE_J7 + if (parallel) flags = LOAD_BALANCE_MEMORY; +#endif + +#ifdef _OPENMP +#pragma omp master + { +#endif + i_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "i_old", flags); + j_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "j_old", flags); + level_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "level_old", flags); + + mesh_memory.memory_swap(&i, &i_old); + mesh_memory.memory_swap(&j, &j_old); + mesh_memory.memory_swap(&level, &level_old); + + index.clear(); + index.resize(new_ncells); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + static vector order; // Vector of refined mesh traversal order; set to -1 to indicate errors. + // + //vector invorder(4, -1); // Vector mapping location from base index. + + //int ref_entry = 0; + +#ifdef _OPENMP +#pragma omp master + { +#endif + // Insert new cells into the mesh at the point of refinement. + order.resize(4, -1); // Vector of refined mesh traversal order; set to -1 to indicate errors. + + ifirst = 0; + ilast = 0; + jfirst = 0; + jlast = 0; + level_first = 0; + level_last = 0; + + if (parallel) { +#ifdef HAVE_MPI + MPI_Request req[12]; + MPI_Status status[12]; + + static int prev = MPI_PROC_NULL; + static int next = MPI_PROC_NULL; + + if (mype != 0) prev = mype-1; + if (mype < numpe - 1) next = mype+1; + + MPI_Isend(&i_old[ncells-1], 1,MPI_INT,next,1,MPI_COMM_WORLD,req+0); + MPI_Irecv(&ifirst, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+1); + + MPI_Isend(&i_old[0], 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+2); + MPI_Irecv(&ilast, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+3); + + MPI_Isend(&j_old[ncells-1], 1,MPI_INT,next,1,MPI_COMM_WORLD,req+4); + MPI_Irecv(&jfirst, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+5); + + MPI_Isend(&j_old[0], 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+6); + MPI_Irecv(&jlast, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+7); + + MPI_Isend(&level_old[ncells-1], 1,MPI_INT,next,1,MPI_COMM_WORLD,req+8); + MPI_Irecv(&level_first, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+9); + + MPI_Isend(&level_old[0], 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+10); + MPI_Irecv(&level_last, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+11); + + MPI_Waitall(12, req, status); +#endif + } + +#ifdef _OPENMP + } +#pragma omp barrier +#endif + +#ifdef REZONE_NO_OPTIMIZATION + vector invorder(4, -1); // Vector mapping location from base index. + for (int ic = 0, nc = 0; ic < (int)ncells; ic++) + { + if (mpot[ic] == 0 || mpot[ic] == -1000000) + { // No change is needed; copy the old cell straight to the new mesh at this location. + index[ic] = nc; + i[nc] = i_old[ic]; + j[nc] = j_old[ic]; + level[nc] = level_old[ic]; + nc++; + } // Complete no change needed. + + else if (mpot[ic] < 0) + { // Coarsening is needed; remove this cell and the other three and replace them with one. + index[ic] = nc; + if (mpot[ic] <= -2) { + //printf(" %d: DEBUG -- coarsening cell %d nc %d\n",mype,ic,nc); + i[nc] = i_old[ic]/2; + j[nc] = j_old[ic]/2; + level[nc] = level_old[ic] - 1; + nc++; + } + } // Coarsening complete. + + else if (mpot[ic] > 0) + { // Refinement is needed; insert four cells where once was one. + index[ic] = nc; + if (celltype[ic] == REAL_CELL) + { + set_refinement_order(&order[0], ic, ifirst, ilast, jfirst, jlast, + level_first, level_last, i_old, j_old, level_old); + + // Create the cells in the correct order and orientation. + for (int ii = 0; ii < 4; ii++) + { level[nc] = level_old[ic] + 1; + switch (order[ii]) + { case SW: + // lower left + invorder[SW] = ii; + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2; + nc++; + break; + + case SE: + // lower right + invorder[SE] = ii; + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2; + nc++; + break; + + case NW: + // upper left + invorder[NW] = ii; + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2 + 1; + nc++; + break; + + case NE: + // upper right + invorder[NE] = ii; + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2 + 1; + nc++; + break; } } // Complete cell refinement. + } // Complete real cell refinement. + + else if (celltype[ic] == LEFT_BOUNDARY) { + // lower + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2; + level[nc] = level_old[ic] + 1; + nc++; + + // upper + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2 + 1; + level[nc] = level_old[ic] + 1; + nc++; + } + else if (celltype[ic] == RIGHT_BOUNDARY) { + // lower + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2; + level[nc] = level_old[ic] + 1; + nc++; + + // upper + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2 + 1; + level[nc] = level_old[ic] + 1; + nc++; + } + else if (celltype[ic] == BOTTOM_BOUNDARY) { + // left + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2 + 1; + level[nc] = level_old[ic] + 1; + nc++; + + // right + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2 + 1; + level[nc] = level_old[ic] + 1; + nc++; + } + else if (celltype[ic] == TOP_BOUNDARY) { + // right + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2; + level[nc] = level_old[ic] + 1; + nc++; + + // left + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2; + level[nc] = level_old[ic] + 1; + nc++; + } + } // Complete refinement needed. + } // Complete addition of new cells to the mesh. + + mesh_memory.memory_delete(i_old); + mesh_memory.memory_delete(j_old); + mesh_memory.memory_delete(level_old); + + calc_celltype(new_ncells); + + if (have_state){ + flags = RESTART_DATA; + MallocPlus state_memory_old = state_memory; + malloc_plus_memory_entry *memory_item; + + for (memory_item = state_memory_old.memory_entry_by_name_begin(); + memory_item != state_memory_old.memory_entry_by_name_end(); + memory_item = state_memory_old.memory_entry_by_name_next() ) { + //printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize); + if (memory_item->mem_elsize == 8) { + double *state_temp_double = (double *)state_memory.memory_malloc(new_ncells, sizeof(double), + "state_temp_double", flags); + + double *mem_ptr_double = (double *)memory_item->mem_ptr; + + //ref_entry = 0; + for (int ic=0, nc=0; ic<(int)ncells; ic++) { + + if (mpot[ic] == 0) { + state_temp_double[nc] = mem_ptr_double[ic]; + nc++; + } else if (mpot[ic] < 0){ + if (mpot[ic] == -2) { + int nr = nrht[ic]; + int nt = ntop[ic]; + int nrt = nrht[nt]; + state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nr] + + mem_ptr_double[nt] + mem_ptr_double[nrt])*0.25; + nc++; + } + if (mpot[ic] == -3) { + int nl = nlft[ic]; + int nb = nbot[ic]; + int nlb = nlft[nb]; + state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nl] + + mem_ptr_double[nb] + mem_ptr_double[nlb])*0.25; + nc++; + } + } else if (mpot[ic] > 0){ + // lower left + state_temp_double[nc] = mem_ptr_double[ic]; + nc++; + + // lower right + state_temp_double[nc] = mem_ptr_double[ic]; + nc++; + + if (celltype_save[ic] == REAL_CELL){ + // upper left + state_temp_double[nc] = mem_ptr_double[ic]; + nc++; + + // upper right + state_temp_double[nc] = mem_ptr_double[ic]; + nc++; + } + } + } + + state_memory.memory_replace(mem_ptr_double, state_temp_double); + } else if (memory_item->mem_elsize == 4) { + float *state_temp_float = (float *)state_memory.memory_malloc(new_ncells, sizeof(float), + "state_temp_float", flags); + + float *mem_ptr_float = (float *)memory_item->mem_ptr; + + for (int ic=0, nc=0; ic<(int)ncells; ic++) { + + if (mpot[ic] == 0) { + state_temp_float[nc] = mem_ptr_float[ic]; + nc++; + } else if (mpot[ic] < 0){ + if (mpot[ic] == -2) { + int nr = nrht[ic]; + int nt = ntop[ic]; + int nrt = nrht[nt]; + state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nr] + + mem_ptr_float[nt] + mem_ptr_float[nrt])*0.25; + nc++; + } + if (mpot[ic] == -3) { + int nl = nlft[ic]; + int nb = nbot[ic]; + int nlb = nlft[nb]; + state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nl] + + mem_ptr_float[nb] + mem_ptr_float[nlb])*0.25; + nc++; + } + } else if (mpot[ic] > 0){ + // lower left + state_temp_float[nc] = mem_ptr_float[ic]; + nc++; + + // lower right + state_temp_float[nc] = mem_ptr_float[ic]; + nc++; + + if (celltype_save[ic] == REAL_CELL){ + // upper left + state_temp_float[nc] = mem_ptr_float[ic]; + nc++; + + // upper right + state_temp_float[nc] = mem_ptr_float[ic]; + nc++; + } + } + } + + state_memory.memory_replace(mem_ptr_float, state_temp_float); + } + } + } +#else + // Data parallel optimizations for thread parallel -- slows down serial + // code by about 25% + static vector add_count; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + add_count.resize(ncells); + new_ic.resize(ncells+1); +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for (int ic = 0; ic < (int)ncells; ic++){ + if (mpot[ic] == 0) { + add_count[ic] = 1; + } else if (mpot[ic] < 0) { + if (mpot[ic] == -2){ + add_count[ic] = 1; + } else { + add_count[ic] = 0; + } + } else if (mpot[ic] > 0) { + if (celltype[ic] != REAL_CELL) { + add_count[ic] = 2; + } else { + add_count[ic] = 4; + } + } + } + +#ifdef _OPENMP +#pragma omp barrier +#endif + scan (&add_count[0], &new_ic[0], ncells); +#ifdef _OPENMP +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for (int ic = 0; ic < (int)ncells; ic++) { + vector invorder(4, -1); // Vector mapping location from base index. + int nc = new_ic[ic]; + if (mpot[ic] == 0) + { // No change is needed; copy the old cell straight to the new mesh at this location. + index[ic] = nc; + i[nc] = i_old[ic]; + j[nc] = j_old[ic]; + level[nc] = level_old[ic]; + } // Complete no change needed. + + else if (mpot[ic] < 0) + { // Coarsening is needed; remove this cell and the other three and replace them with one. + index[ic] = nc; + if (mpot[ic] <= -2) { + //printf(" %d: DEBUG -- coarsening cell %d nc %d\n",mype,ic,nc); + i[nc] = i_old[ic]/2; + j[nc] = j_old[ic]/2; + level[nc] = level_old[ic] - 1; + } + } // Coarsening complete. + + else if (mpot[ic] > 0) + { // Refinement is needed; insert four cells where once was one. + index[ic] = nc; + if (celltype[ic] == REAL_CELL) + { + int order[4]; + set_refinement_order(&order[0], ic, ifirst, ilast, jfirst, jlast, + level_first, level_last, i_old, j_old, level_old); + + // Create the cells in the correct order and orientation. + for (int ii = 0; ii < 4; ii++) { + level[nc] = level_old[ic] + 1; + switch (order[ii]) { + case SW: + // lower left + invorder[SW] = ii; + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2; + nc++; + break; + + case SE: + // lower right + invorder[SE] = ii; + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2; + nc++; + break; + + case NW: + // upper left + invorder[NW] = ii; + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2 + 1; + nc++; + break; + + case NE: + // upper right + invorder[NE] = ii; + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2 + 1; + nc++; + break; + } + } // Complete cell refinement. + } // Complete real cell refinement. + + else if (celltype[ic] == LEFT_BOUNDARY) { + // lower + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2; + level[nc] = level_old[ic] + 1; + nc++; + + // upper + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2 + 1; + level[nc] = level_old[ic] + 1; + nc++; + } + else if (celltype[ic] == RIGHT_BOUNDARY) { + // lower + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2; + level[nc] = level_old[ic] + 1; + nc++; + + // upper + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2 + 1; + level[nc] = level_old[ic] + 1; + nc++; + } + else if (celltype[ic] == BOTTOM_BOUNDARY) { + // left + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2 + 1; + level[nc] = level_old[ic] + 1; + nc++; + + // right + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2 + 1; + level[nc] = level_old[ic] + 1; + nc++; + } + else if (celltype[ic] == TOP_BOUNDARY) { + // right + i[nc] = i_old[ic]*2 + 1; + j[nc] = j_old[ic]*2; + level[nc] = level_old[ic] + 1; + nc++; + + // left + i[nc] = i_old[ic]*2; + j[nc] = j_old[ic]*2; + level[nc] = level_old[ic] + 1; + nc++; + } + } // Complete refinement needed. + } // Complete addition of new cells to the mesh. + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + mesh_memory.memory_delete(i_old); + mesh_memory.memory_delete(j_old); + mesh_memory.memory_delete(level_old); +#ifdef _OPENMP + } // end master region +#endif + + calc_celltype_threaded(new_ncells); + + if (have_state){ + + static MallocPlus state_memory_old; + static malloc_plus_memory_entry *memory_begin; + static malloc_plus_memory_entry *memory_end; + static malloc_plus_memory_entry *memory_next; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + state_memory_old = state_memory; + + memory_begin = state_memory_old.memory_entry_by_name_begin(); + memory_end = state_memory_old.memory_entry_by_name_end(); +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + + for (malloc_plus_memory_entry *memory_item = memory_begin; + memory_item != memory_end; + memory_item = memory_next ) { + //ref_entry = 0; + //printf("DEBUG -- memory_item->mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize); + if (memory_item->mem_elsize == 8) { + + static double *state_temp_double, *mem_ptr_double; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + state_temp_double = (double *)state_memory.memory_malloc(new_ncells, sizeof(double), + "state_temp_double", flags); + mem_ptr_double = (double *)memory_item->mem_ptr; +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + + //ref_entry = 0; +#ifdef _OPENMP +#pragma omp for +#endif + for (int ic=0; ic<(int)ncells; ic++) { + + int nc = new_ic[ic]; + if (mpot[ic] == 0) { + state_temp_double[nc] = mem_ptr_double[ic]; + } else if (mpot[ic] < 0){ + if (mpot[ic] == -2) { + int nr = nrht[ic]; + int nt = ntop[ic]; + int nrt = nrht[nt]; + state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nr] + + mem_ptr_double[nt] + mem_ptr_double[nrt])*0.25; + } + if (mpot[ic] == -3) { + int nl = nlft[ic]; + int nb = nbot[ic]; + int nlb = nlft[nb]; + state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nl] + + mem_ptr_double[nb] + mem_ptr_double[nlb])*0.25; + } + } else if (mpot[ic] > 0){ + // lower left + state_temp_double[nc] = mem_ptr_double[ic]; + nc++; + + // lower right + state_temp_double[nc] = mem_ptr_double[ic]; + nc++; + + if (celltype_save[ic] == REAL_CELL){ + // upper left + state_temp_double[nc] = mem_ptr_double[ic]; + nc++; + + // upper right + state_temp_double[nc] = mem_ptr_double[ic]; + nc++; + } + } + } // end cell loop + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + state_memory.memory_replace(mem_ptr_double, state_temp_double); +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + + } else if (memory_item->mem_elsize == 4) { + + static float *state_temp_float, *mem_ptr_float; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + state_temp_float = (float *)state_memory.memory_malloc(new_ncells, sizeof(float), + "state_temp_float", flags); + mem_ptr_float = (float *)memory_item->mem_ptr; +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for (int ic=0; ic<(int)ncells; ic++) { + + int nc = new_ic[ic]; + if (mpot[ic] == 0) { + state_temp_float[nc] = mem_ptr_float[ic]; + } else if (mpot[ic] < 0){ + if (mpot[ic] == -2) { + int nr = nrht[ic]; + int nt = ntop[ic]; + int nrt = nrht[nt]; + state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nr] + + mem_ptr_float[nt] + mem_ptr_float[nrt])*0.25; + } + if (mpot[ic] == -3) { + int nl = nlft[ic]; + int nb = nbot[ic]; + int nlb = nlft[nb]; + state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nl] + + mem_ptr_float[nb] + mem_ptr_float[nlb])*0.25; + } + } else if (mpot[ic] > 0){ + // lower left + state_temp_float[nc] = mem_ptr_float[ic]; + nc++; + + // lower right + state_temp_float[nc] = mem_ptr_float[ic]; + nc++; + + if (celltype_save[ic] == REAL_CELL){ + // upper left + state_temp_float[nc] = mem_ptr_float[ic]; + nc++; + + // upper right + state_temp_float[nc] = mem_ptr_float[ic]; + nc++; + } + } + } // end cell loop + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + state_memory.memory_replace(mem_ptr_float, state_temp_float); +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + } // mem elem size 4 bytes + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + memory_next = state_memory_old.memory_entry_by_name_next(); +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + + } // memory item iteration + + } // if have state + // End of data parallel optimizations +#endif + + if (neighbor_remap) { + int flags = 0; + static int *nlft_old, *nrht_old, *nbot_old, *ntop_old; +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + nlft_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "nlft_old", flags); + nrht_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "nrht_old", flags); + nbot_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "nbot_old", flags); + ntop_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "ntop_old", flags); +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + flags = RESTART_DATA; + +#ifdef _OPENMP +#pragma omp for +#endif + for (int ic = 0; ic < new_ncells; ic++){ + nlft_old[ic] = -1; + nrht_old[ic] = -1; + nbot_old[ic] = -1; + ntop_old[ic] = -1; + } + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + mesh_memory.memory_swap(&nlft, &nlft_old); + mesh_memory.memory_swap(&nrht, &nrht_old); + mesh_memory.memory_swap(&nbot, &nbot_old); + mesh_memory.memory_swap(&ntop, &ntop_old); +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for (int ic = 0; ic < (int)ncells; ic++){ + int nc = index[ic]; + + if (mpot[ic] == 0){ + if (nlft_old[ic] < (int)ncells && nlft_old[ic] >= 0){ + nlft[nc] = (mpot[nlft_old[ic]] == 0) ? index[nlft_old[ic]] : -1; + } + if (nrht_old[ic] < (int)ncells && nrht_old[ic] >= 0){ + nrht[nc] = (mpot[nrht_old[ic]] == 0) ? index[nrht_old[ic]] : -1; + } + if (nbot_old[ic] < (int)ncells && nbot_old[ic] >= 0){ + nbot[nc] = (mpot[nbot_old[ic]] == 0) ? index[nbot_old[ic]] : -1; + } + if (ntop_old[ic] < (int)ncells && ntop_old[ic] >= 0){ + ntop[nc] = (mpot[ntop_old[ic]] == 0) ? index[ntop_old[ic]] : -1; + } + } else if (mpot[ic] <= -2) { + nlft[nc] = -1; + nrht[nc] = -1; + nbot[nc] = -1; + ntop[nc] = -1; + } else if (mpot[ic] > 0){ + nlft[nc] = -1; + nlft[nc+1] = -1; + nrht[nc] = -1; + nrht[nc+1] = -1; + nbot[nc] = -1; + nbot[nc+1] = -1; + ntop[nc] = -1; + ntop[nc+1] = -1; + if (celltype[nc] == REAL_CELL){ + nlft[nc+2] = -1; + nlft[nc+3] = -1; + nrht[nc+2] = -1; + nrht[nc+3] = -1; + nbot[nc+2] = -1; + nbot[nc+3] = -1; + ntop[nc+2] = -1; + ntop[nc+3] = -1; + } + } + if (mpot[ic] > 0){ + nc++; + switch(celltype[nc]){ + case LEFT_BOUNDARY: + nlft[nc] = nc; + break; + case RIGHT_BOUNDARY: + nrht[nc] = nc; + break; + case BOTTOM_BOUNDARY: + nbot[nc] = nc; + break; + case TOP_BOUNDARY: + ntop[nc] = nc; + break; + } + } + } + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + nlft_old = (int *)mesh_memory.memory_delete(nlft_old); + nrht_old = (int *)mesh_memory.memory_delete(nrht_old); + nbot_old = (int *)mesh_memory.memory_delete(nbot_old); + ntop_old = (int *)mesh_memory.memory_delete(ntop_old); +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + } else { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + nlft = (int *)mesh_memory.memory_delete(nlft); + nrht = (int *)mesh_memory.memory_delete(nrht); + nbot = (int *)mesh_memory.memory_delete(nbot); + ntop = (int *)mesh_memory.memory_delete(ntop); +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + } + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + //ncells = nc; + +#ifdef HAVE_MPI + if (parallel) { + MPI_Allgather(&new_ncells, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD); + + ndispl[0]=0; + for (int ip=1; ip 1) { + int i_tmp_first, i_tmp_last; + int j_tmp_first, j_tmp_last; + int level_tmp_first, level_tmp_last; + + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, 1*sizeof(cl_int), &i_tmp_first, NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, 1*sizeof(cl_int), &j_tmp_first, NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, 1*sizeof(cl_int), &level_tmp_first, NULL); + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, (old_ncells-1)*sizeof(cl_int), 1*sizeof(cl_int), &i_tmp_last, NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, (old_ncells-1)*sizeof(cl_int), 1*sizeof(cl_int), &j_tmp_last, NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, (old_ncells-1)*sizeof(cl_int), 1*sizeof(cl_int), &level_tmp_last, NULL); + + MPI_Request req[12]; + MPI_Status status[12]; + + static int prev = MPI_PROC_NULL; + static int next = MPI_PROC_NULL; + + if (mype != 0) prev = mype-1; + if (mype < numpe - 1) next = mype+1; + + MPI_Isend(&i_tmp_last, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+0); + MPI_Irecv(&ifirst, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+1); + + MPI_Isend(&i_tmp_first, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+2); + MPI_Irecv(&ilast, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+3); + + MPI_Isend(&j_tmp_last, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+4); + MPI_Irecv(&jfirst, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+5); + + MPI_Isend(&j_tmp_first, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+6); + MPI_Irecv(&jlast, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+7); + + MPI_Isend(&level_tmp_last, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+8); + MPI_Irecv(&level_first, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+9); + + MPI_Isend(&level_tmp_first, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+10); + MPI_Irecv(&level_last, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+11); + + MPI_Waitall(12, req, status); + } +#endif + +/* + if (new_ncells != old_ncells){ + ncells = new_ncells; + } +*/ + + size_t mem_request = (int)((float)new_ncells*mem_factor); + cl_mem dev_celltype_new = ezcl_malloc(NULL, const_cast("dev_celltype_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_level_new = ezcl_malloc(NULL, const_cast("dev_level_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_i_new = ezcl_malloc(NULL, const_cast("dev_i_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_j_new = ezcl_malloc(NULL, const_cast("dev_j_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + cl_mem dev_ijadd; + + vectorijadd(6); + if (numpe > 1) { + ijadd[0] = ifirst; + ijadd[1] = ilast; + ijadd[2] = jfirst; + ijadd[3] = jlast; + ijadd[4] = level_first; + ijadd[5] = level_last; + } + + size_t six = 6; + dev_ijadd = ezcl_malloc(NULL, const_cast("dev_ijadd"), &six, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + ezcl_enqueue_write_buffer(command_queue, dev_ijadd, CL_TRUE, 0, 6*sizeof(cl_int), (void*)&ijadd[0], NULL); + + cl_mem dev_indexoffset = ezcl_malloc(NULL, const_cast("dev_indexoffset"), &old_ncells, sizeof(cl_uint), CL_MEM_READ_WRITE, 0); + + int stencil = 0; + if (localStencil) stencil = 1; + + size_t local_work_size = 128; + size_t global_work_size = ((old_ncells+local_work_size - 1) /local_work_size) * local_work_size; + + ezcl_set_kernel_arg(kernel_rezone_all, 0, sizeof(cl_int), (void *)&old_ncells); + ezcl_set_kernel_arg(kernel_rezone_all, 1, sizeof(cl_int), (void *)&stencil); + ezcl_set_kernel_arg(kernel_rezone_all, 2, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_rezone_all, 3, sizeof(cl_mem), (void *)&dev_mpot); + ezcl_set_kernel_arg(kernel_rezone_all, 4, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_rezone_all, 5, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_rezone_all, 6, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_rezone_all, 7, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_rezone_all, 8, sizeof(cl_mem), (void *)&dev_level_new); + ezcl_set_kernel_arg(kernel_rezone_all, 9, sizeof(cl_mem), (void *)&dev_i_new); + ezcl_set_kernel_arg(kernel_rezone_all, 10, sizeof(cl_mem), (void *)&dev_j_new); + ezcl_set_kernel_arg(kernel_rezone_all, 11, sizeof(cl_mem), (void *)&dev_celltype_new); + ezcl_set_kernel_arg(kernel_rezone_all, 12, sizeof(cl_mem), (void *)&dev_ioffset); + ezcl_set_kernel_arg(kernel_rezone_all, 13, sizeof(cl_mem), (void *)&dev_indexoffset); + ezcl_set_kernel_arg(kernel_rezone_all, 14, sizeof(cl_mem), (void *)&dev_levdx); + ezcl_set_kernel_arg(kernel_rezone_all, 15, sizeof(cl_mem), (void *)&dev_levdy); + ezcl_set_kernel_arg(kernel_rezone_all, 16, sizeof(cl_mem), (void *)&dev_levtable); + ezcl_set_kernel_arg(kernel_rezone_all, 17, sizeof(cl_mem), (void *)&dev_ijadd); + ezcl_set_kernel_arg(kernel_rezone_all, 18, local_work_size * sizeof(cl_uint), NULL); + //ezcl_set_kernel_arg(kernel_rezone_all, 19, local_work_size * sizeof(cl_real4_t), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_all, 1, NULL, &global_work_size, &local_work_size, NULL); + + MallocPlus gpu_state_memory_old = gpu_state_memory; + malloc_plus_memory_entry *memory_item; + + for (memory_item = gpu_state_memory_old.memory_entry_by_name_begin(); + memory_item != gpu_state_memory_old.memory_entry_by_name_end(); + memory_item = gpu_state_memory_old.memory_entry_by_name_next() ) { + //printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize); + cl_mem dev_state_mem_ptr = (cl_mem)memory_item->mem_ptr; + + if (memory_item->mem_elsize == 8){ +#ifndef MINIMUM_PRECISION + cl_mem dev_state_var_new = (cl_mem)gpu_state_memory.memory_malloc(max(old_ncells,new_ncells), sizeof(cl_double), const_cast("dev_state_var_new"), DEVICE_REGULAR_MEMORY); + + ezcl_set_kernel_arg(kernel_rezone_one_double, 0, sizeof(cl_int), (void *)&old_ncells); + ezcl_set_kernel_arg(kernel_rezone_one_double, 1, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_rezone_one_double, 2, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_rezone_one_double, 3, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_rezone_one_double, 4, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_rezone_one_double, 5, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_rezone_one_double, 6, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_rezone_one_double, 7, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_rezone_one_double, 8, sizeof(cl_mem), (void *)&dev_mpot); + ezcl_set_kernel_arg(kernel_rezone_one_double, 9, sizeof(cl_mem), (void *)&dev_indexoffset); + ezcl_set_kernel_arg(kernel_rezone_one_double,10, sizeof(cl_mem), (void *)&dev_state_mem_ptr); + ezcl_set_kernel_arg(kernel_rezone_one_double,11, sizeof(cl_mem), (void *)&dev_state_var_new); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_one_double, 1, NULL, &global_work_size, &local_work_size, NULL); + + gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new); +#else + printf("ERROR -- can't have double type for state variable\n"); + exit(1); +#endif + } else if (memory_item->mem_elsize == 4){ + cl_mem dev_state_var_new = (cl_mem)gpu_state_memory.memory_malloc(max(old_ncells,new_ncells), sizeof(cl_float), const_cast("dev_state_var_new"), DEVICE_REGULAR_MEMORY); + + ezcl_set_kernel_arg(kernel_rezone_one_float, 0, sizeof(cl_int), (void *)&old_ncells); + ezcl_set_kernel_arg(kernel_rezone_one_float, 1, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_rezone_one_float, 2, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_rezone_one_float, 3, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_rezone_one_float, 4, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_rezone_one_float, 5, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_rezone_one_float, 6, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_rezone_one_float, 7, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_rezone_one_float, 8, sizeof(cl_mem), (void *)&dev_mpot); + ezcl_set_kernel_arg(kernel_rezone_one_float, 9, sizeof(cl_mem), (void *)&dev_indexoffset); + ezcl_set_kernel_arg(kernel_rezone_one_float,10, sizeof(cl_mem), (void *)&dev_state_mem_ptr); + ezcl_set_kernel_arg(kernel_rezone_one_float,11, sizeof(cl_mem), (void *)&dev_state_var_new); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_one_float, 1, NULL, &global_work_size, &local_work_size, NULL); + + gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new); + } + } + + if (neighbor_remap & ! parallel) { + size_t mem_request = (int)((float)new_ncells*mem_factor); + cl_mem dev_nlft_new = ezcl_malloc(NULL, const_cast("dev_nlft_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_nrht_new = ezcl_malloc(NULL, const_cast("dev_nrht_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_nbot_new = ezcl_malloc(NULL, const_cast("dev_nbot_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_ntop_new = ezcl_malloc(NULL, const_cast("dev_ntop_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + ezcl_set_kernel_arg(kernel_neighbor_init, 0, sizeof(cl_int), (void *)&new_ncells); + ezcl_set_kernel_arg(kernel_neighbor_init, 1, sizeof(cl_mem), (void *)&dev_nlft_new); + ezcl_set_kernel_arg(kernel_neighbor_init, 2, sizeof(cl_mem), (void *)&dev_nrht_new); + ezcl_set_kernel_arg(kernel_neighbor_init, 3, sizeof(cl_mem), (void *)&dev_nbot_new); + ezcl_set_kernel_arg(kernel_neighbor_init, 4, sizeof(cl_mem), (void *)&dev_ntop_new); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_neighbor_init, 1, NULL, &global_work_size, &local_work_size, NULL); + + ezcl_set_kernel_arg(kernel_rezone_neighbors, 0, sizeof(cl_int), (void *)&old_ncells); + ezcl_set_kernel_arg(kernel_rezone_neighbors, 1, sizeof(cl_mem), (void *)&dev_mpot); + ezcl_set_kernel_arg(kernel_rezone_neighbors, 2, sizeof(cl_mem), (void *)&dev_indexoffset); + ezcl_set_kernel_arg(kernel_rezone_neighbors, 3, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_rezone_neighbors, 4, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_rezone_neighbors, 5, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_rezone_neighbors, 6, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_rezone_neighbors, 7, sizeof(cl_mem), (void *)&dev_celltype_new); + ezcl_set_kernel_arg(kernel_rezone_neighbors, 8, sizeof(cl_mem), (void *)&dev_nlft_new); + ezcl_set_kernel_arg(kernel_rezone_neighbors, 9, sizeof(cl_mem), (void *)&dev_nrht_new); + ezcl_set_kernel_arg(kernel_rezone_neighbors, 10, sizeof(cl_mem), (void *)&dev_nbot_new); + ezcl_set_kernel_arg(kernel_rezone_neighbors, 11, sizeof(cl_mem), (void *)&dev_ntop_new); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_neighbors, 1, NULL, &global_work_size, &local_work_size, NULL); + + ezcl_device_memory_swap(&dev_nlft, &dev_nlft_new); + ezcl_device_memory_swap(&dev_nrht, &dev_nrht_new); + ezcl_device_memory_swap(&dev_nbot, &dev_nbot_new); + ezcl_device_memory_swap(&dev_ntop, &dev_ntop_new); + + ezcl_device_memory_delete(dev_nlft_new); + ezcl_device_memory_delete(dev_nrht_new); + ezcl_device_memory_delete(dev_nbot_new); + ezcl_device_memory_delete(dev_ntop_new); + } else { + ezcl_device_memory_delete(dev_nlft); + ezcl_device_memory_delete(dev_nrht); + ezcl_device_memory_delete(dev_nbot); + ezcl_device_memory_delete(dev_ntop); + dev_nlft = NULL; + dev_nrht = NULL; + dev_nbot = NULL; + dev_ntop = NULL; + } + + ezcl_device_memory_delete(dev_indexoffset); + + if (new_ncells != old_ncells){ + resize_old_device_memory(new_ncells); + } + + ezcl_device_memory_swap(&dev_celltype, &dev_celltype_new); + ezcl_device_memory_swap(&dev_level, &dev_level_new); + ezcl_device_memory_swap(&dev_i, &dev_i_new); + ezcl_device_memory_swap(&dev_j, &dev_j_new); + + ezcl_device_memory_delete(dev_mpot); + ezcl_device_memory_delete(dev_ijadd); + ezcl_device_memory_delete(dev_ioffset); + + ezcl_device_memory_delete(dev_i_new); + ezcl_device_memory_delete(dev_j_new); + ezcl_device_memory_delete(dev_celltype_new); + ezcl_device_memory_delete(dev_level_new); + +#ifdef HAVE_MPI + if (parallel) { + int new_ncells = ncells + add_ncells; + MPI_Allgather(&new_ncells, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD); + + ndispl[0]=0; + for (int ip=1; ip= 2) cpu_timer_start(&tstart_lev2); + + int jmaxsize = (jmax+1)*IPOW2(levmx); + int imaxsize = (imax+1)*IPOW2(levmx); + + int *hash; + +#ifdef _OPENMP + hash = compact_hash_init_openmp(ncells, imaxsize, jmaxsize, 0); +#else + hash = compact_hash_init(ncells, imaxsize, jmaxsize, 0); +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for(int ic=0; ic lev && ntop[nlft[ic]] == -1) || + (level[nrht[ic]] > lev && ntop[nrht[ic]] == -1) || + (level[nbot[ic]] > lev && nrht[nbot[ic]] == -1) || + (level[ntop[ic]] > lev && nrht[ntop[ic]] == -1) ) need_hash = true; + } + + if (need_hash) { + int levmult = IPOW2(levmx-lev); + int ii = i[ic]*levmult; + int jj = j[ic]*levmult; + + write_hash(ic,jj*imaxsize+ii,hash); + } + } + + if (TIMING_LEVEL >= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_HASH_SETUP] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + //fprintf(fp,"DEBUG ncells is %lu\n",ncells); +#ifdef _OPENMP +#pragma omp for +#endif + for (int ic=0; ic<(int)ncells; ic++){ + int ii = i[ic]; + int jj = j[ic]; + int lev = level[ic]; + int levmult = IPOW2(levmx-lev); + int iicur = ii*levmult; + int iilft = max( (ii-1)*levmult, 0 ); + int iirht = min( (ii+1)*levmult, imaxsize-1); + int jjcur = jj*levmult; + int jjbot = max( (jj-1)*levmult, 0 ); + int jjtop = min( (jj+1)*levmult, jmaxsize-1); + + int nlftval = nlft[ic]; + int nrhtval = nrht[ic]; + int nbotval = nbot[ic]; + int ntopval = ntop[ic]; + + // Taking care of boundary cells + // Force each boundary cell to point to itself on its boundary direction + if (nlftval < 0 && iicur < 1*IPOW2(levmx) ) nlftval = ic; + if (nbotval < 0 && jjcur < 1*IPOW2(levmx) ) nbotval = ic; + if (nrhtval < 0 && iicur > imax*IPOW2(levmx)-1) nrhtval = ic; + if (ntopval < 0 && jjcur > jmax*IPOW2(levmx)-1) ntopval = ic; + // Boundary cells next to corner boundary need special checks + if (nlftval < 0 && iicur == 1*IPOW2(levmx) && (jjcur < 1*IPOW2(levmx) || jjcur >= jmax*IPOW2(levmx) ) ) nlftval = ic; + if (nbotval < 0 && jjcur == 1*IPOW2(levmx) && (iicur < 1*IPOW2(levmx) || iicur >= imax*IPOW2(levmx) ) ) nbotval = ic; + if (nrhtval < 0 && iirht == imax*IPOW2(levmx) && (jjcur < 1*IPOW2(levmx) || jjcur >= jmax*IPOW2(levmx) ) ) nrhtval = ic; + if (ntopval < 0 && jjtop == jmax*IPOW2(levmx) && (iicur < 1*IPOW2(levmx) || iicur >= imax*IPOW2(levmx) ) ) ntopval = ic; + + // need to check for finer neighbor first + // Right and top neighbor don't change for finer, so drop through to same size + // Left and bottom need to be half of same size index for finer test + if (lev != levmx) { + int iilftfiner = iicur-(iicur-iilft)/2; + //int iirhtfiner = (iicur+iirht)/2; + int jjbotfiner = jjcur-(jjcur-jjbot)/2; + //int jjtopfiner = (jjcur+jjtop)/2; + if (nlftval < 0) nlftval = read_hash(jjcur*imaxsize+iilftfiner, hash); + if (nbotval < 0) nbotval = read_hash(jjbotfiner*imaxsize+iicur, hash); + } + + // same size neighbor + if (nlftval < 0) nlftval = read_hash(jjcur*imaxsize+iilft, hash); + if (nrhtval < 0) nrhtval = read_hash(jjcur*imaxsize+iirht, hash); + if (nbotval < 0) nbotval = read_hash(jjbot*imaxsize+iicur, hash); + if (ntopval < 0) ntopval = read_hash(jjtop*imaxsize+iicur, hash); + + // Now we need to take care of special case where bottom and left boundary need adjustment since + // expected cell doesn't exist on these boundaries if it is finer than current cell + if (lev != levmx) { + if (jjcur < 1*IPOW2(levmx)) { + if (nrhtval < 0) { + int jjtopfiner = (jjcur+jjtop)/2; + nrhtval = read_hash(jjtopfiner*imaxsize+iirht, hash); + } + if (nlftval < 0) { + int iilftfiner = iicur-(iicur-iilft)/2; + int jjtopfiner = (jjcur+jjtop)/2; + nlftval = read_hash(jjtopfiner*imaxsize+iilftfiner, hash); + } + } + + if (iicur < 1*IPOW2(levmx)) { + if (ntopval < 0) { + int iirhtfiner = (iicur+iirht)/2; + ntopval = read_hash(jjtop*imaxsize+iirhtfiner, hash); + } + if (nbotval < 0) { + int iirhtfiner = (iicur+iirht)/2; + int jjbotfiner = jjcur-(jjcur-jjbot)/2; + nbotval = read_hash(jjbotfiner*imaxsize+iirhtfiner, hash); + } + } + } + + // coarser neighbor + if (lev != 0){ + if (nlftval < 0) { + iilft -= iicur-iilft; + int jjlft = (jj/2)*2*levmult; + nlftval = read_hash(jjlft*imaxsize+iilft, hash); + } + if (nrhtval < 0) { + int jjrht = (jj/2)*2*levmult; + nrhtval = read_hash(jjrht*imaxsize+iirht, hash); + } + if (nbotval < 0) { + jjbot -= jjcur-jjbot; + int iibot = (ii/2)*2*levmult; + nbotval = read_hash(jjbot*imaxsize+iibot, hash); + } + if (ntopval < 0) { + int iitop = (ii/2)*2*levmult; + ntopval = read_hash(jjtop*imaxsize+iitop, hash); + } + } + + nlft[ic] = nlftval; + nrht[ic] = nrhtval; + nbot[ic] = nbotval; + ntop[ic] = ntopval; + + //printf("neighbors[%d] = %d %d %d %d\n",ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]); + } + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + write_hash_collision_report(); + read_hash_collision_report(); + + compact_hash_delete(hash); + + if (TIMING_LEVEL >= 2) cpu_timers[MESH_TIMER_HASH_QUERY] += cpu_timer_stop(tstart_lev2); +#ifdef _OPENMP + } // master block +#endif + + } else if (calc_neighbor_type == KDTREE) { + + struct timeval tstart_lev2; + if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2); + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + TBounds box; + vector index_list(IPOW2(levmx*levmx) ); + + int num; + + ibase = 0; + calc_spatial_coordinates(ibase); + + kdtree_setup(); + + if (TIMING_LEVEL >= 2) { + cpu_timers[MESH_TIMER_KDTREE_SETUP] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + for (int ic=0; ic= 2) cpu_timers[MESH_TIMER_KDTREE_QUERY] += cpu_timer_stop(tstart_lev2); + +#ifdef _OPENMP + } +#pragma omp barrier +#endif + } // calc_neighbor_type + +#ifdef _OPENMP +#pragma omp master +#endif + ncells_ghost = ncells; + + } + +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_CALC_NEIGHBORS] += cpu_timer_stop(tstart_cpu); +} + +void Mesh::calc_neighbors_local(void) +{ + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + if (do_rezone) { + + int flags = INDEX_ARRAY_MEMORY; + +#if defined (HAVE_J7) + if (parallel) flags |= LOAD_BALANCE_MEMORY; +#endif + +#ifdef _OPENMP +#pragma omp master + { +#endif + cpu_counters[MESH_COUNTER_CALC_NEIGH]++; + + if (mesh_memory.get_memory_size(nlft) < ncells){ + if (nlft != NULL) nlft = (int *)mesh_memory.memory_delete(nlft); + if (nrht != NULL) nrht = (int *)mesh_memory.memory_delete(nrht); + if (nbot != NULL) nbot = (int *)mesh_memory.memory_delete(nbot); + if (ntop != NULL) ntop = (int *)mesh_memory.memory_delete(ntop); + nlft = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nlft", flags); + nrht = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nrht", flags); + nbot = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nbot", flags); + ntop = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "ntop", flags); + } +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + int lowerBound, upperBound; + set_bounds(ncells); + get_bounds(lowerBound, upperBound); + for (int ic = lowerBound; ic < upperBound; ic++){ + nlft[ic] = -98; + nrht[ic] = -98; + nbot[ic] = -98; + ntop[ic] = -98; + } + + if (calc_neighbor_type == HASH_TABLE) { + + struct timeval tstart_lev2; + if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2); + + ncells_ghost = ncells; + + // Find maximum i column and j row for this processor + static int jmintile, imintile, jmaxtile, imaxtile; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + jmintile = (jmax+1)*IPOW2(levmx); + imintile = (imax+1)*IPOW2(levmx); + jmaxtile = 0; + imaxtile = 0; +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + int my_jmintile = jmintile; + int my_imintile = imintile; + int my_jmaxtile = 0; + int my_imaxtile = 0; + +#ifdef _OPENMP +#pragma omp for +#endif + for(uint ic=0; ic levmx) printf("DEBUG -- cell %d lev %d\n",ic,level[ic]); + if ( j[ic] *IPOW2(levmx-lev) < my_jmintile) my_jmintile = j[ic] *IPOW2(levmx-lev) ; + if ((j[ic]+1)*IPOW2(levmx-lev)-1 > my_jmaxtile) my_jmaxtile = (j[ic]+1)*IPOW2(levmx-lev)-1; + if ( i[ic] *IPOW2(levmx-lev) < my_imintile) my_imintile = i[ic] *IPOW2(levmx-lev) ; + if ((i[ic]+1)*IPOW2(levmx-lev)-1 > my_imaxtile) my_imaxtile = (i[ic]+1)*IPOW2(levmx-lev)-1; + } +#ifdef _OPENMP +#pragma omp critical + { +#endif + if (my_jmintile < jmintile) jmintile = my_jmintile; + if (my_imintile < imintile) imintile = my_imintile; + if (my_jmaxtile > jmaxtile) jmaxtile = my_jmaxtile; + if (my_imaxtile > imaxtile) imaxtile = my_imaxtile; +#ifdef _OPENMP + } // end critical region +#pragma omp barrier +#endif + + //if (DEBUG) fprintf(fp,"%d: Tile Sizes are imin %d imax %d jmin %d jmax %d\n",mype,imintile,imaxtile,jmintile,jmaxtile); + + // Expand size by 2*coarse_cells for ghost cells + int jminsize = max(jmintile-2*IPOW2(levmx),0); + int jmaxsize = min(jmaxtile+2*IPOW2(levmx),(jmax+1)*IPOW2(levmx)); + int iminsize = max(imintile-2*IPOW2(levmx),0); + int imaxsize = min(imaxtile+2*IPOW2(levmx),(imax+1)*IPOW2(levmx)); + //if (DEBUG) fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize,imaxsize,jminsize,jmaxsize); + + //fprintf(fp,"DEBUG -- ncells %lu\n",ncells); + + static int *hash; + +#ifdef _OPENMP + hash = compact_hash_init_openmp(ncells, imaxsize-iminsize, jmaxsize-jminsize, 0); +#else + hash = compact_hash_init(ncells, imaxsize-iminsize, jmaxsize-jminsize, 0); +#endif + + //printf("%d: DEBUG -- noffset %d cells %d\n",mype,noffset,ncells); + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp master +#endif + fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize,imaxsize,jminsize,jmaxsize); + } + + static int imaxcalc, jmaxcalc; + +#ifdef _OPENMP +#pragma omp for +#endif + for(uint ic=0; ic= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_HASH_SETUP] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + +#ifdef _OPENMP +#pragma omp master + { +#endif + // Set neighbors to global cell numbers from hash + jmaxcalc = (jmax+1)*IPOW2(levmx); + imaxcalc = (imax+1)*IPOW2(levmx); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for (uint ic=0; ic imax*IPOW2(levmx)-1-iminsize) nrhtval = ic+noffset; + if (jjcur > jmax*IPOW2(levmx)-1-jminsize) ntopval = ic+noffset; + // Boundary cells next to corner boundary need special checks + if (iicur == 1*IPOW2(levmx)-iminsize && (jjcur < 1*IPOW2(levmx)-jminsize || jjcur >= jmax*IPOW2(levmx)-jminsize ) ) nlftval = ic+noffset; + if (jjcur == 1*IPOW2(levmx)-jminsize && (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) nbotval = ic+noffset; + if (iirht == imax*IPOW2(levmx)-iminsize && (jjcur < 1*IPOW2(levmx)-jminsize || jjcur >= jmax*IPOW2(levmx)-jminsize ) ) nrhtval = ic+noffset; + if (jjtop == jmax*IPOW2(levmx)-jminsize && (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) ntopval = ic+noffset; + + // need to check for finer neighbor first + // Right and top neighbor don't change for finer, so drop through to same size + // Left and bottom need to be half of same size index for finer test + if (lev != levmx) { + int iilftfiner = iicur-(iicur-iilft)/2; + int jjbotfiner = jjcur-(jjcur-jjbot)/2; + if (nlftval < 0) nlftval = read_hash(jjcur *(imaxsize-iminsize)+iilftfiner, hash); + if (nbotval < 0) nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur, hash); + } + + // same size neighbor + if (nlftval < 0) { + int nlfttry = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash); + if (nlfttry >= 0 && nlfttry < (int)ncells && level[nlfttry] == lev) nlftval = nlfttry; + } + if (nrhtval < 0) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash); + if (nbotval < 0) { + int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash); + if (nbottry >= 0 && nbottry < (int)ncells && level[nbottry] == lev) nbotval = nbottry; + } + if (ntopval < 0) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash); + + // Now we need to take care of special case where bottom and left boundary need adjustment since + // expected cell doesn't exist on these boundaries if it is finer than current cell + if (lev != levmx) { + if (jjcur < 1*IPOW2(levmx)) { + if (nrhtval < 0) { + int jjtopfiner = (jjcur+jjtop)/2; + nrhtval = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash); + } + if (nlftval < 0) { + int iilftfiner = iicur-(iicur-iilft)/2; + int jjtopfiner = (jjcur+jjtop)/2; + nlftval = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash); + } + } + + if (iicur < 1*IPOW2(levmx)) { + if (ntopval < 0) { + int iirhtfiner = (iicur+iirht)/2; + ntopval = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash); + } + if (nbotval < 0) { + int iirhtfiner = (iicur+iirht)/2; + int jjbotfiner = jjcur-(jjcur-jjbot)/2; + nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash); + } + } + } + + // coarser neighbor + if (lev != 0){ + if (nlftval < 0) { + iilft -= iicur-iilft; + int jjlft = (jj/2)*2*levmult-jminsize; + int nlfttry = read_hash(jjlft*(imaxsize-iminsize)+iilft, hash); + if (nlfttry >= 0 && nlfttry < (int)ncells && level[nlfttry] == lev-1) nlftval = nlfttry; + } + if (nrhtval < 0) { + int jjrht = (jj/2)*2*levmult-jminsize; + int nrhttry = read_hash(jjrht*(imaxsize-iminsize)+iirht, hash); + if (nrhttry >= 0 && nrhttry < (int)ncells && level[nrhttry] == lev-1) nrhtval = nrhttry; + } + if (nbotval < 0) { + jjbot -= jjcur-jjbot; + int iibot = (ii/2)*2*levmult-iminsize; + int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iibot, hash); + if (nbottry >= 0 && nbottry < (int)ncells && level[nbottry] == lev-1) nbotval = nbottry; + } + if (ntopval < 0) { + int iitop = (ii/2)*2*levmult-iminsize; + int ntoptry = read_hash(jjtop*(imaxsize-iminsize)+iitop, hash); + if (ntoptry >= 0 && ntoptry < (int)ncells && level[ntoptry] == lev-1) ntopval = ntoptry; + } + } + + nlft[ic] = nlftval; + nrht[ic] = nrhtval; + nbot[ic] = nbotval; + ntop[ic] = ntopval; + + //fprintf(fp,"%d: neighbors[%d] = %d %d %d %d\n",mype,ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]); + } + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + print_local(); + + int jmaxglobal = (jmax+1)*IPOW2(levmx); + int imaxglobal = (imax+1)*IPOW2(levmx); + fprintf(fp,"\n HASH 0 numbering\n"); + for (int jj = jmaxglobal-1; jj>=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset; + if (hashval >= 0 && hashval < (int)ncells) { + fprintf(fp,"%5d",nlft[hashval]); + } else { + fprintf(fp," "); + } + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset; + if (hashval >= 0 && hashval < (int)ncells) { + fprintf(fp,"%5d",nrht[hashval]); + } else { + fprintf(fp," "); + } + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset; + if (hashval >= 0 && hashval < (int)ncells) { + fprintf(fp,"%5d",nbot[hashval]); + } else { + fprintf(fp," "); + } + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset; + if (hashval >= 0 && hashval < (int)ncells) { + fprintf(fp,"%5d",ntop[hashval]); + } else { + fprintf(fp," "); + } + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_HASH_QUERY] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + +#ifdef HAVE_MPI + if (numpe > 1) { + static int num_comm_partners; + + static vector iminsize_global; + static vector imaxsize_global; + static vector jminsize_global; + static vector jmaxsize_global; + static vector comm_partner; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + iminsize_global.resize(numpe); + imaxsize_global.resize(numpe); + jminsize_global.resize(numpe); + jmaxsize_global.resize(numpe); + comm_partner.resize(numpe,-1); + + MPI_Allgather(&iminsize, 1, MPI_INT, &iminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD); + MPI_Allgather(&imaxsize, 1, MPI_INT, &imaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD); + MPI_Allgather(&jminsize, 1, MPI_INT, &jminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD); + MPI_Allgather(&jmaxsize, 1, MPI_INT, &jmaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD); + + num_comm_partners = 0; + for (int ip = 0; ip < numpe; ip++){ + if (ip == mype) continue; + if (iminsize_global[ip] > imaxtile) continue; + if (imaxsize_global[ip] < imintile) continue; + if (jminsize_global[ip] > jmaxtile) continue; + if (jmaxsize_global[ip] < jmintile) continue; + comm_partner[num_comm_partners] = ip; + num_comm_partners++; + //if (DEBUG) fprintf(fp,"%d: overlap with processor %d bounding box is %d %d %d %d\n",mype,ip,iminsize_global[ip],imaxsize_global[ip],jminsize_global[ip],jmaxsize_global[ip]); + } +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + static vector border_cell; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + border_cell.resize(ncells); + +#ifdef BOUNDS_CHECK + for (uint ic=0; ic= (int)ncells) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl); + } + int nr = nrht[ic]; + if (nr != -1){ + nr -= noffset; + if (nr<0 || nr>= (int)ncells) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr); + } + int nb = nbot[ic]; + if (nb != -1){ + nb -= noffset; + if (nb<0 || nb>= (int)ncells) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb); + } + int nt = ntop[ic]; + if (nt != -1){ + nt -= noffset; + if (nt<0 || nt>= (int)ncells) printf("%d: Warning at line %d cell %d ntop %d\n",mype,__LINE__,ic,nt); + } + } +#endif + +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + static vector border_cell_out; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + border_cell_out.resize(ncells); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for (uint ic=0; ic level[ic] && ntop[nlft[ic]-noffset] == -1) ){ + iborder_cell |= 0x0001; + } + if (nrht[ic] == -1 || (level[nrht[ic]-noffset] > level[ic] && ntop[nrht[ic]-noffset] == -1) ){ + iborder_cell |= 0x0002; + } + if (nbot[ic] == -1 || (level[nbot[ic]-noffset] > level[ic] && nrht[nbot[ic]-noffset] == -1) ) { + iborder_cell |= 0x0004; + } + if (ntop[ic] == -1 || (level[ntop[ic]-noffset] > level[ic] && nrht[ntop[ic]-noffset] == -1) ) { + iborder_cell |= 0x0008; + } + + border_cell[ic] = iborder_cell; + } + +#ifdef _OPENMP +#pragma omp for +#endif + for (uint ic=0; ic= 0 && nl < (int)ncells) { + if ((border_cell[nl] & 0x0001) == 0x0001) { + iborder_cell |= 0x0016; + } else if (level[nl] > level[ic]){ + int ntl = ntop[nl]-noffset; + if (ntl >= 0 && ntl < (int)ncells && (border_cell[ntl] & 0x0001) == 0x0001) { + iborder_cell |= 0x0016; + } + } + } + int nr = nrht[ic]-noffset; + if (nr >= 0 && nr < (int)ncells) { + if ((border_cell[nrht[ic]-noffset] & 0x0002) == 0x0002) { + iborder_cell |= 0x0032; + } else if (level[nr] > level[ic]){ + int ntr = ntop[nr]-noffset; + if (ntr >= 0 && ntr < (int)ncells && (border_cell[ntr] & 0x0002) == 0x0002) { + iborder_cell |= 0x0032; + } + } + } + int nb = nbot[ic]-noffset; + if (nb >= 0 && nb < (int)ncells) { + if ((border_cell[nb] & 0x0004) == 0x0004) { + iborder_cell |= 0x0064; + } else if (level[nb] > level[ic]){ + int nrb = nrht[nb]-noffset; + if (nrb >= 0 && nrb < (int)ncells && (border_cell[nrb] & 0x0004) == 0x0004) { + iborder_cell |= 0x0064; + } + } + } + int nt = ntop[ic]-noffset; + if (nt >= 0 && nt < (int)ncells) { + if ((border_cell[nt] & 0x0008) == 0x0008) { + iborder_cell |= 0x0128; + } else if (level[nt] > level[ic]){ + int nrt = nrht[nt]-noffset; + if (nrt >= 0 && nrt < (int)ncells && (border_cell[nrt] & 0x0008) == 0x0008) { + iborder_cell |= 0x0128; + } + } + } + } + + border_cell_out[ic] = iborder_cell; + } +// indent offset + + vector border_cell_num; + + static int nbsize_local; + + static vector border_cell_i; + static vector border_cell_j; + static vector border_cell_level; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + for (int ic=0; ic<(int)ncells; ic++){ + if (border_cell_out[ic] > 0) border_cell_num.push_back(ic+noffset); + } + //printf("%d: border cell size is %d\n",mype,border_cell_num.size()); + + nbsize_local = border_cell_num.size(); + + border_cell_i.resize(nbsize_local); + border_cell_j.resize(nbsize_local); + border_cell_level.resize(nbsize_local); + + for (int ic = 0; ic = 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_FIND_BOUNDARY] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + // Allocate push database + + static int **send_database; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + send_database = (int**)malloc(num_comm_partners*sizeof(int *)); + for (int ip = 0; ip < num_comm_partners; ip++){ + send_database[ip] = (int *)malloc(nbsize_local*sizeof(int)); + } +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + // Compute the overlap between processor bounding boxes and set up push database + + static vector send_buffer_count; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + send_buffer_count.resize(num_comm_partners); + for (int ip = 0; ip < num_comm_partners; ip++){ + int icount = 0; + for (int ib = 0; ib = iminsize_global[comm_partner[ip]] && + border_cell_i[ib]*levmult <= imaxsize_global[comm_partner[ip]] && + border_cell_j[ib]*levmult >= jminsize_global[comm_partner[ip]] && + border_cell_j[ib]*levmult <= jmaxsize_global[comm_partner[ip]] ) { + // border_cell_i[ib],border_cell_j[ib],border_cell_level[ib]); + send_database[ip][icount] = ib; + icount++; + } + } + send_buffer_count[ip]=icount; + } +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + // Initialize L7_Push_Setup with num_comm_partners, comm_partner, send_database and + // send_buffer_count. L7_Push_Setup will copy data and determine recv_buffer_counts. + // It will return receive_count_total for use in allocations + + static int receive_count_total; + int i_push_handle = 0; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + i_push_handle = 0; + L7_Push_Setup(num_comm_partners, &comm_partner[0], &send_buffer_count[0], + send_database, &receive_count_total, &i_push_handle); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + fprintf(fp,"DEBUG num_comm_partners %d\n",num_comm_partners); + for (int ip = 0; ip < num_comm_partners; ip++){ + fprintf(fp,"DEBUG comm partner is %d data count is %d\n",comm_partner[ip],send_buffer_count[ip]); + for (int ic = 0; ic < send_buffer_count[ip]; ic++){ + int ib = send_database[ip][ic]; + fprintf(fp,"DEBUG \t index %d cell number %d i %d j %d level %d\n",ib,border_cell_num[ib], + border_cell_i[ib],border_cell_j[ib],border_cell_level[ib]); + } + } +#ifdef _OPENMP + } +#endif + } + + // Can now free the send database. Other arrays are vectors and will automatically + // deallocate + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + for (int ip = 0; ip < num_comm_partners; ip++){ + free(send_database[ip]); + } + free(send_database); +#ifdef _OPENMP + } +#endif + + if (TIMING_LEVEL >= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_PUSH_SETUP] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + // Push the data needed to the adjacent processors + static int *border_cell_num_local; + static int *border_cell_i_local; + static int *border_cell_j_local; + static int *border_cell_level_local; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + border_cell_num_local = (int *)malloc(receive_count_total*sizeof(int)); + border_cell_i_local = (int *)malloc(receive_count_total*sizeof(int)); + border_cell_j_local = (int *)malloc(receive_count_total*sizeof(int)); + border_cell_level_local = (int *)malloc(receive_count_total*sizeof(int)); + + L7_Push_Update(&border_cell_num[0], border_cell_num_local, i_push_handle); + L7_Push_Update(&border_cell_i[0], border_cell_i_local, i_push_handle); + L7_Push_Update(&border_cell_j[0], border_cell_j_local, i_push_handle); + L7_Push_Update(&border_cell_level[0], border_cell_level_local, i_push_handle); + + L7_Push_Free(&i_push_handle); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + nbsize_local = receive_count_total; + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + for (int ic = 0; ic < nbsize_local; ic++) { + fprintf(fp,"%d: Local Border cell %d is %d i %d j %d level %d\n",mype,ic,border_cell_num_local[ic], + border_cell_i_local[ic],border_cell_j_local[ic],border_cell_level_local[ic]); + } +#ifdef _OPENMP + } +#endif + } + + if (TIMING_LEVEL >= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_PUSH_BOUNDARY] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + if (TIMING_LEVEL >= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_LOCAL_LIST] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + int jmaxglobal = (jmax+1)*IPOW2(levmx); + int imaxglobal = (imax+1)*IPOW2(levmx); + fprintf(fp,"\n HASH numbering before layer 1\n"); + for (int jj = jmaxglobal-1; jj>=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii border_cell_needed_local; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + border_cell_needed_local.resize(nbsize_local, 0); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + // Layer 1 +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + for (int ic =0; ic= 0 && iicur-(iicur-iilft)/2 < imaxsize-iminsize && jjcur >= 0 && (jjcur+jjtop)/2 < jmaxsize-jminsize){ + int nlftval = -1; + // Check for finer cell left and bottom side + if (lev != levmx){ // finer neighbor + int iilftfiner = iicur-(iicur-iilft)/2; + nlftval = read_hash(jjcur*(imaxsize-iminsize)+iilftfiner, hash); + // Also check for finer cell left and top side + if (nlftval < 0) { + int jjtopfiner = (jjcur+jjtop)/2; + nlftval = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash); + } + } + + if (nlftval < 0 && iilft >= 0) { // same size + int nlfttry = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash); + // we have to test for same level or it could be a finer cell one cell away that it is matching + if (nlfttry-noffset >= 0 && nlfttry-noffset < (int)ncells && level[nlfttry-noffset] == lev) { + nlftval = nlfttry; + } + } + + if (lev != 0 && nlftval < 0 && iilft-(iicur-iilft) >= 0){ // coarser neighbor + iilft -= iicur-iilft; + int jjlft = (jj/2)*2*levmult-jminsize; + int nlfttry = read_hash(jjlft*(imaxsize-iminsize)+iilft, hash); + // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching + if (nlfttry-noffset >= 0 && nlfttry-noffset < (int)ncells && level[nlfttry-noffset] == lev-1) { + nlftval = nlfttry; + } + } + if (nlftval >= 0) iborder |= 0x0001; + } + + // Test for cell to right + if (iirht < imaxsize-iminsize && iirht >= 0 && jjcur >= 0 && jjtop < jmaxsize-jminsize) { + int nrhtval = -1; + // right neighbor -- finer, same size and coarser + nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash); + // right neighbor -- finer right top test + if (nrhtval < 0 && lev != levmx){ + int jjtopfiner = (jjcur+jjtop)/2; + nrhtval = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash); + } + if (nrhtval < 0 && lev != 0) { // test for coarser, but not directly above + int jjrhtcoarser = (jj/2)*2*levmult-jminsize; + if (jjrhtcoarser != jjcur) { + int nrhttry = read_hash(jjrhtcoarser*(imaxsize-iminsize)+iirht, hash); + if (nrhttry-noffset >= 0 && nrhttry-noffset < (int)ncells && level[nrhttry-noffset] == lev-1) { + nrhtval = nrhttry; + } + } + } + if (nrhtval > 0) iborder |= 0x0002; + } + + // Test for cell to bottom + if (iicur >= 0 && (iicur+iirht)/2 < imaxsize-iminsize && jjcur-(jjcur-jjbot)/2 >= 0 && jjcur-(jjcur-jjbot)/2 < jmaxsize-jminsize){ + int nbotval = -1; + // Check for finer cell below and left side + if (lev != levmx){ // finer neighbor + int jjbotfiner = jjcur-(jjcur-jjbot)/2; + nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur, hash); + // Also check for finer cell below and right side + if (nbotval < 0) { + int iirhtfiner = (iicur+iirht)/2; + nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash); + } + } + + if (nbotval < 0 && jjbot >= 0) { // same size + int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash); + // we have to test for same level or it could be a finer cell one cell away that it is matching + if (nbottry-noffset >= 0 && nbottry-noffset < (int)ncells && level[nbottry-noffset] == lev) { + nbotval = nbottry; + } + } + + if (lev != 0 && nbotval < 0 && jjbot-(jjcur-jjbot) >= 0){ // coarser neighbor + jjbot -= jjcur-jjbot; + int iibot = (ii/2)*2*levmult-iminsize; + int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iibot, hash); + // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching + if (nbottry-noffset >= 0 && nbottry-noffset < (int)ncells && level[nbottry-noffset] == lev-1) { + nbotval = nbottry; + } + } + if (nbotval >= 0) iborder |= 0x0004; + } + + // Test for cell to top + if (iirht < imaxsize-iminsize && iicur >= 0 && jjtop >= 0 && jjtop < jmaxsize-jminsize) { + int ntopval = -1; + // top neighbor -- finer, same size and coarser + ntopval = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash); + // top neighbor -- finer top right test + if (ntopval < 0 && lev != levmx){ + int iirhtfiner = (iicur+iirht)/2; + ntopval = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash); + } + if (ntopval < 0 && lev != 0) { // test for coarser, but not directly above + int iitopcoarser = (ii/2)*2*levmult-iminsize; + if (iitopcoarser != iicur) { + int ntoptry = read_hash(jjtop*(imaxsize-iminsize)+iitopcoarser, hash); + if (ntoptry-noffset >= 0 && ntoptry-noffset < (int)ncells && level[ntoptry-noffset] == lev-1) { + ntopval = ntoptry; + } + } + } + if (ntopval > 0) iborder |= 0x0008; + } + + if (iborder) border_cell_needed_local[ic] = iborder; + } +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + for(int ic=0; ic= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_LAYER1] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + print_local(); + + int jmaxglobal = (jmax+1)*IPOW2(levmx); + int imaxglobal = (imax+1)*IPOW2(levmx); + fprintf(fp,"\n HASH numbering for 1 layer\n"); + for (int jj = jmaxglobal-1; jj>=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) ); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii 0) continue; + int jj = border_cell_j_local[ic]; + int ii = border_cell_i_local[ic]; + int lev = border_cell_level_local[ic]; + int levmult = IPOW2(levmx-lev); + + int iicur = ii*levmult-iminsize; + int iilft = max( (ii-1)*levmult, 0 )-iminsize; + int iirht = min( (ii+1)*levmult, imaxcalc-1)-iminsize; + int jjcur = jj*levmult-jminsize; + int jjbot = max( (jj-1)*levmult, 0 )-jminsize; + int jjtop = min( (jj+1)*levmult, jmaxcalc-1)-jminsize; + + //fprintf(fp," DEBUG layer2 ic %d num %d i %d j %d lev %d\n",ic,border_cell_num_local[ic],ii,jj,lev); + + int iborder = 0; + + // Test for cell to left + if (iicur-(iicur-iilft)/2 >= 0 && iicur-(iicur-iilft)/2 < imaxsize-iminsize && jjcur >= 0 && (jjcur+jjtop)/2 < jmaxsize-jminsize){ + // Check for finer cell left and bottom side + if (lev != levmx){ // finer neighbor + int iilftfiner = iicur-(iicur-iilft)/2; + int nl = read_hash(jjcur*(imaxsize-iminsize)+iilftfiner, hash); + if (nl >= (int)(ncells+noffset) && (border_cell_needed_local[nl-ncells-noffset] & 0x0001) == 0x0001) { + iborder = 0x0001; + } else { + // Also check for finer cell left and top side + int jjtopfiner = (jjcur+jjtop)/2; + int nlt = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash); + if ( nlt >= (int)(ncells+noffset) && (border_cell_needed_local[nlt-ncells-noffset] & 0x0001) == 0x0001) { + iborder = 0x0001; + } + } + } + if ( (iborder & 0x0001) == 0 && iilft >= 0) { //same size + int nl = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash); + int levcheck = -1; + if (nl-noffset >= 0 && nl-noffset < (int)ncells) { + levcheck = level[nl-noffset]; + } else if (nl >= 0 && (int)(nl-ncells-noffset) >= 0 && (int)(nl-ncells-noffset) < nbsize_local) { + levcheck = border_cell_level_local[nl-ncells-noffset]; + } + if (nl >= (int)(ncells+noffset) && levcheck == lev && (border_cell_needed_local[nl-ncells-noffset] & 0x0001) == 0x0001) { + iborder = 0x0001; + } else if (lev != 0 && iilft-(iicur-iilft) >= 0){ // coarser neighbor + iilft -= iicur-iilft; + int jjlft = (jj/2)*2*levmult-jminsize; + nl = read_hash(jjlft*(imaxsize-iminsize)+iilft, hash); + levcheck = -1; + if (nl-noffset >= 0 && nl-noffset < (int)ncells) { + levcheck = level[nl-noffset]; + } else if (nl >= 0 && (int)(nl-ncells-noffset) >= 0 && (int)(nl-ncells-noffset) < nbsize_local) { + levcheck = border_cell_level_local[nl-ncells-noffset]; + } + // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching + if (nl >= (int)(ncells+noffset) && levcheck == lev-1 && (border_cell_needed_local[nl-ncells-noffset] & 0x0001) == 0x0001) { + iborder = 0x0001; + } + } + } + } + + // Test for cell to right + if (iirht < imaxsize-iminsize && iirht >= 0 && jjcur >= 0 && jjtop < jmaxsize-jminsize) { + // right neighbor -- finer, same size and coarser + int nr = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash); + if (nr >= (int)(ncells+noffset) && (border_cell_needed_local[nr-ncells-noffset] & 0x0002) == 0x0002) { + iborder = 0x0002; + } else if (lev != levmx){ + // right neighbor -- finer right top test + int jjtopfiner = (jjcur+jjtop)/2; + int nrt = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash); + if (nrt >= (int)(ncells+noffset) && (border_cell_needed_local[nrt-ncells-noffset] & 0x0002) == 0x0002) { + iborder = 0x0002; + } + } + if ( (iborder & 0x0002) == 0 && lev != 0) { // test for coarser, but not directly right + int jjrhtcoarser = (jj/2)*2*levmult-jminsize; + if (jjrhtcoarser != jjcur) { + int nr = read_hash(jjrhtcoarser*(imaxsize-iminsize)+iirht, hash); + int levcheck = -1; + if (nr-noffset >= 0 && nr-noffset < (int)ncells) { + levcheck = level[nr-noffset]; + } else if (nr >= 0 && (int)(nr-ncells-noffset) >= 0 && (int)(nr-ncells-noffset) < nbsize_local) { + levcheck = border_cell_level_local[nr-ncells-noffset]; + } + if (nr >= (int)(ncells+noffset) && levcheck == lev-1 && (border_cell_needed_local[nr-ncells-noffset] & 0x0002) == 0x0002) { + iborder = 0x0002; + } + } + } + } + + // Test for cell to bottom + if (iicur >= 0 && (iicur+iirht)/2 < imaxsize-iminsize && jjcur-(jjcur-jjbot)/2 >= 0 && jjcur-(jjcur-jjbot)/2 < jmaxsize-jminsize){ + // Check for finer cell below and left side + if (lev != levmx){ // finer neighbor + int jjbotfiner = jjcur-(jjcur-jjbot)/2; + int nb = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur, hash); + if (nb >= (int)(ncells+noffset) && (border_cell_needed_local[nb-ncells-noffset] & 0x0004) == 0x0004) { + iborder = 0x0004; + } else { + // Also check for finer cell below and right side + int iirhtfiner = (iicur+iirht)/2; + int nbr = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash); + if (nbr >= (int)(ncells+noffset) && (border_cell_needed_local[nbr-ncells-noffset] & 0x0004) == 0x0004) { + iborder = 0x0004; + } + } + } + if ( (iborder & 0x0004) == 0 && jjbot >= 0) { //same size + int nb = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash); + int levcheck = -1; + if (nb-noffset >= 0 && nb-noffset < (int)ncells) { + levcheck = level[nb-noffset]; + } else if (nb >= 0 && (int)(nb-ncells-noffset) >= 0 && (int)(nb-ncells-noffset) < nbsize_local) { + levcheck = border_cell_level_local[nb-ncells-noffset]; + } + if (nb >= (int)(ncells+noffset) && levcheck == lev && (border_cell_needed_local[nb-ncells-noffset] & 0x0004) == 0x0004) { + iborder = 0x0004; + } else if (lev != 0 && jjbot-(jjcur-jjbot) >= 0){ // coarser neighbor + jjbot -= jjcur-jjbot; + int iibot = (ii/2)*2*levmult-iminsize; + nb = read_hash(jjbot*(imaxsize-iminsize)+iibot, hash); + levcheck = -1; + if (nb-noffset >= 0 && nb-noffset < (int)ncells) { + levcheck = level[nb-noffset]; + } else if (nb >= 0 && (int)(nb-ncells-noffset) >= 0 && (int)(nb-ncells-noffset) < nbsize_local) { + levcheck = border_cell_level_local[nb-ncells-noffset]; + } + // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching + if (nb >= (int)(ncells+noffset) && levcheck == lev-1 && (border_cell_needed_local[nb-ncells-noffset] & 0x0004) == 0x0004) { + iborder = 0x0004; + } + } + } + } + + // Test for cell to top + if (iirht < imaxsize-iminsize && iicur >= 0 && jjtop >= 0 && jjtop < jmaxsize-jminsize) { + // top neighbor -- finer, same size and coarser + int nt = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash); + if (nt >= (int)(ncells+noffset) && (border_cell_needed_local[nt-ncells-noffset] & 0x0008) == 0x0008) { + iborder = 0x0008; + } else if (lev != levmx){ + int iirhtfiner = (iicur+iirht)/2; + int ntr = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash); + if ( ntr >= (int)(ncells+noffset) && (border_cell_needed_local[ntr-ncells-noffset] & 0x0008) == 0x0008) { + iborder = 0x0008; + } + } + if ( (iborder & 0x0008) == 0 && lev != 0) { // test for coarser, but not directly above + int iitopcoarser = (ii/2)*2*levmult-iminsize; + if (iitopcoarser != iicur) { + int nb = read_hash(jjtop*(imaxsize-iminsize)+iitopcoarser, hash); + int levcheck = -1; + if (nb-noffset >= 0 && nb-noffset < (int)ncells) { + levcheck = level[nb-noffset]; + } else if (nb >= 0 && (int)(nb-ncells-noffset) >= 0 && (int)(nb-ncells-noffset) < nbsize_local) { + levcheck = border_cell_level_local[nb-ncells-noffset]; + } + if (nb-noffset >= (int)(ncells-noffset) && levcheck == lev-1 && (border_cell_needed_local[nb-ncells-noffset] & 0x0008) == 0x0008) { + iborder = 0x0008; + } + } + } + } + + if (iborder) border_cell_needed_local[ic] = iborder |= 0x0016; + } +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + + vector indices_needed; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + if (DEBUG) { + for(int ic=0; ic= 0x0016) fprintf(fp,"%d: Second set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]); + } + } + + int inew = 0; + for(int ic=0; ic= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_LAYER2] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + print_local(); + + int jmaxglobal = (jmax+1)*IPOW2(levmx); + int imaxglobal = (imax+1)*IPOW2(levmx); + fprintf(fp,"\n HASH numbering for 2 layer\n"); + for (int jj = jmaxglobal-1; jj>=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) ); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_LAYER_LIST] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + int nghost = nbsize_local; + ncells_ghost = ncells + nghost; + + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + celltype = (int *)mesh_memory.memory_realloc(ncells_ghost, celltype); + i = (int *)mesh_memory.memory_realloc(ncells_ghost, i); + j = (int *)mesh_memory.memory_realloc(ncells_ghost, j); + level = (int *)mesh_memory.memory_realloc(ncells_ghost, level); + nlft = (int *)mesh_memory.memory_realloc(ncells_ghost, nlft); + nrht = (int *)mesh_memory.memory_realloc(ncells_ghost, nrht); + nbot = (int *)mesh_memory.memory_realloc(ncells_ghost, nbot); + ntop = (int *)mesh_memory.memory_realloc(ncells_ghost, ntop); + memory_reset_ptrs(); +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for (int ic = ncells; ic < (int)ncells_ghost; ic++){ + nlft[ic] = -1; + nrht[ic] = -1; + nbot[ic] = -1; + ntop[ic] = -1; + } + + if (TIMING_LEVEL >= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_COPY_MESH_DATA] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + +#ifdef _OPENMP +#pragma omp for +#endif + for(int ic=0; ic lev_iend[lev]) celltype[ncells+ic] = RIGHT_BOUNDARY; + if (jj < lev_jbegin[lev]) celltype[ncells+ic] = BOTTOM_BOUNDARY; + if (jj > lev_jend[lev]) celltype[ncells+ic] = TOP_BOUNDARY; + i[ncells+ic] = ii; + j[ncells+ic] = jj; + level[ncells+ic] = lev; + } + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + free(border_cell_i_local); + free(border_cell_j_local); + free(border_cell_level_local); +#ifdef _OPENMP + } // end master region +#endif + + if (TIMING_LEVEL >= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_FILL_MESH_GHOST] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + fprintf(fp,"After copying i,j, level to ghost cells\n"); + print_local(); +#ifdef _OPENMP + } // end master region +#endif + } + +#ifdef _OPENMP +#pragma omp for +#endif + for (uint ic=0; ic= jmax*IPOW2(levmx)-jminsize ) ) nlftval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash); + + // need to check for finer neighbor first + // Right and top neighbor don't change for finer, so drop through to same size + // Left and bottom need to be half of same size index for finer test + if (lev != levmx) { + int iilftfiner = iicur-(iicur-iilft)/2; + if (nlftval == -1 && iilftfiner >= 0) nlftval = read_hash(jjcur*(imaxsize-iminsize)+iilftfiner, hash); + } + + // same size neighbor + if (nlftval == -1 && iilft >= 0) nlftval = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash); + + // Now we need to take care of special case where bottom and left boundary need adjustment since + // expected cell doesn't exist on these boundaries if it is finer than current cell + if (jjcur < 1*IPOW2(levmx) && lev != levmx) { + if (nlftval == -1) { + int iilftfiner = iicur-(iicur-iilft)/2; + int jjtopfiner = (jjcur+jjtop)/2; + if (jjtopfiner < jmaxsize-jminsize && iilftfiner >= 0) nlftval = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash); + } + } + + // coarser neighbor + if (lev != 0){ + if (nlftval == -1) { + int iilftcoarser = iilft - (iicur-iilft); + int jjlft = (jj/2)*2*levmult-jminsize; + if (iilftcoarser >=0) nlftval = read_hash(jjlft*(imaxsize-iminsize)+iilftcoarser, hash); + } + } + + if (nlftval != -1) nlft[ic] = nlftval; + } + + if (nrhtval == -1) { + // Taking care of boundary cells + // Force each boundary cell to point to itself on its boundary direction + if (iicur > imax*IPOW2(levmx)-1-iminsize) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash); + + // Boundary cells next to corner boundary need special checks + if (iirht == imax*IPOW2(levmx)-iminsize && (jjcur < 1*IPOW2(levmx)-jminsize || jjcur >= jmax*IPOW2(levmx)-jminsize ) ) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash); + + // same size neighbor + if (nrhtval == -1 && iirht < imaxsize-iminsize) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash); + + // Now we need to take care of special case where bottom and left boundary need adjustment since + // expected cell doesn't exist on these boundaries if it is finer than current cell + if (jjcur < 1*IPOW2(levmx) && lev != levmx) { + if (nrhtval == -1) { + int jjtopfiner = (jjcur+jjtop)/2; + if (jjtopfiner < jmaxsize-jminsize && iirht < imaxsize-iminsize) nrhtval = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash); + } + } + + // coarser neighbor + if (lev != 0){ + if (nrhtval == -1) { + int jjrht = (jj/2)*2*levmult-jminsize; + if (iirht < imaxsize-iminsize) nrhtval = read_hash(jjrht*(imaxsize-iminsize)+iirht, hash); + } + } + if (nrhtval != -1) nrht[ic] = nrhtval; + } + + if (nbotval == -1) { + // Taking care of boundary cells + // Force each boundary cell to point to itself on its boundary direction + if (jjcur < 1*IPOW2(levmx) -jminsize) nbotval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash); + // Boundary cells next to corner boundary need special checks + if (jjcur == 1*IPOW2(levmx)-jminsize && (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) nbotval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash); + + // need to check for finer neighbor first + // Right and top neighbor don't change for finer, so drop through to same size + // Left and bottom need to be half of same size index for finer test + if (lev != levmx) { + int jjbotfiner = jjcur-(jjcur-jjbot)/2; + if (nbotval == -1 && jjbotfiner >= 0) nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur, hash); + } + + // same size neighbor + if (nbotval == -1 && jjbot >=0) nbotval = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash); + + // Now we need to take care of special case where bottom and left boundary need adjustment since + // expected cell doesn't exist on these boundaries if it is finer than current cell + if (iicur < 1*IPOW2(levmx) && lev != levmx) { + if (nbotval == -1) { + int iirhtfiner = (iicur+iirht)/2; + int jjbotfiner = jjcur-(jjcur-jjbot)/2; + if (jjbotfiner >= 0 && iirhtfiner < imaxsize-iminsize) nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash); + } + } + + // coarser neighbor + if (lev != 0){ + if (nbotval == -1) { + int jjbotcoarser = jjbot - (jjcur-jjbot); + int iibot = (ii/2)*2*levmult-iminsize; + if (jjbotcoarser >= 0 && iibot >= 0) nbotval = read_hash(jjbotcoarser*(imaxsize-iminsize)+iibot, hash); + } + } + if (nbotval != -1) nbot[ic] = nbotval; + } + + if (ntopval == -1) { + // Taking care of boundary cells + // Force each boundary cell to point to itself on its boundary direction + if (jjcur > jmax*IPOW2(levmx)-1-jminsize) ntopval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash); + // Boundary cells next to corner boundary need special checks + if (jjtop == jmax*IPOW2(levmx)-jminsize && (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) ntopval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash); + + // same size neighbor + if (ntopval == -1 && jjtop < jmaxsize-jminsize) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash); + + if (iicur < 1*IPOW2(levmx)) { + if (ntopval == -1) { + int iirhtfiner = (iicur+iirht)/2; + if (jjtop < jmaxsize-jminsize && iirhtfiner < imaxsize-iminsize) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash); + } + } + + // coarser neighbor + if (lev != 0){ + if (ntopval == -1) { + int iitop = (ii/2)*2*levmult-iminsize; + if (jjtop < jmaxsize-jminsize && iitop < imaxsize-iminsize) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iitop, hash); + } + } + if (ntopval != -1) ntop[ic] = ntopval; + } + + //fprintf(fp,"%d: neighbors[%d] = %d %d %d %d\n",mype,ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]); + } + + if (TIMING_LEVEL >= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_FILL_NEIGH_GHOST] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + fprintf(fp,"After setting neighbors through ghost cells\n"); + print_local(); +#ifdef _OPENMP + } // end master region +#endif + } + +/* + // Set neighbors to global cell numbers from hash + for (uint ic=0; ic= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_SET_CORNER_NEIGH] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + fprintf(fp,"After setting corner neighbors\n"); + print_local(); +#ifdef _OPENMP + } // end master region +#endif + } + + // Adjusting neighbors to local indices +#ifdef _OPENMP +#pragma omp for +#endif + for (uint ic=0; ic -(int)ncells_ghost){ + nlft[ic] = abs(nlft[ic]); + } else if (nlft[ic] >= noffset && nlft[ic] < (int)(noffset+ncells)) { + nlft[ic] -= noffset; + } + if (nrht[ic] <= -(int)ncells && nrht[ic] > -(int)ncells_ghost){ + nrht[ic] = abs(nrht[ic]); + } else if (nrht[ic] >= noffset && nrht[ic] < (int)(noffset+ncells)) { + nrht[ic] -= noffset; + } + if (nbot[ic] <= -(int)ncells && nbot[ic] > -(int)ncells_ghost){ + nbot[ic] = abs(nbot[ic]); + } else if (nbot[ic] >= noffset && nbot[ic] < (int)(noffset+ncells)) { + nbot[ic] -= noffset; + } + if (ntop[ic] <= -(int)ncells && ntop[ic] > -(int)ncells_ghost){ + ntop[ic] = abs(ntop[ic]); + } else if (ntop[ic] >= noffset && ntop[ic] < (int)(noffset+ncells)) { + ntop[ic] -= noffset; + } + } + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + fprintf(fp,"After adjusting neighbors to local indices\n"); + print_local(); +#ifdef _OPENMP + } // end master region +#endif + } + + if (TIMING_LEVEL >= 2) { +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_NEIGH_ADJUST] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + offtile_ratio_local = (offtile_ratio_local*(double)offtile_local_count) + ((double)nghost / (double)ncells); + offtile_local_count++; + offtile_ratio_local /= offtile_local_count; + //printf("%d ncells size is %ld ncells_ghost size is %ld nghost %d\n",mype,ncells,ncells_ghost,nghost); + //fprintf(fp,"%d ncells_ghost size is %ld nghost %d\n",mype,ncells_ghost,nghost); + + if (cell_handle) L7_Free(&cell_handle); + cell_handle=0; + + if (DEBUG) { + fprintf(fp,"%d: SETUP ncells %ld noffset %d nghost %d\n",mype,ncells,noffset,nghost); + for (int ig = 0; ig= 2) cpu_timers[MESH_TIMER_SETUP_COMM] += cpu_timer_stop(tstart_lev2); + +#ifdef _OPENMP + } // end master region +#endif + + if (DEBUG) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + print_local(); + + int jmaxglobal = (jmax+1)*IPOW2(levmx); + int imaxglobal = (imax+1)*IPOW2(levmx); + fprintf(fp,"\n HASH numbering\n"); + for (int jj = jmaxglobal-1; jj>=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) ); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset; + if ( (hashval >= 0 && hashval < (int)ncells) ) { + fprintf(fp,"%5d",nlft[hashval]); + } else { + fprintf(fp," "); + } + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize ) { + int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset; + if ( hashval >= 0 && hashval < (int)ncells ) { + fprintf(fp,"%5d",nrht[hashval]); + } else { + fprintf(fp," "); + } + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize ) { + int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset; + if ( hashval >= 0 && hashval < (int)ncells ) { + fprintf(fp,"%5d",nbot[hashval]); + } else { + fprintf(fp," "); + } + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize ) { + int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset; + if ( hashval >= 0 && hashval < (int)ncells ) { + fprintf(fp,"%5d",ntop[hashval]); + } else { + fprintf(fp," "); + } + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii 1 +#endif + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + write_hash_collision_report(); + read_hash_collision_report(); + compact_hash_delete(hash); + +#ifdef BOUNDS_CHECK + { + for (uint ic=0; ic= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl); + if (level[nl] > level[ic]){ + int ntl = ntop[nl]; + if (ntl<0 || ntl>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d global %d nlft %d ntop of nlft %d\n",mype,__LINE__,ic,ic+noffset,nl,ntl); + } + int nr = nrht[ic]; + if (nr<0 || nr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr); + if (level[nr] > level[ic]){ + int ntr = ntop[nr]; + if (ntr<0 || ntr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d ntop of nrht %d\n",mype,__LINE__,ic,ntr); + } + int nb = nbot[ic]; + if (nb<0 || nb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb); + if (level[nb] > level[ic]){ + int nrb = nrht[nb]; + if (nrb<0 || nrb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of nbot %d\n",mype,__LINE__,ic,nrb); + } + int nt = ntop[ic]; + if (nt<0 || nt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d ntop %d\n",mype,__LINE__,ic,nt); + if (level[nt] > level[ic]){ + int nrt = nrht[nt]; + if (nrt<0 || nrt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of ntop %d\n",mype,__LINE__,ic,nrt); + } + } + } +#endif + +#ifdef _OPENMP + } // end master region +#pragma omp barrier +#endif + + } else if (calc_neighbor_type == KDTREE) { +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + struct timeval tstart_lev2; + if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2); + + TBounds box; + vector index_list(IPOW2(levmx*levmx) ); + + int num; + + ibase = 0; + calc_spatial_coordinates(ibase); + + kdtree_setup(); + + if (TIMING_LEVEL >= 2) { + cpu_timers[MESH_TIMER_KDTREE_SETUP] += cpu_timer_stop(tstart_lev2); + cpu_timer_start(&tstart_lev2); + } + + for (uint ic=0; ic= 2) cpu_timers[MESH_TIMER_KDTREE_QUERY] += cpu_timer_stop(tstart_lev2); + +#ifdef _OPENMP + } +#pragma omp barrier +#endif + } // calc_neighbor_type + + } + +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[MESH_TIMER_CALC_NEIGHBORS] += cpu_timer_stop(tstart_cpu); +} + +#ifdef HAVE_OPENCL +void Mesh::gpu_calc_neighbors(void) +{ + if (! gpu_do_rezone) return; + + ulong gpu_hash_table_size = 0; + + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + struct timeval tstart_lev2; + cpu_timer_start(&tstart_lev2); + + cl_command_queue command_queue = ezcl_get_command_queue(); + + gpu_counters[MESH_COUNTER_CALC_NEIGH]++; + + assert(dev_levtable); + assert(dev_level); + assert(dev_i); + assert(dev_j); + + size_t mem_request = (int)((float)ncells*mem_factor); + + size_t local_work_size = MIN(ncells, TILE_SIZE); + size_t global_work_size = ((ncells + local_work_size - 1) /local_work_size) * local_work_size; + + //printf("DEBUG file %s line %d dev_nlft %p size %d\n",__FILE__,__LINE__,dev_nlft,ezcl_get_device_mem_nelements(dev_nlft)); + + if (dev_nlft == NULL || ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells) { + dev_nlft = ezcl_malloc(NULL, const_cast("dev_nlft"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_nrht = ezcl_malloc(NULL, const_cast("dev_nrht"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_nbot = ezcl_malloc(NULL, const_cast("dev_nbot"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_ntop = ezcl_malloc(NULL, const_cast("dev_ntop"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + ezcl_set_kernel_arg(kernel_neighbor_init, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_neighbor_init, 1, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_neighbor_init, 2, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_neighbor_init, 3, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_neighbor_init, 4, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_neighbor_init, 1, NULL, &global_work_size, &local_work_size, NULL); + } + + int imaxsize = (imax+1)*IPOW2(levmx); + int jmaxsize = (jmax+1)*IPOW2(levmx); + + int gpu_hash_method = METHOD_UNSET; +// allow input.c to control hash types and methods + if (choose_hash_method != METHOD_UNSET) gpu_hash_method = choose_hash_method; +//========= + + size_t hashsize; + + uint hash_report_level = 1; + cl_mem dev_hash_header = NULL; + cl_mem dev_hash = gpu_compact_hash_init(ncells, imaxsize, jmaxsize, gpu_hash_method, hash_report_level, + &gpu_hash_table_size, &hashsize, &dev_hash_header); + + /* + const int isize, // 0 + const int levmx, // 1 + const int imaxsize, // 2 + __global const int *levtable, // 3 + __global const int *level, // 4 + __global const int *i, // 5 + __global const int *j, // 6 + __global const ulong *hash_header, // 7 + __global int *hash) // 8 + */ + + cl_event hash_setup_event; + + ezcl_set_kernel_arg(kernel_hash_setup, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_hash_setup, 1, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_hash_setup, 2, sizeof(cl_int), (void *)&imaxsize); + ezcl_set_kernel_arg(kernel_hash_setup, 3, sizeof(cl_mem), (void *)&dev_levtable); + ezcl_set_kernel_arg(kernel_hash_setup, 4, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_hash_setup, 5, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_hash_setup, 6, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_hash_setup, 7, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_hash_setup, 8, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_hash_setup, 9, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_hash_setup, 10, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_hash_setup, 11, sizeof(cl_mem), (void *)&dev_hash_header); + ezcl_set_kernel_arg(kernel_hash_setup, 12, sizeof(cl_mem), (void *)&dev_hash); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_setup, 1, NULL, &global_work_size, &local_work_size, &hash_setup_event); + + ezcl_wait_for_events(1, &hash_setup_event); + ezcl_event_release(hash_setup_event); + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_HASH_SETUP] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + /* + const int isize, // 0 + const int levmx, // 1 + const int imax, // 2 + const int jmax, // 3 + const int imaxsize, // 4 + const int jmaxsize, // 5 + __global const int *levtable, // 6 + __global const int *level, // 7 + __global const int *i, // 8 + __global const int *j, // 9 + __global int *nlft, // 10 + __global int *nrht, // 11 + __global int *nbot, // 12 + __global int *ntop, // 13 + __global const ulong *hash_header, // 14 + __global int *hash) // 15 + */ + + cl_event calc_neighbors_event; + + ezcl_set_kernel_arg(kernel_calc_neighbors, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_calc_neighbors, 1, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_calc_neighbors, 2, sizeof(cl_int), (void *)&imax); + ezcl_set_kernel_arg(kernel_calc_neighbors, 3, sizeof(cl_int), (void *)&jmax); + ezcl_set_kernel_arg(kernel_calc_neighbors, 4, sizeof(cl_int), (void *)&imaxsize); + ezcl_set_kernel_arg(kernel_calc_neighbors, 5, sizeof(cl_int), (void *)&jmaxsize); + ezcl_set_kernel_arg(kernel_calc_neighbors, 6, sizeof(cl_mem), (void *)&dev_levtable); + ezcl_set_kernel_arg(kernel_calc_neighbors, 7, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_calc_neighbors, 8, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_calc_neighbors, 9, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_calc_neighbors, 10, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_calc_neighbors, 11, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_calc_neighbors, 12, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_calc_neighbors, 13, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_calc_neighbors, 14, sizeof(cl_mem), (void *)&dev_hash_header); + ezcl_set_kernel_arg(kernel_calc_neighbors, 15, sizeof(cl_mem), (void *)&dev_hash); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_neighbors, 1, NULL, &global_work_size, &local_work_size, &calc_neighbors_event); + + ezcl_wait_for_events(1, &calc_neighbors_event); + ezcl_event_release(calc_neighbors_event); + + gpu_compact_hash_delete(dev_hash, dev_hash_header); + + if (TIMING_LEVEL >= 2) gpu_timers[MESH_TIMER_HASH_QUERY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + + gpu_timers[MESH_TIMER_CALC_NEIGHBORS] += (long)(cpu_timer_stop(tstart_cpu) * 1.0e9); +} + + +void Mesh::gpu_calc_neighbors_local(void) +{ + if (! gpu_do_rezone) return; + + ulong gpu_hash_table_size = 0; + + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + struct timeval tstart_lev2; + if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2); + + cl_command_queue command_queue = ezcl_get_command_queue(); + + gpu_counters[MESH_COUNTER_CALC_NEIGH]++; + + ncells_ghost = ncells; + + assert(dev_levtable); + assert(dev_level); + assert(dev_i); + assert(dev_j); + + size_t one = 1; + cl_mem dev_check = ezcl_malloc(NULL, const_cast("dev_check"), &one, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + size_t mem_request = (int)((float)ncells*mem_factor); + dev_nlft = ezcl_malloc(NULL, const_cast("dev_nlft"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_nrht = ezcl_malloc(NULL, const_cast("dev_nrht"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_nbot = ezcl_malloc(NULL, const_cast("dev_nbot"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_ntop = ezcl_malloc(NULL, const_cast("dev_ntop"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + size_t local_work_size = 64; + size_t global_work_size = ((ncells + local_work_size - 1) /local_work_size) * local_work_size; + size_t block_size = global_work_size/local_work_size; + + //printf("DEBUG file %s line %d lws = %d gws %d bs %d ncells %d\n",__FILE__,__LINE__, + // local_work_size, global_work_size, block_size, ncells); + cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast("dev_redscratch"), &block_size, sizeof(cl_int4), CL_MEM_READ_WRITE, 0); + cl_mem dev_sizes = ezcl_malloc(NULL, const_cast("dev_sizes"), &one, sizeof(cl_int4), CL_MEM_READ_WRITE, 0); + +#ifdef BOUNDS_CHECK + if (ezcl_get_device_mem_nelements(dev_i) < (int)ncells || + ezcl_get_device_mem_nelements(dev_j) < (int)ncells || + ezcl_get_device_mem_nelements(dev_level) < (int)ncells ){ + printf("%d: Warning ncells %ld size dev_i %d dev_j %d dev_level %d\n",mype,ncells,ezcl_get_device_mem_nelements(dev_i),ezcl_get_device_mem_nelements(dev_j),ezcl_get_device_mem_nelements(dev_level)); + } +#endif + + /* + __kernel void calc_hash_size_cl( + const int ncells, // 0 + const int levmx, // 1 + __global int *levtable, // 2 + __global int *level, // 3 + __global int *i, // 4 + __global int *j, // 5 + __global int4 *redscratch, // 6 + __global int4 *sizes, // 7 + __local int4 *tile) // 8 + */ + + ezcl_set_kernel_arg(kernel_hash_size, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_hash_size, 1, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_hash_size, 2, sizeof(cl_mem), (void *)&dev_levtable); + ezcl_set_kernel_arg(kernel_hash_size, 3, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_hash_size, 4, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_hash_size, 5, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_hash_size, 6, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_hash_size, 7, sizeof(cl_mem), (void *)&dev_sizes); + ezcl_set_kernel_arg(kernel_hash_size, 8, local_work_size*sizeof(cl_int4), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_size, 1, NULL, &global_work_size, &local_work_size, NULL); + + if (block_size > 1) { + /* + __kernel void finish_reduction_minmax4_cl( + const int isize, // 0 + __global int4 *redscratch, // 1 + __global int4 *sizes, // 2 + __local int4 *tile) // 3 + */ + ezcl_set_kernel_arg(kernel_finish_hash_size, 0, sizeof(cl_int), (void *)&block_size); + ezcl_set_kernel_arg(kernel_finish_hash_size, 1, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_finish_hash_size, 2, sizeof(cl_mem), (void *)&dev_sizes); + ezcl_set_kernel_arg(kernel_finish_hash_size, 3, local_work_size*sizeof(cl_int4), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_finish_hash_size, 1, NULL, &local_work_size, &local_work_size, NULL); + } + + ezcl_device_memory_delete(dev_redscratch); + + cl_int sizes[4]; + ezcl_enqueue_read_buffer(command_queue, dev_sizes, CL_TRUE, 0, 1*sizeof(cl_int4), &sizes, NULL); + + int imintile = sizes[0]; + int imaxtile = sizes[1]; + int jmintile = sizes[2]; + int jmaxtile = sizes[3]; + + // Expand size by 2*coarse_cells for ghost cells + // TODO: May want to get fancier here and calc based on cell level + int jminsize = max(jmintile-2*IPOW2(levmx),0); + int jmaxsize = min(jmaxtile+2*IPOW2(levmx),(jmax+1)*IPOW2(levmx)); + int iminsize = max(imintile-2*IPOW2(levmx),0); + int imaxsize = min(imaxtile+2*IPOW2(levmx),(imax+1)*IPOW2(levmx)); + //fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize,imaxsize,jminsize,jmaxsize); + + //ezcl_enqueue_write_buffer(command_queue, dev_sizes, CL_TRUE, 0, 1*sizeof(cl_int4), &sizes, NULL); + + int gpu_hash_method = METHOD_UNSET; +// allow imput.c to control hash types and methods + if (choose_hash_method != METHOD_UNSET) gpu_hash_method = choose_hash_method; +//========= + + size_t hashsize; + + uint hash_report_level = 1; + cl_mem dev_hash_header = NULL; + cl_mem dev_hash = gpu_compact_hash_init(ncells, imaxsize-iminsize, jmaxsize-jminsize, gpu_hash_method, hash_report_level, &gpu_hash_table_size, &hashsize, &dev_hash_header); + + int csize = corners_i.size(); +#ifdef BOUNDS_CHECK + for (int ic=0; ic= iminsize) continue; + if (corners_j[ic] >= jminsize) continue; + if (corners_i[ic] < imaxsize) continue; + if (corners_j[ic] < jmaxsize) continue; + if ( (corners_j[ic]-jminsize)*(imaxsize-iminsize)+(corners_i[ic]-iminsize) < 0 || + (corners_j[ic]-jminsize)*(imaxsize-iminsize)+(corners_i[ic]-iminsize) > (int)hashsize){ + printf("%d: Warning corners i %d j %d hash %d\n",mype,corners_i[ic],corners_j[ic], + (corners_j[ic]-jminsize)*(imaxsize-iminsize)+(corners_i[ic]-iminsize)); + } + } +#endif + + size_t corners_local_work_size = MIN(csize, TILE_SIZE); + size_t corners_global_work_size = ((csize+corners_local_work_size - 1) /corners_local_work_size) * corners_local_work_size; + + ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 0, sizeof(cl_int), (void *)&csize); + ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 1, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 2, sizeof(cl_int), (void *)&imax); + ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 3, sizeof(cl_int), (void *)&jmax); + ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 4, sizeof(cl_mem), (void *)&dev_levtable); + ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 5, sizeof(cl_mem), (void *)&dev_sizes); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_adjust_sizes, 1, NULL, &corners_global_work_size, &corners_local_work_size, NULL); + + if (DEBUG){ + vector sizes_tmp(4); + ezcl_enqueue_read_buffer(command_queue, dev_sizes, CL_TRUE, 0, 1*sizeof(cl_int4), &sizes_tmp[0], NULL); + int iminsize_tmp = sizes_tmp[0]; + int imaxsize_tmp = sizes_tmp[1]; + int jminsize_tmp = sizes_tmp[2]; + int jmaxsize_tmp = sizes_tmp[3]; + fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize_tmp,imaxsize_tmp,jminsize_tmp,jmaxsize_tmp); + } + + local_work_size = 128; + global_work_size = ((ncells + local_work_size - 1) /local_work_size) * local_work_size; + +#ifdef BOUNDS_CHECK + { + vector i_tmp(ncells); + vector j_tmp(ncells); + vector level_tmp(ncells); + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells*sizeof(cl_int), &i_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells*sizeof(cl_int), &j_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, ncells*sizeof(cl_int), &level_tmp[0], NULL); + for (int ic=0; ic<(int)ncells; ic++){ + int lev = level_tmp[ic]; + for ( int jj = j_tmp[ic]*IPOW2(levmx-lev)-jminsize; jj < (j_tmp[ic]+1)*IPOW2(levmx-lev)-jminsize; jj++) { + for (int ii = i_tmp[ic]*IPOW2(levmx-lev)-iminsize; ii < (i_tmp[ic]+1)*IPOW2(levmx-lev)-iminsize; ii++) { + if (jj < 0 || jj >= (jmaxsize-jminsize) || ii < 0 || ii >= (imaxsize-iminsize) ) { + printf("%d: Warning ncell %d writes to hash out-of-bounds at line %d ii %d jj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,ic,__LINE__,ii,jj,iminsize,imaxsize,jminsize,jmaxsize); + } + } + } + } + } +#endif + + //printf("%d: lws %d gws %d \n",mype,local_work_size,global_work_size); + cl_event hash_setup_local_event; + + /* + const int isize, // 0 + const int levmx, // 1 + const int imax, // 2 + const int jmax, // 3 + const int noffset, // 4 + __global int *sizes, // 5 + __global int *levtable, // 6 + __global int *level, // 7 + __global int *i, // 8 + __global int *j, // 9 + __global const ulong *hash_heaer, // 10 + __global int *hash) // 11 + */ + + ezcl_set_kernel_arg(kernel_hash_setup_local, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_hash_setup_local, 1, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_hash_setup_local, 2, sizeof(cl_int), (void *)&imax); + ezcl_set_kernel_arg(kernel_hash_setup_local, 3, sizeof(cl_int), (void *)&jmax); + ezcl_set_kernel_arg(kernel_hash_setup_local, 4, sizeof(cl_int), (void *)&noffset); + ezcl_set_kernel_arg(kernel_hash_setup_local, 5, sizeof(cl_mem), (void *)&dev_sizes); + ezcl_set_kernel_arg(kernel_hash_setup_local, 6, sizeof(cl_mem), (void *)&dev_levtable); + ezcl_set_kernel_arg(kernel_hash_setup_local, 7, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_hash_setup_local, 8, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_hash_setup_local, 9, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_hash_setup_local, 10, sizeof(cl_mem), (void *)&dev_hash_header); + ezcl_set_kernel_arg(kernel_hash_setup_local, 11, sizeof(cl_mem), (void *)&dev_hash); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_setup_local, 1, NULL, &global_work_size, &local_work_size, &hash_setup_local_event); + + ezcl_wait_for_events(1, &hash_setup_local_event); + ezcl_event_release(hash_setup_local_event); + + if (DEBUG){ + vector sizes_tmp(4); + ezcl_enqueue_read_buffer(command_queue, dev_sizes, CL_TRUE, 0, 1*sizeof(cl_int4), &sizes_tmp[0], NULL); + int iminsize_tmp = sizes_tmp[0]; + int imaxsize_tmp = sizes_tmp[1]; + int jminsize_tmp = sizes_tmp[2]; + int jmaxsize_tmp = sizes_tmp[3]; + fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize_tmp,imaxsize_tmp,jminsize_tmp,jmaxsize_tmp); + } + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_HASH_SETUP] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + +#ifdef BOUNDS_CHECK + { + if (ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells || + ezcl_get_device_mem_nelements(dev_nrht) < (int)ncells || + ezcl_get_device_mem_nelements(dev_nbot) < (int)ncells || + ezcl_get_device_mem_nelements(dev_ntop) < (int)ncells || + ezcl_get_device_mem_nelements(dev_i) < (int)ncells || + ezcl_get_device_mem_nelements(dev_j) < (int)ncells || + ezcl_get_device_mem_nelements(dev_level) < (int)ncells ) { + printf("%d: Warning -- sizes for dev_neigh too small ncells %ld neigh %d %d %d %d %d %d %d\n",mype,ncells,ezcl_get_device_mem_nelements(dev_nlft),ezcl_get_device_mem_nelements(dev_nrht),ezcl_get_device_mem_nelements(dev_nbot),ezcl_get_device_mem_nelements(dev_ntop),ezcl_get_device_mem_nelements(dev_i),ezcl_get_device_mem_nelements(dev_j),ezcl_get_device_mem_nelements(dev_level)); + } + vector level_tmp(ncells); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, ncells*sizeof(cl_int), &level_tmp[0], NULL); + int iflag = 0; + for (int ic=0; ic levmx) { + printf("%d: Warning level value bad ic %d level %d ncells %d\n",mype,ic,level_tmp[ic],ncells); + iflag++; + } + } + if (ezcl_get_device_mem_nelements(dev_levtable) < levmx+1) printf("%d Warning levtable too small levmx is %d devtable size is %d\n",mype,levmx,ezcl_get_device_mem_nelements(dev_levtable)); +#ifdef HAVE_MPI + if (iflag > 20) {fflush(stdout); L7_Terminate(); exit(0);} +#endif + } +#endif + +#ifdef BOUNDS_CHECK + { + int jmaxcalc = (jmax+1)*IPOW2(levmx); + int imaxcalc = (imax+1)*IPOW2(levmx); + vector i_tmp(ncells); + vector j_tmp(ncells); + vector level_tmp(ncells); + vector hash_tmp(hashsize); + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells*sizeof(cl_int), &i_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells*sizeof(cl_int), &j_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, ncells*sizeof(cl_int), &level_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_TRUE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL); + for (int ic=0; ic<(int)ncells; ic++){ + int ii = i_tmp[ic]; + int jj = j_tmp[ic]; + int lev = level_tmp[ic]; + int levmult = IPOW2(levmx-lev); + int jjj=jj *levmult-jminsize; + int iii=max( ii *levmult-1, 0 )-iminsize; + if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize); + jjj=jj *levmult-jminsize; + iii=min( (ii+1)*levmult, imaxcalc-1)-iminsize; + if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize); + jjj=max( jj *levmult-1, 0) -jminsize; + iii=ii *levmult -iminsize; + if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize); + jjj=min( (jj+1)*levmult, jmaxcalc-1)-jminsize; + iii=ii *levmult -iminsize; + if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize); + int nlftval = hash_tmp[(( jj *levmult )-jminsize)*(imaxsize-iminsize)+((max( ii *levmult-1, 0 ))-iminsize)]; + int nrhtval = hash_tmp[(( jj *levmult )-jminsize)*(imaxsize-iminsize)+((min( (ii+1)*levmult, imaxcalc-1))-iminsize)]; + int nbotval = hash_tmp[((max( jj *levmult-1, 0) )-jminsize)*(imaxsize-iminsize)+(( ii *levmult )-iminsize)]; + int ntopval = hash_tmp[((min( (jj+1)*levmult, jmaxcalc-1))-jminsize)*(imaxsize-iminsize)+(( ii *levmult )-iminsize)]; + + if (nlftval == INT_MIN){ + jjj = jj*levmult-jminsize; + iii = ii*levmult-iminsize; + if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize); + } + if (nrhtval == INT_MIN){ + jjj = jj*levmult-jminsize; + iii = ii*levmult-iminsize; + if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize); + } + if (nbotval == INT_MIN) { + iii = ii*levmult-iminsize; + jjj = jj*levmult-jminsize; + if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize); + } + if (ntopval == INT_MIN) { + iii = ii*levmult-iminsize; + jjj = jj*levmult-jminsize; + if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize); + } + } + } +#endif + + cl_event calc_neighbors_local_event; + + /* + const int isize, // 0 + const int levmx, // 1 + const int imaxsize, // 2 + const int jmaxsize, // 3 + const int noffset, // 4 + __global int *sizes, // 5 + __global int *levtable, // 6 + __global int *level, // 7 + __global int *i, // 8 + __global int *j, // 9 + __global int *nlft, // 10 + __global int *nrht, // 11 + __global int *nbot, // 12 + __global int *ntop, // 13 + __global const ulong *hash_heaer, // 14 + __global int *hash) // 15 + */ + + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 1, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 2, sizeof(cl_int), (void *)&imax); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 3, sizeof(cl_int), (void *)&jmax); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 4, sizeof(cl_int), (void *)&noffset); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 5, sizeof(cl_mem), (void *)&dev_sizes); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 6, sizeof(cl_mem), (void *)&dev_levtable); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 7, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 8, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 9, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 10, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 11, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 12, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 13, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 14, sizeof(cl_mem), (void *)&dev_hash_header); + ezcl_set_kernel_arg(kernel_calc_neighbors_local, 15, sizeof(cl_mem), (void *)&dev_hash); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_neighbors_local, 1, NULL, &global_work_size, &local_work_size, &calc_neighbors_local_event); + + ezcl_wait_for_events(1, &calc_neighbors_local_event); + ezcl_event_release(calc_neighbors_local_event); + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_HASH_QUERY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG) { + print_dev_local(); + + vector hash_tmp(hashsize); + ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_FALSE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL); + + cl_mem dev_hash_header_check = gpu_get_hash_header(); + vector hash_header_check(hash_header_size); + ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL); + + int gpu_hash_method = (int)hash_header_check[0]; + ulong gpu_hash_table_size = hash_header_check[1]; + ulong gpu_AA = hash_header_check[2]; + ulong gpu_BB = hash_header_check[3]; + + vector nlft_tmp(ncells_ghost); + vector nrht_tmp(ncells_ghost); + vector nbot_tmp(ncells_ghost); + vector ntop_tmp(ncells_ghost); + ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL); + + int jmaxglobal = (jmax+1)*IPOW2(levmx); + int imaxglobal = (imax+1)*IPOW2(levmx); + fprintf(fp,"\n HASH 0 numbering\n"); + for (int jj = jmaxglobal-1; jj>=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) ); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) -noffset; + if (hashval >= 0 && hashval < (int)ncells) { + fprintf(fp,"%5d",nlft_tmp[hashval]); + } else { + fprintf(fp," "); + } + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0])-noffset; + if (hashval >= 0 && hashval < (int)ncells) { + fprintf(fp,"%5d",nrht_tmp[hashval]); + } else { + fprintf(fp," "); + } + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0])-noffset; + if (hashval >= 0 && hashval < (int)ncells) { + fprintf(fp,"%5d",nbot_tmp[hashval]); + } else { + fprintf(fp," "); + } + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0])-noffset; + if (hashval >= 0 && hashval < (int)ncells) { + fprintf(fp,"%5d",ntop_tmp[hashval]); + } else { + fprintf(fp," "); + } + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii 1) { + vector iminsize_global(numpe); + vector imaxsize_global(numpe); + vector jminsize_global(numpe); + vector jmaxsize_global(numpe); + vector comm_partner(numpe,-1); + + MPI_Allgather(&iminsize, 1, MPI_INT, &iminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD); + MPI_Allgather(&imaxsize, 1, MPI_INT, &imaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD); + MPI_Allgather(&jminsize, 1, MPI_INT, &jminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD); + MPI_Allgather(&jmaxsize, 1, MPI_INT, &jmaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD); + + int num_comm_partners = 0; + for (int ip = 0; ip < numpe; ip++){ + if (ip == mype) continue; + if (iminsize_global[ip] > imaxtile) continue; + if (imaxsize_global[ip] < imintile) continue; + if (jminsize_global[ip] > jmaxtile) continue; + if (jmaxsize_global[ip] < jmintile) continue; + comm_partner[num_comm_partners] = ip; + num_comm_partners++; + //if (DEBUG) fprintf(fp,"%d: overlap with processor %d bounding box is %d %d %d %d\n",mype,ip,iminsize_global[ip],imaxsize_global[ip],jminsize_global[ip],jmaxsize_global[ip]); + } + +#ifdef BOUNDS_CHECK + { + vector nlft_tmp(ncells_ghost); + vector nrht_tmp(ncells_ghost); + vector nbot_tmp(ncells_ghost); + vector ntop_tmp(ncells_ghost); + ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells*sizeof(cl_int), &nlft_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells*sizeof(cl_int), &nrht_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells*sizeof(cl_int), &nbot_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells*sizeof(cl_int), &ntop_tmp[0], NULL); + for (uint ic=0; ic= ncells) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl); + } + int nr = nrht_tmp[ic]; + if (nr != -1){ + nr -= noffset; + if (nr<0 || nr>= ncells) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr); + } + int nb = nbot_tmp[ic]; + if (nb != -1){ + nb -= noffset; + if (nb<0 || nb>= ncells) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb); + } + int nt = ntop_tmp[ic]; + if (nt != -1){ + nt -= noffset; + if (nt<0 || nt>= ncells) printf("%d: Warning at line %d cell %d ntop %d\n",mype,__LINE__,ic,nt); + } + } + } +#endif + + cl_mem dev_border_cell = ezcl_malloc(NULL, const_cast("dev_border_cell1"), &ncells, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + ezcl_set_kernel_arg(kernel_calc_border_cells, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_calc_border_cells, 1, sizeof(cl_int), (void *)&noffset); + ezcl_set_kernel_arg(kernel_calc_border_cells, 2, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_calc_border_cells, 3, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_calc_border_cells, 4, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_calc_border_cells, 5, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_calc_border_cells, 6, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_calc_border_cells, 7, sizeof(cl_mem), (void *)&dev_border_cell); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_border_cells, 1, NULL, &global_work_size, &local_work_size, NULL); + + cl_mem dev_border_cell_new = ezcl_malloc(NULL, const_cast("dev_border_cell2"), &ncells, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + size_t one = 1; + cl_mem dev_nbsize = ezcl_malloc(NULL, const_cast("dev_nbsize"), &one, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_ioffset = ezcl_malloc(NULL, const_cast("dev_ioffset"), &block_size, sizeof(cl_uint), CL_MEM_READ_WRITE, 0); + + ezcl_set_kernel_arg(kernel_calc_border_cells2, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_calc_border_cells2, 1, sizeof(cl_int), (void *)&noffset); + ezcl_set_kernel_arg(kernel_calc_border_cells2, 2, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_calc_border_cells2, 3, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_calc_border_cells2, 4, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_calc_border_cells2, 5, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_calc_border_cells2, 6, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_calc_border_cells2, 7, sizeof(cl_mem), (void *)&dev_border_cell); + ezcl_set_kernel_arg(kernel_calc_border_cells2, 8, sizeof(cl_mem), (void *)&dev_border_cell_new); + ezcl_set_kernel_arg(kernel_calc_border_cells2, 9, sizeof(cl_mem), (void *)&dev_ioffset); + ezcl_set_kernel_arg(kernel_calc_border_cells2, 10, sizeof(cl_mem), (void *)&dev_nbsize); + ezcl_set_kernel_arg(kernel_calc_border_cells2, 11, local_work_size*sizeof(cl_int), NULL); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_border_cells2, 1, NULL, &global_work_size, &local_work_size, NULL); + + ezcl_device_memory_swap(&dev_border_cell, &dev_border_cell_new); + ezcl_device_memory_delete(dev_border_cell_new); + + int group_size = (int)(global_work_size/local_work_size); + + ezcl_set_kernel_arg(kernel_finish_scan, 0, sizeof(cl_int), (void *)&group_size); + ezcl_set_kernel_arg(kernel_finish_scan, 1, sizeof(cl_mem), (void *)&dev_ioffset); + ezcl_set_kernel_arg(kernel_finish_scan, 2, sizeof(cl_mem), (void *)&dev_nbsize); + ezcl_set_kernel_arg(kernel_finish_scan, 3, local_work_size*sizeof(cl_int), NULL); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_finish_scan, 1, NULL, &local_work_size, &local_work_size, NULL); + + int nbsize_local; + ezcl_enqueue_read_buffer(command_queue, dev_nbsize, CL_TRUE, 0, 1*sizeof(cl_int), &nbsize_local, NULL); + ezcl_device_memory_delete(dev_nbsize); + + //printf("%d: border cell size is %d global is %ld\n",mype,nbsize_local,nbsize_global); + + vector border_cell_num(nbsize_local); + vector border_cell_i(nbsize_local); + vector border_cell_j(nbsize_local); + vector border_cell_level(nbsize_local); + + // allocate new border memory + size_t nbsize_long = nbsize_local; + cl_mem dev_border_cell_i = ezcl_malloc(NULL, const_cast("dev_border_cell_i"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_border_cell_j = ezcl_malloc(NULL, const_cast("dev_border_cell_j"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_border_cell_level = ezcl_malloc(NULL, const_cast("dev_border_cell_level"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_border_cell_num = ezcl_malloc(NULL, const_cast("dev_border_cell_num"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + ezcl_set_kernel_arg(kernel_get_border_data, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_get_border_data, 1, sizeof(cl_int), (void *)&noffset); + ezcl_set_kernel_arg(kernel_get_border_data, 2, sizeof(cl_mem), (void *)&dev_ioffset); + ezcl_set_kernel_arg(kernel_get_border_data, 3, sizeof(cl_mem), (void *)&dev_border_cell); + ezcl_set_kernel_arg(kernel_get_border_data, 4, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_get_border_data, 5, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_get_border_data, 6, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_get_border_data, 7, sizeof(cl_mem), (void *)&dev_border_cell_i); + ezcl_set_kernel_arg(kernel_get_border_data, 8, sizeof(cl_mem), (void *)&dev_border_cell_j); + ezcl_set_kernel_arg(kernel_get_border_data, 9, sizeof(cl_mem), (void *)&dev_border_cell_level); + ezcl_set_kernel_arg(kernel_get_border_data, 10, sizeof(cl_mem), (void *)&dev_border_cell_num); + ezcl_set_kernel_arg(kernel_get_border_data, 11, local_work_size*sizeof(cl_uint), NULL); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_get_border_data, 1, NULL, &global_work_size, &local_work_size, NULL); + + ezcl_device_memory_delete(dev_ioffset); + ezcl_device_memory_delete(dev_border_cell); + + // read gpu border cell data + ezcl_enqueue_read_buffer(command_queue, dev_border_cell_i, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_i[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_border_cell_j, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_j[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_border_cell_level, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_level[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_border_cell_num, CL_TRUE, 0, nbsize_local*sizeof(cl_int), &border_cell_num[0], NULL); + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_FIND_BOUNDARY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + // Allocate push database + + int **send_database = (int**)malloc(num_comm_partners*sizeof(int *)); + for (int ip = 0; ip < num_comm_partners; ip++){ + send_database[ip] = (int *)malloc(nbsize_local*sizeof(int)); + } + + // Compute the overlap between processor bounding boxes and set up push database + + vector send_buffer_count(num_comm_partners); + for (int ip = 0; ip < num_comm_partners; ip++){ + int icount = 0; + for (int ib = 0; ib = iminsize_global[comm_partner[ip]] && + border_cell_i[ib]*levmult <= imaxsize_global[comm_partner[ip]] && + border_cell_j[ib]*levmult >= jminsize_global[comm_partner[ip]] && + border_cell_j[ib]*levmult <= jmaxsize_global[comm_partner[ip]] ) { + send_database[ip][icount] = ib; + icount++; + } + } + send_buffer_count[ip]=icount; + } + + // Initialize L7_Push_Setup with num_comm_partners, comm_partner, send_database and + // send_buffer_count. L7_Push_Setup will copy data and determine recv_buffer_counts. + // It will return receive_count_total for use in allocations + + int receive_count_total; + int i_push_handle = 0; + L7_Push_Setup(num_comm_partners, &comm_partner[0], &send_buffer_count[0], + send_database, &receive_count_total, &i_push_handle); + + if (DEBUG) { + fprintf(fp,"DEBUG num_comm_partners %d\n",num_comm_partners); + for (int ip = 0; ip < num_comm_partners; ip++){ + fprintf(fp,"DEBUG comm partner is %d data count is %d\n",comm_partner[ip],send_buffer_count[ip]); + for (int ic = 0; ic < send_buffer_count[ip]; ic++){ + int ib = send_database[ip][ic]; + fprintf(fp,"DEBUG \t index %d cell number %d i %d j %d level %d\n",ib,border_cell_num[ib], + border_cell_i[ib],border_cell_j[ib],border_cell_level[ib]); + } + } + } + + // Can now free the send database. Other arrays are vectors and will automatically + // deallocate + + for (int ip = 0; ip < num_comm_partners; ip++){ + free(send_database[ip]); + } + free(send_database); + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_PUSH_SETUP] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + // Push the data needed to the adjacent processors + + int *border_cell_num_local = (int *)malloc(receive_count_total*sizeof(int)); + int *border_cell_i_local = (int *)malloc(receive_count_total*sizeof(int)); + int *border_cell_j_local = (int *)malloc(receive_count_total*sizeof(int)); + int *border_cell_level_local = (int *)malloc(receive_count_total*sizeof(int)); + L7_Push_Update(&border_cell_num[0], border_cell_num_local, i_push_handle); + L7_Push_Update(&border_cell_i[0], border_cell_i_local, i_push_handle); + L7_Push_Update(&border_cell_j[0], border_cell_j_local, i_push_handle); + L7_Push_Update(&border_cell_level[0], border_cell_level_local, i_push_handle); + + L7_Push_Free(&i_push_handle); + + ezcl_device_memory_delete(dev_border_cell_i); + ezcl_device_memory_delete(dev_border_cell_j); + ezcl_device_memory_delete(dev_border_cell_level); + ezcl_device_memory_delete(dev_border_cell_num); + + nbsize_local = receive_count_total; + + if (DEBUG) { + for (int ic = 0; ic < nbsize_local; ic++) { + fprintf(fp,"%d: Local Border cell %d is %d i %d j %d level %d\n",mype,ic,border_cell_num_local[ic], + border_cell_i_local[ic],border_cell_j_local[ic],border_cell_level_local[ic]); + } + } + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_PUSH_BOUNDARY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + nbsize_long = nbsize_local; + + dev_border_cell_num = ezcl_malloc(NULL, const_cast("dev_border_cell_num"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_border_cell_i = ezcl_malloc(NULL, const_cast("dev_border_cell_i"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_border_cell_j = ezcl_malloc(NULL, const_cast("dev_border_cell_j"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_border_cell_level = ezcl_malloc(NULL, const_cast("dev_border_cell_level"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_border_cell_needed = ezcl_malloc(NULL, const_cast("dev_border_cell_needed"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_border_cell_needed_out = ezcl_malloc(NULL, const_cast("dev_border_cell_needed_out"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + ezcl_enqueue_write_buffer(command_queue, dev_border_cell_num, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_num_local[0], NULL); + ezcl_enqueue_write_buffer(command_queue, dev_border_cell_i, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_i_local[0], NULL); + ezcl_enqueue_write_buffer(command_queue, dev_border_cell_j, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_j_local[0], NULL); + ezcl_enqueue_write_buffer(command_queue, dev_border_cell_level, CL_TRUE, 0, nbsize_local*sizeof(cl_int), &border_cell_level_local[0], NULL); + + //ezcl_enqueue_write_buffer(command_queue, dev_border_cell_needed, CL_TRUE, 0, nbsize_local*sizeof(cl_int), &border_cell_needed_local[0], NULL); + + free(border_cell_i_local); + free(border_cell_j_local); + free(border_cell_level_local); + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_LOCAL_LIST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG) { + vector hash_tmp(hashsize); + ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_TRUE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL); + + cl_mem dev_hash_header_check = gpu_get_hash_header(); + vector hash_header_check(hash_header_size); + ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL); + + int gpu_hash_method = (int)hash_header_check[0]; + ulong gpu_hash_table_size = hash_header_check[1]; + ulong gpu_AA = hash_header_check[2]; + ulong gpu_BB = hash_header_check[3]; + + int jmaxglobal = (jmax+1)*IPOW2(levmx); + int imaxglobal = (imax+1)*IPOW2(levmx); + fprintf(fp,"\n HASH numbering before layer 1\n"); + for (int jj = jmaxglobal-1; jj>=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) ); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii border_cell_needed_local(nbsize_local); + + ezcl_enqueue_read_buffer(command_queue, dev_border_cell_needed, CL_TRUE, 0, nbsize_local*sizeof(cl_int), &border_cell_needed_local[0], NULL); + + for(int ic=0; ic= 2) { + gpu_timers[MESH_TIMER_LAYER1] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG) { + print_dev_local(); + + vector hash_tmp(hashsize); + ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_TRUE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL); + + cl_mem dev_hash_header_check = gpu_get_hash_header(); + vector hash_header_check(hash_header_size); + ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL); + + int gpu_hash_method = (int)hash_header_check[0]; + ulong gpu_hash_table_size = hash_header_check[1]; + ulong gpu_AA = hash_header_check[2]; + ulong gpu_BB = hash_header_check[3]; + + int jmaxglobal = (jmax+1)*IPOW2(levmx); + int imaxglobal = (imax+1)*IPOW2(levmx); + fprintf(fp,"\n HASH numbering for 1 layer\n"); + for (int jj = jmaxglobal-1; jj>=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) ); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii("dev_nbpacked"), &one, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + size_t group_size_long = group_size; + dev_ioffset = ezcl_malloc(NULL, const_cast("dev_ioffset"), &group_size_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + ezcl_set_kernel_arg(kernel_calc_layer2, 0, sizeof(cl_int), (void *)&nbsize_local); + ezcl_set_kernel_arg(kernel_calc_layer2, 1, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_calc_layer2, 2, sizeof(cl_int), (void *)&noffset); + ezcl_set_kernel_arg(kernel_calc_layer2, 3, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_calc_layer2, 4, sizeof(cl_int), (void *)&imax); + ezcl_set_kernel_arg(kernel_calc_layer2, 5, sizeof(cl_int), (void *)&jmax); + ezcl_set_kernel_arg(kernel_calc_layer2, 6, sizeof(cl_mem), (void *)&dev_sizes); + ezcl_set_kernel_arg(kernel_calc_layer2, 7, sizeof(cl_mem), (void *)&dev_levtable); + ezcl_set_kernel_arg(kernel_calc_layer2, 8, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_calc_layer2, 9, sizeof(cl_mem), (void *)&dev_border_cell_i); + ezcl_set_kernel_arg(kernel_calc_layer2, 10, sizeof(cl_mem), (void *)&dev_border_cell_j); + ezcl_set_kernel_arg(kernel_calc_layer2, 11, sizeof(cl_mem), (void *)&dev_border_cell_level); + ezcl_set_kernel_arg(kernel_calc_layer2, 12, sizeof(cl_mem), (void *)&dev_border_cell_needed); + ezcl_set_kernel_arg(kernel_calc_layer2, 13, sizeof(cl_mem), (void *)&dev_border_cell_needed_out); + ezcl_set_kernel_arg(kernel_calc_layer2, 14, sizeof(cl_mem), (void *)&dev_hash_header); + ezcl_set_kernel_arg(kernel_calc_layer2, 15, sizeof(cl_mem), (void *)&dev_hash); + ezcl_set_kernel_arg(kernel_calc_layer2, 16, sizeof(cl_mem), (void *)&dev_ioffset); + ezcl_set_kernel_arg(kernel_calc_layer2, 17, sizeof(cl_mem), (void *)&dev_nbpacked); + ezcl_set_kernel_arg(kernel_calc_layer2, 18, nb_local_work_size*sizeof(cl_mem), NULL); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_layer2, 1, NULL, &nb_global_work_size, &nb_local_work_size, NULL); + + if (DEBUG){ + vector border_cell_needed_local(nbsize_local); + + ezcl_enqueue_read_buffer(command_queue, dev_border_cell_needed_out, CL_TRUE, 0, nbsize_local*sizeof(cl_int), &border_cell_needed_local[0], NULL); + for(int ic=0; ic= 0x0016) fprintf(fp,"%d: Second set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]); + } + } + + free(border_cell_num_local); + + ezcl_device_memory_delete(dev_border_cell_needed); + + ezcl_set_kernel_arg(kernel_finish_scan, 0, sizeof(cl_int), (void *)&group_size); + ezcl_set_kernel_arg(kernel_finish_scan, 1, sizeof(cl_mem), (void *)&dev_ioffset); + ezcl_set_kernel_arg(kernel_finish_scan, 2, sizeof(cl_mem), (void *)&dev_nbpacked); + ezcl_set_kernel_arg(kernel_finish_scan, 3, nb_local_work_size*sizeof(cl_int), NULL); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_finish_scan, 1, NULL, &nb_local_work_size, &nb_local_work_size, NULL); + + int nbpacked; + ezcl_enqueue_read_buffer(command_queue, dev_nbpacked, CL_TRUE, 0, 1*sizeof(cl_int), &nbpacked, NULL); + ezcl_device_memory_delete(dev_nbpacked); + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_LAYER2] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + nbsize_long = nbsize_local; + cl_mem dev_border_cell_i_new = ezcl_malloc(NULL, const_cast("dev_border_cell_i_new"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_border_cell_j_new = ezcl_malloc(NULL, const_cast("dev_border_cell_j_new"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_border_cell_level_new = ezcl_malloc(NULL, const_cast("dev_border_cell_level_new"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_indices_needed = ezcl_malloc(NULL, const_cast("dev_indices_needed"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + cl_event get_border_data2_event; + + ezcl_set_kernel_arg(kernel_get_border_data2, 0, sizeof(cl_int), (void *)&nbsize_local); + ezcl_set_kernel_arg(kernel_get_border_data2, 1, sizeof(cl_mem), (void *)&dev_ioffset); + ezcl_set_kernel_arg(kernel_get_border_data2, 2, sizeof(cl_mem), (void *)&dev_border_cell_needed_out); + ezcl_set_kernel_arg(kernel_get_border_data2, 3, sizeof(cl_mem), (void *)&dev_border_cell_i); + ezcl_set_kernel_arg(kernel_get_border_data2, 4, sizeof(cl_mem), (void *)&dev_border_cell_j); + ezcl_set_kernel_arg(kernel_get_border_data2, 5, sizeof(cl_mem), (void *)&dev_border_cell_level); + ezcl_set_kernel_arg(kernel_get_border_data2, 6, sizeof(cl_mem), (void *)&dev_border_cell_num); + ezcl_set_kernel_arg(kernel_get_border_data2, 7, sizeof(cl_mem), (void *)&dev_border_cell_i_new); + ezcl_set_kernel_arg(kernel_get_border_data2, 8, sizeof(cl_mem), (void *)&dev_border_cell_j_new); + ezcl_set_kernel_arg(kernel_get_border_data2, 9, sizeof(cl_mem), (void *)&dev_border_cell_level_new); + ezcl_set_kernel_arg(kernel_get_border_data2, 10, sizeof(cl_mem), (void *)&dev_indices_needed); + ezcl_set_kernel_arg(kernel_get_border_data2, 11, local_work_size*sizeof(cl_uint), NULL); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_get_border_data2, 1, NULL, &nb_global_work_size, &nb_local_work_size, &get_border_data2_event); + + ezcl_device_memory_delete(dev_border_cell_num); + + ezcl_device_memory_swap(&dev_border_cell_i, &dev_border_cell_i_new); + ezcl_device_memory_swap(&dev_border_cell_j, &dev_border_cell_j_new); + ezcl_device_memory_swap(&dev_border_cell_level, &dev_border_cell_level_new); + + size_t nbp_local_work_size = 128; + size_t nbp_global_work_size = ((nbpacked + nbp_local_work_size - 1) /nbp_local_work_size) * nbp_local_work_size; + + cl_event calc_layer2_sethash_event; + + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 0, sizeof(cl_int), (void *)&nbpacked); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 1, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 2, sizeof(cl_int), (void *)&noffset); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 3, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 4, sizeof(cl_int), (void *)&imax); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 5, sizeof(cl_int), (void *)&jmax); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 6, sizeof(cl_mem), (void *)&dev_sizes); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 7, sizeof(cl_mem), (void *)&dev_levtable); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 8, sizeof(cl_mem), (void *)&dev_levibeg); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 9, sizeof(cl_mem), (void *)&dev_leviend); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 10, sizeof(cl_mem), (void *)&dev_levjbeg); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 11, sizeof(cl_mem), (void *)&dev_levjend); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 12, sizeof(cl_mem), (void *)&dev_border_cell_i); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 13, sizeof(cl_mem), (void *)&dev_border_cell_j); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 14, sizeof(cl_mem), (void *)&dev_border_cell_level); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 15, sizeof(cl_mem), (void *)&dev_indices_needed); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 16, sizeof(cl_mem), (void *)&dev_border_cell_needed_out); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 17, sizeof(cl_mem), (void *)&dev_hash_header); + ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 18, sizeof(cl_mem), (void *)&dev_hash); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_layer2_sethash, 1, NULL, &nbp_global_work_size, &nbp_local_work_size, &calc_layer2_sethash_event); + + ezcl_wait_for_events(1, &calc_layer2_sethash_event); + ezcl_event_release(calc_layer2_sethash_event); + + ezcl_device_memory_delete(dev_ioffset); + + ezcl_wait_for_events(1, &get_border_data2_event); + ezcl_event_release(get_border_data2_event); + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_LAYER_LIST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + vector indices_needed(nbpacked); + + // read gpu border cell data + ezcl_enqueue_read_buffer(command_queue, dev_indices_needed, CL_TRUE, 0, nbpacked*sizeof(cl_int), &indices_needed[0], NULL); + + ezcl_device_memory_delete(dev_border_cell_i_new); + ezcl_device_memory_delete(dev_border_cell_j_new); + ezcl_device_memory_delete(dev_border_cell_level_new); + + if (DEBUG) { + print_dev_local(); + + vector hash_tmp(hashsize); + ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_TRUE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL); + + cl_mem dev_hash_header_check = gpu_get_hash_header(); + vector hash_header_check(hash_header_size); + ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL); + + int gpu_hash_method = (int)hash_header_check[0]; + ulong gpu_hash_table_size = hash_header_check[1]; + ulong gpu_AA = hash_header_check[2]; + ulong gpu_BB = hash_header_check[3]; + + int jmaxglobal = (jmax+1)*IPOW2(levmx); + int imaxglobal = (imax+1)*IPOW2(levmx); + fprintf(fp,"\n HASH numbering for 2 layer\n"); + for (int jj = jmaxglobal-1; jj>=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) ); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii("dev_celltype_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_i_old = ezcl_malloc(NULL, const_cast("dev_i_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_j_old = ezcl_malloc(NULL, const_cast("dev_j_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_level_old = ezcl_malloc(NULL, const_cast("dev_level_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_nlft_old = ezcl_malloc(NULL, const_cast("dev_nlft_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_nrht_old = ezcl_malloc(NULL, const_cast("dev_nrht_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_nbot_old = ezcl_malloc(NULL, const_cast("dev_nbot_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_ntop_old = ezcl_malloc(NULL, const_cast("dev_ntop_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + ezcl_device_memory_swap(&dev_celltype_old, &dev_celltype); + ezcl_device_memory_swap(&dev_i_old, &dev_i ); + ezcl_device_memory_swap(&dev_j_old, &dev_j ); + ezcl_device_memory_swap(&dev_level_old, &dev_level ); + ezcl_device_memory_swap(&dev_nlft_old, &dev_nlft ); + ezcl_device_memory_swap(&dev_nrht_old, &dev_nrht ); + ezcl_device_memory_swap(&dev_nbot_old, &dev_nbot ); + ezcl_device_memory_swap(&dev_ntop_old, &dev_ntop ); + + cl_event copy_mesh_data_event; + + ezcl_set_kernel_arg(kernel_copy_mesh_data, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 1, sizeof(cl_mem), (void *)&dev_celltype_old); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 2, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 3, sizeof(cl_mem), (void *)&dev_i_old); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 4, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 5, sizeof(cl_mem), (void *)&dev_j_old); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 6, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 7, sizeof(cl_mem), (void *)&dev_level_old); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 8, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 9, sizeof(cl_mem), (void *)&dev_nlft_old); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 10, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 11, sizeof(cl_mem), (void *)&dev_nrht_old); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 12, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 13, sizeof(cl_mem), (void *)&dev_nbot_old); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 14, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 15, sizeof(cl_mem), (void *)&dev_ntop_old); + ezcl_set_kernel_arg(kernel_copy_mesh_data, 16, sizeof(cl_mem), (void *)&dev_ntop); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_copy_mesh_data, 1, NULL, &global_work_size, &local_work_size, ©_mesh_data_event); + + ezcl_device_memory_delete(dev_celltype_old); + ezcl_device_memory_delete(dev_i_old); + ezcl_device_memory_delete(dev_j_old); + ezcl_device_memory_delete(dev_level_old); + ezcl_device_memory_delete(dev_nlft_old); + ezcl_device_memory_delete(dev_nrht_old); + ezcl_device_memory_delete(dev_nbot_old); + ezcl_device_memory_delete(dev_ntop_old); + + ezcl_wait_for_events(1, ©_mesh_data_event); + ezcl_event_release(copy_mesh_data_event); + } + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_COPY_MESH_DATA] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + nb_global_work_size = ((nbpacked + nb_local_work_size - 1) /nb_local_work_size) * nb_local_work_size; + +#ifdef BOUNDS_CHECK + if (ezcl_get_device_mem_nelements(dev_i) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_j) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_level) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_celltype) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_nrht) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_nbot) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_ntop) < (int)ncells_ghost ){ + printf("DEBUG size issue at %d\n",__LINE__); + } + if (ezcl_get_device_mem_nelements(dev_border_cell_i) < nbpacked || + ezcl_get_device_mem_nelements(dev_border_cell_j) < nbpacked || + ezcl_get_device_mem_nelements(dev_border_cell_level) < nbpacked ){ + printf("DEBUG size issue at %d\n",__LINE__); + } +#endif + + cl_event fill_mesh_ghost_event; + + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 0, sizeof(cl_int), (void *)&nbpacked); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 1, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 2, sizeof(cl_mem), (void *)&dev_levibeg); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 3, sizeof(cl_mem), (void *)&dev_leviend); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 4, sizeof(cl_mem), (void *)&dev_levjbeg); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 5, sizeof(cl_mem), (void *)&dev_levjend); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 6, sizeof(cl_mem), (void *)&dev_border_cell_i); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 7, sizeof(cl_mem), (void *)&dev_border_cell_j); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 8, sizeof(cl_mem), (void *)&dev_border_cell_level); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 9, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 10, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 11, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 12, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 13, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 14, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 15, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 16, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_fill_mesh_ghost, 1, NULL, &nb_global_work_size, &nb_local_work_size, &fill_mesh_ghost_event); + + ezcl_wait_for_events(1, &fill_mesh_ghost_event); + ezcl_event_release(fill_mesh_ghost_event); + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_FILL_MESH_GHOST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG){ + fprintf(fp,"After copying i,j, level to ghost cells\n"); + print_dev_local(); + } + + ezcl_device_memory_delete(dev_border_cell_i); + ezcl_device_memory_delete(dev_border_cell_j); + ezcl_device_memory_delete(dev_border_cell_level); + + size_t ghost_local_work_size = 128; + size_t ghost_global_work_size = ((ncells_ghost + ghost_local_work_size - 1) /ghost_local_work_size) * ghost_local_work_size; + + cl_event fill_neighbor_ghost_event; + + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 0, sizeof(cl_int), (void *)&ncells_ghost); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 1, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 2, sizeof(cl_int), (void *)&imax); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 3, sizeof(cl_int), (void *)&jmax); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 4, sizeof(cl_mem), (void *)&dev_sizes); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 5, sizeof(cl_mem), (void *)&dev_levtable); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 6, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 7, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 8, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 9, sizeof(cl_mem), (void *)&dev_hash_header); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 10, sizeof(cl_mem), (void *)&dev_hash); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 11, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 12, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 13, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 14, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_fill_neighbor_ghost, 1, NULL, &ghost_global_work_size, &ghost_local_work_size, &fill_neighbor_ghost_event); + + ezcl_wait_for_events(1, &fill_neighbor_ghost_event); + ezcl_event_release(fill_neighbor_ghost_event); + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_FILL_NEIGH_GHOST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG){ + fprintf(fp,"After setting neighbors through ghost cells\n"); + print_dev_local(); + } + +#ifdef BOUNDS_CHECK + if (ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_nrht) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_nbot) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_ntop) < (int)ncells_ghost ){ + printf("%d: Warning sizes for set_corner_neighbor not right ncells ghost %d nlft size %d\n",mype,ncells_ghost,ezcl_get_device_mem_nelements(dev_nlft)); + } +#endif + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_SET_CORNER_NEIGH] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + if (DEBUG){ + fprintf(fp,"After setting corner neighbors\n"); + print_dev_local(); + } + +#ifdef BOUNDS_CHECK + if (ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_nrht) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_nbot) < (int)ncells_ghost || + ezcl_get_device_mem_nelements(dev_ntop) < (int)ncells_ghost ){ + printf("%d: Warning sizes for adjust neighbors not right ncells ghost %d nlft size %d\n",mype,ncells_ghost,ezcl_get_device_mem_nelements(dev_nlft)); + } + if (ezcl_get_device_mem_nelements(dev_indices_needed) < (int)(ncells_ghost-ncells) ){ + printf("%d: Warning indices size wrong nghost %d size indices_needed\n",mype,ncells_ghost-ncells,ezcl_get_device_mem_nelements(dev_indices_needed)); + } +#endif + + cl_event adjust_neighbors_local_event; + + ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 0, sizeof(cl_int), (void *)&ncells_ghost); + ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 1, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 2, sizeof(cl_int), (void *)&noffset); + ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 3, sizeof(cl_mem), (void *)&dev_indices_needed); + ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 4, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 5, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 6, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 7, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_adjust_neighbors_local, 1, NULL, &ghost_global_work_size, &ghost_local_work_size, &adjust_neighbors_local_event); + + ezcl_device_memory_delete(dev_indices_needed); + + if (DEBUG){ + fprintf(fp,"After adjusting neighbors to local indices\n"); + print_dev_local(); + } + + ezcl_wait_for_events(1, &adjust_neighbors_local_event); + ezcl_event_release(adjust_neighbors_local_event); + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_NEIGH_ADJUST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + cpu_timer_start(&tstart_lev2); + } + + offtile_ratio_local = (offtile_ratio_local*(double)offtile_local_count) + ((double)nghost / (double)ncells); + offtile_local_count++; + offtile_ratio_local /= offtile_local_count; + + if (cell_handle) L7_Free(&cell_handle); + cell_handle=0; + + if (DEBUG){ + fprintf(fp,"%d: SETUP ncells %ld noffset %d nghost %d\n",mype,ncells,noffset,nghost); + for (int ic=0; ic nlft_tmp(ncells_ghost); + vector nrht_tmp(ncells_ghost); + vector nbot_tmp(ncells_ghost); + vector ntop_tmp(ncells_ghost); + vector level_tmp(ncells_ghost); + vector H_tmp(ncells_ghost); + ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL); + for (uint ic=0; ic= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl); + if (level_tmp[nl] > level_tmp[ic]){ + int ntl = ntop_tmp[nl]; + if (ntl<0 || ntl>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d global %d nlft %d ntop of nlft %d\n",mype,__LINE__,ic,ic+noffset,nl,ntl); + } + int nr = nrht_tmp[ic]; + if (nr<0 || nr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr); + if (level_tmp[nr] > level_tmp[ic]){ + int ntr = ntop_tmp[nr]; + if (ntr<0 || ntr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d ntop of nrht %d\n",mype,__LINE__,ic,ntr); + } + int nb = nbot_tmp[ic]; + if (nb<0 || nb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb); + if (level_tmp[nb] > level_tmp[ic]){ + int nrb = nrht_tmp[nb]; + if (nrb<0 || nrb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of nbot %d\n",mype,__LINE__,ic,nrb); + } + int nt = ntop_tmp[ic]; + if (nt<0 || nt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d global %d ntop %d ncells %ld ncells_ghost %ld\n",mype,__LINE__,ic,ic+noffset,nt,ncells,ncells_ghost); + if (level_tmp[nt] > level_tmp[ic]){ + int nrt = nrht_tmp[nt]; + if (nrt<0 || nrt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of ntop %d\n",mype,__LINE__,ic,nrt); + } + } + } +#endif + + if (TIMING_LEVEL >= 2) { + gpu_timers[MESH_TIMER_SETUP_COMM] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + } + + if (DEBUG) { + print_dev_local(); + + vector hash_tmp(hashsize); + ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_FALSE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL); + + cl_mem dev_hash_header_check = gpu_get_hash_header(); + vector hash_header_check(hash_header_size); + ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL); + + int gpu_hash_method = (int)hash_header_check[0]; + ulong gpu_hash_table_size = hash_header_check[1]; + ulong gpu_AA = hash_header_check[2]; + ulong gpu_BB = hash_header_check[3]; + + vector nlft_tmp(ncells_ghost); + vector nrht_tmp(ncells_ghost); + vector nbot_tmp(ncells_ghost); + vector ntop_tmp(ncells_ghost); + ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL); + + int jmaxglobal = (jmax+1)*IPOW2(levmx); + int imaxglobal = (imax+1)*IPOW2(levmx); + fprintf(fp,"\n HASH numbering\n"); + for (int jj = jmaxglobal-1; jj>=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) { + fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) ); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) { + fprintf(fp,"%5d",nlft_tmp[hashval]); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) { + fprintf(fp,"%5d",nrht_tmp[hashval]); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) { + fprintf(fp,"%5d",nbot_tmp[hashval]); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii=0; jj--){ + fprintf(fp,"%2d: %4d:",mype,jj); + if (jj >= jminsize && jj < jmaxsize) { + for (int ii = 0; ii= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) { + fprintf(fp,"%5d",ntop_tmp[hashval]); + } else { + fprintf(fp," "); + } + } + } + fprintf(fp,"\n"); + } + fprintf(fp,"%2d: ",mype); + for (int ii = 0; ii i_tmp(ncells_ghost); + vector j_tmp(ncells_ghost); + vector level_tmp(ncells_ghost); + vector nlft_tmp(ncells_ghost); + vector nrht_tmp(ncells_ghost); + vector nbot_tmp(ncells_ghost); + vector ntop_tmp(ncells_ghost); + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &i_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &j_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL); + + for (uint ic=0; ic &dsym, vector &xsym, vector &ysym) +{ + TBounds box; + vector index_list( IPOW2(levmx*levmx) ); + + int num; + for (uint ic=0; ic indices_needed(indices_needed_count); + for (int iz = lower_block_start; iz <= lower_block_end; iz++, in++){ + indices_needed[in]=iz; + } + for (int iz = upper_block_start; iz <= upper_block_end; iz++, in++){ + indices_needed[in]=iz; + } + + int load_balance_handle = 0; + L7_Setup(0, noffset_old, ncells_old, &indices_needed[0], indices_needed_count, &load_balance_handle); + + //printf("\n%d: DEBUG load balance report\n",mype); + + state_memory.memory_realloc_all(ncells_old+indices_needed_count); + + MallocPlus state_memory_old = state_memory; + + + malloc_plus_memory_entry *memory_item; + + for (memory_item = state_memory_old.memory_entry_by_name_begin(); + memory_item != state_memory_old.memory_entry_by_name_end(); + memory_item = state_memory_old.memory_entry_by_name_next() ) { + + //if (mype == 0) printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize); + + if (memory_item->mem_elsize == 8) { + double *mem_ptr_double = (double *)memory_item->mem_ptr; + + int flags = state_memory.get_memory_flags(mem_ptr_double); + double *state_temp_double = (double *) state_memory.memory_malloc(ncells, sizeof(double), + "state_temp_double", flags); + + //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr); + L7_Update(mem_ptr_double, L7_DOUBLE, load_balance_handle); + in = 0; + if(lower_block_size > 0) { + for(; in < MIN(lower_block_size, (int)ncells); in++) { + state_temp_double[in] = mem_ptr_double[ncells_old + in]; + } + } + + for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) { + state_temp_double[in] = mem_ptr_double[ic]; + } + + if(upper_block_size > 0) { + int ic = ncells_old + lower_block_size; + for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) { + state_temp_double[in] = mem_ptr_double[ic+k]; + } + } + state_memory.memory_replace(mem_ptr_double, state_temp_double); + } else if (memory_item->mem_elsize == 4) { + float *mem_ptr_float = (float *)memory_item->mem_ptr; + + int flags = state_memory.get_memory_flags(mem_ptr_float); + float *state_temp_float = (float *) state_memory.memory_malloc(ncells, sizeof(float), + "state_temp_float", flags); + + //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr); + L7_Update(mem_ptr_float, L7_FLOAT, load_balance_handle); + in = 0; + if(lower_block_size > 0) { + for(; in < MIN(lower_block_size, (int)ncells); in++) { + state_temp_float[in] = mem_ptr_float[ncells_old + in]; + } + } + + for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) { + state_temp_float[in] = mem_ptr_float[ic]; + } + + if(upper_block_size > 0) { + int ic = ncells_old + lower_block_size; + for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) { + state_temp_float[in] = mem_ptr_float[ic+k]; + } + } + state_memory.memory_replace(mem_ptr_float, state_temp_float); + } + } + + mesh_memory.memory_realloc_all(ncells_old+indices_needed_count); + + MallocPlus mesh_memory_old = mesh_memory; + + for (memory_item = mesh_memory_old.memory_entry_by_name_begin(); + memory_item != mesh_memory_old.memory_entry_by_name_end(); + memory_item = mesh_memory_old.memory_entry_by_name_next() ) { + + //if (mype == 0) printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize); + + if (memory_item->mem_elsize == 8) { + long long *mem_ptr_long = (long long *)memory_item->mem_ptr; + + int flags = mesh_memory.get_memory_flags(mem_ptr_long); + long long *mesh_temp_long = (long long *)mesh_memory.memory_malloc(ncells, sizeof(long long), "mesh_temp_long", flags); + + //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr); + L7_Update(mem_ptr_long, L7_LONG_LONG_INT, load_balance_handle); + in = 0; + if(lower_block_size > 0) { + for(; in < MIN(lower_block_size, (int)ncells); in++) { + mesh_temp_long[in] = mem_ptr_long[ncells_old + in]; + } + } + + for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) { + mesh_temp_long[in] = mem_ptr_long[ic]; + } + + if(upper_block_size > 0) { + int ic = ncells_old + lower_block_size; + for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) { + mesh_temp_long[in] = mem_ptr_long[ic+k]; + } + } + mesh_memory.memory_replace(mem_ptr_long, mesh_temp_long); + + } else { + int *mem_ptr_int = (int *)memory_item->mem_ptr; + + int flags = mesh_memory.get_memory_flags(mem_ptr_int); + int *mesh_temp_int = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "mesh_temp_int", flags); + + //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr); + L7_Update(mem_ptr_int, L7_INT, load_balance_handle); + in = 0; + if(lower_block_size > 0) { + for(; in < MIN(lower_block_size, (int)ncells); in++) { + mesh_temp_int[in] = mem_ptr_int[ncells_old + in]; + } + } + + for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) { + mesh_temp_int[in] = mem_ptr_int[ic]; + } + + if(upper_block_size > 0) { + int ic = ncells_old + lower_block_size; + for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) { + mesh_temp_int[in] = mem_ptr_int[ic+k]; + } + } + mesh_memory.memory_replace(mem_ptr_int, mesh_temp_int); + + } + } + + L7_Free(&load_balance_handle); + load_balance_handle = 0; + + memory_reset_ptrs(); + + //mesh_memory.memory_report(); + //state_memory.memory_report(); + //printf("%d: DEBUG end load balance report\n\n",mype); + calc_celltype(ncells); + } + + + cpu_timers[MESH_TIMER_LOAD_BALANCE] += cpu_timer_stop(tstart_cpu); +} +#endif + +#ifdef HAVE_OPENCL +#ifdef HAVE_MPI +int Mesh::gpu_do_load_balance_local(size_t numcells, float *weight, MallocPlus &gpu_state_memory) +{ + int do_load_balance_global = 0; + + if (! gpu_do_rezone) return(do_load_balance_global); + + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + // To get rid of compiler warning + if (DEBUG && weight != NULL) printf("DEBUG weight[0] = %f\n",weight[0]); + + int ncells_old = numcells; + int noffset_old = ndispl[mype]; + +// Need to add weight array to load balance if it is not NULL +// Need to add tolerance to when load balance is done + + int nsizes_old = 0; + for (int ip=0; ip ncells_old) do_whole_segment = 1; + + int upper_segment_size = ( (noffset_old+ncells_old) - (noffset+ncells) ); + int upper_segment_start = (noffset_old+ncells_old) - upper_segment_size - noffset_old; + if (upper_segment_size > ncells_old) do_whole_segment=1; + + int in = 0; + vector indices_needed(indices_needed_count); + for (int iz = lower_block_start; iz <= lower_block_end; iz++, in++){ + indices_needed[in]=iz; + } + for (int iz = upper_block_start; iz <= upper_block_end; iz++, in++){ + indices_needed[in]=iz; + } + + int load_balance_handle = 0; + L7_Setup(0, noffset_old, ncells_old, &indices_needed[0], indices_needed_count, &load_balance_handle); + + size_t local_work_size = 128; + size_t global_work_size = ((ncells + local_work_size - 1) / local_work_size) * local_work_size; + + // printf("MYPE%d: \t ncells = %d \t ncells_old = %d \t ncells_global = %d \n", mype, ncells, ncells_old, ncells_global); + + // Allocate lower block on GPU + size_t low_block_size = MAX(1, lower_block_size); + cl_mem dev_state_var_lower = ezcl_malloc(NULL, const_cast("dev_state_var_lower"), &low_block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0); + + // Allocate upper block on GPU + size_t up_block_size = MAX(1, upper_block_size); + cl_mem dev_state_var_upper = ezcl_malloc(NULL, const_cast("dev_state_var_upper"), &up_block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0); + + MallocPlus gpu_state_memory_old = gpu_state_memory; + malloc_plus_memory_entry *memory_item; + + for (memory_item = gpu_state_memory_old.memory_entry_by_name_begin(); + memory_item != gpu_state_memory_old.memory_entry_by_name_end(); + memory_item = gpu_state_memory_old.memory_entry_by_name_next() ) { + //printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize); + cl_mem dev_state_mem_ptr = (cl_mem)memory_item->mem_ptr; + + if (memory_item->mem_elsize == 8){ +#ifndef MINIMUM_PRECISION + vector state_var_tmp(ncells_old+indices_needed_count,0.0); + + // Read current state values from GPU and write to CPU arrays + if (do_whole_segment) { + ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, ncells_old*sizeof(cl_double), &state_var_tmp[0], NULL); + } else { + // Read lower block from GPU + if (lower_segment_size > 0) { + ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, lower_segment_size*sizeof(cl_double), &state_var_tmp[0], NULL); + } + // Read upper block from GPU + if (upper_segment_size > 0) { + ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, upper_segment_start*sizeof(cl_double), upper_segment_size*sizeof(cl_double), &state_var_tmp[upper_segment_start], NULL); + } + } + + // Update arrays with L7 + L7_Update(&state_var_tmp[0], L7_DOUBLE, load_balance_handle); + + // Set lower block on GPU + if(lower_block_size > 0) { + ezcl_enqueue_write_buffer(command_queue, dev_state_var_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_double), &state_var_tmp[ncells_old], NULL); + } + // Set upper block on GPU + if(upper_block_size > 0) { + ezcl_enqueue_write_buffer(command_queue, dev_state_var_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_double), &state_var_tmp[ncells_old+lower_block_size], NULL); + } + + // Allocate space on GPU for temp arrays (used in double buffering) + cl_mem dev_state_var_new = ezcl_malloc(NULL, gpu_state_memory.get_memory_name(dev_state_mem_ptr), &ncells, sizeof(cl_double), CL_MEM_READ_WRITE, 0); + gpu_state_memory.memory_add(dev_state_var_new, ncells, sizeof(cl_double), "dev_state_var_new", DEVICE_REGULAR_MEMORY); + + //printf("DEBUG memory for proc %d is %p dev_state_new is %p\n",mype,dev_state_mem_ptr,dev_state_var_new); + + ezcl_set_kernel_arg(kernel_do_load_balance_double, 0, sizeof(cl_int), &ncells); + ezcl_set_kernel_arg(kernel_do_load_balance_double, 1, sizeof(cl_int), &lower_block_size); + ezcl_set_kernel_arg(kernel_do_load_balance_double, 2, sizeof(cl_int), &middle_block_size); + ezcl_set_kernel_arg(kernel_do_load_balance_double, 3, sizeof(cl_int), &middle_block_start); + ezcl_set_kernel_arg(kernel_do_load_balance_double, 4, sizeof(cl_mem), &dev_state_mem_ptr); + ezcl_set_kernel_arg(kernel_do_load_balance_double, 5, sizeof(cl_mem), &dev_state_var_lower); + ezcl_set_kernel_arg(kernel_do_load_balance_double, 6, sizeof(cl_mem), &dev_state_var_upper); + ezcl_set_kernel_arg(kernel_do_load_balance_double, 7, sizeof(cl_mem), &dev_state_var_new); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_double, 1, NULL, &global_work_size, &local_work_size, NULL); + + gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new); +#else + printf("ERROR -- can't have double type for state variable\n"); + exit(1); +#endif + } else if (memory_item->mem_elsize == 4){ + vector state_var_tmp(ncells_old+indices_needed_count,0.0); + + // Read current state values from GPU and write to CPU arrays + if (do_whole_segment) { + ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, ncells_old*sizeof(cl_float), &state_var_tmp[0], NULL); + } else { + // Read lower block from GPU + if (lower_segment_size > 0) { + ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, lower_segment_size*sizeof(cl_float), &state_var_tmp[0], NULL); + } + // Read upper block from GPU + if (upper_segment_size > 0) { + ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, upper_segment_start*sizeof(cl_float), upper_segment_size*sizeof(cl_float), &state_var_tmp[upper_segment_start], NULL); + } + } + + // Update arrays with L7 + L7_Update(&state_var_tmp[0], L7_FLOAT, load_balance_handle); + + // Set lower block on GPU + if(lower_block_size > 0) { + ezcl_enqueue_write_buffer(command_queue, dev_state_var_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_float), &state_var_tmp[ncells_old], NULL); + } + // Set upper block on GPU + if(upper_block_size > 0) { + ezcl_enqueue_write_buffer(command_queue, dev_state_var_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_float), &state_var_tmp[ncells_old+lower_block_size], NULL); + } + + // Allocate space on GPU for temp arrays (used in double buffering) + cl_mem dev_state_var_new = ezcl_malloc(NULL, gpu_state_memory.get_memory_name(dev_state_mem_ptr), &ncells, sizeof(cl_float), CL_MEM_READ_WRITE, 0); + gpu_state_memory.memory_add(dev_state_var_new, ncells, sizeof(cl_float), "dev_state_var_new", DEVICE_REGULAR_MEMORY); + + //printf("DEBUG memory for proc %d is %p dev_state_new is %p\n",mype,dev_state_mem_ptr,dev_state_var_new); + + ezcl_set_kernel_arg(kernel_do_load_balance_float, 0, sizeof(cl_int), &ncells); + ezcl_set_kernel_arg(kernel_do_load_balance_float, 1, sizeof(cl_int), &lower_block_size); + ezcl_set_kernel_arg(kernel_do_load_balance_float, 2, sizeof(cl_int), &middle_block_size); + ezcl_set_kernel_arg(kernel_do_load_balance_float, 3, sizeof(cl_int), &middle_block_start); + ezcl_set_kernel_arg(kernel_do_load_balance_float, 4, sizeof(cl_mem), &dev_state_mem_ptr); + ezcl_set_kernel_arg(kernel_do_load_balance_float, 5, sizeof(cl_mem), &dev_state_var_lower); + ezcl_set_kernel_arg(kernel_do_load_balance_float, 6, sizeof(cl_mem), &dev_state_var_upper); + ezcl_set_kernel_arg(kernel_do_load_balance_float, 7, sizeof(cl_mem), &dev_state_var_new); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_float, 1, NULL, &global_work_size, &local_work_size, NULL); + + gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new); + } + } + + ezcl_device_memory_delete(dev_state_var_lower); + ezcl_device_memory_delete(dev_state_var_upper); + + vector i_tmp(ncells_old+indices_needed_count,0); + vector j_tmp(ncells_old+indices_needed_count,0); + vector level_tmp(ncells_old+indices_needed_count,0); + vector celltype_tmp(ncells_old+indices_needed_count,0); + + if (do_whole_segment) { + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells_old*sizeof(cl_int), &i_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells_old*sizeof(cl_int), &j_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells_old*sizeof(cl_int), &level_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE, 0, ncells_old*sizeof(cl_int), &celltype_tmp[0], NULL); + } else { + if (lower_segment_size > 0) { + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, lower_segment_size*sizeof(cl_int), &i_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, lower_segment_size*sizeof(cl_int), &j_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, lower_segment_size*sizeof(cl_int), &level_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE, 0, lower_segment_size*sizeof(cl_int), &celltype_tmp[0], NULL); + } + if (upper_segment_size > 0) { + ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &i_tmp[upper_segment_start], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &j_tmp[upper_segment_start], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &level_tmp[upper_segment_start], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE, upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &celltype_tmp[upper_segment_start], NULL); + } + } + + L7_Update(&i_tmp[0], L7_INT, load_balance_handle); + L7_Update(&j_tmp[0], L7_INT, load_balance_handle); + L7_Update(&level_tmp[0], L7_INT, load_balance_handle); + L7_Update(&celltype_tmp[0], L7_INT, load_balance_handle); + + L7_Free(&load_balance_handle); + load_balance_handle = 0; + + // Allocate and set lower block on GPU + cl_mem dev_i_lower, dev_j_lower, dev_level_lower, dev_celltype_lower; + + if(lower_block_size > 0) { + dev_i_lower = ezcl_malloc(NULL, const_cast("dev_i_lower"), &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_j_lower = ezcl_malloc(NULL, const_cast("dev_j_lower"), &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_level_lower = ezcl_malloc(NULL, const_cast("dev_level_lower"), &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_celltype_lower = ezcl_malloc(NULL, const_cast("dev_celltype_lower"), &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + ezcl_enqueue_write_buffer(command_queue, dev_i_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_int), &i_tmp[ncells_old], NULL); + ezcl_enqueue_write_buffer(command_queue, dev_j_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_int), &j_tmp[ncells_old], NULL); + ezcl_enqueue_write_buffer(command_queue, dev_level_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_int), &level_tmp[ncells_old], NULL); + ezcl_enqueue_write_buffer(command_queue, dev_celltype_lower, CL_TRUE, 0, lower_block_size*sizeof(cl_int), &celltype_tmp[ncells_old], NULL); + } + + // Allocate and set upper block on GPU + cl_mem dev_i_upper, dev_j_upper, dev_level_upper, dev_celltype_upper; + if(upper_block_size > 0) { + dev_i_upper = ezcl_malloc(NULL, const_cast("dev_i_upper"), &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_j_upper = ezcl_malloc(NULL, const_cast("dev_j_upper"), &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_level_upper = ezcl_malloc(NULL, const_cast("dev_level_upper"), &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + dev_celltype_upper = ezcl_malloc(NULL, const_cast("dev_celltype_upper"), &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + ezcl_enqueue_write_buffer(command_queue, dev_i_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_int), &i_tmp[ncells_old+lower_block_size], NULL); + ezcl_enqueue_write_buffer(command_queue, dev_j_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_int), &j_tmp[ncells_old+lower_block_size], NULL); + ezcl_enqueue_write_buffer(command_queue, dev_level_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_int), &level_tmp[ncells_old+lower_block_size], NULL); + ezcl_enqueue_write_buffer(command_queue, dev_celltype_upper, CL_TRUE, 0, upper_block_size*sizeof(cl_int), &celltype_tmp[ncells_old+lower_block_size], NULL); + } + + local_work_size = 128; + + // printf("MYPE%d: \t ncells = %d \t ncells_old = %d \t ncells_global = %d \n", mype, ncells, ncells_old, ncells_global); + // Allocate space on GPU for temp arrays (used in double buffering) + + size_t mem_request = (int)((float)ncells*mem_factor); + cl_mem dev_i_new = ezcl_malloc(NULL, const_cast("dev_i_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_j_new = ezcl_malloc(NULL, const_cast("dev_j_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_level_new = ezcl_malloc(NULL, const_cast("dev_level_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + cl_mem dev_celltype_new = ezcl_malloc(NULL, const_cast("dev_celltype_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + // Set kernel arguments and call lower block kernel + if(lower_block_size > 0) { + + size_t global_work_size = ((lower_block_size + local_work_size - 1) / local_work_size) * local_work_size; + + ezcl_set_kernel_arg(kernel_do_load_balance_lower, 0, sizeof(cl_mem), &dev_i_new); + ezcl_set_kernel_arg(kernel_do_load_balance_lower, 1, sizeof(cl_mem), &dev_j_new); + ezcl_set_kernel_arg(kernel_do_load_balance_lower, 2, sizeof(cl_mem), &dev_level_new); + ezcl_set_kernel_arg(kernel_do_load_balance_lower, 3, sizeof(cl_mem), &dev_celltype_new); + ezcl_set_kernel_arg(kernel_do_load_balance_lower, 4, sizeof(cl_mem), &dev_i_lower); + ezcl_set_kernel_arg(kernel_do_load_balance_lower, 5, sizeof(cl_mem), &dev_j_lower); + ezcl_set_kernel_arg(kernel_do_load_balance_lower, 6, sizeof(cl_mem), &dev_level_lower); + ezcl_set_kernel_arg(kernel_do_load_balance_lower, 7, sizeof(cl_mem), &dev_celltype_lower); + ezcl_set_kernel_arg(kernel_do_load_balance_lower, 8, sizeof(cl_int), &lower_block_size); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_lower, 1, NULL, &global_work_size, &local_work_size, NULL); + + ezcl_device_memory_delete(dev_i_lower); + ezcl_device_memory_delete(dev_j_lower); + ezcl_device_memory_delete(dev_level_lower); + ezcl_device_memory_delete(dev_celltype_lower); + } + + // Set kernel arguments and call middle block kernel + if(middle_block_size > 0) { + + size_t global_work_size = ((middle_block_size + local_work_size - 1) / local_work_size) * local_work_size; + + ezcl_set_kernel_arg(kernel_do_load_balance_middle, 0, sizeof(cl_mem), &dev_i_new); + ezcl_set_kernel_arg(kernel_do_load_balance_middle, 1, sizeof(cl_mem), &dev_j_new); + ezcl_set_kernel_arg(kernel_do_load_balance_middle, 2, sizeof(cl_mem), &dev_level_new); + ezcl_set_kernel_arg(kernel_do_load_balance_middle, 3, sizeof(cl_mem), &dev_celltype_new); + ezcl_set_kernel_arg(kernel_do_load_balance_middle, 4, sizeof(cl_mem), &dev_i); + ezcl_set_kernel_arg(kernel_do_load_balance_middle, 5, sizeof(cl_mem), &dev_j); + ezcl_set_kernel_arg(kernel_do_load_balance_middle, 6, sizeof(cl_mem), &dev_level); + ezcl_set_kernel_arg(kernel_do_load_balance_middle, 7, sizeof(cl_mem), &dev_celltype); + ezcl_set_kernel_arg(kernel_do_load_balance_middle, 8, sizeof(cl_int), &lower_block_size); + ezcl_set_kernel_arg(kernel_do_load_balance_middle, 9, sizeof(cl_int), &middle_block_size); + ezcl_set_kernel_arg(kernel_do_load_balance_middle, 10, sizeof(cl_int), &middle_block_start); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_middle, 1, NULL, &global_work_size, &local_work_size, NULL); + } + + // Set kernel arguments and call upper block kernel + if(upper_block_size > 0) { + + size_t global_work_size = ((upper_block_size + local_work_size - 1) / local_work_size) * local_work_size; + + ezcl_set_kernel_arg(kernel_do_load_balance_upper, 0, sizeof(cl_mem), &dev_i_new); + ezcl_set_kernel_arg(kernel_do_load_balance_upper, 1, sizeof(cl_mem), &dev_j_new); + ezcl_set_kernel_arg(kernel_do_load_balance_upper, 2, sizeof(cl_mem), &dev_level_new); + ezcl_set_kernel_arg(kernel_do_load_balance_upper, 3, sizeof(cl_mem), &dev_celltype_new); + ezcl_set_kernel_arg(kernel_do_load_balance_upper, 4, sizeof(cl_mem), &dev_i_upper); + ezcl_set_kernel_arg(kernel_do_load_balance_upper, 5, sizeof(cl_mem), &dev_j_upper); + ezcl_set_kernel_arg(kernel_do_load_balance_upper, 6, sizeof(cl_mem), &dev_level_upper); + ezcl_set_kernel_arg(kernel_do_load_balance_upper, 7, sizeof(cl_mem), &dev_celltype_upper); + ezcl_set_kernel_arg(kernel_do_load_balance_upper, 8, sizeof(cl_int), &lower_block_size); + ezcl_set_kernel_arg(kernel_do_load_balance_upper, 9, sizeof(cl_int), &middle_block_size); + ezcl_set_kernel_arg(kernel_do_load_balance_upper, 10, sizeof(cl_int), &upper_block_size); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_upper, 1, NULL, &global_work_size, &local_work_size, NULL); + + ezcl_device_memory_delete(dev_i_upper); + ezcl_device_memory_delete(dev_j_upper); + ezcl_device_memory_delete(dev_level_upper); + ezcl_device_memory_delete(dev_celltype_upper); + } + + ezcl_device_memory_swap(&dev_i_new, &dev_i); + ezcl_device_memory_swap(&dev_j_new, &dev_j); + ezcl_device_memory_swap(&dev_level_new, &dev_level); + ezcl_device_memory_swap(&dev_celltype_new, &dev_celltype); + + ezcl_device_memory_delete(dev_i_new); + ezcl_device_memory_delete(dev_j_new); + ezcl_device_memory_delete(dev_level_new); + ezcl_device_memory_delete(dev_celltype_new); + + gpu_timers[MESH_TIMER_LOAD_BALANCE] += (long int)(cpu_timer_stop(tstart_cpu)*1.0e9); + } + + return(do_load_balance_global); +} +#endif +#endif + +#ifdef HAVE_OPENCL +int Mesh::gpu_count_BCs(void) +{ + cl_event count_BCs_stage1_event, count_BCs_stage2_event; + + size_t local_work_size = MIN(ncells, TILE_SIZE); + size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size; + + //size_t block_size = (ncells + TILE_SIZE - 1) / TILE_SIZE; // For on-device global reduction kernel. + size_t block_size = global_work_size/local_work_size; + + int bcount = 0; + + if (! have_boundary) { + cl_command_queue command_queue = ezcl_get_command_queue(); + cl_mem dev_ioffset = ezcl_malloc(NULL, const_cast("dev_ioffset"), &block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + /* + __kernel void count_BCs( + const int isize, // 0 + __global const int *i, // 1 + __global const int *j, // 2 + __global const int *level, // 3 + __global const int *lev_ibeg, // 4 + __global const int *lev_iend, // 5 + __global const int *lev_jbeg, // 6 + __global const int *lev_jend, // 7 + __global int *scratch, // 8 + __local int *tile) // 9 + */ + size_t shared_spd_sum_int = local_work_size * sizeof(cl_int); + ezcl_set_kernel_arg(kernel_count_BCs, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_count_BCs, 1, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_count_BCs, 2, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_count_BCs, 3, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_count_BCs, 4, sizeof(cl_mem), (void *)&dev_levibeg); + ezcl_set_kernel_arg(kernel_count_BCs, 5, sizeof(cl_mem), (void *)&dev_leviend); + ezcl_set_kernel_arg(kernel_count_BCs, 6, sizeof(cl_mem), (void *)&dev_levjbeg); + ezcl_set_kernel_arg(kernel_count_BCs, 7, sizeof(cl_mem), (void *)&dev_levjend); + ezcl_set_kernel_arg(kernel_count_BCs, 8, sizeof(cl_mem), (void *)&dev_ioffset); + ezcl_set_kernel_arg(kernel_count_BCs, 9, shared_spd_sum_int, 0); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_count_BCs, 1, NULL, &global_work_size, &local_work_size, &count_BCs_stage1_event); + + if (block_size > 1) { + ezcl_set_kernel_arg(kernel_reduce_sum_int_stage2of2, 0, sizeof(cl_int), (void *)&block_size); + ezcl_set_kernel_arg(kernel_reduce_sum_int_stage2of2, 1, sizeof(cl_mem), (void *)&dev_ioffset); + ezcl_set_kernel_arg(kernel_reduce_sum_int_stage2of2, 2, shared_spd_sum_int, 0); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_sum_int_stage2of2, 1, NULL, &local_work_size, &local_work_size, &count_BCs_stage2_event); + } + + ezcl_enqueue_read_buffer(command_queue, dev_ioffset, CL_TRUE, 0, 1*sizeof(cl_int), &bcount, NULL); + + //printf("DEBUG -- bcount is %d\n",bcount); + //state->gpu_time_read += ezcl_timer_calc(&start_read_event, &start_read_event); + + ezcl_device_memory_delete(dev_ioffset); + + gpu_timers[MESH_TIMER_COUNT_BCS] += ezcl_timer_calc(&count_BCs_stage1_event, &count_BCs_stage1_event); + if (block_size > 1) { + gpu_timers[MESH_TIMER_COUNT_BCS] += ezcl_timer_calc(&count_BCs_stage2_event, &count_BCs_stage2_event); + } + + } + + return(bcount); +} +#endif + +void Mesh::allocate(size_t ncells) +{ + int flags = 0; + flags = RESTART_DATA; +#ifdef HAVE_J7 + if (parallel) flags = LOAD_BALANCE_MEMORY; +#endif + + i = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "i", flags); + j = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "j", flags); + level = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "level", flags); +} + + +void Mesh::resize(size_t new_ncells) +{ + size_t current_size = mesh_memory.get_memory_size(i); + if (new_ncells > current_size) mesh_memory.memory_realloc_all(new_ncells); +} + +void Mesh::memory_reset_ptrs(void){ + i = (int *)mesh_memory.get_memory_ptr("i"); + j = (int *)mesh_memory.get_memory_ptr("j"); + level = (int *)mesh_memory.get_memory_ptr("level"); + celltype = (int *)mesh_memory.get_memory_ptr("celltype"); + nlft = (int *)mesh_memory.get_memory_ptr("nlft"); + nrht = (int *)mesh_memory.get_memory_ptr("nrht"); + nbot = (int *)mesh_memory.get_memory_ptr("nbot"); + ntop = (int *)mesh_memory.get_memory_ptr("ntop"); +} + +void Mesh::resize_old_device_memory(size_t ncells) +{ +#ifdef HAVE_OPENCL + ezcl_device_memory_delete(dev_level); + ezcl_device_memory_delete(dev_i); + ezcl_device_memory_delete(dev_j); + ezcl_device_memory_delete(dev_celltype); + size_t mem_request = (int)((float)ncells*mem_factor); + dev_level = ezcl_malloc(NULL, const_cast("dev_level"), &mem_request, sizeof(cl_int), CL_MEM_READ_ONLY, 0); + dev_i = ezcl_malloc(NULL, const_cast("dev_i"), &mem_request, sizeof(cl_int), CL_MEM_READ_ONLY, 0); + dev_j = ezcl_malloc(NULL, const_cast("dev_j"), &mem_request, sizeof(cl_int), CL_MEM_READ_ONLY, 0); + dev_celltype = ezcl_malloc(NULL, const_cast("dev_celltype"), &mem_request, sizeof(cl_int), CL_MEM_READ_ONLY, 0); +#else + // To get rid of compiler warning + if (1 == 2) printf("DEBUG -- ncells is %lu\n",ncells); +#endif +} +void Mesh::print_object_info(void) +{ + printf(" ---- Mesh object info -----\n"); + printf("Dimensionality : %d\n",ndim); + printf("Parallel info : mype %d numpe %d noffset %d parallel %d\n",mype,numpe,noffset,parallel); + printf("Sizes : ncells %ld ncells_ghost %ld\n\n",ncells,ncells_ghost); +#ifdef HAVE_OPENCL + int num_elements, elsize; + + num_elements = ezcl_get_device_mem_nelements(dev_celltype); + elsize = ezcl_get_device_mem_elsize(dev_celltype); + printf("dev_celltype ptr : %p nelements %d elsize %d\n",dev_celltype,num_elements,elsize); + num_elements = ezcl_get_device_mem_nelements(dev_level); + elsize = ezcl_get_device_mem_elsize(dev_level); + printf("dev_level ptr : %p nelements %d elsize %d\n",dev_level,num_elements,elsize); + num_elements = ezcl_get_device_mem_nelements(dev_i); + elsize = ezcl_get_device_mem_elsize(dev_i); + printf("dev_i ptr : %p nelements %d elsize %d\n",dev_i,num_elements,elsize); + num_elements = ezcl_get_device_mem_nelements(dev_j); + elsize = ezcl_get_device_mem_elsize(dev_j); + printf("dev_j ptr : %p nelements %d elsize %d\n",dev_j,num_elements,elsize); + + num_elements = ezcl_get_device_mem_nelements(dev_nlft); + elsize = ezcl_get_device_mem_elsize(dev_nlft); + printf("dev_nlft ptr : %p nelements %d elsize %d\n",dev_nlft,num_elements,elsize); + num_elements = ezcl_get_device_mem_nelements(dev_nrht); + elsize = ezcl_get_device_mem_elsize(dev_nrht); + printf("dev_nrht ptr : %p nelements %d elsize %d\n",dev_nrht,num_elements,elsize); + num_elements = ezcl_get_device_mem_nelements(dev_nbot); + elsize = ezcl_get_device_mem_elsize(dev_nbot); + printf("dev_nbot ptr : %p nelements %d elsize %d\n",dev_nbot,num_elements,elsize); + num_elements = ezcl_get_device_mem_nelements(dev_ntop); + elsize = ezcl_get_device_mem_elsize(dev_ntop); + printf("dev_ntop ptr : %p nelements %d elsize %d\n",dev_ntop,num_elements,elsize); +#endif + printf("vector celltype ptr : %p nelements %ld elsize %ld\n",&celltype[0],mesh_memory.get_memory_size(celltype),sizeof(celltype[0])); + printf("vector level ptr : %p nelements %ld elsize %ld\n",&level[0], mesh_memory.get_memory_size(level), sizeof(level[0])); + printf("vector i ptr : %p nelements %ld elsize %ld\n",&i[0], mesh_memory.get_memory_size(i), sizeof(i[0])); + printf("vector j ptr : %p nelements %ld elsize %ld\n",&j[0], mesh_memory.get_memory_size(j), sizeof(j[0])); + + printf("vector nlft ptr : %p nelements %ld elsize %ld\n",&nlft[0], mesh_memory.get_memory_size(nlft), sizeof(nlft[0])); + printf("vector nrht ptr : %p nelements %ld elsize %ld\n",&nrht[0], mesh_memory.get_memory_size(nrht), sizeof(nrht[0])); + printf("vector nbot ptr : %p nelements %ld elsize %ld\n",&nbot[0], mesh_memory.get_memory_size(nbot), sizeof(nbot[0])); + printf("vector ntop ptr : %p nelements %ld elsize %ld\n",&ntop[0], mesh_memory.get_memory_size(ntop), sizeof(ntop[0])); +} + + +void Mesh::set_refinement_order(int order[4], int ic, int ifirst, int ilast, int jfirst, int jlast, + int level_first, int level_last, int *i_old, int *j_old, int *level_old) +{ + if (localStencil) { + // Store the coordinates of the cells before and after this one on + // the space-filling curve index. + +#ifdef __OLD_STENCIL__ + spatial_t nx[3], // x-coordinates of cells. + ny[3]; // y-coordinates of cells. + if (ic != 0) { + nx[0] = lev_deltax[level_old[ic-1]] * (spatial_t)i[ic-1]; + ny[0] = lev_deltay[level_old[ic-1]] * (spatial_t)j[ic-1]; + } else { + nx[0] = lev_deltax[level_first] * (spatial_t)ifirst; + ny[0] = lev_deltay[level_first] * (spatial_t)jfirst; + } + nx[1] = lev_deltax[level_old[ic ]] * (spatial_t)i[ic ]; + ny[1] = lev_deltay[level_old[ic ]] * (spatial_t)j[ic ]; + if (ic != ncells-1) { + nx[2] = lev_deltax[level_old[ic+1]] * (spatial_t)i[ic+1]; + ny[2] = lev_deltay[level_old[ic+1]] * (spatial_t)j[ic+1]; + } else { + nx[2] = lev_deltax[level_last] * (spatial_t)ilast; + ny[2] = lev_deltay[level_last] * (spatial_t)jlast; + } + + // Figure out relative orientation of the neighboring cells. We are + // are aided in this because the Hilbert curve only has six possible + // ways across the cell: four Ls and two straight lines. Then + // refine the cell according to the relative orientation and order + // according to the four-point Hilbert stencil. + if (nx[0] < nx[1] and ny[2] < ny[1]) // southwest L, forward order + { order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; } + else if (nx[2] < nx[1] and ny[0] < ny[1]) // southwest L, reverse order + { order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW; } + else if (nx[0] > nx[1] and ny[2] < ny[1]) // southeast L, forward order + { order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW; } + else if (nx[2] > nx[1] and ny[0] < ny[1]) // southeast L, reverse order + { order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; } + else if (nx[0] > nx[1] and ny[2] > ny[1]) // northeast L, forward order + { order[0] = SE; order[1] = SW; order[2] = NW; order[3] = NE; } + else if (nx[2] > nx[1] and ny[0] > ny[1]) // northeast L, reverse order + { order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE; } + else if (nx[0] < nx[1] and ny[2] > ny[1]) // northwest L, forward order + { order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW; } + else if (nx[2] < nx[1] and ny[0] > ny[1]) // northwest L, reverse order + { order[0] = NW; order[1] = NE; order[2] = SE; order[3] = SW; } + else if (nx[0] > nx[1] and nx[1] > nx[2]) // straight horizontal, forward order + { order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW; } + else if (nx[0] < nx[1] and nx[1] < nx[2]) // straight horizontal, reverse order + { order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; } + else if (ny[0] > ny[1] and ny[1] > ny[2]) // straight vertical, forward order + { order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE; } + else if (ny[0] < ny[1] and ny[1] < ny[2]) // straight vertical, reverse order + { order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW; } + else // other, default to z-order + { order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE; } +#endif + +#ifdef __NEW_STENCIL__ + int ir[3], // First i index at finest level of the mesh + jr[3]; // First j index at finest level of the mesh + // Cell's Radius at the Finest level of the mesh + + int crf = IPOW2(levmx-level_old[ic]); + + if (ic != 0) { + ir[0] = i_old[ic - 1] * IPOW2(levmx-level_old[ic - 1]); + jr[0] = j_old[ic - 1] * IPOW2(levmx-level_old[ic - 1]); + } else { + //printf("%d cell %d is a first\n",mype,ic); + ir[0] = ifirst * IPOW2(levmx-level_first); + jr[0] = jfirst * IPOW2(levmx-level_first); + } + ir[1] = i_old[ic ] * IPOW2(levmx-level_old[ic ]); + jr[1] = j_old[ic ] * IPOW2(levmx-level_old[ic ]); + if (ic != (int)ncells-1) { + ir[2] = i_old[ic + 1] * IPOW2(levmx-level_old[ic + 1]); + jr[2] = j_old[ic + 1] * IPOW2(levmx-level_old[ic + 1]); + } else { + //printf("%d cell %d is a last\n",mype,ic); + ir[2] = ilast * IPOW2(levmx-level_last); + jr[2] = jlast * IPOW2(levmx-level_last); + } + //if (parallel) fprintf(fp,"%d: DEBUG rezone top boundary -- ic %d global %d noffset %d nc %d i %d j %d level %d\n",mype,ic,ic+noffset,noffset,nc,i[nc],j[nc],level[nc]); + + int dir_in = ir[1] - ir[0]; + int dir_out = ir[1] - ir[2]; + int djr_in = jr[1] - jr[0]; + int djr_out = jr[1] - jr[2]; + + char in_direction = 'X'; + char out_direction = 'X'; + + // Left In + if( (djr_in == 0 && (dir_in == crf*HALF || dir_in == crf || dir_in == crf*TWO)) || (djr_in == -crf*HALF && dir_in == crf*HALF) || (djr_in == crf && dir_in == crf*TWO) ) { + in_direction = 'L'; + } + // Bottom In + else if( (dir_in == 0 && (djr_in == crf*HALF || djr_in == crf || djr_in == crf*TWO)) || (dir_in == -crf*HALF && djr_in == crf*HALF) || (dir_in == crf && djr_in == crf*TWO) ) { + in_direction = 'B'; + } + // Right In + else if( (dir_in == -crf && (djr_in == -crf*HALF || djr_in == 0 || (djr_in == crf && level_old[ic-1] < level_old[ic]))) ) { + in_direction = 'R'; + } + // Top In + else if( (djr_in == -crf && (dir_in == -crf*HALF || dir_in == 0 || (dir_in == crf && level_old[ic-1] < level_old[ic]))) ) { + in_direction = 'T'; + } + // Further from the left + else if( dir_in > 0 && djr_in == 0 ) { + in_direction = 'L'; + } + // Further from the right + else if( dir_in < 0 && djr_in == 0 ) { + in_direction = 'R'; + } + // Further from the bottom + else if( djr_in > 0 && dir_in == 0 ) { + in_direction = 'B'; + } + // Further from the top + else if( djr_in < 0 && dir_in == 0 ) { + in_direction = 'T'; + } + // SW in; 'M' + else if( dir_in > 0 && djr_in > 0) { + in_direction = 'M'; + } + // NW in; 'W' + else if( dir_in > 0 && djr_in < 0) { + in_direction = 'W'; + } + // SE in; 'F' + else if( dir_in < 0 && djr_in > 0) { + in_direction = 'F'; + } + // NE in; 'E' + else if( dir_in < 0 && djr_in < 0) { + in_direction = 'E'; + } + + + // Left Out + if( (djr_out == 0 && (dir_out == crf*HALF || dir_out == crf || dir_out == crf*TWO)) || (djr_out == -crf*HALF && dir_out == crf*HALF) || (djr_out == crf && dir_out == crf*TWO) ) { + out_direction = 'L'; + } + // Bottom Out + else if( (dir_out == 0 && (djr_out == crf*HALF || djr_out == crf || djr_out == crf*TWO)) || (dir_out == -crf*HALF && djr_out == crf*HALF) || (dir_out == crf && djr_out == crf*TWO) ) { + out_direction = 'B'; + } + // Right Out + else if( (dir_out == -crf && (djr_out == -crf*HALF || djr_out == 0 || (djr_out == crf && level_old[ic+1] < level_old[ic]))) ) { + out_direction = 'R'; + } + // Top Out + else if( (djr_out == -crf && (dir_out == -crf*HALF || dir_out == 0 || (dir_out == crf && level_old[ic+1] < level_old[ic]))) ) { + out_direction = 'T'; + } + // Further from the left + else if( dir_out > 0 && djr_out == 0 ) { + out_direction = 'L'; + } + // Further from the right + else if( dir_out < 0 && djr_out == 0 ) { + out_direction = 'R'; + } + // Further from the bottom + else if( djr_out > 0 && dir_out == 0 ) { + out_direction = 'B'; + } + // Further from the top + else if( djr_out < 0 && dir_out == 0 ) { + out_direction = 'T'; + } + // SW out; 'M' + else if( dir_out > 0 && djr_out > 0) { + out_direction = 'M'; + } + // NW out; 'W' + else if( dir_out > 0 && djr_out < 0) { + out_direction = 'W'; + } + // SE out; 'F' + else if( dir_out < 0 && djr_out > 0) { + out_direction = 'F'; + } + // NE out; 'E' + else if( dir_out < 0 && djr_out < 0) { + out_direction = 'E'; + } + + // Set the Stencil + if(in_direction == 'L' && (out_direction == 'B' || out_direction == 'R' || out_direction == 'F')) { + order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; + } + else if(in_direction == 'L' && (out_direction == 'T' || out_direction == 'W' )) { + order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW; + } + else if(in_direction == 'L' && out_direction == 'M') { + order[0] = NW; order[1] = NE; order[2] = SE; order[3] = SW; + } + else if(in_direction == 'L' && out_direction == 'E') { + order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE; + } + + else if(in_direction == 'B' && (out_direction == 'R' || out_direction == 'F' )) { + order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; + } + else if(in_direction == 'B' && (out_direction == 'L' || out_direction == 'T' || out_direction == 'W' )) { + order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW; + } + else if(in_direction == 'B' && out_direction == 'M') { + order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW; + } + else if(in_direction == 'B' && out_direction == 'E') { + order[0] = SW; order[1] = NW; order[2] = SE; order[3] = NE; + } + + else if(in_direction == 'R' && (out_direction == 'T' || out_direction == 'L' || out_direction == 'W' )) { + order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW; + } + else if(in_direction == 'R' && (out_direction == 'B' || out_direction == 'F' )) { + order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE; + } + else if(in_direction == 'R' && out_direction == 'M') { + order[0] = NE; order[1] = NW; order[2] = SE; order[3] = SW; + } + else if(in_direction == 'R' && out_direction == 'E') { + order[0] = SE; order[1] = SW; order[2] = NW; order[3] = NE; + } + + else if(in_direction == 'T' && (out_direction == 'L' || out_direction == 'W' )) { + order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW; + } + else if(in_direction == 'T' && (out_direction == 'R' || out_direction == 'B' || out_direction == 'F' )) { + order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE; + } + else if(in_direction == 'T' && out_direction == 'M') { + order[0] = NE; order[1] = SE; order[2] = NW; order[3] = SW; + } + else if(in_direction == 'T' && out_direction == 'E') { + order[0] = NW; order[1] = SW; order[2] = SE; order[3] = NE; + } + + else if(in_direction == 'M' && (out_direction == 'L' || out_direction == 'W' || out_direction == 'T') ) { + order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW; + } + else if(in_direction == 'M' && (out_direction == 'R' || out_direction == 'F' || out_direction == 'B') ) { + order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; + } + else if(in_direction == 'M' && out_direction == 'E') { + order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE; + } + + else if(in_direction == 'W' && (out_direction == 'L' || out_direction == 'M' || out_direction == 'B') ) { + order[0] = NW; order[1] = NE; order[2] = SE; order[3] = SW; + } + else if(in_direction == 'W' && (out_direction == 'R' || out_direction == 'E' || out_direction == 'T') ) { + order[0] = NW; order[1] = SW; order[2] = SE; order[3] = NE; + } + else if(in_direction == 'W' && out_direction == 'F') { + order[0] = NW; order[1] = NE; order[2] = SW; order[3] = SE; + } + + else if(in_direction == 'F' && (out_direction == 'L' || out_direction == 'M' || out_direction == 'B') ) { + order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW; + } + else if(in_direction == 'F' && (out_direction == 'R' || out_direction == 'E' || out_direction == 'T') ) { + order[0] = SE; order[1] = SW; order[2] = NW; order[3] = NE; + } + else if(in_direction == 'F' && out_direction == 'W') { + order[0] = SE; order[1] = NE; order[2] = SW; order[3] = NW; + } + + else if(in_direction == 'E' && (out_direction == 'L' || out_direction == 'W' || out_direction == 'T') ) { + order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW; + } + else if(in_direction == 'E' && (out_direction == 'R' || out_direction == 'F' || out_direction == 'B') ) { + order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE; + } + else if(in_direction == 'E' && out_direction == 'M') { + order[0] = NE; order[1] = SE; order[2] = NW; order[3] = SW; + } + + else { // Default to a knot + order[0] = NW; order[1] = SE; order[2] = SW; order[3] = NE; + if (do_stencil_warning) { + printf("Nonlocal case for the stencil.\n"); + } + } + // Determine the relative orientation of the neighboring cells. + // There are 12 possible ways across the cell: 4 Ls and 2 straight + // lines, each with 2 directions of traversal. + // Then the cell is refined and ordered according to the relative + // orientation and four-point Hilbert stencil. + + // XXX NOTE that the four-point stencil varies depending upon + // the starting and ending point of the global Hilbert curve. + // The stencil applied here assumes the start at (0,0) and the end + // at (0,y_max). XXX WRONG +#endif + + } // End local stencil version + else // Use Z-ordering for the curve. + { order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE; } + +} + +void Mesh::calc_face_list(void) +{ + xface_i.clear(); + xface_j.clear(); + xface_level.clear(); + + ixmin_level.clear(); + ixmax_level.clear(); + jxmin_level.clear(); + jxmax_level.clear(); + ixmin_level.resize(levmx+1, 9999999); + ixmax_level.resize(levmx+1, -9999999); + jxmin_level.resize(levmx+1, 9999999); + jxmax_level.resize(levmx+1, -9999999); + + ixadjust.clear(); + ixadjust.resize(levmx+1); + jxadjust.clear(); + jxadjust.resize(levmx+1); + + int iface=0; + for (int nz=0; nz<(int)ncells; nz++){ + int nr = nrht[nz]; + if (nr == nz) continue; + + int ifactor = 1; + if (level[nr] < level[nz]) ifactor = 2; + + // Have right face + //printf("DEBUG xface -- iface %d lower nz %d upper nr %d\n",iface,nz,nr); + xface_level.push_back(MAX(level[nz],level[nr])); + xface_i.push_back(i[nr]*ifactor); + if (level[nr] < level[nz] && is_upper(j[nz]) ) { + xface_j.push_back(j[nr]*ifactor+1); + } else { + xface_j.push_back(j[nr]*ifactor); + } + + iface++; + + if (level[nr] > level[nz] && is_lower(j[nr]) ){ + int ntr = ntop[nr]; + if (ntr != nr) { + //printf("DEBUG xface -- iface %d lower nz %d upper ntr %d\n",iface,nz,ntr); + xface_level.push_back(MAX(level[nz],level[ntr])); + xface_i.push_back(i[ntr]*ifactor); + xface_j.push_back(j[ntr]*ifactor); + + iface++; + } + } + } + nxface=iface; + + yface_i.clear(); + yface_j.clear(); + yface_level.clear(); + + iymin_level.clear(); + iymax_level.clear(); + jymin_level.clear(); + jymax_level.clear(); + iymin_level.resize(levmx+1, 9999999); + iymax_level.resize(levmx+1, -9999999); + jymin_level.resize(levmx+1, 9999999); + jymax_level.resize(levmx+1, -9999999); + + iyadjust.clear(); + iyadjust.resize(levmx+1); + jyadjust.clear(); + jyadjust.resize(levmx+1); + + iface=0; + for (int nz=0; nz<(int)ncells; nz++){ + int nt = ntop[nz]; + if (nt == nz) continue; + + int ifactor = 1; + if (level[nt] < level[nz]) ifactor = 2; + + // Have top face + //printf("DEBUG yface -- iface %d lower nz %d upper nt %d\n",iface,nz,nt); + yface_level.push_back(MAX(level[nz],level[nt])); + yface_j.push_back(j[nt]*ifactor); + if (level[nt] < level[nz] && is_upper(i[nz]) ) { + yface_i.push_back(i[nt]*ifactor+1); + } else{ + yface_i.push_back(i[nt]*ifactor); + } + + iface++; + if (level[nt] > level[nz] && is_lower(i[nt]) ){ + int nrt = nrht[nt]; + if (nrt != nt) { + //printf("DEBUG yface -- iface %d lower nz %d upper nrt %d\n",iface,nz,nrt); + yface_level.push_back(MAX(level[nz],level[nrt])); + yface_j.push_back(j[nrt]*ifactor); + yface_i.push_back(i[nrt]*ifactor); + + iface++; + } + + } + } + nyface=iface; + + for (int iface=0; iface < nxface; iface++){ + int fl = xface_level[iface]; + + int fi = xface_i[iface]; + if (fi < ixmin_level[fl]) ixmin_level[fl] = fi; + if (fi > ixmax_level[fl]) ixmax_level[fl] = fi; + + int fj = xface_j[iface]; + if (fj < jxmin_level[fl]) jxmin_level[fl] = fj; + if (fj > jxmax_level[fl]) jxmax_level[fl] = fj; + } + + for (int iface=0; iface < nxface; iface++){ + int fl = xface_level[iface]; + if (ixmax_level[fl] < ixmin_level[fl]) continue; + + xface_i[iface] -= ixmin_level[fl]; + xface_j[iface] -= jxmin_level[fl]; + } + + for (int fl = 0; fl <= levmx; fl++){ + ixadjust[fl] = ixmin_level[fl]; + jxadjust[fl] = jxmin_level[fl]; + ixmax_level[fl] -= ixmin_level[fl];; + jxmax_level[fl] -= jxmin_level[fl]; + ixmin_level[fl] = 0; + jxmin_level[fl] = 0; + } + + for (int iface=0; iface < nyface; iface++){ + int fl = yface_level[iface]; + + int fi = yface_i[iface]; + if (fi < iymin_level[fl]) iymin_level[fl] = fi; + if (fi > iymax_level[fl]) iymax_level[fl] = fi; + + int fj = yface_j[iface]; + if (fj < jymin_level[fl]) jymin_level[fl] = fj; + if (fj > jymax_level[fl]) jymax_level[fl] = fj; + } + + for (int iface=0; iface < nyface; iface++){ + int fl = yface_level[iface]; + if (iymax_level[fl] < iymin_level[fl]) continue; + + yface_i[iface] -= iymin_level[fl]; + yface_j[iface] -= jymin_level[fl]; + } + + for (int fl = 0; fl <= levmx; fl++){ + iyadjust[fl] = iymin_level[fl]; + jyadjust[fl] = jymin_level[fl]; + iymax_level[fl] -= iymin_level[fl];; + jymax_level[fl] -= jymin_level[fl]; + iymin_level[fl] = 0; + jymin_level[fl] = 0; + } + +} + +void Mesh::calc_face_list_wmap(void) +{ + map_xface2cell_lower.clear(); + map_xface2cell_upper.clear(); + + xface_i.clear(); + xface_j.clear(); + xface_level.clear(); + + ixmin_level.clear(); + ixmax_level.clear(); + jxmin_level.clear(); + jxmax_level.clear(); + ixmin_level.resize(levmx+1, 9999999); + ixmax_level.resize(levmx+1, -9999999); + jxmin_level.resize(levmx+1, 9999999); + jxmax_level.resize(levmx+1, -9999999); + + ixadjust.clear(); + ixadjust.resize(levmx+1); + jxadjust.clear(); + jxadjust.resize(levmx+1); + + int iface=0; + for (int nz=0; nz<(int)ncells; nz++){ + int nr = nrht[nz]; + if (nr == nz) continue; + + int ifactor = 1; + if (level[nr] < level[nz]) ifactor = 2; + + // Have right face + map_xface2cell_lower.push_back(nz); + map_xface2cell_upper.push_back(nr); + xface_level.push_back(MAX(level[nz],level[nr])); + xface_i.push_back(i[nr]*ifactor); + if (level[nr] < level[nz] && is_upper(j[nz]) ) { + xface_j.push_back(j[nr]*ifactor+1); + } else { + xface_j.push_back(j[nr]*ifactor); + } + + iface++; + + if (level[nr] > level[nz] && is_lower(j[nr]) ){ + int ntr = ntop[nr]; + if (ntr != nr) { + map_xface2cell_lower.push_back(nz); + map_xface2cell_upper.push_back(ntr); + xface_level.push_back(MAX(level[nz],level[ntr])); + xface_i.push_back(i[ntr]*ifactor); + xface_j.push_back(j[ntr]*ifactor); + + iface++; + } + } + } + nxface=iface; + + map_yface2cell_lower.clear(); + map_yface2cell_upper.clear(); + + yface_i.clear(); + yface_j.clear(); + yface_level.clear(); + + iymin_level.clear(); + iymax_level.clear(); + jymin_level.clear(); + jymax_level.clear(); + iymin_level.resize(levmx+1, 9999999); + iymax_level.resize(levmx+1, -9999999); + jymin_level.resize(levmx+1, 9999999); + jymax_level.resize(levmx+1, -9999999); + + iyadjust.clear(); + iyadjust.resize(levmx+1); + jyadjust.clear(); + jyadjust.resize(levmx+1); + + iface=0; + for (int nz=0; nz<(int)ncells; nz++){ + int nt = ntop[nz]; + if (nt == nz) continue; + + int ifactor = 1; + if (level[nt] < level[nz]) ifactor = 2; + + // Have top face + // printf("DEBUG -- iface %d lower nz %d upper nr %d\n",iface,nz,nt); + map_yface2cell_lower.push_back(nz); + map_yface2cell_upper.push_back(nt); + yface_level.push_back(MAX(level[nz],level[nt])); + yface_j.push_back(j[nt]*ifactor); + if (level[nt] < level[nz] && is_upper(i[nz]) ) { + yface_i.push_back(i[nt]*ifactor+1); + } else{ + yface_i.push_back(i[nt]*ifactor); + } + + iface++; + if (level[nt] > level[nz] && is_lower(i[nt]) ){ + int nrt = nrht[nt]; + if (nrt != nt) { + map_yface2cell_lower.push_back(nz); + map_yface2cell_upper.push_back(nrt); + yface_level.push_back(MAX(level[nz],level[nrt])); + yface_j.push_back(j[nrt]*ifactor); + yface_i.push_back(i[nrt]*ifactor); + + iface++; + } + } + } + nyface=iface; + + for (int iface=0; iface < nxface; iface++){ + int fl = xface_level[iface]; + + int fi = xface_i[iface]; + if (fi < ixmin_level[fl]) ixmin_level[fl] = fi; + if (fi > ixmax_level[fl]) ixmax_level[fl] = fi; + + int fj = xface_j[iface]; + if (fj < jxmin_level[fl]) jxmin_level[fl] = fj; + if (fj > jxmax_level[fl]) jxmax_level[fl] = fj; + } + + for (int iface=0; iface < nxface; iface++){ + int fl = xface_level[iface]; + if (ixmax_level[fl] < ixmin_level[fl]) continue; + + xface_i[iface] -= ixmin_level[fl]; + xface_j[iface] -= jxmin_level[fl]; + } + + for (int fl = 0; fl <= levmx; fl++){ + ixadjust[fl] = ixmin_level[fl]; + jxadjust[fl] = jxmin_level[fl]; + ixmax_level[fl] -= ixmin_level[fl];; + jxmax_level[fl] -= jxmin_level[fl]; + ixmin_level[fl] = 0; + jxmin_level[fl] = 0; + } + + for (int iface=0; iface < nyface; iface++){ + int fl = yface_level[iface]; + + int fi = yface_i[iface]; + if (fi < iymin_level[fl]) iymin_level[fl] = fi; + if (fi > iymax_level[fl]) iymax_level[fl] = fi; + + int fj = yface_j[iface]; + if (fj < jymin_level[fl]) jymin_level[fl] = fj; + if (fj > jymax_level[fl]) jymax_level[fl] = fj; + } + + for (int iface=0; iface < nyface; iface++){ + int fl = yface_level[iface]; + if (iymax_level[fl] < iymin_level[fl]) continue; + + yface_i[iface] -= iymin_level[fl]; + yface_j[iface] -= jymin_level[fl]; + } + + for (int fl = 0; fl <= levmx; fl++){ + iyadjust[fl] = iymin_level[fl]; + jyadjust[fl] = jymin_level[fl]; + iymax_level[fl] -= iymin_level[fl];; + jymax_level[fl] -= jymin_level[fl]; + iymin_level[fl] = 0; + jymin_level[fl] = 0; + } + +} + +void Mesh::calc_face_list_wbidirmap(void) +{ + map_xface2cell_lower.clear(); + map_xface2cell_upper.clear(); + + map_xcell2face_left1.clear(); + map_xcell2face_left2.clear(); + map_xcell2face_right1.clear(); + map_xcell2face_right2.clear(); + map_xcell2face_left1.resize(ncells, -1); + map_xcell2face_left2.resize(ncells, -1); + map_xcell2face_right1.resize(ncells, -1); + map_xcell2face_right2.resize(ncells, -1); + + xface_i.clear(); + xface_j.clear(); + xface_level.clear(); + + ixmin_level.clear(); + ixmax_level.clear(); + jxmin_level.clear(); + jxmax_level.clear(); + ixmin_level.resize(levmx+1, 9999999); + ixmax_level.resize(levmx+1, -9999999); + jxmin_level.resize(levmx+1, 9999999); + jxmax_level.resize(levmx+1, -9999999); + + ixadjust.clear(); + ixadjust.resize(levmx+1); + jxadjust.clear(); + jxadjust.resize(levmx+1); + + int iface=0; + for (int nz=0; nz<(int)ncells; nz++){ + int nr = nrht[nz]; + if (nr == nz) continue; + + int ifactor = 1; + if (level[nr] < level[nz]) ifactor = 2; + + // Have right face + map_xface2cell_lower.push_back(nz); + map_xface2cell_upper.push_back(nr); + xface_level.push_back(MAX(level[nz],level[nr])); + xface_i.push_back(i[nr]*ifactor); + if (level[nr] < level[nz] && is_upper(j[nz]) ) { + xface_j.push_back(j[nr]*ifactor+1); + } else { + xface_j.push_back(j[nr]*ifactor); + } + map_xcell2face_right1[nz] = iface; + + iface++; + + if (level[nr] > level[nz] && is_lower(j[nr]) ){ + int ntr = ntop[nr]; + if (ntr != nr) { + map_xface2cell_lower.push_back(nz); + map_xface2cell_upper.push_back(ntr); + xface_level.push_back(MAX(level[nz],level[ntr])); + xface_i.push_back(i[ntr]*ifactor); + xface_j.push_back(j[ntr]*ifactor); + map_xcell2face_right2[nz] = iface; + + iface++; + } + } + } + nxface=iface; + + for (int nz=0; nz<(int)ncells; nz++){ + int nl = nlft[nz]; + if (nl == nz) continue; + + if (level[nl] < level[nz] && is_upper(j[nz])){ + map_xcell2face_left1[nz] = map_xcell2face_right2[nl]; + } else { + map_xcell2face_left1[nz] = map_xcell2face_right1[nl]; + if (level[nl] > level[nz]){ + map_xcell2face_left2[nz] = map_xcell2face_right1[ntop[nl]]; + } + } + + } + + map_yface2cell_lower.clear(); + map_yface2cell_upper.clear(); + + map_ycell2face_bot1.clear(); + map_ycell2face_bot2.clear(); + map_ycell2face_top1.clear(); + map_ycell2face_top2.clear(); + map_ycell2face_bot1.resize(ncells, -1); + map_ycell2face_bot2.resize(ncells, -1); + map_ycell2face_top1.resize(ncells, -1); + map_ycell2face_top2.resize(ncells, -1); + + yface_i.clear(); + yface_j.clear(); + yface_level.clear(); + + iymin_level.clear(); + iymax_level.clear(); + jymin_level.clear(); + jymax_level.clear(); + iymin_level.resize(levmx+1, 9999999); + iymax_level.resize(levmx+1, -9999999); + jymin_level.resize(levmx+1, 9999999); + jymax_level.resize(levmx+1, -9999999); + + iyadjust.clear(); + iyadjust.resize(levmx+1); + jyadjust.clear(); + jyadjust.resize(levmx+1); + + iface=0; + for (int nz=0; nz<(int)ncells; nz++){ + int nt = ntop[nz]; + if (nt == nz) continue; + + int ifactor = 1; + if (level[nt] < level[nz]) ifactor = 2; + + // Have top face + // printf("DEBUG -- iface %d lower nz %d upper nr %d\n",iface,nz,nt); + map_yface2cell_lower.push_back(nz); + map_yface2cell_upper.push_back(nt); + yface_level.push_back(MAX(level[nz],level[nt])); + yface_j.push_back(j[nt]*ifactor); + if (level[nt] < level[nz] && is_upper(i[nz]) ) { + yface_i.push_back(i[nt]*ifactor+1); + } else{ + yface_i.push_back(i[nt]*ifactor); + } + map_ycell2face_top1[nz] = iface; + + iface++; + + if (level[nt] > level[nz] &&is_lower(i[nt]) ){ + int nrt = nrht[nt]; + if (nrt != nt) { + map_yface2cell_lower.push_back(nz); + map_yface2cell_upper.push_back(nrt); + yface_level.push_back(MAX(level[nz],level[nrt])); + yface_j.push_back(j[nrt]*ifactor); + yface_i.push_back(i[nrt]*ifactor); + map_ycell2face_top2[nz] = iface; + + iface++; + } + } + } + nyface=iface; + + for (int nz=0; nz<(int)ncells; nz++){ + int nb = nbot[nz]; + if (nb == nz) continue; + + if (level[nb] < level[nz] && is_upper(i[nz])){ + map_ycell2face_bot1[nz] = map_ycell2face_top2[nb]; + } else { + map_ycell2face_bot1[nz] = map_ycell2face_top1[nb]; + if (level[nb] > level[nz]){ + map_ycell2face_bot2[nz] = map_ycell2face_top1[nrht[nb]]; + } + } + + } + + for (int iface=0; iface < nxface; iface++){ + int fl = xface_level[iface]; + + int fi = xface_i[iface]; + if (fi < ixmin_level[fl]) ixmin_level[fl] = fi; + if (fi > ixmax_level[fl]) ixmax_level[fl] = fi; + + int fj = xface_j[iface]; + if (fj < jxmin_level[fl]) jxmin_level[fl] = fj; + if (fj > jxmax_level[fl]) jxmax_level[fl] = fj; + } + + for (int iface=0; iface < nxface; iface++){ + int fl = xface_level[iface]; + if (ixmax_level[fl] < ixmin_level[fl]) continue; + + xface_i[iface] -= ixmin_level[fl]; + xface_j[iface] -= jxmin_level[fl]; + } + + for (int fl = 0; fl <= levmx; fl++){ + ixadjust[fl] = ixmin_level[fl]; + jxadjust[fl] = jxmin_level[fl]; + ixmax_level[fl] -= ixmin_level[fl];; + jxmax_level[fl] -= jxmin_level[fl]; + ixmin_level[fl] = 0; + jxmin_level[fl] = 0; + } + + for (int iface=0; iface < nyface; iface++){ + int fl = yface_level[iface]; + + int fi = yface_i[iface]; + if (fi < iymin_level[fl]) iymin_level[fl] = fi; + if (fi > iymax_level[fl]) iymax_level[fl] = fi; + + int fj = yface_j[iface]; + if (fj < jymin_level[fl]) jymin_level[fl] = fj; + if (fj > jymax_level[fl]) jymax_level[fl] = fj; + } + + for (int iface=0; iface < nyface; iface++){ + int fl = yface_level[iface]; + if (iymax_level[fl] < iymin_level[fl]) continue; + + yface_i[iface] -= iymin_level[fl]; + yface_j[iface] -= jymin_level[fl]; + } + + for (int fl = 0; fl <= levmx; fl++){ + iyadjust[fl] = iymin_level[fl]; + jyadjust[fl] = jymin_level[fl]; + iymax_level[fl] -= iymin_level[fl];; + jymax_level[fl] -= jymin_level[fl]; + iymin_level[fl] = 0; + jymin_level[fl] = 0; + } + +} + +int **Mesh::get_xface_flag(int lev, bool print_output) +{ + int **xface_flag = (int **)genmatrix(jxmax_level[lev]+1, + ixmax_level[lev]+1, sizeof(int)); + for (int jj=0; jj=0; jj--){ + + printf("DEBUG -- j %4d: ",jj); + for (int ii=0; ii= 0){ + //printf(" xface_flag_check[%d][%d] = 1;\n",jj,ii); + printf(" %4d ", xface_flag[jj][ii]); + } else { + printf(" "); + } + } + printf("\n"); + } + } + + return(xface_flag); +} + +int **Mesh::get_yface_flag(int lev, bool print_output) +{ + int **yface_flag = (int **)genmatrix(jymax_level[lev]+1, + iymax_level[lev]+1, sizeof(int)); + for (int jj=0; jj=0; jj--){ + + printf("DEBUG -- j %4d: ",jj); + for (int ii=0; ii= 0){ + //printf(" yface_flag_check[%d][%d] = 1;\n",jj,ii); + printf(" %4d ", yface_flag[jj][ii]); + } else { + printf(" "); + } + } + printf("\n"); + } + } + + return(yface_flag); +} + +void Mesh::get_flat_grid(int lev, int ***zone_flag_base, int ***zone_cell_base) +{ + int isize = ixmax_level[lev]+4; + int jsize = jymax_level[lev]+4; + int iadjust = ixadjust[lev]-2; + int jadjust = jyadjust[lev]-2; + + //printf("DEBUG -- sizes isize %d jsize %d\n",isize,jsize); + //printf("DEBUG -- adjust ixadjust %d jxadjust %d\n",ixadjust[lev],jxadjust[lev]); + //printf("DEBUG -- adjust iyadjust %d jyadjust %d\n",iyadjust[lev],jyadjust[lev]); + + (*zone_flag_base) = (int **)genmatrix(jsize, isize, sizeof(int)); + + int **zone_flag = *zone_flag_base; + for (int jj=0; jj=0; j--){ + for (int i=0; i= 0){ + printf(" zone_flag_check[%d][%d] = 1;\n",j,i); + } + } + } + for (int j=jsize-1; j>=0; j--){ + for (int i=0; i= 0){ + printf(" zone_cell_check[%d][%d] = %d;\n",j,i,zone_cell[j][i]); + } + } + } + + printf(" "); + for (int i=0; i=0; j--){ + + printf("DEBUG -- j %4d: ",j); + for (int i=0; i= 0){ + printf(" %4d ", zone_flag[j][i]); + } else { + printf(" "); + } + } + printf("\n"); + } + + printf("DEBUG -- zone_cell for level %d\n",lev); + + printf(" "); + for (int i=0; i=0; j--){ + + printf("DEBUG -- j %4d: ",j); + for (int i=0; i= 0){ + printf(" %4d ", zone_cell[j][i]); + } else { + printf(" "); + } + } + printf("\n"); + } + } +} + +void Mesh::calc_face_list_clearmaps() +{ + map_xface2cell_lower.clear(); + map_xface2cell_upper.clear(); + + map_xcell2face_left1.clear(); + map_xcell2face_left2.clear(); + map_xcell2face_right1.clear(); + map_xcell2face_right2.clear(); + + map_yface2cell_lower.clear(); + map_yface2cell_upper.clear(); + + map_ycell2face_bot1.clear(); + map_ycell2face_bot2.clear(); + map_ycell2face_top1.clear(); + map_ycell2face_top2.clear(); +} + +void Mesh::timer_output(mesh_timer_category category, mesh_device_types device_type, int timer_level) +{ + double local_time = 0.0; + if (device_type == MESH_DEVICE_CPU){ + local_time = get_cpu_timer(category); + } else { + local_time = get_gpu_timer(category); + } + + char string[80] = "/0"; + + if (mype == 0) { + const char *blank=" "; + + if (device_type == MESH_DEVICE_CPU){ + sprintf(string,"CPU: %.*s%-30.30s\t", 2*timer_level, blank, mesh_timer_descriptor[category]); + } else { + sprintf(string,"GPU: %.*s%-30.30s\t", 2*timer_level, blank, mesh_timer_descriptor[category]); + } + } + + parallel_output(string, local_time, timer_level, "s"); +} + +void Mesh::parallel_output(const char *string, double local_value, int output_level, const char *units) +{ + vector global_values(numpe); + global_values[0] = local_value; +#ifdef HAVE_MPI + if (numpe > 1) { + MPI_Gather(&local_value, 1, MPI_DOUBLE, &global_values[0], 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } +#endif + if (mype == 0) { + const char *blank=" "; + + printf("%s\t",string); + if (numpe <= 4) { + for(int ip = 0; ip < numpe; ip++){ + printf("%.*s%8.4f\t", 2*output_level, blank, global_values[ip]); + } + printf("%s\n",units); + } else { + sort(global_values.begin(),global_values.end()); + double median_value; + int half_value = numpe/2; + if (numpe%2 == 0) { + median_value = (global_values[half_value-1]+global_values[half_value])/2.0; + } else { + median_value = global_values[half_value+1]; + } + printf("%.*s%8.4f\t%.*s%8.4f\t%.*s%8.4f %s min/median/max\n", + 2*output_level, blank, global_values[0], + 2*output_level, blank, median_value, + 2*output_level, blank, global_values[numpe-1], + units); + } + } +} + +void Mesh::parallel_output(const char *string, long long local_value, int output_level, const char *units) +{ + vector global_values(numpe); + global_values[0] = local_value; +#ifdef HAVE_MPI + if (numpe > 1) { + MPI_Gather(&local_value, 1, MPI_LONG_LONG, &global_values[0], 1, MPI_LONG_LONG, 0, MPI_COMM_WORLD); + } +#endif + if (mype == 0) { + const char *blank=" "; + + printf("%s\t",string); + if (numpe <= 4) { + for(int ip = 0; ip < numpe; ip++){ + printf("%.*s%10lld\t", 2*output_level, blank, global_values[ip]); + } + printf("%s\n",units); + } else { + sort(global_values.begin(),global_values.end()); + long long median_value; + int half_value = numpe/2; + if (numpe%2 == 0) { + median_value = (global_values[half_value-1]+global_values[half_value])/2; + } else { + median_value = global_values[half_value+1]; + } + printf("%.*s%10lld\t%.*s%10lld\t%.*s%10lld %s min/median/max\n", + 2*output_level, blank, global_values[0], + 2*output_level, blank, median_value, + 2*output_level, blank, global_values[numpe-1], + units); + } + } +} + +void Mesh::parallel_output(const char *string, int local_value, int output_level, const char *units) +{ + vector global_values(numpe); + global_values[0] = local_value; +#ifdef HAVE_MPI + if (numpe > 1) { + MPI_Gather(&local_value, 1, MPI_INT, &global_values[0], 1, MPI_INT, 0, MPI_COMM_WORLD); + } +#endif + if (mype == 0) { + const char *blank=" "; + + printf("%s\t",string); + if (numpe <= 4) { + for(int ip = 0; ip < numpe; ip++){ + printf("%.*s%10d\t", 2*output_level, blank, global_values[ip]); + } + printf("%s\n",units); + } else { + sort(global_values.begin(),global_values.end()); + int median_value; + int half_value = numpe/2; + if (numpe%2 == 0) { + median_value = (global_values[half_value-1]+global_values[half_value])/2; + } else { + median_value = global_values[half_value+1]; + } + printf("%.*s%10d\t%.*s%10d\t%.*s%10d %s min/median/max\n", + 2*output_level, blank, global_values[0], + 2*output_level, blank, median_value, + 2*output_level, blank, global_values[numpe-1], + units); + } + } +} + +const int CRUX_MESH_VERSION = 103; +const int num_int_dist_vals = 3; +const int num_int_vals = 3; +const int num_double_vals = 1; + +size_t Mesh::get_checkpoint_size(void) +{ + size_t nsize; + nsize = num_int_dist_vals*sizeof(int); + nsize += num_int_vals*sizeof(int); + nsize += num_double_vals*sizeof(double); + nsize += 2*MESH_COUNTER_SIZE*sizeof(int); + nsize += MESH_TIMER_SIZE*sizeof(double); + nsize += MESH_TIMER_SIZE*sizeof(long); + nsize += ncells*3*sizeof(int); + return(nsize); +} + +void Mesh::store_checkpoint(Crux *crux) +{ + // Need ncells for memory allocation + int storage = mesh_memory.get_memory_capacity(level); + crux->store_named_ints("storage", 7, &storage, 1); + // Write scalars to arrays for storing in checkpoint + int int_vals[num_int_vals]; + + int_vals[ 0] = CRUX_MESH_VERSION; + int_vals[ 1] = ndim; + int_vals[ 2] = levmx; + + // These are for values that will be different on every processor + int int_dist_vals[num_int_dist_vals]; + int_dist_vals[ 0] = (int)ncells; + int_dist_vals[ 1] = (int)ncells_ghost; + int_dist_vals[ 2] = offtile_local_count; + + double double_vals[num_double_vals]; + + double_vals[0] = offtile_ratio_local; + + int flags = RESTART_DATA; + // Now add memory entries to database for storing checkpoint + mesh_memory.memory_add(int_dist_vals, (size_t)num_int_dist_vals, 4, "mesh_int_dist_vals", flags); + flags = RESTART_DATA | REPLICATED_DATA; + mesh_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "mesh_int_vals", flags); + + flags = RESTART_DATA; + mesh_memory.memory_add(double_vals, (size_t)num_double_vals, 8, "mesh_double_vals", flags); + mesh_memory.memory_add(cpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_cpu_counters", flags); + mesh_memory.memory_add(gpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_gpu_counters", flags); + + mesh_memory.memory_add(cpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_cpu_timers", flags); + mesh_memory.memory_add(gpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_gpu_timers", flags); + + // Store MallocPlus memory database + crux->store_MallocPlus(mesh_memory); + + // Remove memory entries from database now that data is stored + mesh_memory.memory_remove(int_dist_vals); + mesh_memory.memory_remove(int_vals); + mesh_memory.memory_remove(double_vals); + mesh_memory.memory_remove(cpu_counters); + mesh_memory.memory_remove(gpu_counters); + mesh_memory.memory_remove(cpu_timers); + mesh_memory.memory_remove(gpu_timers); +} + +void Mesh::restore_checkpoint(Crux *crux) +{ + int storage; + crux->restore_named_ints("storage", 7, &storage, 1); + + // Create memory for reading data into + int int_dist_vals[num_int_dist_vals]; + int int_vals[num_int_vals]; + double double_vals[num_double_vals]; + + mesh_memory.memory_delete(nlft); + mesh_memory.memory_delete(nrht); + mesh_memory.memory_delete(nbot); + mesh_memory.memory_delete(ntop); + mesh_memory.memory_delete(celltype); + + nlft = NULL; + nrht = NULL; + ntop = NULL; + nbot = NULL; + celltype = NULL; + + // Resize is a mesh method + // resize(storage); + // memory_reset_ptrs(); + allocate (storage); + + int flags = RESTART_DATA; + // Now add memory entries to database for restoring checkpoint + mesh_memory.memory_add(int_dist_vals, (size_t)num_int_dist_vals, 4, "mesh_int_dist_vals", flags); + flags = RESTART_DATA | REPLICATED_DATA; + mesh_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "mesh_int_vals", flags); + mesh_memory.memory_add(double_vals, (size_t)num_double_vals, 8, "mesh_double_vals", flags); + + flags = RESTART_DATA; + mesh_memory.memory_add(cpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_cpu_counters", flags); + mesh_memory.memory_add(gpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_gpu_counters", flags); + + mesh_memory.memory_add(cpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_cpu_timers", flags); + mesh_memory.memory_add(gpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_gpu_timers", flags); + + // Restore MallocPlus memory database + crux->restore_MallocPlus(mesh_memory); + + // Remove memory entries from database now that data is restored + mesh_memory.memory_remove(int_dist_vals); + mesh_memory.memory_remove(int_vals); + mesh_memory.memory_remove(double_vals); + mesh_memory.memory_remove(cpu_counters); + mesh_memory.memory_remove(gpu_counters); + mesh_memory.memory_remove(cpu_timers); + mesh_memory.memory_remove(gpu_timers); + + // Check version number + if (int_vals[ 0] != CRUX_MESH_VERSION) { + printf("CRUX version mismatch for mesh data, version on file is %d, version in code is %d\n", + int_vals[0], CRUX_MESH_VERSION); + exit(0); + } + + // Copy out scalar values from array + ncells = int_dist_vals[ 0]; + ncells_ghost = int_dist_vals[ 1]; + offtile_local_count = int_dist_vals[ 2]; + + // Copy out scalar values from array + ndim = int_vals[ 1]; + levmx = int_vals[ 2]; + +#ifdef DEBUG_RESTORE_VALS + if (DEBUG_RESTORE_VALS && mype == 0) { + const char *int_dist_vals_descriptor[num_int_dist_vals] = { + "ncells", + "ncells_ghost", + "offtile_local_count" + }; + const char *int_vals_descriptor[num_int_vals] = { + "CRUX_MESH_VERSION", + "ndim", + "levmx", + }; + printf("\n"); + printf(" === Restored mesh int_dist_vals ===\n"); + for (int i = 0; i < num_int_dist_vals; i++){ + printf(" %-30s %d\n",int_dist_vals_descriptor[i], int_dist_vals[i]); + } + printf(" === Restored mesh int_vals ===\n"); + for (int i = 0; i < num_int_vals; i++){ + printf(" %-30s %d\n",int_vals_descriptor[i], int_vals[i]); + } + printf(" === Restored mesh int_vals ===\n"); + printf("\n"); + } +#endif + + offtile_ratio_local = double_vals[0]; + +#ifdef DEBUG_RESTORE_VALS + if (DEBUG_RESTORE_VALS && mype == 0) { + const char *double_vals_descriptor[num_double_vals] = { + "offtile_ratio_local" + }; + printf("\n"); + printf(" === Restored mesh double_vals ===\n"); + for (int i = 0; i < num_double_vals; i++){ + printf(" %-30s %lf\n",double_vals_descriptor[i], double_vals[i]); + } + printf(" === Restored mesh double_vals ===\n"); + printf("\n"); + } +#endif + +#ifdef DEBUG_RESTORE_VALS + if (DEBUG_RESTORE_VALS && mype == 0) { + printf(" === Restored mesh cpu counters ===\n"); + for (int i = 0; i < MESH_COUNTER_SIZE; i++){ + printf(" %-30s %d\n",mesh_counter_descriptor[i], cpu_counters[i]); + } + printf(" === Restored mesh cpu counters ===\n"); + printf(" === Restored mesh gpu counters ===\n"); + for (int i = 0; i < MESH_COUNTER_SIZE; i++){ + printf(" %-30s %d\n",mesh_counter_descriptor[i], gpu_counters[i]); + } + printf(" === Restored mesh gpu counters ===\n"); + printf("\n"); + } +#endif + +#ifdef DEBUG_RESTORE_VALS + if (DEBUG_RESTORE_VALS && mype == 0) { + printf(" === Restored mesh cpu timers ===\n"); + for (int i = 0; i < MESH_TIMER_SIZE; i++){ + printf(" %-30s %lf\n",mesh_timer_descriptor[i], cpu_timers[i]); + } + printf(" === Restored mesh cpu timers ===\n"); + printf("\n"); + } +#endif + +#ifdef DEBUG_RESTORE_VALS + if (DEBUG_RESTORE_VALS && mype == 0) { + printf("\n"); + printf(" === Restored mesh gpu timers ===\n"); + for (int i = 0; i < MESH_TIMER_SIZE; i++){ + printf(" %-30s %lld\n",mesh_timer_descriptor[i], gpu_timers[i]); + } + printf(" === Restored mesh gpu timers ===\n"); + printf("\n"); + } +#endif + //calc_celltype(ncells); +} + + +// This code due to Matt Calef +void scan ( scanInt *input , scanInt *output , scanInt length) +{ +#ifdef _OPENMP + // This already assumes it is in a parallel region + + // Get the total number of threads + + scanInt numThreads = omp_get_num_threads ( ); + + // Compute the range for which this thread is responsible. + + scanInt threadID = omp_get_thread_num ( ); + scanInt start = length * ( threadID ) / numThreads; + scanInt end = length * ( threadID + 1 ) / numThreads; + + // In the case that there are fewer entries than threads, some + // threads will have no entries. Only perform this operation if + // there is a postive number of entries. + + if ( start < end ) { + + // Do a scan over the region for this thread, with an initial + // value of zero. + + output[start] = 0; + for ( scanInt i = start + 1 ; i < end ; i++ ) + output[i] = output[i-1] + input[i-1]; + } + + // Wait until all threads get here. + +#pragma omp barrier + + // At this point each thread has done an independent scan of its + // region. All scans, except the first, are off by an + // offset. Here we have a single thread compute that offset with a + // serial scan that strides over the regions assigned to each + // thread. + +#pragma omp single + for ( scanInt i = 1 ; i < numThreads ; i ++ ) { + scanInt s0 = length * ( i - 1 ) / numThreads; + scanInt s1 = length * ( i ) / numThreads; + + if ( s0 < s1 ) + output[s1] = output[s0] + input[s1-1]; + + if ( s0 < s1 - 1 ) + output[s1] += output[s1-1]; + } + + // Barrier is implicit from omp single Wait until all threads get here. + + // Apply the offset to the range for this thread. + + for ( scanInt i = start + 1 ; i < end ; i++ ) + output[i] += output[start]; + +#else + output[0] = 0; + for (int ic = 0; ic < length; ic++){ + output[ic+1] = output[ic] + input[ic]; + } +#endif +} +/****************************************************//** +*GET BOUNDS!!!!!!**** +**********************************/ +void Mesh::get_bounds(int& lowerBound, int& upperBound){ +#ifdef _OPENMP + int threadID = omp_get_thread_num(); + lowerBound = lowerBound_Global[threadID]; + upperBound = upperBound_Global[threadID]; +// printf("GETBOUNDs ThreadID: %d, upperBound: %d, lowerBound: %d \n",threadID, upperBound, lowerBound); +#else + lowerBound = 0; + upperBound = ncells; +#endif +} + +/****************************************************//** +*SETTING BOUNDS!!!!!!**** +**********************************/ +void Mesh::set_bounds(int n){ + +#ifdef _OPENMP + // #pragma omp parallel + { + int nthreads = omp_get_num_threads();//Private for each thread + int threadID = omp_get_thread_num(); //Private for each thread + #pragma omp master + { + if(lowerBound_Global == NULL) lowerBound_Global = (int *)malloc(nthreads*sizeof(int)); + if(upperBound_Global == NULL) upperBound_Global = (int *)malloc(nthreads*sizeof(int)); + } + //#pragma omp flush (lowerBound_Global, upperBound_Global) + #pragma omp barrier + + int work = n/nthreads; + if(threadID<(n%nthreads))work++; + int lowerBound = ((n / nthreads)*threadID) + min(n%nthreads, threadID); + int upperBound = lowerBound + work; +// printf("ThreadID: %d, upperBound: %d, lowerBound: %d \n",threadID, upperBound, lowerBound); + lowerBound_Global[threadID] = lowerBound; + upperBound_Global[threadID] = upperBound; + } +#else + if(lowerBound_Global == NULL) lowerBound_Global = (int *)malloc(1*sizeof(int)); + if(upperBound_Global == NULL) upperBound_Global = (int *)malloc(1*sizeof(int)); + int lowerBound = 0; + int upperBound = ncells; + lowerBound_Global[0] = lowerBound; + upperBound_Global[0] = upperBound; +#endif + +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#ifndef PARTITION_H +#define PARTITION_H + +#include + +#include "input.h" + +using namespace std; + +enum partition_method { + ORIGINAL_ORDER, + HILBERT_SORT, + HILBERT_PARTITION, + ZORDER +}; + +enum partition_measure { + NO_PARTITION_MEASURE, + WITH_DUPLICATES, + WITHOUT_DUPLICATES, + CVALUE, + CSTARVALUE +}; + + +void calc_distribution(int numpe, vector &proc); +//void partition_cells(int numpe, vector &proc, Mesh &mesh, enum partition_method method); + +typedef void (*maptonorm)( double * , double * , void * ); + +extern "C" void hsfc2sort( + const int N , /* IN: Number of points */ + const double * X , /* IN: array of X-Coordinates */ + const double * Y , /* IN: array of Y-Coordinates */ + const int ibase , /* IN: Stride for Y array */ + int * Info , /* OUT: (1 <= LDInfo) [ HSFC ordering ] + (2 <= LDInfo) [ HSFC index, #1 ] + (3 <= LDInfo) [ HSFC index, #2 ] */ + int LDInfo /* IN: Leading dimension of Info */ + ); + +extern "C" void hsfc2part( + const int Level , /* IN: Background grid level of partitioning */ + const int Limit , /* IN: Number of levels to consider for 'gaps' */ + const int NPart , /* IN: Target number of partitions */ + const int N , /* IN: Number of points */ + const double * X , /* IN: array of X-Coordinates */ + const double * Y , /* IN: array of Y-Coordinates */ + const int ibase , /* IN: Base - 0 for C, 1 for Fortran */ + int * Info , /* IN: Array of computational weights, + OUT: (1 <= LDInfo) [ Partitioning ] + (2 <= LDInfo) [ Adjusted HSFC ordering ] + (3 <= LDInfo) [ Original HSFC index, #1 ] + (4 <= LDInfo) [ Original HSFC index, #2 ] */ + int LDInfo );/* IN: Leading dimension of Info */ + + +#endif /* PARTITION_H */ Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.cpp =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.cpp @@ -0,0 +1,764 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#ifdef HAVE_MPI +#include "mpi.h" +#endif + +#include +#include +#include +#include +#include +#include "partition.h" +#include "KDTree.h" +#include "mesh.h" +#ifdef HAVE_MPI +#include "s7.h" +#endif +#include "zorder.h" +#include "timer.h" +#include "hsfc.h" + +#ifndef DEBUG +#define DEBUG 0 +#endif + +typedef unsigned int uint; + +int measure_type; +int meas_count = 0; +double meas_sum_average = 0.0; + +extern bool localStencil; +extern enum partition_method initial_order; +extern enum partition_method cycle_reorder; + +void Mesh::partition_measure(void) +{ + if (measure_type != NO_PARTITION_MEASURE){ + + int ntX = TILE_SIZE; + static double offtile_ratio = 0.0; + + uint num_groups = (ncells + TILE_SIZE - 1)/TILE_SIZE; + + if (measure_type == WITH_DUPLICATES) { + int i = 0; +#ifdef _OPENMP +#pragma omp for reduction(+:offtile_ratio) +#endif + for (uint group_id=0; group_id < num_groups; group_id ++){ + + int start_idx = group_id * ntX; + int end_idx = (group_id + 1) * ntX; + + int offtile =0; + for (uint ic = 0; ic < TILE_SIZE; ic++, i++){ + + if (i >= ncells) continue; + //taken from wave_kern_calc.cl 'setup tile' kernel + if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile++; + if (level[nlft[i]] > level[i] && + (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile++; + if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile++; + if (level[nrht[i]] > level[i] && + (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile++; + if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile++; + if (level[nbot[i]] > level[i] && + (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile++; + if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile++; + if (level[ntop[i]] > level[i] && + (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile++; + } + offtile_ratio += (double)offtile/(double)(TILE_SIZE); + //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE); + } + } else if (measure_type == WITHOUT_DUPLICATES) { + int i = 0; +#ifdef _OPENMP +#pragma omp for reduction(+:offtile_ratio) +#endif + for (uint group_id=0; group_id < num_groups; group_id ++){ + list offtile_list; + + int start_idx = group_id * ntX; + int end_idx = (group_id + 1) * ntX; + + for (uint ic = 0; ic < TILE_SIZE; ic++, i++){ + + if (i >= ncells) continue; + + if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile_list.push_back(nlft[i]); + if (level[nlft[i]] > level[i] && + (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile_list.push_back(ntop[nlft[i]]); + if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile_list.push_back(nrht[i]); + if (level[nrht[i]] > level[i] && + (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile_list.push_back(ntop[nrht[i]]); + if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile_list.push_back(nbot[i]); + if (level[nbot[i]] > level[i] && + (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile_list.push_back(nrht[nbot[i]]); + if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile_list.push_back(ntop[i]); + if (level[ntop[i]] > level[i] && + (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile_list.push_back(nrht[ntop[i]]); + } + offtile_list.sort(); + offtile_list.unique(); + + offtile_ratio += (double)offtile_list.size()/(double)(TILE_SIZE); + //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE); + } + } else if (measure_type == CVALUE) { + int i = 0; +#ifdef _OPENMP +#pragma omp for reduction(+:offtile_ratio) +#endif + for (uint group_id=0; group_id < num_groups; group_id ++){ + list offtile_list; + + int start_idx = group_id * ntX; + int end_idx = (group_id + 1) * ntX; + + for (uint ic = 0; ic < TILE_SIZE; ic++, i++){ + + if (i >= ncells) continue; + + if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile_list.push_back(nlft[i]); + if (level[nlft[i]] > level[i] && + (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile_list.push_back(ntop[nlft[i]]); + if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile_list.push_back(nrht[i]); + if (level[nrht[i]] > level[i] && + (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile_list.push_back(ntop[nrht[i]]); + if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile_list.push_back(nbot[i]); + if (level[nbot[i]] > level[i] && + (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile_list.push_back(nrht[nbot[i]]); + if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile_list.push_back(ntop[i]); + if (level[ntop[i]] > level[i] && + (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile_list.push_back(nrht[ntop[i]]); + } + offtile_list.sort(); + offtile_list.unique(); + + offtile_ratio += (double)offtile_list.size()/(4*sqrt((double)(TILE_SIZE))); + //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE); + } + } else if (measure_type == CSTARVALUE) { + int i = 0; +#ifdef _OPENMP +#pragma omp for reduction(+:offtile_ratio) +#endif + for (uint group_id=0; group_id < num_groups; group_id ++){ + list offtile_list; + list offtile_cache_lines; // Assumes memory is aligned + int cache_line_size = 4; // Some could be 8, or more? + + int start_idx = group_id * ntX; + int end_idx = (group_id + 1) * ntX; + + for (uint ic = 0; ic < TILE_SIZE; ic++, i++){ + + if (i >= ncells) continue; + + if (nlft[i] < start_idx || nlft[i] >= end_idx) { + offtile_list.push_back(nlft[i]); + offtile_cache_lines.push_back(nlft[i]/cache_line_size); + } + + if (level[nlft[i]] > level[i] && (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) { + offtile_list.push_back(ntop[nlft[i]]); + offtile_cache_lines.push_back(ntop[nlft[i]]/cache_line_size); + } + if (nrht[i] < start_idx || nrht[i] >= end_idx) { + offtile_list.push_back(nrht[i]); + offtile_cache_lines.push_back(nrht[i]/cache_line_size); + } + if (level[nrht[i]] > level[i] && (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) { + offtile_list.push_back(ntop[nrht[i]]); + offtile_cache_lines.push_back(ntop[nrht[i]]/cache_line_size); + } + if (nbot[i] < start_idx || nbot[i] >= end_idx) { + offtile_list.push_back(nbot[i]); + offtile_cache_lines.push_back(nbot[i]/cache_line_size); + } + if (level[nbot[i]] > level[i] && (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) { + offtile_list.push_back(nrht[nbot[i]]); + offtile_cache_lines.push_back(nrht[nbot[i]]/cache_line_size); + } + if (ntop[i] < start_idx || ntop[i] >= end_idx) { + offtile_list.push_back(ntop[i]); + offtile_cache_lines.push_back(ntop[i]/cache_line_size); + } + if (level[ntop[i]] > level[i] && (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) { + offtile_list.push_back(nrht[ntop[i]]); + offtile_cache_lines.push_back(nrht[ntop[i]]/cache_line_size); + } + } + offtile_list.sort(); + offtile_list.unique(); + offtile_cache_lines.sort(); + offtile_cache_lines.unique(); + + double s_ngeom = (double)(offtile_list.size()); + double q_ngeom = (double)(offtile_cache_lines.size()); + double ngeom = (double)(TILE_SIZE); + double cover = (double)(cache_line_size); +// offtile_ratio += (s_ngeom * q_ngeom) / (4*sqrt(ngeom)*2*(1+(ngeom+cache_line_size-1)/cache_line_size)); +// offtile_ratio += (q_ngeom) / (2*sqrt(ngeom)+2*((sqrt(ngeom)+cover-1)/cover)); +// offtile_ratio += (q_ngeom) / ( (8*sqrt(ngeom)+cover-1)/cover ); + ngeom = sqrt(ngeom); + offtile_ratio += (s_ngeom*q_ngeom*cover) / ( 4 * ngeom * (8*ngeom+cover-1) ); + + //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE); + } + } + + // printf("DEBUG Ratio of surface area to volume is equal to %d / %d \n", offtile, ontile); + +#ifdef _OPENMP +#pragma omp master + { +#endif + meas_count ++; + meas_sum_average += offtile_ratio/(double)num_groups; + // printf("DEBUG %d icount %d sum_average %lf\n",__LINE__,icount, sum_average); +#ifdef _OPENMP + } +#endif + } // if PARTITION TYPE +} + +void Mesh::print_partition_measure() +{ + if (meas_count != 0) { + if (measure_type == NO_PARTITION_MEASURE) { + if (mype == 0) printf("No Partition Measure\n"); + } else if (measure_type == WITH_DUPLICATES) { + parallel_output("Average surface area to volume ratio ", meas_sum_average/(double)meas_count, 0, "with duplicates"); + } else if (measure_type == WITHOUT_DUPLICATES) { + parallel_output("Average surface area to volume ratio ", meas_sum_average/(double)meas_count, 0, "without duplicates"); + } else if (measure_type == CVALUE) { + parallel_output("Partition Quality Avg C value ", meas_sum_average/(double)meas_count, 0, ""); + } else if (measure_type == CSTARVALUE){ + parallel_output("Partition Quality Avg C* value ", meas_sum_average/(double)meas_count, 0, ""); + } + } + + if (numpe > 1){ + parallel_output("The MPI surface area to volume ratio ", offtile_ratio_local, 0, "without duplicates"); + } +} + +void Mesh::print_partition_type() +{ + if (mype == 0) { + if (initial_order == ORIGINAL_ORDER) { + printf("Initial order is naive."); + } else if (initial_order == HILBERT_SORT) { + printf("Initial order is Hilbert sort."); + } else if (initial_order == HILBERT_PARTITION) { + printf("Initial order is Hilbert partitionr."); + } else if (initial_order == ZORDER) { + printf("Initial order is Z order."); + } + + if (cycle_reorder == ORIGINAL_ORDER) { + printf(" No cycle reorder."); + } else if (cycle_reorder == HILBERT_SORT) { + printf(" Cycle reorder is Hilbert sort."); + } else if (cycle_reorder == HILBERT_PARTITION) { + printf(" Cycle reorder is Hilbert partition."); + } else if (cycle_reorder == ZORDER) { + printf(" Cycle reorder is Z order."); + } + + if (localStencil) { + printf(" Local Stencil is on.\n"); + } else { + printf("\n"); + } + } + +} +void Mesh::partition_cells( + int numpe, // + vector &z_order, // Resulting index ordering. + enum partition_method method) // Assigned partitioning method. +{ + int *info; // + double iscale, // + jscale; // + int imax, // Maximum x-index. + jmax; // Maximum y-index. + vector z_index; // Ordered curve from hsfc. + vector i_scaled; // x-indices normalized to a scale of [0, 1] for hsfc. + vector j_scaled; // y-indices normalized to a scale of [0, 1] for hsfc. + vector iunit; // + vector junit; // + + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + // Initialize ordered curve index. + z_index.resize(ncells, 0); + //z_order.resize(ncells, 0); + + if (parallel) { +#ifdef HAVE_MPI + nsizes.resize(numpe); + ndispl.resize(numpe); + MPI_Allgather(&ncells, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD); + ndispl[0]=0; + for (int ip=1; ipiunit_global(ncells_global); + vectorjunit_global(ncells_global); + vectorz_order_global(ncells_global); + + MPI_Allgatherv(&iunit[0], ncells, MPI_DOUBLE, &iunit_global[0], &nsizes[0], &ndispl[0], MPI_DOUBLE, MPI_COMM_WORLD); + MPI_Allgatherv(&junit[0], ncells, MPI_DOUBLE, &junit_global[0], &nsizes[0], &ndispl[0], MPI_DOUBLE, MPI_COMM_WORLD); + // Sort the mesh into an ordered space-filling curve from hsfc. + hsfc2sort(ncells_global, &iunit_global[0], &junit_global[0], 0, info, 1); + + // Copy the cell order information from info into z_order. + for (uint ic = 0; ic < ncells_global; ++ic) + { z_order_global[ic] = info[ic]; } + free(info); + + // Order the mesh according to the calculated order (note that z_order is for both curves). + vector int_global(ncells_global); + vector int_global_new(ncells_global); + + // gather, reorder and scatter i + MPI_Allgatherv(&i[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = int_global[z_order_global[ic]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &i[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + // gather, reorder and scatter j + MPI_Allgatherv(&j[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = int_global[z_order_global[ic]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &j[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + // gather, reorder and scatter level + MPI_Allgatherv(&level[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = int_global[z_order_global[ic]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &level[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + // It is faster just to recalculate these variables instead of communicating them + if (mesh_memory.get_memory_size(celltype) >= ncells) { + calc_celltype(mesh_memory.get_memory_size(celltype)); + } + + if (have_spatial_variables) { + calc_spatial_coordinates(0); + } + + if (mesh_memory.get_memory_size(nlft) >= ncells) { + vector inv_z_order(ncells_global); + for (int ic = 0; ic<(int)ncells_global; ic++){ + inv_z_order[z_order_global[ic]] = ic; + } + + MPI_Allgatherv(&nlft[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nlft[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Allgatherv(&nrht[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nrht[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Allgatherv(&nbot[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nbot[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Allgatherv(&ntop[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &ntop[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + } + + MPI_Scatterv(&z_order_global[0], &nsizes[0], &ndispl[0], MPI_INT, &z_order[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); +#endif + } else { + info = (int *)malloc(sizeof(int) * 3 * ncells); + + // Sort the mesh into an ordered space-filling curve from hsfc. + hsfc2sort(ncells, &iunit[0], &junit[0], 0, info, 1); + + // Copy the cell order information from info into z_order. + for (uint ic = 0; ic < ncells; ++ic) + { z_order[ic] = info[ic]; } + free(info); + + // Order the mesh according to the calculated order (note that z_order is for both curves). + vector int_local(ncells); + + mesh_memory.set_memory_attribute(nlft, 0x100); + mesh_memory.set_memory_attribute(nrht, 0x100); + mesh_memory.set_memory_attribute(nbot, 0x100); + mesh_memory.set_memory_attribute(ntop, 0x100); + + mesh_memory.memory_reorder_all(&z_order[0]); + memory_reset_ptrs(); + + if (x.size() >= ncells) { + vector real_local(ncells); + + for (int ic = 0; ic<(int)ncells; ic++){ + real_local[ic] = x[ic]; + } + for (int ic = 0; ic<(int)ncells; ic++){ + x[ic] = real_local[z_order[ic]]; + } + + for (int ic = 0; ic<(int)ncells; ic++){ + real_local[ic] = dx[ic]; + } + for (int ic = 0; ic<(int)ncells; ic++){ + dx[ic] = real_local[z_order[ic]]; + } + + for (int ic = 0; ic<(int)ncells; ic++){ + real_local[ic] = y[ic]; + } + for (int ic = 0; ic<(int)ncells; ic++){ + y[ic] = real_local[z_order[ic]]; + } + + for (int ic = 0; ic<(int)ncells; ic++){ + real_local[ic] = dy[ic]; + } + for (int ic = 0; ic<(int)ncells; ic++){ + dy[ic] = real_local[z_order[ic]]; + } + } + + } + + break; + + case ZORDER: + // Resort the curve by z-order. + if (parallel) { +#ifdef HAVE_MPI + vectori_global(ncells_global); + vectorj_global(ncells_global); + vectorlevel_global(ncells_global); + vectorz_index_global(ncells_global); + vectorz_order_global(ncells_global); + MPI_Allgatherv(&i[0], ncells, MPI_REAL, &i_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD); + MPI_Allgatherv(&j[0], ncells, MPI_REAL, &j_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD); + MPI_Allgatherv(&level[0], ncells, MPI_REAL, &level_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD); + + i_scaled.resize(ncells_global); + j_scaled.resize(ncells_global); + + // + imax = 0; + jmax = 0; + for (uint ic = 0; ic < ncells_global; ++ic) + { if (i_global[ic] > imax) imax = i_global[ic]; + if (j_global[ic] > jmax) jmax = j_global[ic]; } + + // + iscale = 16.0 / (double)imax; + jscale = 16.0 / (double)jmax; + + // + for (uint ic = 0; ic < ncells_global; ++ic) + { i_scaled[ic]=(int) ( (double)i_global[ic]*iscale); + j_scaled[ic]=(int) ( (double)j_global[ic]*jscale); } + + // + calc_zorder(ncells_global, &i_scaled[0], &j_scaled[0], &level_global[0], levmx, ibase, &z_index_global[0], &z_order_global[0]); + + // Order the mesh according to the calculated order (note that z_order is for both curves). + vector int_global(ncells_global); + vector int_global_new(ncells_global); + + // gather, reorder and scatter i + MPI_Allgatherv(&i[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = int_global[z_order_global[ic]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &i[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + // gather, reorder and scatter j + MPI_Allgatherv(&j[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = int_global[z_order_global[ic]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &j[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + // gather, reorder and scatter level + MPI_Allgatherv(&level[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = int_global[z_order_global[ic]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &level[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + // It is faster just to recalculate these variables instead of communicating them + if (mesh_memory.get_memory_size(celltype) >= ncells) { + calc_celltype(mesh_memory.get_memory_size(celltype)); + } + + if (x.size() >= ncells) { + calc_spatial_coordinates(0); + } + + if (mesh_memory.get_memory_size(nlft) >= ncells) { + vector inv_z_order(ncells_global); + for (int ic = 0; ic<(int)ncells_global; ic++){ + inv_z_order[z_order_global[ic]] = ic; + } + + MPI_Allgatherv(&nlft[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nlft[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Allgatherv(&nrht[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nrht[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Allgatherv(&nbot[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nbot[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Allgatherv(&ntop[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD); + for (int ic = 0; ic<(int)ncells_global; ic++){ + int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]]; + } + MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &ntop[0], ncells, MPI_INT, 0, MPI_COMM_WORLD); + } + MPI_Scatterv(&z_order_global[0], &nsizes[0], &ndispl[0], MPI_REAL, &z_order[0], ncells, MPI_REAL, 0, MPI_COMM_WORLD); +#endif + } else { + i_scaled.resize(ncells); + j_scaled.resize(ncells); + + // + imax = 0; + jmax = 0; + for (uint ic = 0; ic < ncells; ++ic) + { if (i[ic] > imax) imax = i[ic]; + if (j[ic] > jmax) jmax = j[ic]; } + + // + iscale = 16.0 / (double)imax; + jscale = 16.0 / (double)jmax; + + // + for (uint ic = 0; ic < ncells; ++ic) + { i_scaled[ic]=(int) ( (double)i[ic]*iscale); + j_scaled[ic]=(int) ( (double)j[ic]*jscale); } + + // + calc_zorder(ncells, &i_scaled[0], &j_scaled[0], &level[0], levmx, ibase, &z_index[0], &z_order[0]); + + // Order the mesh according to the calculated order (note that z_order is for both curves). + vector int_local(ncells); + + mesh_memory.set_memory_attribute(nlft, 0x100); + mesh_memory.set_memory_attribute(nrht, 0x100); + mesh_memory.set_memory_attribute(nbot, 0x100); + mesh_memory.set_memory_attribute(ntop, 0x100); + + mesh_memory.memory_reorder_all(&z_order[0]); + memory_reset_ptrs(); + + + if (x.size() >= ncells) { + vector real_local(ncells); + + for (int ic = 0; ic<(int)ncells; ic++){ + real_local[ic] = x[ic]; + } + for (int ic = 0; ic<(int)ncells; ic++){ + x[ic] = real_local[z_order[ic]]; + } + + for (int ic = 0; ic<(int)ncells; ic++){ + real_local[ic] = dx[ic]; + } + for (int ic = 0; ic<(int)ncells; ic++){ + dx[ic] = real_local[z_order[ic]]; + } + + for (int ic = 0; ic<(int)ncells; ic++){ + real_local[ic] = y[ic]; + } + for (int ic = 0; ic<(int)ncells; ic++){ + y[ic] = real_local[z_order[ic]]; + } + + for (int ic = 0; ic<(int)ncells; ic++){ + real_local[ic] = dy[ic]; + } + for (int ic = 0; ic<(int)ncells; ic++){ + dy[ic] = real_local[z_order[ic]]; + } + } + + } + + break; + + default: + // Note that HILBERT_PARTITION is not currently supported due to redundancy with HILBERT_SORT. + break; + } + + + // Output ordered mesh information. + if (DEBUG) + { printf("orig index i j lev nlft nrht nbot ntop xlow xhigh ylow yhigh z index z order\n"); + for (uint ic=0; ic +#else +#include "CL/cl.h" +#endif +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + +#ifdef HAVE_OPENCL +cl_kernel kernel_reduce_sum, + kernel_reduce_sum_stage1of2, + kernel_reduce_sum_stage2of2, + kernel_reduce_sum_int_stage1of2, + kernel_reduce_sum_int_stage2of2, + kernel_reduce_product, + kernel_reduce_max, + kernel_reduce_max_stage1of2, + kernel_reduce_max_stage2of2, + kernel_reduce_min, + kernel_reduce_min_stage1of2, + kernel_reduce_min_stage2of2; +#endif + +void init_kernels_reduce(void); +void init_kernel_sum(void); +void init_kernel_2stage_sum(void); +void init_kernel_2stage_sum_int(void); +void init_kernel_product(void); +void init_kernel_max(void); +void init_kernel_2stage_max(void); +void init_kernel_min(void); +void init_kernel_2stage_min(void); + +void terminate_kernel_2stage_sum(void); +void terminate_kernel_2stage_sum_int(void); + +void release_kernels_reduce(); +void release_kernel_sum(); +void release_kernel_2stage_sum(); +void release_kernel_2stage_sum_int(); +void release_kernel_product(); +void release_kernel_max(); +void release_kernel_2stage_max(); +void release_kernel_min(); +void release_kernel_2stage_min(); + +#ifdef __cplusplus +} +#endif + +#endif /* _REDUCE_H_ */ + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.c @@ -0,0 +1,245 @@ +/** + * Copyright (c) 2011, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#include "reduce.h" +#ifdef HAVE_OPENCL +#include "ezcl/ezcl.h" +#endif + +#ifdef HAVE_OPENCL +#include "reduce_kernel.inc" +#endif + +void init_kernels_reduce(void) +{ +#ifdef HAVE_OPENCL + cl_context context = ezcl_get_context(); + kernel_reduce_sum = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_cl"); + kernel_reduce_sum_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage1of2_cl"); + kernel_reduce_sum_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage2of2_cl"); + kernel_reduce_sum_int_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage1of2_cl"); + kernel_reduce_sum_int_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage2of2_cl"); + kernel_reduce_product = ezcl_create_kernel_wsource(context, reduce_source, "reduce_product_cl"); + kernel_reduce_max = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_cl"); + kernel_reduce_max_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage1of2_cl"); + kernel_reduce_max_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage2of2_cl"); + kernel_reduce_min = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_cl"); + kernel_reduce_min_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage1of2_cl"); + kernel_reduce_min_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage2of2_cl"); +#endif +} + +void init_kernel_sum(void) +{ +#ifdef HAVE_OPENCL + cl_context context = ezcl_get_context(); + kernel_reduce_sum = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_cl"); +#endif +} + +void init_kernel_2stage_sum(void) +{ +#ifdef HAVE_OPENCL + cl_context context = ezcl_get_context(); + kernel_reduce_sum_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage1of2_cl"); + kernel_reduce_sum_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage2of2_cl"); +#endif +} + +void terminate_kernel_2stage_sum(void) +{ +#ifdef HAVE_OPENCL + ezcl_kernel_release(kernel_reduce_sum_stage1of2); + ezcl_kernel_release(kernel_reduce_sum_stage2of2); +#endif +} + +void init_kernel_2stage_sum_int(void) +{ +#ifdef HAVE_OPENCL + cl_context context = ezcl_get_context(); + kernel_reduce_sum_int_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage1of2_cl"); + kernel_reduce_sum_int_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage2of2_cl"); +#endif +} + +void terminate_kernel_2stage_sum_int(void) +{ +#ifdef HAVE_OPENCL + ezcl_kernel_release(kernel_reduce_sum_int_stage1of2); + ezcl_kernel_release(kernel_reduce_sum_int_stage2of2); +#endif +} + +void init_kernel_product(void) +{ +#ifdef HAVE_OPENCL + cl_context context = ezcl_get_context(); + kernel_reduce_product = ezcl_create_kernel_wsource(context, reduce_source, "reduce_product_cl"); +#endif +} + +void init_kernel_max(void) +{ +#ifdef HAVE_OPENCL + cl_context context = ezcl_get_context(); + kernel_reduce_max = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_cl"); +#endif +} + +void init_kernel_2stage_max(void) +{ +#ifdef HAVE_OPENCL + cl_context context = ezcl_get_context(); + kernel_reduce_max_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage1of2_cl"); + kernel_reduce_max_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage2of2_cl"); +#endif +} + +void init_kernel_min(void) +{ +#ifdef HAVE_OPENCL + cl_context context = ezcl_get_context(); + kernel_reduce_min = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_cl"); +#endif +} + +void init_kernel_2stage_min(void) +{ +#ifdef HAVE_OPENCL + cl_context context = ezcl_get_context(); + kernel_reduce_min_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage1of2_cl"); + kernel_reduce_min_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage2of2_cl"); +#endif +} + +void release_kernels_reduce() +{ +#ifdef HAVE_OPENCL + ezcl_kernel_release(kernel_reduce_sum); + ezcl_kernel_release(kernel_reduce_sum_stage1of2); + ezcl_kernel_release(kernel_reduce_sum_stage2of2); + ezcl_kernel_release(kernel_reduce_sum_int_stage1of2); + ezcl_kernel_release(kernel_reduce_sum_int_stage2of2); + ezcl_kernel_release(kernel_reduce_product); + ezcl_kernel_release(kernel_reduce_max); + ezcl_kernel_release(kernel_reduce_max_stage1of2); + ezcl_kernel_release(kernel_reduce_max_stage2of2); + ezcl_kernel_release(kernel_reduce_min); + ezcl_kernel_release(kernel_reduce_min_stage1of2); + ezcl_kernel_release(kernel_reduce_min_stage2of2); +#endif +} + +void release_kernel_sum() +{ +#ifdef HAVE_OPENCL + ezcl_kernel_release(kernel_reduce_sum); +#endif +} + +void release_kernel_2stage_sum() +{ +#ifdef HAVE_OPENCL + ezcl_kernel_release(kernel_reduce_sum_stage1of2); + ezcl_kernel_release(kernel_reduce_sum_stage2of2); +#endif +} + +void release_kernel_2stage_sum_int() +{ +#ifdef HAVE_OPENCL + ezcl_kernel_release(kernel_reduce_sum_int_stage1of2); + ezcl_kernel_release(kernel_reduce_sum_int_stage2of2); +#endif +} + +void release_kernel_product() +{ +#ifdef HAVE_OPENCL + ezcl_kernel_release(kernel_reduce_product); +#endif +} + +void release_kernel_max() +{ +#ifdef HAVE_OPENCL + ezcl_kernel_release(kernel_reduce_max); +#endif +} + +void release_kernel_2stage_max() +{ +#ifdef HAVE_OPENCL + ezcl_kernel_release(kernel_reduce_max_stage1of2); + ezcl_kernel_release(kernel_reduce_max_stage2of2); +#endif +} + +void release_kernel_min() +{ +#ifdef HAVE_OPENCL + ezcl_kernel_release(kernel_reduce_min); +#endif +} + +void release_kernel_2stage_min() +{ +#ifdef HAVE_OPENCL + ezcl_kernel_release(kernel_reduce_min_stage1of2); + ezcl_kernel_release(kernel_reduce_min_stage2of2); +#endif +} + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.h @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#ifndef S7_H_ +#define S7_H_ + +//#define _S7_DEBUG + +#ifdef __cplusplus +extern "C" +{ +#endif + + /* + * Some S7 parameters. + */ + +#define S7_OK 0 /* Successful return. */ + + enum S7_Datatype + { + S7_GENERIC8 = 0, + S7_BYTE, + S7_PACKED, + + S7_CHAR, + S7_INT, + S7_LONG, + S7_LONG_LONG_INT, + S7_FLOAT, + S7_DOUBLE, + + S7_CHARACTER, + S7_LOGICAL, + S7_INTEGER4, + S7_INTEGER8, + S7_REAL4, + S7_REAL8, + + S7_DATATYPE_MIN = S7_GENERIC8, + S7_DATATYPE_MAX = S7_REAL8 + }; + + + void S7_Sort( + void *array_in, + const int nsize, + const enum S7_Datatype S7_datatype + ); + + void S7_Sort_2Arrays( + void * array_in1, + void * array_in2, + const int nsize, + const enum S7_Datatype S7_datatype + ); + + void S7_Index_Sort( + void * array_in, + const int nsize, + const enum S7_Datatype S7_datatype, + int * index + ); + + void S7_Indexi8_Sort( + void * array_in, + const int nsize, + const enum S7_Datatype S7_datatype, + long * index + ); + + + void S7_Index_sort_real8(const int n,double array_in[],int index[]); + void S7_Index_sort_int8(const int n,long long iarray_in[], int index[]); + void S7_Index_sort_int4(const int n, int iarray_in[], int index[]); + void S7_Index_sort_real8_int8(const int n,double array_in[],long long index[]); + + void S7_Index_sort_int8_int8(const int n,long long iarray_in[], long long index[]); + void S7_Index_sort_int4_int8(const int n, int iarray_in[], long long index[]); + void S7_Sort_real8(const int n,double array_in[]); + void S7_Sort_int8(const int n,long long array_in[]); + void S7_Sort_int4(const int n,int array_in[]); + void S7_Sort_real8_real8(const int n,double array_in[],double array_in2[]); + void S7_Sort_int8_int8(const int n,long long array_in[],long long array_in2[]); + void S7_Sort_int4_int4(const int n,int array_in[],int array_in2[]); + + /* + * End prototypes. + */ + + /* + * remove typesafe linkage if compiling under c++ + */ + +#ifdef __cplusplus +} +#endif + +#endif /* S7_H */ Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.c @@ -0,0 +1,977 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#include +#include +#include "s7.h" + +void S7_Sort( + void * array_in, + const int nsize, + const enum S7_Datatype S7_datatype + ) +{ + int n, child, parent, i; + + int qint; + long qlong; + long long qlonglong; + float qfloat; + double qdouble; + + int + *int_data_ptr; + long + *long_data_ptr; + long long + *longlong_data_ptr; + float + *float_data_ptr; + double + *double_data_ptr; + + // Heapsort + + i=nsize/2; + n = nsize; + + switch (S7_datatype){ + case S7_INTEGER4: + case S7_INT: + int_data_ptr = (int *)array_in; + + for (;;) { + if (i > 0) { + qint=int_data_ptr[--i]; + } // if i > 0 + else { + n--; + if (n == 0) { + break; // End the sort here! + } // if n + qint=int_data_ptr[n]; + int_data_ptr[n]=int_data_ptr[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && int_data_ptr[child+1] > int_data_ptr[child]) child++; + if (int_data_ptr[child] > qint) { + int_data_ptr[parent] = int_data_ptr[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; // Break out of sift while loop + } // else + } // while + int_data_ptr[parent]=qint; + } // for + + break; + + case S7_LONG: + long_data_ptr = (long *)array_in; + + for (;;) { + if (i > 0) { + qlong=long_data_ptr[--i]; + } // if i > 0 + else { + n--; + if (n == 0) { + break; // End the sort here! + } // if n + qlong=long_data_ptr[n]; + long_data_ptr[n]=long_data_ptr[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && long_data_ptr[child+1] > long_data_ptr[child]) child++; + if (long_data_ptr[child] > qlong) { + long_data_ptr[parent] = long_data_ptr[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; // Break out of sift while loop + } // else + } // while + long_data_ptr[parent]=qlong; + } // for + + break; + + case S7_LONG_LONG_INT: + case S7_INTEGER8: + longlong_data_ptr = (long long *)array_in; + + for (;;) { + if (i > 0) { + qlonglong=longlong_data_ptr[--i]; + } // if i > 0 + else { + n--; + if (n == 0) { + break; // End the sort here! + } // if n + qlonglong=longlong_data_ptr[n]; + longlong_data_ptr[n]=longlong_data_ptr[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && longlong_data_ptr[child+1] > longlong_data_ptr[child]) child++; + if (longlong_data_ptr[child] > qlonglong) { + longlong_data_ptr[parent] = longlong_data_ptr[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; // Break out of sift while loop + } // else + } // while + longlong_data_ptr[parent]=qlonglong; + } // for + + break; + + case S7_FLOAT: + case S7_REAL4: + float_data_ptr = (float *)array_in; + + for (;;) { + if (i > 0) { + qfloat=float_data_ptr[--i]; + } // if i > 0 + else { + n--; + if (n == 0) { + break; // End the sort here! + } // if n + qfloat=float_data_ptr[n]; + float_data_ptr[n]=float_data_ptr[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && float_data_ptr[child+1] > float_data_ptr[child]) child++; + if (float_data_ptr[child] > qfloat) { + float_data_ptr[parent] = float_data_ptr[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; // Break out of sift while loop + } // else + } // while + float_data_ptr[parent]=qfloat; + } // for + + break; + + case S7_DOUBLE: + case S7_REAL8: + double_data_ptr = (double *)array_in; + + for (;;) { + if (i > 0) { + qdouble=double_data_ptr[--i]; + } // if i > 0 + else { + n--; + if (n == 0) { + break; // End the sort here! + } // if n + qdouble=double_data_ptr[n]; + double_data_ptr[n]=double_data_ptr[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && double_data_ptr[child+1] > double_data_ptr[child]) child++; + if (double_data_ptr[child] > qdouble) { + double_data_ptr[parent] = double_data_ptr[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; // Break out of sift while loop + } // else + } // while + double_data_ptr[parent]=qdouble; + } // for + + break; + + default: + printf("Error -- S7_Datatype not supported in S7_Sort\n"); + exit(1); + break; + + } +} + + +void S7_Sort_2Arrays( + void * array_in1, + void * array_in2, + const int nsize, + const enum S7_Datatype S7_datatype + ) +{ + int n, child, parent, i; + + int qint1, qint2; + long qlong1, qlong2; + long long qlonglong1, qlonglong2; + float qfloat1, qfloat2; + double qdouble1, qdouble2; + + int + *int_data_ptr1, *int_data_ptr2; + long + *long_data_ptr1, *long_data_ptr2; + long long + *longlong_data_ptr1, *longlong_data_ptr2; + float + *float_data_ptr1, *float_data_ptr2; + double + *double_data_ptr1, *double_data_ptr2; + + // Heapsort + + i=nsize/2; + n = nsize; + + switch (S7_datatype){ + case S7_INTEGER4: + case S7_INT: + int_data_ptr1 = (int *)array_in1; + int_data_ptr2 = (int *)array_in2; + + for (;;) { + if (i > 0) { + qint1=int_data_ptr1[--i]; + qint2=int_data_ptr2[i]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; // End of sort + } // if n + qint1=int_data_ptr1[n]; + qint2=int_data_ptr2[n]; + int_data_ptr1[n]=int_data_ptr1[0]; + int_data_ptr2[n]=int_data_ptr2[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && int_data_ptr1[child+1] > int_data_ptr1[child]) child++; + if (int_data_ptr1[child] > qint1) { + int_data_ptr1[parent] = int_data_ptr1[child]; + int_data_ptr2[parent] = int_data_ptr2[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + int_data_ptr1[parent]=qint1; + int_data_ptr2[parent]=qint2; + } // for + break; + + case S7_LONG: + long_data_ptr1 = (long *)array_in1; + long_data_ptr2 = (long *)array_in2; + + for (;;) { + if (i > 0) { + qlong1=long_data_ptr1[--i]; + qlong2=long_data_ptr2[i]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; // End of sort + } // if n + qlong1=long_data_ptr1[n]; + qlong2=long_data_ptr2[n]; + long_data_ptr1[n]=long_data_ptr1[0]; + long_data_ptr2[n]=long_data_ptr2[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && long_data_ptr1[child+1] > long_data_ptr1[child]) child++; + if (long_data_ptr1[child] > qlong1) { + long_data_ptr1[parent] = long_data_ptr1[child]; + long_data_ptr2[parent] = long_data_ptr2[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + long_data_ptr1[parent]=qlong1; + long_data_ptr2[parent]=qlong2; + } // for + break; + + case S7_LONG_LONG_INT: + case S7_INTEGER8: + longlong_data_ptr1 = (long long *)array_in1; + longlong_data_ptr2 = (long long *)array_in2; + + for (;;) { + if (i > 0) { + qlonglong1=longlong_data_ptr1[--i]; + qlonglong2=longlong_data_ptr2[i]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; // End of sort + } // if n + qlonglong1=longlong_data_ptr1[n]; + qlonglong2=longlong_data_ptr2[n]; + longlong_data_ptr1[n]=longlong_data_ptr1[0]; + longlong_data_ptr2[n]=longlong_data_ptr2[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && longlong_data_ptr1[child+1] > longlong_data_ptr1[child]) child++; + if (longlong_data_ptr1[child] > qlonglong1) { + longlong_data_ptr1[parent] = longlong_data_ptr1[child]; + longlong_data_ptr2[parent] = longlong_data_ptr2[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + longlong_data_ptr1[parent]=qlonglong1; + longlong_data_ptr2[parent]=qlonglong2; + } // for + break; + + case S7_FLOAT: + case S7_REAL4: + float_data_ptr1 = (float *)array_in1; + float_data_ptr2 = (float *)array_in2; + + for (;;) { + if (i > 0) { + qfloat1=float_data_ptr1[--i]; + qfloat2=float_data_ptr2[i]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; // End of sort + } // if n + qfloat1=float_data_ptr1[n]; + qfloat2=float_data_ptr2[n]; + float_data_ptr1[n]=float_data_ptr1[0]; + float_data_ptr2[n]=float_data_ptr2[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && float_data_ptr1[child+1] > float_data_ptr1[child]) child++; + if (float_data_ptr1[child] > qfloat1) { + float_data_ptr1[parent] = float_data_ptr1[child]; + float_data_ptr2[parent] = float_data_ptr2[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + float_data_ptr1[parent]=qfloat1; + float_data_ptr2[parent]=qfloat2; + } // for + break; + + case S7_DOUBLE: + case S7_REAL8: + double_data_ptr1 = (double *)array_in1; + double_data_ptr2 = (double *)array_in2; + + for (;;) { + if (i > 0) { + qdouble1=double_data_ptr1[--i]; + qdouble2=double_data_ptr2[i]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; // End of sort + } // if n + qdouble1=double_data_ptr1[n]; + qdouble2=double_data_ptr2[n]; + double_data_ptr1[n]=double_data_ptr1[0]; + double_data_ptr2[n]=double_data_ptr2[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && double_data_ptr1[child+1] > double_data_ptr1[child]) child++; + if (double_data_ptr1[child] > qdouble1) { + double_data_ptr1[parent] = double_data_ptr1[child]; + double_data_ptr2[parent] = double_data_ptr2[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + double_data_ptr1[parent]=qdouble1; + double_data_ptr2[parent]=qdouble2; + } // for + break; + + default: + printf("Error -- S7_Datatype not supported in S7_Sort\n"); + exit(1); + break; + } +} + + + + + +void S7_Index_Sort( + void * array_in, + const int nsize, + const enum S7_Datatype S7_datatype, + int * index + ) +{ + int n, j, child, parent, i; + int indext; + + int qint; + long qlong; + long long qlonglong; + float qfloat; + double qdouble; + + int + *int_data_ptr; + long + *long_data_ptr; + long long + *longlong_data_ptr; + float + *float_data_ptr; + double + *double_data_ptr; + + // Heapsort + + // Initialize array with consecutive integers + for (j=0; j 0) { + indext=index[--i]; + qint=int_data_ptr[indext]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; + } // if n + indext=index[n]; + qint=int_data_ptr[indext]; + index[n]=index[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && int_data_ptr[index[child+1]] > int_data_ptr[index[child]]) child++; + if (int_data_ptr[index[child]] > qint) { + index[parent] = index[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + index[parent]=indext; + } // for + break; + + case S7_LONG: + long_data_ptr = (long *)array_in; + + for (;;) { + if (i > 0) { + indext=index[--i]; + qlong=long_data_ptr[indext]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; + } // if n + indext=index[n]; + qlong=long_data_ptr[indext]; + index[n]=index[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && long_data_ptr[index[child+1]] > long_data_ptr[index[child]]) child++; + if (long_data_ptr[index[child]] > qlong) { + index[parent] = index[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + index[parent]=indext; + } // for + break; + + case S7_LONG_LONG_INT: + case S7_INTEGER8: + longlong_data_ptr = (long long *)array_in; + + for (;;) { + if (i > 0) { + indext=index[--i]; + qlonglong=longlong_data_ptr[indext]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; + } // if n + indext=index[n]; + qlonglong=longlong_data_ptr[indext]; + index[n]=index[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && longlong_data_ptr[index[child+1]] > longlong_data_ptr[index[child]]) child++; + if (longlong_data_ptr[index[child]] > qlonglong) { + index[parent] = index[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + index[parent]=indext; + } // for + break; + + case S7_FLOAT: + case S7_REAL4: + float_data_ptr = (float *)array_in; + + for (;;) { + if (i > 0) { + indext=index[--i]; + qfloat=float_data_ptr[indext]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; + } // if n + indext=index[n]; + qfloat=float_data_ptr[indext]; + index[n]=index[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && float_data_ptr[index[child+1]] > float_data_ptr[index[child]]) child++; + if (float_data_ptr[index[child]] > qfloat) { + index[parent] = index[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + index[parent]=indext; + } // for + break; + + + case S7_DOUBLE: + case S7_REAL8: + double_data_ptr = (double *)array_in; + + for (;;) { + if (i > 0) { + indext=index[--i]; + qdouble=double_data_ptr[indext]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; + } // if n + indext=index[n]; + qdouble=double_data_ptr[indext]; + index[n]=index[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && double_data_ptr[index[child+1]] > double_data_ptr[index[child]]) child++; + if (double_data_ptr[index[child]] > qdouble) { + index[parent] = index[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + index[parent]=indext; + } // for + break; + + default: + printf("Error -- S7_Datatype not supported in S7_Index_Sort\n"); + exit(1); + break; + + } +} + +void S7_Indexi8_Sort( + void * array_in, + const int nsize, + const enum S7_Datatype S7_datatype, + long * index + ) +{ + int n, j, child, parent, i; + long indext; + + int qint; + long qlong; + long long qlonglong; + float qfloat; + double qdouble; + + int + *int_data_ptr; + long + *long_data_ptr; + long long + *longlong_data_ptr; + float + *float_data_ptr; + double + *double_data_ptr; + + // Heapsort + + // Initialize array with consecutive integers + for (j=0; j 0) { + indext=index[--i]; + qint=int_data_ptr[indext]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; + } // if n + indext=index[n]; + qint=int_data_ptr[indext]; + index[n]=index[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && int_data_ptr[index[child+1]] > int_data_ptr[index[child]]) child++; + if (int_data_ptr[index[child]] > qint) { + index[parent] = index[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + index[parent]=indext; + } // for + break; + + case S7_LONG: + long_data_ptr = (long *)array_in; + + for (;;) { + if (i > 0) { + indext=index[--i]; + qlong=long_data_ptr[indext]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; + } // if n + indext=index[n]; + qlong=long_data_ptr[indext]; + index[n]=index[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && long_data_ptr[index[child+1]] > long_data_ptr[index[child]]) child++; + if (long_data_ptr[index[child]] > qlong) { + index[parent] = index[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + index[parent]=indext; + } // for + break; + + case S7_LONG_LONG_INT: + case S7_INTEGER8: + longlong_data_ptr = (long long *)array_in; + + for (;;) { + if (i > 0) { + indext=index[--i]; + qlonglong=longlong_data_ptr[indext]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; + } // if n + indext=index[n]; + qlonglong=longlong_data_ptr[indext]; + index[n]=index[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && longlong_data_ptr[index[child+1]] > longlong_data_ptr[index[child]]) child++; + if (longlong_data_ptr[index[child]] > qlonglong) { + index[parent] = index[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + index[parent]=indext; + } // for + break; + + case S7_FLOAT: + case S7_REAL4: + float_data_ptr = (float *)array_in; + + for (;;) { + if (i > 0) { + indext=index[--i]; + qfloat=float_data_ptr[indext]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; + } // if n + indext=index[n]; + qfloat=float_data_ptr[indext]; + index[n]=index[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && float_data_ptr[index[child+1]] > float_data_ptr[index[child]]) child++; + if (float_data_ptr[index[child]] > qfloat) { + index[parent] = index[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + index[parent]=indext; + } // for + break; + + + case S7_DOUBLE: + case S7_REAL8: + double_data_ptr = (double *)array_in; + + for (;;) { + if (i > 0) { + indext=index[--i]; + qdouble=double_data_ptr[indext]; + } // if i > 0 + else { + n--; + if (n == 0) { + + return; + } // if n + indext=index[n]; + qdouble=double_data_ptr[indext]; + index[n]=index[0]; + } // else + + parent=i; + child = i*2 + 1; + while (child < n) { + if (child +1 < n && double_data_ptr[index[child+1]] > double_data_ptr[index[child]]) child++; + if (double_data_ptr[index[child]] > qdouble) { + index[parent] = index[child]; + parent=child; + child = parent*2 + 1; + } // if q + else { + break; + } // else + } // while + index[parent]=indext; + } // for + break; + + default: + printf("Error -- S7_Datatype not supported in S7_Indexi8_Sort\n"); + exit(1); + break; + + } +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.h @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2011-2013, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#ifndef STATE_H_ +#define STATE_H_ + +#include +#include "MallocPlus.h" +#include "mesh.h" +#include "crux.h" +#ifdef HAVE_OPENCL +#include "ezcl/ezcl.h" +#endif +//#include "l7/l7.h" + +#define STATUS_OK 0 +#define STATUS_NAN 1 +#define STATUS_MASS_LOSS 2 + +#if !defined(FULL_PRECISION) && !defined(MIXED_PRECISION) && !defined(MINIMUM_PRECISION) +#define FULL_PRECISION +#endif +#ifdef NO_CL_DOUBLE +#undef FULL_PRECISION +#undef MIXED_PRECISION +#define MINIMUM_PRECISION +#endif + +#if defined(MINIMUM_PRECISION) + typedef float state_t; // this is for physics state variables ncell in size + typedef float real_t; // this is used for intermediate calculations + typedef struct + { + float s0; + float s1; + } real2_t; +#define CONSERVATION_EPS 15.0 +#ifdef HAVE_OPENCL + typedef cl_float cl_state_t; // for gpu physics state variables + typedef cl_float4 cl_state4_t; // for gpu physics state variables + typedef cl_float cl_real_t; // for intermediate gpu physics state variables + typedef cl_float2 cl_real2_t; // for intermediate gpu physics state variables + typedef cl_float4 cl_real4_t; // for intermediate gpu physics state variables +#endif +#ifdef HAVE_MPI + #define MPI_STATE_T MPI_FLOAT // for MPI communication for physics state variables + #define MPI_REAL_T MPI_FLOAT // for MPI communication for physics state variables + #define L7_STATE_T L7_FLOAT + #define L7_REAL_T L7_FLOAT +#endif + +#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats + typedef float state_t; + typedef double real_t; + typedef struct + { + double s0; + double s1; + } real2_t; +#define CONSERVATION_EPS .02 +#ifdef HAVE_OPENCL + typedef cl_float cl_state_t; + typedef cl_float4 cl_state4_t; + typedef cl_double cl_real_t; // for intermediate gpu physics state variables + typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables + typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables +#endif +#ifdef HAVE_MPI + #define MPI_STATE_T MPI_FLOAT + #define MPI_REAL_T MPI_DOUBLE + #define L7_STATE_T L7_FLOAT + #define L7_REAL_T L7_DOUBLE +#endif + +#elif defined(FULL_PRECISION) + typedef double state_t; + typedef double real_t; + typedef struct + { + double s0; + double s1; + } real2_t; +#define CONSERVATION_EPS .02 +#ifdef HAVE_OPENCL + typedef cl_double cl_state_t; + typedef cl_double4 cl_state4_t; + typedef cl_double cl_real_t; // for intermediate gpu physics state variables + typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables + typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables +#endif +#ifdef HAVE_MPI + #define MPI_STATE_T MPI_DOUBLE + #define MPI_REAL_T MPI_DOUBLE + #define L7_STATE_T L7_DOUBLE + #define L7_REAL_T L7_DOUBLE +#endif +#endif + +extern "C" void do_calc(void); + +enum SUM_TYPE { + SUM_REGULAR, + SUM_KAHAN +}; + + +enum SIGN_RULE { + DIAG_RULE, + X_RULE, + Y_RULE, +}; + +enum state_timers +{ + STATE_TIMER_APPLY_BCS, + STATE_TIMER_SET_TIMESTEP, + STATE_TIMER_FINITE_DIFFERENCE, + STATE_TIMER_REFINE_POTENTIAL, + STATE_TIMER_CALC_MPOT, + STATE_TIMER_REZONE_ALL, + STATE_TIMER_MASS_SUM, + STATE_TIMER_READ, + STATE_TIMER_WRITE, + STATE_TIMER_SIZE +}; + +typedef enum state_timers state_timer_category; + +using namespace std; + +class State { + +public: + MallocPlus state_memory; + MallocPlus gpu_state_memory; + Mesh *mesh; + state_t *H; + state_t *U; + state_t *V; + +#ifdef HAVE_OPENCL + cl_mem dev_H; + cl_mem dev_U; + cl_mem dev_V; + + cl_mem dev_mass_sum; + cl_mem dev_deltaT; + + cl_event apply_BCs_event; + + cl_mem dev_mpot; + //cl_mem dev_ioffset; + cl_mem dev_result; +#endif + + double cpu_timers[STATE_TIMER_SIZE]; + long long gpu_timers[STATE_TIMER_SIZE]; + + // constructor -- allocates state arrays to size ncells + State(Mesh *mesh_in); + + void init(int do_gpu_calc); + void terminate(void); + + /* Memory routines for linked list of state arrays */ + void allocate(size_t ncells); + void allocate_from_backup_file(FILE *fp); + void allocate_for_rollback(State *state_to_copy); + void resize(size_t ncells); + void memory_reset_ptrs(void); +#ifdef HAVE_OPENCL + void allocate_device_memory(size_t ncells); +#endif + void resize_old_device_memory(size_t ncells); + + /* Accessor routines */ + double get_cpu_timer(state_timer_category category) {return(cpu_timers[category]); }; + /* Convert nanoseconds to msecs */ + double get_gpu_timer(state_timer_category category) {return((double)(gpu_timers[category])*1.0e-9); }; + + /* Boundary routines -- not currently used */ + void add_boundary_cells(void); + void apply_boundary_conditions(void); + void apply_boundary_conditions_local(void); + void apply_boundary_conditions_ghost(void); + void remove_boundary_cells(void); + + /******************************************************************* + * set_timestep + * Input + * H, U, V -- from state object + * celltype, level, lev_delta + * Output + * mindeltaT returned + *******************************************************************/ + double set_timestep(double g, double sigma); +#ifdef HAVE_OPENCL + double gpu_set_timestep(double sigma); +#endif + + /******************************************************************* + * calc finite difference + * will add ghost region to H, U, V and fill at start of routine + * Input + * H, U, V -- from state object + * nlft, nrht, nbot, ntop, level, celltype -- from mesh object + * Output + * H, U, V + *******************************************************************/ + void calc_finite_difference(double deltaT); + void calc_finite_difference_via_faces(double deltaT); +#ifdef HAVE_OPENCL + void gpu_calc_finite_difference(double deltaT); +#endif + + /******************************************************************* + * calc refine potential -- state has responsibility to calc initial + * refinement potential array that is then passed to mesh for + * smoothing and enforcing refinement ruiles + * Input + * H, U, V -- from state object + * Output + * mpot + * ioffset + * count + *******************************************************************/ + size_t calc_refine_potential(vector &mpot, int &icount, int &jcount); +#ifdef HAVE_OPENCL + size_t gpu_calc_refine_potential(int &icount, int &jcount); +#endif + + /******************************************************************* + * rezone all -- most of call is done in mesh + * Input + * Mesh and state variables + * Output + * New mesh and state variables on refined mesh + *******************************************************************/ + void rezone_all(int icount, int jcount, vector mpot); +#ifdef HAVE_OPENCL + void gpu_rezone_all(int icount, int jcount, bool localStencil); +#endif + + /******************************************************************* + * load balance -- most of call is done in mesh, but pointers are + * reset to newly allocated state arrays + * Input + * Mesh and state variables + * Output + * New mesh and state variables on refined mesh + *******************************************************************/ +#ifdef HAVE_MPI + void do_load_balance_local(size_t &numcells); +#ifdef HAVE_OPENCL + void gpu_do_load_balance_local(size_t &numcells); +#endif +#endif + + /******************************************************************* + * mass sum -- Conservation of mass check + * Input + * H from state object + * Precision type for sum + * Output + * total mass is returned + *******************************************************************/ + double mass_sum(int enhanced_precision_sum); +#ifdef HAVE_OPENCL + double gpu_mass_sum(int enhanced_precision_sum); +#endif + + void fill_circle(double circ_radius, double fill_value, double background); + void state_reorder(vector iorder); + + void symmetry_check(const char *string, vector sym_index, double eps, + SIGN_RULE sign_rule, int &flag); + + void output_timing_info(int do_cpu_calc, int do_gpu_calc, double total_elapsed_time); + + /* state comparison routines */ +#ifdef HAVE_OPENCL + void compare_state_gpu_global_to_cpu_global(const char* string, int cycle, uint ncells); +#endif + void compare_state_cpu_local_to_cpu_global(State *state_global, const char* string, int cycle, uint ncells, uint ncells_global, int *nsizes, int *ndispl); +#ifdef HAVE_OPENCL + void compare_state_all_to_gpu_local(State *state_global, uint ncells, uint ncells_global, int mype, int ncycle, int *nsizes, int *ndispl); +#endif + + void output_timer_block(mesh_device_types device_type, double elapsed_time, + double mesh_time, double compute_time, double total_elapsed_time, double speedup_ratio); + + void timer_output(state_timer_category category, mesh_device_types device_type, int timer_level); + + void print(void); + + size_t get_checkpoint_size(void); + void store_checkpoint(Crux *crux); + void restore_checkpoint(Crux *crux); + //Added to for second print for every interation: Brian Atkinson (5-29-14) + void print(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage); + void print_local(int ncycle); + void print_failure_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, bool got_nan); + void print_rollback_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, int backup_attempt, int num_of_attempts, int error_status); + +private: + State(const State&); // To block copy constructor so copies are not made inadvertently + + void print_object_info(void); +}; + +#endif // ifndef STATE_H_ + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.cpp =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.cpp @@ -0,0 +1,3966 @@ +/* + * Copyright (c) 2011-2013, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#include "mesh.h" +#include +#include +#include +#include +#include +#include "state.h" +#include "timer.h" +#ifdef HAVE_MPI +#include +#endif + +#undef DEBUG +//#define DEBUG 0 +#define DEBUG_RESTORE_VALS 1 +#define TIMING_LEVEL 2 + +#if defined(MINIMUM_PRECISION) +#define ZERO 0.0f +#define ONE 1.0f +#define HALF 0.5f +#define EPSILON 1.0f-30 +#define STATE_EPS 15.0 +// calc refine is done in single precision +#define REFINE_GRADIENT 0.10f +#define COARSEN_GRADIENT 0.05f +#define REFINE_HALF 0.5f +#define REFINE_NEG_THOUSAND -1000.0f + +#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats +#define ZERO 0.0 +#define ONE 1.0 +#define HALF 0.5 +#define EPSILON 1.0e-30 +#define STATE_EPS .02 +// calc refine is done in single precision +#define REFINE_GRADIENT 0.10f +#define COARSEN_GRADIENT 0.05f +#define REFINE_HALF 0.5f +#define REFINE_NEG_THOUSAND -1000.0f + +#elif defined(FULL_PRECISION) +#define ZERO 0.0 +#define ONE 1.0 +#define HALF 0.5 +#define EPSILON 1.0e-30 +#define STATE_EPS .02 +// calc refine is done in single precision +#define REFINE_GRADIENT 0.10 +#define COARSEN_GRADIENT 0.05 +#define REFINE_HALF 0.5 +#define REFINE_NEG_THOUSAND -1000.0 + +#endif + +#ifdef _OPENMP +static bool iversion_flag = false; +#endif + +typedef unsigned int uint; + +static const char *state_timer_descriptor[STATE_TIMER_SIZE] = { + "state_timer_apply_BCs", + "state_timer_set_timestep", + "state_timer_finite_difference", + "state_timer_refine_potential", + "state_timer_calc_mpot", + "state_timer_rezone_all", + "state_timer_mass_sum", + "state_timer_read", + "state_timer_write" +}; + +#ifdef HAVE_OPENCL +#include "state_kernel.inc" +#endif + +struct esum_type{ + double sum; + double correction; +}; +#ifdef HAVE_MPI +MPI_Datatype MPI_TWO_DOUBLES; +MPI_Op KNUTH_SUM; +int commutative = 1; +void knuth_sum(struct esum_type *in, struct esum_type *inout, int *len, MPI_Datatype *MPI_TWO_DOUBLES); +#endif + +int save_ncells; + +#define CONSERVED_EQNS + +#define SQR(x) ( x*x ) +#define MIN3(x,y,z) ( min( min(x,y), z) ) + +#ifdef HAVE_OPENCL +cl_kernel kernel_set_timestep; +cl_kernel kernel_reduction_min; +cl_kernel kernel_copy_state_data; +cl_kernel kernel_copy_state_ghost_data; +cl_kernel kernel_apply_boundary_conditions; +cl_kernel kernel_apply_boundary_conditions_local; +cl_kernel kernel_apply_boundary_conditions_ghost; +cl_kernel kernel_calc_finite_difference; +cl_kernel kernel_refine_potential; +cl_kernel kernel_reduce_sum_mass_stage1of2; +cl_kernel kernel_reduce_sum_mass_stage2of2; +cl_kernel kernel_reduce_epsum_mass_stage1of2; +cl_kernel kernel_reduce_epsum_mass_stage2of2; +#endif + +inline real_t U_halfstep(// XXX Fix the subindices to be more intuitive XXX + real_t deltaT, // Timestep + real_t U_i, // Initial cell's (downwind's) state variable + real_t U_n, // Next cell's (upwind's) state variable + real_t F_i, // Initial cell's (downwind's) state variable flux + real_t F_n, // Next cell's (upwind's) state variable flux + real_t r_i, // Initial cell's (downwind's) center to face distance + real_t r_n, // Next cell's (upwind's) center to face distance + real_t A_i, // Cell's face surface area + real_t A_n, // Cell's neighbor's face surface area + real_t V_i, // Cell's volume + real_t V_n) { // Cell's neighbor's volume + + return (( r_i*U_n + r_n*U_i ) / ( r_i + r_n )) + - HALF*deltaT*(( F_n*A_n*min(ONE, A_i/A_n) - F_i*A_i*min(ONE, A_n/A_i) ) + / ( V_n*min(HALF, V_i/V_n) + V_i*min(HALF, V_n/V_i) )); + +} + +inline real_t U_fullstep( + real_t deltaT, + real_t dr, + real_t U, + real_t F_plus, + real_t F_minus, + real_t G_plus, + real_t G_minus) { + + return (U - (deltaT / dr)*(F_plus - F_minus + G_plus - G_minus)); + +} + + +inline real_t w_corrector( + real_t deltaT, // Timestep + real_t dr, // Cell's center to face distance + real_t U_eigen, // State variable's eigenvalue (speed) + real_t grad_half, // Centered gradient + real_t grad_minus, // Downwind gradient + real_t grad_plus) { // Upwind gradient + + real_t nu = HALF * U_eigen * deltaT / dr; + nu = nu * (ONE - nu); + + real_t rdenom = ONE / max(SQR(grad_half), EPSILON); + real_t rplus = (grad_plus * grad_half) * rdenom; + real_t rminus = (grad_minus * grad_half) * rdenom; + + return HALF*nu*(ONE- max(MIN3(ONE, rplus, rminus), ZERO)); +} + +State::State(Mesh *mesh_in) +{ + for (int i = 0; i < STATE_TIMER_SIZE; i++){ + cpu_timers[i] = 0.0; + } + for (int i = 0; i < STATE_TIMER_SIZE; i++){ + gpu_timers[i] = 0L; + } + + mesh = mesh_in; + +#ifdef HAVE_MPI + int mpi_init; + MPI_Initialized(&mpi_init); + if (mpi_init){ + MPI_Type_contiguous(2, MPI_DOUBLE, &MPI_TWO_DOUBLES); + MPI_Type_commit(&MPI_TWO_DOUBLES); + MPI_Op_create((MPI_User_function *)knuth_sum, commutative, &KNUTH_SUM); + // FIXME add fini and set size + if (mesh->parallel) state_memory.pinit(MPI_COMM_WORLD, 2L * 1024 * 1024 * 1024); + } +#endif +} + +void State::init(int do_gpu_calc) +{ + if (do_gpu_calc) { +#ifdef HAVE_OPENCL + cl_context context = ezcl_get_context(); + + if (mesh->mype == 0) printf("Starting compile of kernels in state\n"); + const char *defines = NULL; + cl_program program = ezcl_create_program_wsource(context, defines, state_kern_source); + + kernel_set_timestep = ezcl_create_kernel_wprogram(program, "set_timestep_cl"); + kernel_reduction_min = ezcl_create_kernel_wprogram(program, "finish_reduction_min_cl"); + kernel_copy_state_data = ezcl_create_kernel_wprogram(program, "copy_state_data_cl"); + kernel_copy_state_ghost_data = ezcl_create_kernel_wprogram(program, "copy_state_ghost_data_cl"); + kernel_apply_boundary_conditions = ezcl_create_kernel_wprogram(program, "apply_boundary_conditions_cl"); + kernel_apply_boundary_conditions_local = ezcl_create_kernel_wprogram(program, "apply_boundary_conditions_local_cl"); + kernel_apply_boundary_conditions_ghost = ezcl_create_kernel_wprogram(program, "apply_boundary_conditions_ghost_cl"); + kernel_calc_finite_difference = ezcl_create_kernel_wprogram(program, "calc_finite_difference_cl"); + kernel_refine_potential = ezcl_create_kernel_wprogram(program, "refine_potential_cl"); + kernel_reduce_sum_mass_stage1of2 = ezcl_create_kernel_wprogram(program, "reduce_sum_mass_stage1of2_cl"); + kernel_reduce_sum_mass_stage2of2 = ezcl_create_kernel_wprogram(program, "reduce_sum_mass_stage2of2_cl"); + kernel_reduce_epsum_mass_stage1of2 = ezcl_create_kernel_wprogram(program, "reduce_epsum_mass_stage1of2_cl"); + kernel_reduce_epsum_mass_stage2of2 = ezcl_create_kernel_wprogram(program, "reduce_epsum_mass_stage2of2_cl"); + + ezcl_program_release(program); + if (mesh->mype == 0) printf("Finishing compile of kernels in state\n"); +#endif + } + + //printf("\nDEBUG -- Calling state memory memory malloc at line %d\n",__LINE__); + allocate(mesh->ncells); + //state_memory.memory_report(); + //printf("DEBUG -- Finished state memory memory malloc at line %d\n\n",__LINE__); + +} + +void State::allocate(size_t ncells) +{ + int flags = 0; + flags = RESTART_DATA; +#ifdef HAVE_J7 + if (mesh->parallel) flags = LOAD_BALANCE_MEMORY; +#endif + + H = (state_t *)state_memory.memory_malloc(ncells, sizeof(state_t), "H", flags); + U = (state_t *)state_memory.memory_malloc(ncells, sizeof(state_t), "U", flags); + V = (state_t *)state_memory.memory_malloc(ncells, sizeof(state_t), "V", flags); +} + +void State::resize(size_t new_ncells){ + size_t current_size = state_memory.get_memory_size(H); + if (new_ncells > current_size) state_memory.memory_realloc_all(new_ncells); + + //printf("\nDEBUG -- Calling state memory resize at line %d\n",__LINE__); + //state_memory.memory_report(); + //printf("DEBUG -- Finished state memory resize at line %d\n\n",__LINE__); +} + +void State::memory_reset_ptrs(void){ + H = (state_t *)state_memory.get_memory_ptr("H"); + U = (state_t *)state_memory.get_memory_ptr("U"); + V = (state_t *)state_memory.get_memory_ptr("V"); + + //printf("\nDEBUG -- Calling state memory reset_ptrs at line %d\n",__LINE__); + //state_memory.memory_report(); + //printf("DEBUG -- Finished state memory reset_ptrs at line %d\n\n",__LINE__); +} + +void State::terminate(void) +{ + state_memory.memory_delete(H); + state_memory.memory_delete(U); + state_memory.memory_delete(V); + +#ifdef HAVE_OPENCL + ezcl_device_memory_delete(dev_deltaT); + + gpu_state_memory.memory_delete(dev_H); + gpu_state_memory.memory_delete(dev_U); + gpu_state_memory.memory_delete(dev_V); + + ezcl_kernel_release(kernel_set_timestep); + ezcl_kernel_release(kernel_reduction_min); + ezcl_kernel_release(kernel_copy_state_data); + ezcl_kernel_release(kernel_copy_state_ghost_data); + ezcl_kernel_release(kernel_apply_boundary_conditions); + ezcl_kernel_release(kernel_apply_boundary_conditions_local); + ezcl_kernel_release(kernel_apply_boundary_conditions_ghost); + ezcl_kernel_release(kernel_calc_finite_difference); + ezcl_kernel_release(kernel_refine_potential); + ezcl_kernel_release(kernel_reduce_sum_mass_stage1of2); + ezcl_kernel_release(kernel_reduce_sum_mass_stage2of2); + ezcl_kernel_release(kernel_reduce_epsum_mass_stage1of2); + ezcl_kernel_release(kernel_reduce_epsum_mass_stage2of2); +#endif +#ifdef HAVE_MPI + if (mesh->parallel) state_memory.pfini(); +#endif +} + +#ifdef HAVE_MPI +void knuth_sum(struct esum_type *in, struct esum_type *inout, int *len, MPI_Datatype *MPI_TWO_DOUBLES) +{ + double u, v, upt, up, vpp; + u = inout->sum; + v = in->sum + (in->correction+inout->correction); + upt = u + v; + up = upt - v; + vpp = upt - up; + inout->sum = upt; + inout->correction = (u - up) + (v - vpp); + + // Just to block compiler warnings + if (1==2) printf("DEBUG len %d datatype %lld\n",*len,(long long)(*MPI_TWO_DOUBLES) ); +} +#endif + +void State::add_boundary_cells(void) +{ + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + // This is for a mesh with no boundary cells -- they are added and + // the mesh sizes increased + size_t &ncells = mesh->ncells; + vector &index = mesh->index; + vector &x = mesh->x; + vector &dx = mesh->dx; + vector &y = mesh->y; + vector &dy = mesh->dy; + + int *i = mesh->i; + int *j = mesh->j; + int *level = mesh->level; + int *celltype = mesh->celltype; + int *nlft = mesh->nlft; + int *nrht = mesh->nrht; + int *nbot = mesh->nbot; + int *ntop = mesh->ntop; + + vector &lev_ibegin = mesh->lev_ibegin; + vector &lev_iend = mesh->lev_iend; + vector &lev_jbegin = mesh->lev_jbegin; + vector &lev_jend = mesh->lev_jend; + + // Pre-count number of cells to add + int icount = 0; + for (uint ic=0; ici =(int *)mesh->mesh_memory.memory_realloc(new_ncells, i); + mesh->j =(int *)mesh->mesh_memory.memory_realloc(new_ncells, j); + mesh->level =(int *)mesh->mesh_memory.memory_realloc(new_ncells, level); + mesh->celltype =(int *)mesh->mesh_memory.memory_realloc(new_ncells, celltype); + mesh->nlft =(int *)mesh->mesh_memory.memory_realloc(new_ncells, nlft); + mesh->nrht =(int *)mesh->mesh_memory.memory_realloc(new_ncells, nrht); + mesh->nbot =(int *)mesh->mesh_memory.memory_realloc(new_ncells, nbot); + mesh->ntop =(int *)mesh->mesh_memory.memory_realloc(new_ncells, ntop); + //memory_reset_ptrs(); + i = mesh->i; + j = mesh->j; + level = mesh->level; + celltype = mesh->celltype; + nlft = mesh->nlft; + nrht = mesh->nrht; + nbot = mesh->nbot; + ntop = mesh->ntop; + + index.resize(new_ncells); + x.resize(new_ncells); + dx.resize(new_ncells); + y.resize(new_ncells); + dy.resize(new_ncells); + + for (int nc=ncells; ncncells; + nlft = mesh->nlft; + nrht = mesh->nrht; + nbot = mesh->nbot; + ntop = mesh->ntop; + + // This is for a mesh with boundary cells + int lowerBound, upperBound; + mesh->get_bounds(lowerBound, upperBound); + for (uint ic=lowerBound; icis_left_boundary(ic)) { + int nr = nrht[ic]; + if (nr < (int)ncells) { + H[ic] = H[nr]; + U[ic] = -U[nr]; + V[ic] = V[nr]; + } + } + if (mesh->is_right_boundary(ic)) { + int nl = nlft[ic]; + if (nl < (int)ncells) { + H[ic] = H[nl]; + U[ic] = -U[nl]; + V[ic] = V[nl]; + } + } + if (mesh->is_bottom_boundary(ic)) { + int nt = ntop[ic]; + if (nt < (int)ncells) { + H[ic] = H[nt]; + U[ic] = U[nt]; + V[ic] = -V[nt]; + } + } + if (mesh->is_top_boundary(ic)) { + int nb = nbot[ic]; + if (nb < (int)ncells) { + H[ic] = H[nb]; + U[ic] = U[nb]; + V[ic] = -V[nb]; + } + } + } +} + +void State::apply_boundary_conditions_ghost(void) +{ + static int *nlft, *nrht, *nbot, *ntop; + + size_t &ncells = mesh->ncells; + nlft = mesh->nlft; + nrht = mesh->nrht; + nbot = mesh->nbot; + ntop = mesh->ntop; + + // This is for a mesh with boundary cells + int lowerBound, upperBound; + mesh->get_bounds(lowerBound, upperBound); + for (uint ic=lowerBound; icis_left_boundary(ic)) { + int nr = nrht[ic]; + if (nr >= (int)ncells) { + H[ic] = H[nr]; + U[ic] = -U[nr]; + V[ic] = V[nr]; + } + } + if (mesh->is_right_boundary(ic)) { + int nl = nlft[ic]; + if (nl >= (int)ncells) { + H[ic] = H[nl]; + U[ic] = -U[nl]; + V[ic] = V[nl]; + } + } + if (mesh->is_bottom_boundary(ic)) { + int nt = ntop[ic]; + if (nt >= (int)ncells) { + H[ic] = H[nt]; + U[ic] = U[nt]; + V[ic] = -V[nt]; + } + } + if (mesh->is_top_boundary(ic)) { + int nb = nbot[ic]; + if (nb >= (int)ncells) { + H[ic] = H[nb]; + U[ic] = U[nb]; + V[ic] = -V[nb]; + } + } + } +} + +void State::apply_boundary_conditions(void) +{ + int *nlft, *nrht, *nbot, *ntop; + + size_t &ncells = mesh->ncells; + nlft = mesh->nlft; + nrht = mesh->nrht; + nbot = mesh->nbot; + ntop = mesh->ntop; + + // This is for a mesh with boundary cells + int lowerBound, upperBound; + mesh->get_bounds(lowerBound, upperBound); + for (uint ic=lowerBound; icis_left_boundary(ic)) { + int nr = nrht[ic]; + H[ic] = H[nr]; + U[ic] = -U[nr]; + V[ic] = V[nr]; + } + if (mesh->is_right_boundary(ic)) { + int nl = nlft[ic]; + H[ic] = H[nl]; + U[ic] = -U[nl]; + V[ic] = V[nl]; + } + if (mesh->is_bottom_boundary(ic)) { + int nt = ntop[ic]; + H[ic] = H[nt]; + U[ic] = U[nt]; + V[ic] = -V[nt]; + } + if (mesh->is_top_boundary(ic)) { + int nb = nbot[ic]; + H[ic] = H[nb]; + U[ic] = U[nb]; + V[ic] = -V[nb]; + } + } +} + +void State::remove_boundary_cells(void) +{ + if(! mesh->have_boundary) { + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + size_t &ncells = mesh->ncells; + + // Resize to drop all the boundary cells + ncells = save_ncells; + H=(state_t *)state_memory.memory_realloc(save_ncells, H); + U=(state_t *)state_memory.memory_realloc(save_ncells, U); + V=(state_t *)state_memory.memory_realloc(save_ncells, V); + //printf("\nDEBUG remove_boundary cells\n"); + //state_memory.memory_report(); + //printf("DEBUG end remove_boundary cells\n\n"); + + mesh->i = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->i); + mesh->j = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->j); + mesh->level = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->level); + mesh->celltype = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->celltype); + mesh->nlft = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->nlft); + mesh->nrht = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->nrht); + mesh->nbot = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->nbot); + mesh->ntop = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->ntop); + + // Reset the neighbors due to the dropped boundary cells + mesh->index.resize(save_ncells); + mesh->x.resize(save_ncells); + mesh->dx.resize(save_ncells); + mesh->y.resize(save_ncells); + mesh->dy.resize(save_ncells); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + mesh->set_bounds(mesh->ncells); + + int lowerBound, upperBound; + mesh->get_bounds(lowerBound, upperBound); + for (uint ic=lowerBound; ici[ic] == mesh->lev_ibegin[mesh->level[ic]]) mesh->nlft[ic] = ic; + if (mesh->i[ic] == mesh->lev_iend[mesh->level[ic]]) mesh->nrht[ic] = ic; + if (mesh->j[ic] == mesh->lev_jbegin[mesh->level[ic]]) mesh->nbot[ic] = ic; + if (mesh->j[ic] == mesh->lev_jend[mesh->level[ic]]) mesh->ntop[ic] = ic; + } + + } // if have_boundary +} + +double State::set_timestep(double g, double sigma) +{ + double globalmindeltaT; + + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + static double mindeltaT; + + int lowerBounds, upperBounds; + mesh->set_bounds(mesh->ncells); + mesh->get_bounds(lowerBounds, upperBounds); + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + mindeltaT = 1000; +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + double mymindeltaT = 1000.0; // private for each thread + + for (int ic=lowerBounds; iccelltype[ic] == REAL_CELL) { + int lev = mesh->level[ic]; + double wavespeed = sqrt(g*H[ic]); + double xspeed = (fabs(U[ic])+wavespeed)/mesh->lev_deltax[lev]; + double yspeed = (fabs(V[ic])+wavespeed)/mesh->lev_deltay[lev]; + double deltaT=sigma/(xspeed+yspeed); + if (deltaT < mymindeltaT) mymindeltaT = deltaT; + } + } + +#ifdef _OPENMP +#pragma omp critical + { +#endif + if (mymindeltaT < mindeltaT) mindeltaT = mymindeltaT; +#ifdef _OPENMP + } // End critical region +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp master + { +#endif + + + globalmindeltaT = mindeltaT; +#ifdef HAVE_MPI + if (mesh->parallel) MPI_Allreduce(&mindeltaT, &globalmindeltaT, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); +#endif + + cpu_timers[STATE_TIMER_SET_TIMESTEP] += cpu_timer_stop(tstart_cpu); +#ifdef _OPENMP + } // End master region +#pragma omp barrier +#endif + + return(globalmindeltaT); +} + +#ifdef HAVE_OPENCL +double State::gpu_set_timestep(double sigma) +{ + double deltaT, globalmindeltaT; + + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + cl_command_queue command_queue = ezcl_get_command_queue(); + + size_t &ncells = mesh->ncells; +#ifdef HAVE_MPI + int ¶llel = mesh->parallel; +#endif + cl_mem &dev_level = mesh->dev_level; + cl_mem &dev_celltype = mesh->dev_celltype; + cl_mem &dev_levdx = mesh->dev_levdx; + cl_mem &dev_levdy = mesh->dev_levdy; + + assert(dev_H); + assert(dev_U); + assert(dev_V); + assert(dev_level); + assert(dev_celltype); + assert(dev_levdx); + assert(dev_levdy); + + size_t local_work_size = 128; + size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size; + size_t block_size = global_work_size/local_work_size; + + cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast("dev_redscratch"), &block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0); + + /* + __kernel void set_timestep_cl( + const int ncells, // 0 Total number of cells. + const real_t sigma, // 1 + __global const state_t *H, // 2 + __global const state_t *U, // 3 + __global const state_t *V, // 4 + __global const int *level, // 5 Array of level information. + __global const int *celltype, // 6 + __global const real_t *lev_dx, // 7 + __global const real_t *lev_dy, // 8 + __global real_t *redscratch, // 9 + __global real_t *deltaT, // 10 + __local real_t *tile) // 11 + */ + + real_t sigma_local = sigma; + ezcl_set_kernel_arg(kernel_set_timestep, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_set_timestep, 1, sizeof(cl_real_t), (void *)&sigma_local); + ezcl_set_kernel_arg(kernel_set_timestep, 2, sizeof(cl_mem), (void *)&dev_H); + ezcl_set_kernel_arg(kernel_set_timestep, 3, sizeof(cl_mem), (void *)&dev_U); + ezcl_set_kernel_arg(kernel_set_timestep, 4, sizeof(cl_mem), (void *)&dev_V); + ezcl_set_kernel_arg(kernel_set_timestep, 5, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_set_timestep, 6, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_set_timestep, 7, sizeof(cl_mem), (void *)&dev_levdx); + ezcl_set_kernel_arg(kernel_set_timestep, 8, sizeof(cl_mem), (void *)&dev_levdy); + ezcl_set_kernel_arg(kernel_set_timestep, 9, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_set_timestep, 10, sizeof(cl_mem), (void *)&dev_deltaT); + ezcl_set_kernel_arg(kernel_set_timestep, 11, local_work_size*sizeof(cl_real_t), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_set_timestep, 1, NULL, &global_work_size, &local_work_size, NULL); + + if (block_size > 1){ + /* + __kernel void finish_reduction_min_cl( + const int isize, + __global real_t *redscratch, + __global real_t *deltaT, + __local real_t *tile) + */ + ezcl_set_kernel_arg(kernel_reduction_min, 0, sizeof(cl_int), (void *)&block_size); + ezcl_set_kernel_arg(kernel_reduction_min, 1, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_reduction_min, 2, sizeof(cl_mem), (void *)&dev_deltaT); + ezcl_set_kernel_arg(kernel_reduction_min, 3, local_work_size*sizeof(cl_real_t), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_min, 1, NULL, &local_work_size, &local_work_size, NULL); + } + + real_t deltaT_local; + ezcl_enqueue_read_buffer(command_queue, dev_deltaT, CL_TRUE, 0, sizeof(cl_real_t), &deltaT_local, NULL); + deltaT = deltaT_local; + + globalmindeltaT = deltaT; +#ifdef HAVE_MPI + if (parallel) MPI_Allreduce(&deltaT, &globalmindeltaT, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); +#endif + + ezcl_device_memory_delete(dev_redscratch); + + gpu_timers[STATE_TIMER_SET_TIMESTEP] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9); + + return(globalmindeltaT); +} +#endif + +void State::fill_circle(double circ_radius,// Radius of circle in grid units. + double fill_value, // Circle height for shallow water. + double background) // Background height for shallow water. +{ + size_t &ncells = mesh->ncells; + vector &x = mesh->x; + vector &dx = mesh->dx; + vector &y = mesh->y; + vector &dy = mesh->dy; + + for (uint ic = 0; ic < ncells; ic++) + { H[ic] = background; + U[ic] = V[ic] = 0.0; } + + // Clear the old k-D tree and generate new data (slow but necessary here). + //KDTree_Destroy(&mesh->tree); + mesh->kdtree_setup(); + + int nez; + vector ind(ncells); + vector weight(ncells); + +#ifdef FULL_PRECISION + KDTree_QueryCircleInterior_Double(&mesh->tree, &nez, &(ind[0]), circ_radius, ncells, + &x[0], &dx[0], + &y[0], &dy[0]); +#else + KDTree_QueryCircleInterior_Float(&mesh->tree, &nez, &(ind[0]), circ_radius, ncells, + &x[0], &dx[0], + &y[0], &dy[0]); +#endif + for (int ic = 0; ic < nez; ++ic) + { H[ind[ic]] = fill_value; } + +#ifdef FULL_PRECISION + KDTree_QueryCircleIntersectWeighted_Double(&mesh->tree, &nez, &(ind[0]), &(weight[0]), + circ_radius, ncells, + &x[0], &dx[0], + &y[0], &dy[0]); +#else + KDTree_QueryCircleIntersectWeighted_Float(&mesh->tree, &nez, &(ind[0]), &(weight[0]), + circ_radius, ncells, + &x[0], &dx[0], + &y[0], &dy[0]); +#endif + + for (int ic = 0; ic < nez; ++ic) + { H[ind[ic]] = background + (fill_value - background) * weight[ic]; } + + KDTree_Destroy(&mesh->tree); +} + +void State::state_reorder(vector iorder) +{ + H = state_memory.memory_reorder(H, &iorder[0]); + U = state_memory.memory_reorder(U, &iorder[0]); + V = state_memory.memory_reorder(V, &iorder[0]); + //printf("\nDEBUG reorder cells\n"); + //state_memory.memory_report(); + //printf("DEBUG end reorder cells\n\n"); +} + +void State::rezone_all(int icount, int jcount, vector mpot) +{ + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + mesh->rezone_all(icount, jcount, mpot, 1, state_memory); + +#ifdef _OPENMP +#pragma omp master + { +#endif + memory_reset_ptrs(); + + cpu_timers[STATE_TIMER_REZONE_ALL] += cpu_timer_stop(tstart_cpu); +#ifdef _OPENMP + } // end master region +#endif +} + + +#ifdef HAVE_OPENCL +void State::gpu_rezone_all(int icount, int jcount, bool localStencil) +{ + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + // Just to get rid of compiler warnings + if (1 == 2) printf("DEBUG -- localStencil is %d\n",localStencil); + + mesh->gpu_rezone_all(icount, jcount, dev_mpot, gpu_state_memory); + dev_H = (cl_mem)gpu_state_memory.get_memory_ptr("dev_H"); + dev_U = (cl_mem)gpu_state_memory.get_memory_ptr("dev_U"); + dev_V = (cl_mem)gpu_state_memory.get_memory_ptr("dev_V"); + + gpu_timers[STATE_TIMER_REZONE_ALL] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9); +} +#endif + +//define macro for squaring a number +#define SQ(x) ((x)*(x)) +//define macro to find minimum of 3 values +//#define MIN3(a,b,c) (min(min((a),(b)),(c))) + +#define HXFLUX(ic) ( U[ic] ) +#define UXFLUX(ic) ( SQ(U[ic])/H[ic] + ghalf*SQ(H[ic]) ) +#define UVFLUX(ic) ( U[ic]*V[ic]/H[ic] ) + +#define HXFLUXIC ( Uic ) +#define HXFLUXNL ( Ul ) +#define HXFLUXNR ( Ur ) +#define HXFLUXNB ( Ub ) +#define HXFLUXNT ( Ut ) + +#define UXFLUXIC ( SQ(Uic)/Hic + ghalf*SQ(Hic) ) +#define UXFLUXNL ( SQ(Ul)/Hl + ghalf*SQ(Hl) ) +#define UXFLUXNR ( SQ(Ur)/Hr + ghalf*SQ(Hr) ) +#define UXFLUXNB ( SQ(Ub)/Hb + ghalf*SQ(Hb) ) +#define UXFLUXNT ( SQ(Ut)/Ht + ghalf*SQ(Ht) ) + +#define UVFLUXIC ( Uic*Vic/Hic ) +#define UVFLUXNL ( Ul*Vl/Hl ) +#define UVFLUXNR ( Ur*Vr/Hr ) +#define UVFLUXNB ( Ub*Vb/Hb ) +#define UVFLUXNT ( Ut*Vt/Ht ) + +#define HYFLUX(ic) ( V[ic] ) +#define VUFLUX(ic) ( V[ic]*U[ic]/H[ic] ) +#define VYFLUX(ic) ( SQ(V[ic])/H[ic] + ghalf*SQ(H[ic]) ) + +#define HYFLUXIC ( Vic ) +#define HYFLUXNL ( Vl ) +#define HYFLUXNR ( Vr ) +#define HYFLUXNB ( Vb ) +#define HYFLUXNT ( Vt ) + +#define VUFLUXIC ( Vic*Uic/Hic ) +#define VUFLUXNL ( Vl*Ul/Hl ) +#define VUFLUXNR ( Vr*Ur/Hr ) +#define VUFLUXNB ( Vb*Ub/Hb ) +#define VUFLUXNT ( Vt*Ut/Ht ) + +#define VYFLUXIC ( SQ(Vic)/Hic + ghalf*SQ(Hic) ) +#define VYFLUXNL ( SQ(Vl)/Hl + ghalf*SQ(Hl) ) +#define VYFLUXNR ( SQ(Vr)/Hr + ghalf*SQ(Hr) ) +#define VYFLUXNB ( SQ(Vb)/Hb + ghalf*SQ(Hb) ) +#define VYFLUXNT ( SQ(Vt)/Ht + ghalf*SQ(Ht) ) + + +#define HNEWXFLUXMINUS ( Uxminus ) +#define HNEWXFLUXPLUS ( Uxplus ) +#define UNEWXFLUXMINUS ( SQ(Uxminus)/Hxminus + ghalf*SQ(Hxminus) ) +#define UNEWXFLUXPLUS ( SQ(Uxplus) /Hxplus + ghalf*SQ(Hxplus) ) +#define UVNEWFLUXMINUS ( Uxminus*Vxminus/Hxminus ) +#define UVNEWFLUXPLUS ( Uxplus *Vxplus /Hxplus ) + +#define HNEWYFLUXMINUS ( Vyminus ) +#define HNEWYFLUXPLUS ( Vyplus ) +#define VNEWYFLUXMINUS ( SQ(Vyminus)/Hyminus + ghalf*SQ(Hyminus) ) +#define VNEWYFLUXPLUS ( SQ(Vyplus) /Hyplus + ghalf*SQ(Hyplus) ) +#define VUNEWFLUXMINUS ( Vyminus*Uyminus/Hyminus ) +#define VUNEWFLUXPLUS ( Vyplus *Uyplus /Hyplus ) + +// XXX ADDED XXX +#define HXFLUXNLT ( Ult ) +#define HXFLUXNRT ( Urt ) +#define UXFLUXNLT ( SQR(Ult)/Hlt + ghalf*SQR(Hlt) ) +#define UXFLUXNRT ( SQR(Urt)/Hrt + ghalf*SQR(Hrt) ) +#define UVFLUXNLT ( Ult*Vlt/Hlt ) +#define UVFLUXNRT ( Urt*Vrt/Hrt ) +#define HYFLUXNBR ( Vbr ) +#define HYFLUXNTR ( Vtr ) +#define VUFLUXNBR ( Vbr*Ubr/Hbr ) +#define VUFLUXNTR ( Vtr*Utr/Htr ) +#define VYFLUXNBR ( SQR(Vbr)/Hbr + ghalf*SQR(Hbr) ) +#define VYFLUXNTR ( SQR(Vtr)/Htr + ghalf*SQR(Htr) ) +#define HNEWXFLUXMINUS2 ( Uxminus2 ) +#define HNEWXFLUXPLUS2 ( Uxplus2 ) +#define UNEWXFLUXMINUS2 ( SQR(Uxminus2)/Hxminus2 + ghalf*SQR(Hxminus2) ) +#define UNEWXFLUXPLUS2 ( SQR(Uxplus2) /Hxplus2 + ghalf*SQR(Hxplus2) ) +#define UVNEWFLUXMINUS2 ( Uxminus2*Vxminus2/Hxminus2 ) +#define UVNEWFLUXPLUS2 ( Uxplus2 *Vxplus2 /Hxplus2 ) +#define HNEWYFLUXMINUS2 ( Vyminus2 ) +#define HNEWYFLUXPLUS2 ( Vyplus2 ) +#define VNEWYFLUXMINUS2 ( SQR(Vyminus2)/Hyminus2 + ghalf*SQR(Hyminus2) ) +#define VNEWYFLUXPLUS2 ( SQR(Vyplus2) /Hyplus2 + ghalf*SQR(Hyplus2) ) +#define VUNEWFLUXMINUS2 ( Vyminus2*Uyminus2/Hyminus2 ) +#define VUNEWFLUXPLUS2 ( Vyplus2 *Uyplus2 /Hyplus2 ) + +void State::calc_finite_difference(double deltaT){ + real_t g = 9.80; // gravitational constant + real_t ghalf = 0.5*g; + + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + size_t ncells = mesh->ncells; + size_t &ncells_ghost = mesh->ncells_ghost; +#ifdef _OPENMP +#pragma omp master +#endif + if (ncells_ghost < ncells) ncells_ghost = ncells; + + //printf("\nDEBUG finite diff\n"); + +#ifdef HAVE_MPI + // We need to populate the ghost regions since the calc neighbors has just been + // established for the mesh shortly before + if (mesh->numpe > 1) { + apply_boundary_conditions_local(); + +#ifdef _OPENMP +#pragma omp master + { +#endif + H=(state_t *)state_memory.memory_realloc(ncells_ghost, H); + U=(state_t *)state_memory.memory_realloc(ncells_ghost, U); + V=(state_t *)state_memory.memory_realloc(ncells_ghost, V); + + L7_Update(&H[0], L7_STATE_T, mesh->cell_handle); + L7_Update(&U[0], L7_STATE_T, mesh->cell_handle); + L7_Update(&V[0], L7_STATE_T, mesh->cell_handle); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + apply_boundary_conditions_ghost(); + } else { + apply_boundary_conditions(); + } +#else + apply_boundary_conditions(); +#endif + + static state_t *H_new, *U_new, *V_new; + int *nlft, *nrht, *nbot, *ntop, *level; + + nlft = mesh->nlft; + nrht = mesh->nrht; + nbot = mesh->nbot; + ntop = mesh->ntop; + level = mesh->level; + + vector &lev_deltax = mesh->lev_deltax; + vector &lev_deltay = mesh->lev_deltay; + + int flags = 0; + flags = RESTART_DATA; +#if defined (HAVE_J7) + if (mesh->parallel) flags = LOAD_BALANCE_MEMORY; +#endif + +#ifdef _OPENMP +#pragma omp master +#endif + { + H_new = (state_t *)state_memory.memory_malloc(ncells_ghost, + sizeof(state_t), + "H_new", flags); + U_new = (state_t *)state_memory.memory_malloc(ncells_ghost, + sizeof(state_t), + "U_new", flags); + V_new = (state_t *)state_memory.memory_malloc(ncells_ghost, + sizeof(state_t), + "V_new", flags); + } +#ifdef _OPENMP +#pragma omp barrier +#endif + + int lowerBound, upperBound; + mesh->get_bounds(lowerBound, upperBound); + + for(int gix = lowerBound; gix < upperBound; gix++) { +#if DEBUG >= 3 + printf("%d: DEBUG gix is %d at line %d in file %s\n",mesh->mype,gix,__LINE__,__FILE__); +#endif + + int lvl = level[gix]; + int nl = nlft[gix]; + int nr = nrht[gix]; + int nt = ntop[gix]; + int nb = nbot[gix]; + + real_t Hic = H[gix]; + real_t Uic = U[gix]; + real_t Vic = V[gix]; + +#if DEBUG >= 3 + if (nl < 0 || nl >= ncells_ghost ) printf("%d: Problem at file %s line %d with nl %ld\n",mesh->mype,__FILE__,__LINE__,nl); +#endif + int nll = nlft[nl]; + real_t Hl = H[nl]; + real_t Ul = U[nl]; + real_t Vl = V[nl]; + +#if DEBUG >= 3 + if (nr < 0 || nr >= ncells_ghost ) printf("%d: Problem at file %s line %d with nr %ld\n",mesh->mype,__FILE__,__LINE__,nr); +#endif + int nrr = nrht[nr]; + real_t Hr = H[nr]; + real_t Ur = U[nr]; + real_t Vr = V[nr]; + +#if DEBUG >= 3 + if (nt < 0 || nt >= ncells_ghost ) printf("%d: Problem at file %s line %d with nt %ld\n",mesh->mype,__FILE__,__LINE__,nt); +#endif + int ntt = ntop[nt]; + real_t Ht = H[nt]; + real_t Ut = U[nt]; + real_t Vt = V[nt]; + +#if DEBUG >= 3 + if (nb < 0 || nb >= ncells_ghost ) printf("%d: Problem at file %s line %d with nb %ld\n",mesh->mype,__FILE__,__LINE__,nb); +#endif + int nbb = nbot[nb]; + real_t Hb = H[nb]; + real_t Ub = U[nb]; + real_t Vb = V[nb]; + + int nlt = ntop[nl]; + int nrt = ntop[nr]; + int ntr = nrht[nt]; + int nbr = nrht[nb]; + +#if DEBUG >= 3 + if (nll < 0 || nll >= ncells_ghost ) printf("%d: Problem at file %s line %d with nll %ld\n",mesh->mype,__FILE__,__LINE__,nll); +#endif + real_t Hll = H[nll]; + real_t Ull = U[nll]; + //real_t Vll = V[nll]; + +#if DEBUG >= 3 + if (nrr < 0 || nrr >= ncells_ghost ) printf("%d: Problem at file %s line %d with nrr %ld\n",mesh->mype,__FILE__,__LINE__,nrr); +#endif + real_t Hrr = H[nrr]; + real_t Urr = U[nrr]; + //real_t Vrr = V[nrr]; + +#if DEBUG >= 3 + if (ntt < 0 || ntt >= ncells_ghost ) printf("%d: Problem at file %s line %d with ntt %ld\n",mesh->mype,__FILE__,__LINE__,ntt); +#endif + real_t Htt = H[ntt]; + //real_t Utt = U[ntt]; + real_t Vtt = V[ntt]; + +#if DEBUG >= 3 + if (nbb < 0 || nbb >= ncells_ghost ) {printf("%d: Problem at file %s line %d ic %d %d with nbb %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,nbb); sleep(15); } +#endif + real_t Hbb = H[nbb]; + //real_t Ubb = U[nbb]; + real_t Vbb = V[nbb]; + +#if DEBUG >= 3 + if (lvl < 0 || lvl >= (int)lev_deltax.size() ) printf("%d: Problem at file %s line %d with lvl %d\n",mesh->mype,__FILE__,__LINE__,lvl); +#endif + real_t dxic = lev_deltax[lvl]; + real_t dyic = lev_deltay[lvl]; + + real_t dxl = lev_deltax[level[nl]]; + real_t dxr = lev_deltax[level[nr]]; + + real_t dyt = lev_deltay[level[nt]]; + real_t dyb = lev_deltay[level[nb]]; + + real_t drl = dxl; + real_t drr = dxr; + real_t drt = dyt; + real_t drb = dyb; + + real_t dric = dxic; + + int nltl = 0; + real_t Hlt = 0.0, Ult = 0.0, Vlt = 0.0; + real_t Hll2 = 0.0; + real_t Ull2 = 0.0; + if(lvl < level[nl]) { +#if DEBUG >= 3 + if (nlt < 0 || nlt > ncells_ghost ) printf("%d: Problem at file %s line %d with nlt %ld\n",mesh->mype,__FILE__,__LINE__,nlt); +#endif + Hlt = H[ ntop[nl] ]; + Ult = U[ ntop[nl] ]; + Vlt = V[ ntop[nl] ]; + nltl = nlft[nlt]; +#if DEBUG >= 3 + if (nltl < 0 || nltl > ncells_ghost ) printf("%d: Problem at file %s line %d with nltl %ld\n",mesh->mype,__FILE__,__LINE__,nltl); +#endif + Hll2 = H[nltl]; + Ull2 = U[nltl]; + } + + int nrtr = 0; + real_t Hrt = 0.0, Urt = 0.0, Vrt = 0.0; + real_t Hrr2 = 0.0; + real_t Urr2 = 0.0; + if(lvl < level[nr]) { +#if DEBUG >= 3 + if (nrt < 0 || nrt > ncells_ghost ) printf("%d: Problem at file %s line %d with nrt %ld\n",mesh->mype,__FILE__,__LINE__,nrt); +#endif + Hrt = H[ ntop[nr] ]; + Urt = U[ ntop[nr] ]; + Vrt = V[ ntop[nr] ]; + nrtr = nrht[nrt]; +#if DEBUG >= 3 + if (nrtr < 0 || nrtr > ncells_ghost ) printf("%d: Problem at file %s line %d with nrtr %ld\n",mesh->mype,__FILE__,__LINE__,nrtr); +#endif + Hrr2 = H[nrtr]; + Urr2 = U[nrtr]; + } + + int nbrb = 0; + real_t Hbr = 0.0, Ubr = 0.0, Vbr = 0.0; + real_t Hbb2 = 0.0; + real_t Vbb2 = 0.0; + if(lvl < level[nb]) { +#if DEBUG >= 3 + if (nbr < 0 || nbr > ncells_ghost ) printf("%d: Problem at file %s line %d with nbr %ld\n",mesh->mype,__FILE__,__LINE__,nbr); +#endif + Hbr = H[ nrht[nb] ]; + Ubr = U[ nrht[nb] ]; + Vbr = V[ nrht[nb] ]; + nbrb = nbot[nbr]; +#if DEBUG >= 3 + if (nbrb < 0 || nbrb > ncells_ghost ) {printf("%d: Problem at file %s line %d ic %d %d with nbrb %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,nbrb); sleep(20);} +#endif + Hbb2 = H[nbrb]; + Vbb2 = V[nbrb]; + } + + int ntrt = 0; + real_t Htr = 0.0, Utr = 0.0, Vtr = 0.0; + real_t Htt2 = 0.0; + real_t Vtt2 = 0.0; + if(lvl < level[nt]) { +#if DEBUG >= 3 + if (ntr < 0 || ntr > ncells_ghost ) printf("%d: Problem at file %s line %d with ntr %ld\n",mesh->mype,__FILE__,__LINE__,ntr); +#endif + Htr = H[ nrht[nt] ]; + Utr = U[ nrht[nt] ]; + Vtr = V[ nrht[nt] ]; + ntrt = ntop[ntr]; +#if DEBUG >= 3 + if (ntrt < 0 || ntrt > ncells_ghost ) {printf("%d: Problem at file %s line %d ic %d %d with ntrt %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,ntrt); sleep(20); } +#endif + Htt2 = H[ntrt]; + Vtt2 = V[ntrt]; + } + + + real_t Hxminus = U_halfstep(deltaT, Hl, Hic, HXFLUXNL, HXFLUXIC, + dxl, dxic, dxl, dxic, SQR(dxl), SQR(dxic)); + real_t Uxminus = U_halfstep(deltaT, Ul, Uic, UXFLUXNL, UXFLUXIC, + dxl, dxic, dxl, dxic, SQR(dxl), SQR(dxic)); + real_t Vxminus = U_halfstep(deltaT, Vl, Vic, UVFLUXNL, UVFLUXIC, + dxl, dxic, dxl, dxic, SQR(dxl), SQR(dxic)); + + real_t Hxplus = U_halfstep(deltaT, Hic, Hr, HXFLUXIC, HXFLUXNR, + dxic, dxr, dxic, dxr, SQR(dxic), SQR(dxr)); + real_t Uxplus = U_halfstep(deltaT, Uic, Ur, UXFLUXIC, UXFLUXNR, + dxic, dxr, dxic, dxr, SQR(dxic), SQR(dxr)); + real_t Vxplus = U_halfstep(deltaT, Vic, Vr, UVFLUXIC, UVFLUXNR, + dxic, dxr, dxic, dxr, SQR(dxic), SQR(dxr)); + + real_t Hyminus = U_halfstep(deltaT, Hb, Hic, HYFLUXNB, HYFLUXIC, + dyb, dyic, dyb, dyic, SQR(dyb), SQR(dyic)); + real_t Uyminus = U_halfstep(deltaT, Ub, Uic, VUFLUXNB, VUFLUXIC, + dyb, dyic, dyb, dyic, SQR(dyb), SQR(dyic)); + real_t Vyminus = U_halfstep(deltaT, Vb, Vic, VYFLUXNB, VYFLUXIC, + dyb, dyic, dyb, dyic, SQR(dyb), SQR(dyic)); + + real_t Hyplus = U_halfstep(deltaT, Hic, Ht, HYFLUXIC, HYFLUXNT, + dyic, dyt, dyic, dyt, SQR(dyic), SQR(dyt)); + real_t Uyplus = U_halfstep(deltaT, Uic, Ut, VUFLUXIC, VUFLUXNT, + dyic, dyt, dyic, dyt, SQR(dyic), SQR(dyt)); + real_t Vyplus = U_halfstep(deltaT, Vic, Vt, VYFLUXIC, VYFLUXNT, + dyic, dyt, dyic, dyt, SQR(dyic), SQR(dyt)); + + real_t Hxfluxminus = HNEWXFLUXMINUS; + real_t Uxfluxminus = UNEWXFLUXMINUS; + real_t Vxfluxminus = UVNEWFLUXMINUS; + + real_t Hxfluxplus = HNEWXFLUXPLUS; + real_t Uxfluxplus = UNEWXFLUXPLUS; + real_t Vxfluxplus = UVNEWFLUXPLUS; + + real_t Hyfluxminus = HNEWYFLUXMINUS; + real_t Uyfluxminus = VUNEWFLUXMINUS; + real_t Vyfluxminus = VNEWYFLUXMINUS; + + real_t Hyfluxplus = HNEWYFLUXPLUS; + real_t Uyfluxplus = VUNEWFLUXPLUS; + real_t Vyfluxplus = VNEWYFLUXPLUS; + + real_t Hxminus2 = 0.0; + real_t Uxminus2 = 0.0; + real_t Vxminus2 = 0.0; + if(lvl < level[nl]) { + + Hxminus2 = U_halfstep(deltaT, Hlt, Hic, HXFLUXNLT, HXFLUXIC, + drl, dric, drl, dric, SQR(drl), SQR(dric)); + Uxminus2 = U_halfstep(deltaT, Ult, Uic, UXFLUXNLT, UXFLUXIC, + drl, dric, drl, dric, SQR(drl), SQR(dric)); + Vxminus2 = U_halfstep(deltaT, Vlt, Vic, UVFLUXNLT, UVFLUXIC, + drl, dric, drl, dric, SQR(drl), SQR(dric)); + + Hxfluxminus = (Hxfluxminus + HNEWXFLUXMINUS2) * HALF; + Uxfluxminus = (Uxfluxminus + UNEWXFLUXMINUS2) * HALF; + Vxfluxminus = (Vxfluxminus + UVNEWFLUXMINUS2) * HALF; + + } + + real_t Hxplus2 = 0.0; + real_t Uxplus2 = 0.0; + real_t Vxplus2 = 0.0; + if(lvl < level[nr]) { + + Hxplus2 = U_halfstep(deltaT, Hic, Hrt, HXFLUXIC, HXFLUXNRT, + dric, drr, dric, drr, SQR(dric), SQR(drr)); + Uxplus2 = U_halfstep(deltaT, Uic, Urt, UXFLUXIC, UXFLUXNRT, + dric, drr, dric, drr, SQR(dric), SQR(drr)); + Vxplus2 = U_halfstep(deltaT, Vic, Vrt, UVFLUXIC, UVFLUXNRT, + dric, drr, dric, drr, SQR(dric), SQR(drr)); + + Hxfluxplus = (Hxfluxplus + HNEWXFLUXPLUS2) * HALF; + Uxfluxplus = (Uxfluxplus + UNEWXFLUXPLUS2) * HALF; + Vxfluxplus = (Vxfluxplus + UVNEWFLUXPLUS2) * HALF; + + } + + real_t Hyminus2 = 0.0; + real_t Uyminus2 = 0.0; + real_t Vyminus2 = 0.0; + if(lvl < level[nb]) { + + Hyminus2 = U_halfstep(deltaT, Hbr, Hic, HYFLUXNBR, HYFLUXIC, + drb, dric, drb, dric, SQR(drb), SQR(dric)); + Uyminus2 = U_halfstep(deltaT, Ubr, Uic, VUFLUXNBR, VUFLUXIC, + drb, dric, drb, dric, SQR(drb), SQR(dric)); + Vyminus2 = U_halfstep(deltaT, Vbr, Vic, VYFLUXNBR, VYFLUXIC, + drb, dric, drb, dric, SQR(drb), SQR(dric)); + + Hyfluxminus = (Hyfluxminus + HNEWYFLUXMINUS2) * HALF; + Uyfluxminus = (Uyfluxminus + VUNEWFLUXMINUS2) * HALF; + Vyfluxminus = (Vyfluxminus + VNEWYFLUXMINUS2) * HALF; + + } + + real_t Hyplus2 = 0.0; + real_t Uyplus2 = 0.0; + real_t Vyplus2 = 0.0; + if(lvl < level[nt]) { + + Hyplus2 = U_halfstep(deltaT, Hic, Htr, HYFLUXIC, HYFLUXNTR, + dric, drt, dric, drt, SQR(dric), SQR(drt)); + Uyplus2 = U_halfstep(deltaT, Uic, Utr, VUFLUXIC, VUFLUXNTR, + dric, drt, dric, drt, SQR(dric), SQR(drt)); + Vyplus2 = U_halfstep(deltaT, Vic, Vtr, VYFLUXIC, VYFLUXNTR, + dric, drt, dric, drt, SQR(dric), SQR(drt)); + + Hyfluxplus = (Hyfluxplus + HNEWYFLUXPLUS2) * HALF; + Uyfluxplus = (Uyfluxplus + VUNEWFLUXPLUS2) * HALF; + Vyfluxplus = (Vyfluxplus + VNEWYFLUXPLUS2) * HALF; + + } + + //if (DEBUG >= 2) { + // printf("1st pass x direction nz %d nzlower %d nzupper %d %lf %lf %lf %lf %lf %lf\n", + // gix, nl, nr, + // Hxplus,Hxplus2,Uxplus,Uxplus2,Vxplus,Vxplus2); + // //H[cell_upper],H[cell_lower],U[cell_upper],U[cell_lower],V[cell_upper],V[cell_lower]); + //} + + //////////////////////////////////////// + /// Artificial Viscosity corrections /// + //////////////////////////////////////// + + + if(level[nl] < level[nll]) { +#if DEBUG >= 3 + size_t nllt = ntop[nll]; + if (nllt < 0 || nllt >= ncells_ghost ) printf("%d: Problem at file %s line %d with nllt %ld\n",mesh->mype,__FILE__,__LINE__,nllt); +#endif + Hll = (Hll + H[ ntop[nll] ]) * HALF; + Ull = (Ull + U[ ntop[nll] ]) * HALF; + } + + real_t Hr2 = Hr; + real_t Ur2 = Ur; + if(lvl < level[nr]) { + Hr2 = (Hr2 + Hrt) * HALF; + Ur2 = (Ur2 + Urt) * HALF; + } + + real_t wminusx_H = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus), + Hic-Hl, Hl-Hll, Hr2-Hic); + + wminusx_H *= Hic - Hl; + + if(lvl < level[nl]) { + if(level[nlt] < level[nltl]) + Hll2 = (Hll2 + H[ ntop[nltl] ]) * HALF; + wminusx_H = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) + + sqrt(g*Hxminus2), Hic-Hlt, Hlt-Hll2, Hr2-Hic) * + (Hic - Hlt)) + wminusx_H)*HALF*HALF; + } + + + if(level[nr] < level[nrr]) { +#if DEBUG >= 3 + size_t nrrt = ntop[nrr]; + if (nrrt < 0 || nrrt >= ncells_ghost ) printf("%d: Problem at file %s line %d with nrrt %ld\n",mesh->mype,__FILE__,__LINE__,nrrt); +#endif + Hrr = (Hrr + H[ ntop[nrr] ]) * HALF; + Urr = (Urr + U[ ntop[nrr] ]) * HALF; + } + + real_t Hl2 = Hl; + real_t Ul2 = Ul; + if(lvl < level[nl]) { + Hl2 = (Hl2 + Hlt) * HALF; + Ul2 = (Ul2 + Ult) * HALF; + } + + real_t wplusx_H = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus), + Hr-Hic, Hic-Hl2, Hrr-Hr); + + wplusx_H *= Hr - Hic; + + if(lvl < level[nr]) { + if(level[nrt] < level[nrtr]) + Hrr2 = (Hrr2 + H[ ntop[nrtr] ]) * HALF; + wplusx_H = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) + + sqrt(g*Hxplus2), Hrt-Hic, Hic-Hl2, Hrr2-Hrt) * + (Hrt - Hic))+wplusx_H)*HALF*HALF; + } + + + real_t wminusx_U = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus), + Uic-Ul, Ul-Ull, Ur2-Uic); + + wminusx_U *= Uic - Ul; + + if(lvl < level[nl]) { + if(level[nlt] < level[nltl]) + Ull2 = (Ull2 + U[ ntop[nltl] ]) * HALF; + wminusx_U = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) + + sqrt(g*Hxminus2), Uic-Ult, Ult-Ull2, Ur2-Uic) * + (Uic - Ult))+wminusx_U)*HALF*HALF; + } + + + real_t wplusx_U = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus), + Ur-Uic, Uic-Ul2, Urr-Ur); + + wplusx_U *= Ur - Uic; + + if(lvl < level[nr]) { + if(level[nrt] < level[nrtr]) + Urr2 = (Urr2 + U[ ntop[nrtr] ]) * HALF; + wplusx_U = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) + + sqrt(g*Hxplus2), Urt-Uic, Uic-Ul2, Urr2-Urt) * + (Urt - Uic))+wplusx_U)*HALF*HALF; + } + + + if(level[nb] < level[nbb]) { +#if DEBUG >= 3 + size_t nbbr = nrht[nbb]; + if (nbbr < 0 || nbbr >= ncells_ghost ) printf("%d: Problem at file %s line %d gix %d %d with nbbr %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,nbbr); +#endif + Hbb = (Hbb + H[ nrht[nbb] ]) * HALF; + Vbb = (Vbb + V[ nrht[nbb] ]) * HALF; + } + + real_t Ht2 = Ht; + real_t Vt2 = Vt; + if(lvl < level[nt]) { + Ht2 = (Ht2 + Htr) * HALF; + Vt2 = (Vt2 + Vtr) * HALF; + } + + real_t wminusy_H = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus), + Hic-Hb, Hb-Hbb, Ht2-Hic); + + wminusy_H *= Hic - Hb; + + if(lvl < level[nb]) { + if(level[nbr] < level[nbrb]) + Hbb2 = (Hbb2 + H[ nrht[nbrb] ]) * HALF; + wminusy_H = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) + + sqrt(g*Hyminus2), Hic-Hbr, Hbr-Hbb2, Ht2-Hic) * + (Hic - Hbr))+wminusy_H)*HALF*HALF; + } + + + if(level[nt] < level[ntt]) { +#if DEBUG >= 3 + size_t nttr = nrht[ntt]; + if (nttr < 0 || nttr >= ncells_ghost ) printf("%d: Problem at file %s line %d with nttr %ld\n",mesh->mype,__FILE__,__LINE__,nttr); +#endif + Htt = (Htt + H[ nrht[ntt] ]) * HALF; + Vtt = (Vtt + V[ nrht[ntt] ]) * HALF; + } + + real_t Hb2 = Hb; + real_t Vb2 = Vb; + if(lvl < level[nb]) { + Hb2 = (Hb2 + Hbr) * HALF; + Vb2 = (Vb2 + Vbr) * HALF; + } + + real_t wplusy_H = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus), + Ht-Hic, Hic-Hb2, Htt-Ht); + + wplusy_H *= Ht - Hic; + + if(lvl < level[nt]) { + if(level[ntr] < level[ntrt]) + Htt2 = (Htt2 + H[ nrht[ntrt] ]) * HALF; + wplusy_H = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) + + sqrt(g*Hyplus2), Htr-Hic, Hic-Hb2, Htt2-Htr) * + (Htr - Hic))+wplusy_H)*HALF*HALF; + } + + real_t wminusy_V = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus), + Vic-Vb, Vb-Vbb, Vt2-Vic); + + wminusy_V *= Vic - Vb; + + if(lvl < level[nb]) { + if(level[nbr] < level[nbrb]) + Vbb2 = (Vbb2 + V[ nrht[nbrb] ]) * HALF; + wminusy_V = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) + + sqrt(g*Hyminus2), Vic-Vbr, Vbr-Vbb2, Vt2-Vic) * + (Vic - Vbr))+wminusy_V)*HALF*HALF; + } + + real_t wplusy_V = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus), + Vt-Vic, Vic-Vb2, Vtt-Vt); + + wplusy_V *= Vt - Vic; + + if(lvl < level[nt]) { + if(level[ntr] < level[ntrt]) + Vtt2 = (Vtt2 + V[ nrht[ntrt] ]) * HALF; + wplusy_V = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) + + sqrt(g*Hyplus2), Vtr-Vic, Vic-Vb2, Vtt2-Vtr) * + (Vtr - Vic))+wplusy_V)*HALF*HALF; + } + + H_new[gix] = U_fullstep(deltaT, dxic, Hic, + Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus) + - wminusx_H + wplusx_H - wminusy_H + wplusy_H; + U_new[gix] = U_fullstep(deltaT, dxic, Uic, + Uxfluxplus, Uxfluxminus, Uyfluxplus, Uyfluxminus) + - wminusx_U + wplusx_U; + V_new[gix] = U_fullstep(deltaT, dxic, Vic, + Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus) + - wminusy_V + wplusy_V; + +#if DEBUG >= 1 + if (DEBUG >= 1) { + real_t U_tmp = U_new[gix]; + real_t V_tmp = V_new[gix]; + if (U_tmp == 0.0) U_tmp = 0.0; + if (V_tmp == 0.0) V_tmp = 0.0; + printf("DEBUG ic %d H_new %lf U_new %lf V_new %lf\n",gix,H_new[gix],U_tmp,V_tmp); + } +#endif + +/* + printf("DEBUG ic %d deltaT, %lf dxic, %lf Hic, %lf Hxfluxplus, %lf Hxfluxminus, %lf Hyfluxplus, %lf Hyfluxminus %lf\n", + gix, deltaT, dxic, Hic, Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus); + printf("DEBUG ic %d wminusx_H %lf wplusx_H %lf wminusy_H %lf wplusy_H %lf\n",gix, wminusx_H, wplusx_H, wminusy_H, wplusy_H); + printf("DEBUG ic %d deltaT, %lf dxic, %lf Vic, %lf Vxfluxplus, %lf Vxfluxminus, %lf Vyfluxplus, %lf Vyfluxminus %lf\n", + gix, deltaT, dxic, Vic, Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus); + printf("DEBUG ic %d wminusy_V %lf wplusy_V %lf\n",gix, wminusy_V, wplusy_V); +*/ + } // cell loop + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + // Replace H with H_new and deallocate H. New memory will have the characteristics + // of the new memory and the name of the old. Both return and arg1 will be reset to new memory + H = (state_t *)state_memory.memory_replace(H, H_new); + U = (state_t *)state_memory.memory_replace(U, U_new); + V = (state_t *)state_memory.memory_replace(V, V_new); + + //state_memory.memory_report(); + //printf("DEBUG end finite diff\n\n"); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[STATE_TIMER_FINITE_DIFFERENCE] += cpu_timer_stop(tstart_cpu); +} + +void State::calc_finite_difference_via_faces(double deltaT){ + real_t g = 9.80; // gravitational constant + real_t ghalf = HALF*g; + + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + size_t ncells = mesh->ncells; + size_t &ncells_ghost = mesh->ncells_ghost; +#ifdef _OPENMP +#pragma omp master +#endif + if (ncells_ghost < ncells) ncells_ghost = ncells; + + //printf("\nDEBUG finite diff\n"); + +#ifdef HAVE_MPI + // We need to populate the ghost regions since the calc neighbors has just been + // established for the mesh shortly before + if (mesh->numpe > 1) { + apply_boundary_conditions_local(); + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + H=(state_t *)state_memory.memory_realloc(ncells_ghost, H); + U=(state_t *)state_memory.memory_realloc(ncells_ghost, U); + V=(state_t *)state_memory.memory_realloc(ncells_ghost, V); + + L7_Update(&H[0], L7_STATE_T, mesh->cell_handle); + L7_Update(&U[0], L7_STATE_T, mesh->cell_handle); + L7_Update(&V[0], L7_STATE_T, mesh->cell_handle); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + apply_boundary_conditions_ghost(); + } else { + apply_boundary_conditions(); + } +#else + apply_boundary_conditions(); +#endif + + int *nlft, *nrht, *nbot, *ntop, *level; + + nlft = mesh->nlft; + nrht = mesh->nrht; + nbot = mesh->nbot; + ntop = mesh->ntop; + level = mesh->level; + + vector &lev_deltax = mesh->lev_deltax; + vector &lev_deltay = mesh->lev_deltay; + + int flags = 0; + flags = RESTART_DATA; +#if defined (HAVE_J7) + if (mesh->parallel) flags = LOAD_BALANCE_MEMORY; +#endif + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + mesh->calc_face_list_wbidirmap(); +#ifdef _OPENMP + } +#endif + + static vector Hx, Ux, Vx; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + Hx.resize(mesh->nxface); + Ux.resize(mesh->nxface); + Vx.resize(mesh->nxface); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for (int iface = 0; iface < mesh->nxface; iface++){ + int cell_lower = mesh->map_xface2cell_lower[iface]; + int cell_upper = mesh->map_xface2cell_upper[iface]; + int level_lower = level[cell_lower]; + int level_upper = level[cell_upper]; + if (level_lower == level_upper) { + int lev = level_upper; + real_t Cxhalf = 0.5*deltaT/mesh->lev_deltax[lev]; + Hx[iface]=HALF*(H[cell_upper]+H[cell_lower]) - Cxhalf*( HXFLUX(cell_upper)-HXFLUX(cell_lower) ); + Ux[iface]=HALF*(U[cell_upper]+U[cell_lower]) - Cxhalf*( UXFLUX(cell_upper)-UXFLUX(cell_lower) ); + Vx[iface]=HALF*(V[cell_upper]+V[cell_lower]) - Cxhalf*( UVFLUX(cell_upper)-UVFLUX(cell_lower) ); + } else { + real_t dx_lower = mesh->lev_deltax[level[cell_lower]]; + real_t dx_upper = mesh->lev_deltax[level[cell_upper]]; + + real_t FA_lower = dx_lower; + real_t FA_upper = dx_upper; + real_t FA_lolim = FA_lower*min(ONE, FA_upper/FA_lower); + real_t FA_uplim = FA_upper*min(ONE, FA_lower/FA_upper); + + real_t CV_lower = SQ(dx_lower); + real_t CV_upper = SQ(dx_upper); + real_t CV_lolim = CV_lower*min(HALF, CV_upper/CV_lower); + real_t CV_uplim = CV_upper*min(HALF, CV_lower/CV_upper); + + // Weighted half-step calculation + // + // (dx_lower*H[cell_upper]+dx_upper*H[cell_lower]) + // ----------------------------------------------- - + // (dx_lower+dx_upper) + // + // ( (FA_uplim*HXFLUX(cell_upper))-(FA_lolim*HXFLUX(cell_lower)) ) + // 0.5*deltaT * ---------------------------------------------------------------- + // (CV_uplim+CV_lolim) + // + + Hx[iface]=(dx_lower*H[cell_upper]+dx_upper*H[cell_lower])/(dx_lower+dx_upper) - + HALF*deltaT*( (FA_uplim*HXFLUX(cell_upper))-(FA_lolim*HXFLUX(cell_lower)) )/ + (CV_uplim+CV_lolim); + Ux[iface]=(dx_lower*U[cell_upper]+dx_upper*U[cell_lower])/(dx_lower+dx_upper) - + HALF*deltaT*( (FA_uplim*UXFLUX(cell_upper))-(FA_lolim*UXFLUX(cell_lower)) )/ + (CV_uplim+CV_lolim); + Vx[iface]=(dx_lower*V[cell_upper]+dx_upper*V[cell_lower])/(dx_lower+dx_upper) - + HALF*deltaT*( (FA_uplim*UVFLUX(cell_upper))-(FA_lolim*UVFLUX(cell_lower)) )/ + (CV_uplim+CV_lolim); + } +#if DEBUG >= 2 + if (DEBUG >= 2) { + printf("1st pass x direction iface %d i %d j %d lev %d nzlower %d nzupper %d %lf %lf %lf %lf %lf %lf %lf %lf %lf\n", + iface, mesh->xface_i[iface], mesh->xface_j[iface], mesh->xface_level[iface], + mesh->map_xface2cell_lower[iface], mesh->map_xface2cell_upper[iface], + Hx[iface],Ux[iface],Vx[iface], + H[cell_upper],H[cell_lower],U[cell_upper],U[cell_lower],V[cell_upper],V[cell_lower]); + } +#endif + } +#if DEBUG >= 2 + if (DEBUG >= 2) { + printf("\n"); + } +#endif + + static vector Hy, Uy, Vy; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + Hy.resize(mesh->nyface); + Uy.resize(mesh->nyface); + Vy.resize(mesh->nyface); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp for +#endif + for (int iface = 0; iface < mesh->nyface; iface++){ + int cell_lower = mesh->map_yface2cell_lower[iface]; + int cell_upper = mesh->map_yface2cell_upper[iface]; + int level_lower = level[cell_lower]; + int level_upper = level[cell_upper]; + if (level_lower == level_upper) { + int lev = level_upper; + real_t Cyhalf = 0.5*deltaT/mesh->lev_deltay[lev]; + Hy[iface]=HALF*(H[cell_upper]+H[cell_lower]) - Cyhalf*( HYFLUX(cell_upper)-HYFLUX(cell_lower) ); + Uy[iface]=HALF*(U[cell_upper]+U[cell_lower]) - Cyhalf*( UVFLUX(cell_upper)-UVFLUX(cell_lower) ); + Vy[iface]=HALF*(V[cell_upper]+V[cell_lower]) - Cyhalf*( VYFLUX(cell_upper)-VYFLUX(cell_lower) ); + } else { + real_t dy_lower = mesh->lev_deltay[level[cell_lower]]; + real_t dy_upper = mesh->lev_deltay[level[cell_upper]]; + + real_t FA_lower = dy_lower; + real_t FA_upper = dy_upper; + real_t FA_lolim = FA_lower*min(ONE, FA_upper/FA_lower); + real_t FA_uplim = FA_upper*min(ONE, FA_lower/FA_upper); + + real_t CV_lower = SQ(dy_lower); + real_t CV_upper = SQ(dy_upper); + real_t CV_lolim = CV_lower*min(HALF, CV_upper/CV_lower); + real_t CV_uplim = CV_upper*min(HALF, CV_lower/CV_upper); + + // Weighted half-step calculation + // + // (dy_lower*H[cell_upper]+dy_upper*H[cell_lower]) + // ----------------------------------------------- - + // (dy_lower+dy_upper) + // + // ( (FA_uplim*HYFLUX(cell_upper))-(FA_lolim*HYFLUX(cell_lower)) ) + // 0.5*deltaT * ---------------------------------------------------------------- + // (CV_uplim+CV_lolim) + // + + Hy[iface]=(dy_lower*H[cell_upper]+dy_upper*H[cell_lower])/(dy_lower+dy_upper) - + HALF*deltaT*( (FA_uplim*HYFLUX(cell_upper))-(FA_lolim*HYFLUX(cell_lower)) )/ + (CV_uplim+CV_lolim); + Uy[iface]=(dy_lower*U[cell_upper]+dy_upper*U[cell_lower])/(dy_lower+dy_upper) - + HALF*deltaT*( (FA_uplim*UVFLUX(cell_upper))-(FA_lolim*UVFLUX(cell_lower)) )/ + (CV_uplim+CV_lolim); + Vy[iface]=(dy_lower*V[cell_upper]+dy_upper*V[cell_lower])/(dy_lower+dy_upper) - + HALF*deltaT*( (FA_uplim*VYFLUX(cell_upper))-(FA_lolim*VYFLUX(cell_lower)) )/ + (CV_uplim+CV_lolim); + + } + +#if DEBUG >= 2 + if (DEBUG >= 2) { + printf("1st pass y direction iface %d i %d j %d lev %d nzlower %d nzupper %d %lf %lf %lf %lf %lf %lf %lf %lf %lf\n", + iface, mesh->yface_i[iface], mesh->yface_j[iface], mesh->yface_level[iface], + mesh->map_yface2cell_lower[iface], mesh->map_yface2cell_upper[iface], + Hy[iface],Uy[iface],Vy[iface], + H[cell_upper],H[cell_lower],U[cell_upper],U[cell_lower],V[cell_upper],V[cell_lower]); + } +#endif + } +#if DEBUG >= 2 + if (DEBUG >= 2) { + printf("\n"); + } +#endif + + static state_t *H_new, *U_new, *V_new; + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + H_new = (state_t *)state_memory.memory_malloc(mesh->ncells_ghost, sizeof(state_t), "H_new", flags); + U_new = (state_t *)state_memory.memory_malloc(mesh->ncells_ghost, sizeof(state_t), "U_new", flags); + V_new = (state_t *)state_memory.memory_malloc(mesh->ncells_ghost, sizeof(state_t), "V_new", flags); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + + int lowerBound, upperBound; + + mesh->get_bounds(lowerBound, upperBound); + for (int ic = lowerBound; ic < upperBound; ic++){ + + int lvl = level[ic]; + int nl = nlft[ic]; + int nr = nrht[ic]; + int nt = ntop[ic]; + int nb = nbot[ic]; + + real_t Hic = H[ic]; + real_t Uic = U[ic]; + real_t Vic = V[ic]; + + int nll = nlft[nl]; + real_t Hl = H[nl]; + real_t Ul = U[nl]; + //real_t Vl = V[nl]; + + int nrr = nrht[nr]; + real_t Hr = H[nr]; + real_t Ur = U[nr]; + //real_t Vr = V[nr]; + + int ntt = ntop[nt]; + real_t Ht = H[nt]; + //real_t Ut = U[nt]; + real_t Vt = V[nt]; + + int nbb = nbot[nb]; + real_t Hb = H[nb]; + //real_t Ub = U[nb]; + real_t Vb = V[nb]; + + int nlt = ntop[nl]; + int nrt = ntop[nr]; + int ntr = nrht[nt]; + int nbr = nrht[nb]; + + real_t Hll = H[nll]; + real_t Ull = U[nll]; + //real_t Vll = V[nll]; + + real_t Hrr = H[nrr]; + real_t Urr = U[nrr]; + //real_t Vrr = V[nrr]; + + real_t Htt = H[ntt]; + //real_t Utt = U[ntt]; + real_t Vtt = V[ntt]; + + real_t Hbb = H[nbb]; + //real_t Ubb = U[nbb]; + real_t Vbb = V[nbb]; + + real_t dxic = lev_deltax[lvl]; + //real_t dyic = lev_deltay[lvl]; + + real_t dxl = lev_deltax[level[nl]]; + real_t dxr = lev_deltax[level[nr]]; + + real_t dyt = lev_deltay[level[nt]]; + real_t dyb = lev_deltay[level[nb]]; + + //real_t drl = dxl; + //real_t drr = dxr; + //real_t drt = dyt; + //real_t drb = dyb; + + real_t dric = dxic; + + int nltl = 0; + real_t Hlt = 0.0, Ult = 0.0; // Vlt = 0.0; + real_t Hll2 = 0.0; + real_t Ull2 = 0.0; + if(lvl < level[nl]) { + Hlt = H[ ntop[nl] ]; + Ult = U[ ntop[nl] ]; + //Vlt = V[ ntop[nl] ]; + + nltl = nlft[nlt]; + Hll2 = H[nltl]; + Ull2 = U[nltl]; + } + + int nrtr = 0; + real_t Hrt = 0.0, Urt = 0.0; // Vrt = 0.0; + real_t Hrr2 = 0.0; + real_t Urr2 = 0.0; + if(lvl < level[nr]) { + Hrt = H[ ntop[nr] ]; + Urt = U[ ntop[nr] ]; + //Vrt = V[ ntop[nr] ]; + + nrtr = nrht[nrt]; + Hrr2 = H[nrtr]; + Urr2 = U[nrtr]; + } + + int nbrb = 0; + real_t Hbr = 0.0, Vbr = 0.0; // Ubr = 0.0 + real_t Hbb2 = 0.0; + real_t Vbb2 = 0.0; + if(lvl < level[nb]) { + Hbr = H[ nrht[nb] ]; + //Ubr = U[ nrht[nb] ]; + Vbr = V[ nrht[nb] ]; + + nbrb = nbot[nbr]; + Hbb2 = H[nbrb]; + Vbb2 = V[nbrb]; + } + + int ntrt = 0; + real_t Htr = 0.0, Vtr = 0.0; // Utr = 0.0 + real_t Htt2 = 0.0; + real_t Vtt2 = 0.0; + if(lvl < level[nt]) { + Htr = H[ nrht[nt] ]; + //Utr = U[ nrht[nt] ]; + Vtr = V[ nrht[nt] ]; + + ntrt = ntop[ntr]; + Htt2 = H[ntrt]; + Vtt2 = V[ntrt]; + } + + //////////////////////////////////////// + /// Artificial Viscosity corrections /// + //////////////////////////////////////// + + real_t Hxminus = H[ic]; + real_t Uxminus = 0.0; + real_t Vxminus = 0.0; + if (mesh->map_xcell2face_left1[ic] >= 0){ + Hxminus = Hx[mesh->map_xcell2face_left1[ic]]; + Uxminus = Ux[mesh->map_xcell2face_left1[ic]]; + Vxminus = Vx[mesh->map_xcell2face_left1[ic]]; + } + + real_t Hxminus2 = 0.0; + if(lvl < level[nl]) Hxminus2 = H[ic]; + real_t Uxminus2 = 0.0; + real_t Vxminus2 = 0.0; + if (mesh->map_xcell2face_left2[ic] >= 0) { + Hxminus2 = Hx[mesh->map_xcell2face_left2[ic]]; + Uxminus2 = Ux[mesh->map_xcell2face_left2[ic]]; + Vxminus2 = Vx[mesh->map_xcell2face_left2[ic]]; + } + + real_t Hxplus = H[ic]; + real_t Uxplus = 0.0; + real_t Vxplus = 0.0; + if (mesh->map_xcell2face_right1[ic] >= 0){ + Hxplus = Hx[mesh->map_xcell2face_right1[ic]]; + Uxplus = Ux[mesh->map_xcell2face_right1[ic]]; + Vxplus = Vx[mesh->map_xcell2face_right1[ic]]; + } + + real_t Hxplus2 = 0.0; + if(lvl < level[nr]) Hxplus2 = H[ic]; + real_t Uxplus2 = 0.0; + real_t Vxplus2 = 0.0; + if (mesh->map_xcell2face_right2[ic] >= 0){ + Hxplus2 = Hx[mesh->map_xcell2face_right2[ic]]; + Uxplus2 = Ux[mesh->map_xcell2face_right2[ic]]; + Vxplus2 = Vx[mesh->map_xcell2face_right2[ic]]; + } + + if(level[nl] < level[nll]) { + Hll = (Hll + H[ ntop[nll] ]) * HALF; + Ull = (Ull + U[ ntop[nll] ]) * HALF; + } + + real_t Hr2 = Hr; + real_t Ur2 = Ur; + if(lvl < level[nr]) { + Hr2 = (Hr2 + Hrt) * HALF; + Ur2 = (Ur2 + Urt) * HALF; + } + + real_t wminusx_H = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus), + Hic-Hl, Hl-Hll, Hr2-Hic); + + wminusx_H *= Hic - Hl; + + if(lvl < level[nl]) { + if(level[nlt] < level[nltl]) + Hll2 = (Hll2 + H[ ntop[nltl] ]) * HALF; + wminusx_H = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) + + sqrt(g*Hxminus2), Hic-Hlt, Hlt-Hll2, Hr2-Hic) * + (Hic - Hlt)) + wminusx_H)*HALF*HALF; + } + + if(level[nr] < level[nrr]) { + Hrr = (Hrr + H[ ntop[nrr] ]) * HALF; + Urr = (Urr + U[ ntop[nrr] ]) * HALF; + } + + real_t Hl2 = Hl; + real_t Ul2 = Ul; + if(lvl < level[nl]) { + Hl2 = (Hl2 + Hlt) * HALF; + Ul2 = (Ul2 + Ult) * HALF; + } + + real_t wplusx_H = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus), + Hr-Hic, Hic-Hl2, Hrr-Hr); + + wplusx_H *= Hr - Hic; + + if(lvl < level[nr]) { + if(level[nrt] < level[nrtr]) + Hrr2 = (Hrr2 + H[ ntop[nrtr] ]) * HALF; + wplusx_H = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) + + sqrt(g*Hxplus2), Hrt-Hic, Hic-Hl2, Hrr2-Hrt) * + (Hrt - Hic))+wplusx_H)*HALF*HALF; + } + + + real_t wminusx_U = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus), + Uic-Ul, Ul-Ull, Ur2-Uic); + + wminusx_U *= Uic - Ul; + + if(lvl < level[nl]) { + if(level[nlt] < level[nltl]) + Ull2 = (Ull2 + U[ ntop[nltl] ]) * HALF; + wminusx_U = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) + + sqrt(g*Hxminus2), Uic-Ult, Ult-Ull2, Ur2-Uic) * + (Uic - Ult))+wminusx_U)*HALF*HALF; + } + + + real_t wplusx_U = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus), + Ur-Uic, Uic-Ul2, Urr-Ur); + + wplusx_U *= Ur - Uic; + + if(lvl < level[nr]) { + if(level[nrt] < level[nrtr]) + Urr2 = (Urr2 + U[ ntop[nrtr] ]) * HALF; + wplusx_U = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) + + sqrt(g*Hxplus2), Urt-Uic, Uic-Ul2, Urr2-Urt) * + (Urt - Uic))+wplusx_U)*HALF*HALF; + } + + + if(level[nb] < level[nbb]) { + Hbb = (Hbb + H[ nrht[nbb] ]) * HALF; + Vbb = (Vbb + V[ nrht[nbb] ]) * HALF; + } + + real_t Ht2 = Ht; + real_t Vt2 = Vt; + if(lvl < level[nt]) { + Ht2 = (Ht2 + Htr) * HALF; + Vt2 = (Vt2 + Vtr) * HALF; + } + + real_t Hyminus = H[ic]; + real_t Uyminus = 0.0; + real_t Vyminus = 0.0; + if (mesh->map_ycell2face_bot1[ic] >= 0){ + Hyminus = Hy[mesh->map_ycell2face_bot1[ic]]; + Uyminus = Uy[mesh->map_ycell2face_bot1[ic]]; + Vyminus = Vy[mesh->map_ycell2face_bot1[ic]]; + } + + real_t Hyminus2 = 0.0; + if(lvl < level[nb]) Hyminus2 = H[ic]; + real_t Uyminus2 = 0.0; + real_t Vyminus2 = 0.0; + if (mesh->map_ycell2face_bot2[ic] >= 0){ + Hyminus2 = Hy[mesh->map_ycell2face_bot2[ic]]; + Uyminus2 = Uy[mesh->map_ycell2face_bot2[ic]]; + Vyminus2 = Vy[mesh->map_ycell2face_bot2[ic]]; + } + + real_t Hyplus = H[ic]; + real_t Uyplus = 0.0; + real_t Vyplus = 0.0; + if (mesh->map_ycell2face_top1[ic] >= 0){ + Hyplus = Hy[mesh->map_ycell2face_top1[ic]]; + Uyplus = Uy[mesh->map_ycell2face_top1[ic]]; + Vyplus = Vy[mesh->map_ycell2face_top1[ic]]; + } + + real_t Hyplus2 = 0.0; + if(lvl < level[nt]) Hyplus2 = H[ic]; + real_t Uyplus2 = 0.0; + real_t Vyplus2 = 0.0; + if (mesh->map_ycell2face_top2[ic] >= 0){ + Hyplus2 = Hy[mesh->map_ycell2face_top2[ic]]; + Uyplus2 = Uy[mesh->map_ycell2face_top2[ic]]; + Vyplus2 = Vy[mesh->map_ycell2face_top2[ic]]; + } + + real_t wminusy_H = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus), + Hic-Hb, Hb-Hbb, Ht2-Hic); + + wminusy_H *= Hic - Hb; + + if(lvl < level[nb]) { + if(level[nbr] < level[nbrb]) + Hbb2 = (Hbb2 + H[ nrht[nbrb] ]) * HALF; + wminusy_H = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) + + sqrt(g*Hyminus2), Hic-Hbr, Hbr-Hbb2, Ht2-Hic) * + (Hic - Hbr))+wminusy_H)*HALF*HALF; + } + + + if(level[nt] < level[ntt]) { + Htt = (Htt + H[ nrht[ntt] ]) * HALF; + Vtt = (Vtt + V[ nrht[ntt] ]) * HALF; + } + + real_t Hb2 = Hb; + real_t Vb2 = Vb; + if(lvl < level[nb]) { + Hb2 = (Hb2 + Hbr) * HALF; + Vb2 = (Vb2 + Vbr) * HALF; + } + + real_t wplusy_H = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus), + Ht-Hic, Hic-Hb2, Htt-Ht); + + wplusy_H *= Ht - Hic; + + if(lvl < level[nt]) { + if(level[ntr] < level[ntrt]) + Htt2 = (Htt2 + H[ nrht[ntrt] ]) * HALF; + wplusy_H = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) + + sqrt(g*Hyplus2), Htr-Hic, Hic-Hb2, Htt2-Htr) * + (Htr - Hic))+wplusy_H)*HALF*HALF; + } + + real_t wminusy_V = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus), + Vic-Vb, Vb-Vbb, Vt2-Vic); + + wminusy_V *= Vic - Vb; + + if(lvl < level[nb]) { + if(level[nbr] < level[nbrb]) + Vbb2 = (Vbb2 + V[ nrht[nbrb] ]) * HALF; + wminusy_V = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) + + sqrt(g*Hyminus2), Vic-Vbr, Vbr-Vbb2, Vt2-Vic) * + (Vic - Vbr))+wminusy_V)*HALF*HALF; + } + + real_t wplusy_V = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus), + Vt-Vic, Vic-Vb2, Vtt-Vt); + + wplusy_V *= Vt - Vic; + + if(lvl < level[nt]) { + if(level[ntr] < level[ntrt]) + Vtt2 = (Vtt2 + V[ nrht[ntrt] ]) * HALF; + wplusy_V = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) + + sqrt(g*Hyplus2), Vtr-Vic, Vic-Vb2, Vtt2-Vtr) * + (Vtr - Vic))+wplusy_V)*HALF*HALF; + } + + real_t Hxfluxminus = HNEWXFLUXMINUS; + real_t Uxfluxminus = UNEWXFLUXMINUS; + real_t Vxfluxminus = UVNEWFLUXMINUS; + + real_t Hxfluxplus = HNEWXFLUXPLUS; + real_t Uxfluxplus = UNEWXFLUXPLUS; + real_t Vxfluxplus = UVNEWFLUXPLUS; + + real_t Hyfluxminus = HNEWYFLUXMINUS; + real_t Uyfluxminus = VUNEWFLUXMINUS; + real_t Vyfluxminus = VNEWYFLUXMINUS; + + real_t Hyfluxplus = HNEWYFLUXPLUS; + real_t Uyfluxplus = VUNEWFLUXPLUS; + real_t Vyfluxplus = VNEWYFLUXPLUS; + + if(lvl < level[nl]) { + Hxfluxminus = (Hxfluxminus + HNEWXFLUXMINUS2) * HALF; + Uxfluxminus = (Uxfluxminus + UNEWXFLUXMINUS2) * HALF; + Vxfluxminus = (Vxfluxminus + UVNEWFLUXMINUS2) * HALF; + } + + if(lvl < level[nr]) { + Hxfluxplus = (Hxfluxplus + HNEWXFLUXPLUS2) * HALF; + Uxfluxplus = (Uxfluxplus + UNEWXFLUXPLUS2) * HALF; + Vxfluxplus = (Vxfluxplus + UVNEWFLUXPLUS2) * HALF; + } + + if(lvl < level[nb]) { + Hyfluxminus = (Hyfluxminus + HNEWYFLUXMINUS2) * HALF; + Uyfluxminus = (Uyfluxminus + VUNEWFLUXMINUS2) * HALF; + Vyfluxminus = (Vyfluxminus + VNEWYFLUXMINUS2) * HALF; + } + + if(lvl < level[nt]) { + Hyfluxplus = (Hyfluxplus + HNEWYFLUXPLUS2) * HALF; + Uyfluxplus = (Uyfluxplus + VUNEWFLUXPLUS2) * HALF; + Vyfluxplus = (Vyfluxplus + VNEWYFLUXPLUS2) * HALF; + } + + H_new[ic] = U_fullstep(deltaT, dxic, Hic, + Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus) + - wminusx_H + wplusx_H - wminusy_H + wplusy_H; + U_new[ic] = U_fullstep(deltaT, dxic, Uic, + Uxfluxplus, Uxfluxminus, Uyfluxplus, Uyfluxminus) + - wminusx_U + wplusx_U; + V_new[ic] = U_fullstep(deltaT, dxic, Vic, + Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus) + - wminusy_V + wplusy_V; + +#if DEBUG >= 1 + if (DEBUG >= 1) { + real_t U_tmp = U_new[ic]; + real_t V_tmp = V_new[ic]; + if (U_tmp == 0.0) U_tmp = 0.0; + if (V_tmp == 0.0) V_tmp = 0.0; + printf("DEBUG ic %d H_new %lf U_new %lf V_new %lf\n",ic,H_new[ic],U_tmp,V_tmp); + } +#endif + +/* + printf("DEBUG ic %d deltaT, %lf dxic, %lf Hic, %lf Hxfluxplus, %lf Hxfluxminus, %lf Hyfluxplus, %lf Hyfluxminus %lf\n", + ic, deltaT, dxic, Hic, Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus); + printf("DEBUG ic %d wminusx_H %lf wplusx_H %lf wminusy_H %lf wplusy_H %lf\n",ic, wminusx_H, wplusx_H, wminusy_H, wplusy_H); + printf("DEBUG ic %d deltaT, %lf dxic, %lf Vic, %lf Vxfluxplus, %lf Vxfluxminus, %lf Vyfluxplus, %lf Vyfluxminus %lf\n", + ic, deltaT, dxic, Vic, Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus); + printf("DEBUG ic %d wminusy_V %lf wplusy_V %lf\n",ic, wminusy_V, wplusy_V); +*/ + }//end forloop + +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master + { +#endif + // Replace H with H_new and deallocate H. New memory will have the characteristics + // of the new memory and the name of the old. Both return and arg1 will be reset to new memory + H = (state_t *)state_memory.memory_replace(H, H_new); + U = (state_t *)state_memory.memory_replace(U, U_new); + V = (state_t *)state_memory.memory_replace(V, V_new); + + //state_memory.memory_report(); + //printf("DEBUG end finite diff\n\n"); +#ifdef _OPENMP + } +#pragma omp barrier +#endif + +#ifdef _OPENMP +#pragma omp master +#endif + cpu_timers[STATE_TIMER_FINITE_DIFFERENCE] += cpu_timer_stop(tstart_cpu); +} + +#ifdef HAVE_OPENCL +void State::gpu_calc_finite_difference(double deltaT) +{ + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + cl_command_queue command_queue = ezcl_get_command_queue(); + + //cl_mem dev_ptr = NULL; + + size_t &ncells = mesh->ncells; + size_t &ncells_ghost = mesh->ncells_ghost; + if (ncells_ghost < ncells) ncells_ghost = ncells; + int &levmx = mesh->levmx; + cl_mem &dev_celltype = mesh->dev_celltype; + cl_mem &dev_nlft = mesh->dev_nlft; + cl_mem &dev_nrht = mesh->dev_nrht; + cl_mem &dev_nbot = mesh->dev_nbot; + cl_mem &dev_ntop = mesh->dev_ntop; + cl_mem &dev_level = mesh->dev_level; + cl_mem &dev_levdx = mesh->dev_levdx; + cl_mem &dev_levdy = mesh->dev_levdy; + + assert(dev_H); + assert(dev_U); + assert(dev_V); + assert(dev_nlft); + assert(dev_nrht); + assert(dev_nbot); + assert(dev_ntop); + assert(dev_level); + assert(dev_levdx); + assert(dev_levdy); + + cl_mem dev_H_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast("dev_H_new"), DEVICE_REGULAR_MEMORY); + cl_mem dev_U_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast("dev_U_new"), DEVICE_REGULAR_MEMORY); + cl_mem dev_V_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast("dev_V_new"), DEVICE_REGULAR_MEMORY); + + size_t local_work_size = 128; + size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size; + +#ifdef HAVE_MPI + if (mesh->numpe > 1) { + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 0, sizeof(cl_int), &ncells); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 1, sizeof(cl_mem), &dev_celltype); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 2, sizeof(cl_mem), &dev_nlft); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 3, sizeof(cl_mem), &dev_nrht); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 4, sizeof(cl_mem), &dev_ntop); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 5, sizeof(cl_mem), &dev_nbot); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 6, sizeof(cl_mem), &dev_H); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 7, sizeof(cl_mem), &dev_U); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 8, sizeof(cl_mem), &dev_V); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_local, 1, NULL, &global_work_size, &local_work_size, NULL); + + /* + __kernel void copy_state_data_cl( + const int isize, // 0 + __global state_t *H, // 1 + __global state_t *U, // 2 + __global state_t *V, // 3 + __global state_t *H_new, // 4 + __global state_t *U_new, // 5 + __global state_t *V_new) // 6 + */ + + ezcl_set_kernel_arg(kernel_copy_state_data, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_copy_state_data, 1, sizeof(cl_mem), (void *)&dev_H); + ezcl_set_kernel_arg(kernel_copy_state_data, 2, sizeof(cl_mem), (void *)&dev_U); + ezcl_set_kernel_arg(kernel_copy_state_data, 3, sizeof(cl_mem), (void *)&dev_V); + ezcl_set_kernel_arg(kernel_copy_state_data, 4, sizeof(cl_mem), (void *)&dev_H_new); + ezcl_set_kernel_arg(kernel_copy_state_data, 5, sizeof(cl_mem), (void *)&dev_U_new); + ezcl_set_kernel_arg(kernel_copy_state_data, 6, sizeof(cl_mem), (void *)&dev_V_new); + + //ezcl_enqueue_ndrange_kernel(command_queue, kernel_copy_state_data, 1, NULL, &global_work_size, &local_work_size, ©_state_data_event); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_copy_state_data, 1, NULL, &global_work_size, &local_work_size, NULL); + + dev_H = (cl_mem)gpu_state_memory.memory_replace(dev_H, dev_H_new); + dev_U = (cl_mem)gpu_state_memory.memory_replace(dev_U, dev_U_new); + dev_V = (cl_mem)gpu_state_memory.memory_replace(dev_V, dev_V_new); + + L7_Dev_Update(dev_H, L7_STATE_T, mesh->cell_handle); + L7_Dev_Update(dev_U, L7_STATE_T, mesh->cell_handle); + L7_Dev_Update(dev_V, L7_STATE_T, mesh->cell_handle); + + dev_H_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast("dev_H_new"), DEVICE_REGULAR_MEMORY); + dev_U_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast("dev_U_new"), DEVICE_REGULAR_MEMORY); + dev_V_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast("dev_V_new"), DEVICE_REGULAR_MEMORY); + + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 0, sizeof(cl_int), &ncells); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 1, sizeof(cl_mem), &dev_celltype); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 2, sizeof(cl_mem), &dev_nlft); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 3, sizeof(cl_mem), &dev_nrht); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 4, sizeof(cl_mem), &dev_ntop); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 5, sizeof(cl_mem), &dev_nbot); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 6, sizeof(cl_mem), &dev_H); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 7, sizeof(cl_mem), &dev_U); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 8, sizeof(cl_mem), &dev_V); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_ghost, 1, NULL, &global_work_size, &local_work_size, NULL); + } else { + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions, 1, NULL, &global_work_size, &local_work_size, NULL); + } +#else + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions, 1, NULL, &global_work_size, &local_work_size, NULL); +#endif + + /* + __kernel void calc_finite_difference_cl( + const int ncells, // 0 Total number of cells. + const int lvmax, // 1 Maximum level + __global state_t *H, // 2 + __global state_t *U, // 3 + __global state_t *V, // 4 + __global state_t *H_new, // 5 + __global state_t *U_new, // 6 + __global state_t *V_new, // 7 + __global const int *nlft, // 8 Array of left neighbors. + __global const int *nrht, // 9 Array of right neighbors. + __global const int *ntop, // 10 Array of bottom neighbors. + __global const int *nbot, // 11 Array of top neighbors. + __global const int *level, // 12 Array of level information. + const real_t deltaT, // 13 Size of time step. + __global const real_t *lev_dx, // 14 + __global const real_t *lev_dy, // 15 + __local state4_t *tile, // 16 Tile size in state4. + __local int8 *itile) // 17 Tile size in int8. + */ + cl_event calc_finite_difference_event; + + real_t deltaT_local = deltaT; + ezcl_set_kernel_arg(kernel_calc_finite_difference, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_calc_finite_difference, 1, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_calc_finite_difference, 2, sizeof(cl_mem), (void *)&dev_H); + ezcl_set_kernel_arg(kernel_calc_finite_difference, 3, sizeof(cl_mem), (void *)&dev_U); + ezcl_set_kernel_arg(kernel_calc_finite_difference, 4, sizeof(cl_mem), (void *)&dev_V); + ezcl_set_kernel_arg(kernel_calc_finite_difference, 5, sizeof(cl_mem), (void *)&dev_H_new); + ezcl_set_kernel_arg(kernel_calc_finite_difference, 6, sizeof(cl_mem), (void *)&dev_U_new); + ezcl_set_kernel_arg(kernel_calc_finite_difference, 7, sizeof(cl_mem), (void *)&dev_V_new); + ezcl_set_kernel_arg(kernel_calc_finite_difference, 8, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_calc_finite_difference, 9, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_calc_finite_difference,10, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_calc_finite_difference,11, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_calc_finite_difference,12, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_calc_finite_difference,13, sizeof(cl_real_t), (void *)&deltaT_local); + ezcl_set_kernel_arg(kernel_calc_finite_difference,14, sizeof(cl_mem), (void *)&dev_levdx); + ezcl_set_kernel_arg(kernel_calc_finite_difference,15, sizeof(cl_mem), (void *)&dev_levdy); + ezcl_set_kernel_arg(kernel_calc_finite_difference,16, local_work_size*sizeof(cl_state4_t), NULL); + ezcl_set_kernel_arg(kernel_calc_finite_difference,17, local_work_size*sizeof(cl_int8), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_finite_difference, 1, NULL, &global_work_size, &local_work_size, &calc_finite_difference_event); + + ezcl_wait_for_events(1, &calc_finite_difference_event); + ezcl_event_release(calc_finite_difference_event); + + dev_H = (cl_mem)gpu_state_memory.memory_replace(dev_H, dev_H_new); + dev_U = (cl_mem)gpu_state_memory.memory_replace(dev_U, dev_U_new); + dev_V = (cl_mem)gpu_state_memory.memory_replace(dev_V, dev_V_new); + + gpu_timers[STATE_TIMER_FINITE_DIFFERENCE] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9); +} +#endif + +void State::symmetry_check(const char *string, vector sym_index, double eps, + SIGN_RULE sign_rule, int &flag) +{ + size_t &ncells = mesh->ncells; + + double xsign = 1.0, ysign = 1.0; + + if (sign_rule == DIAG_RULE || sign_rule == X_RULE) { + xsign = -1.0; + } + + if (sign_rule == DIAG_RULE || sign_rule == Y_RULE) { + ysign = -1.0; + } + + for (uint ic=0; ic eps) { + printf("%s ic %d sym %d H[ic] %lf Hsym %lf diff %lf\n", + string,ic,sym_index[ic],H[ic],H[sym_index[ic]],fabs(H[ic]-H[sym_index[ic]])); + flag++; + } + if (fabs(U[ic] - xsign*U[sym_index[ic]]) > eps) { + printf("%s ic %d sym %d U[ic] %lf Usym %lf diff %lf\n", + string,ic,sym_index[ic],U[ic],U[sym_index[ic]],fabs(U[ic]-xsign*U[sym_index[ic]])); + flag++; + } + if (fabs(V[ic] - ysign*V[sym_index[ic]]) > eps) { + printf("%s ic %d sym %d V[ic] %lf Vsym %lf diff %lf\n", + string,ic,sym_index[ic],V[ic],V[sym_index[ic]],fabs(V[ic]-ysign*V[sym_index[ic]])); + flag++; + } + } + +} + +size_t State::calc_refine_potential(vector &mpot,int &icount, int &jcount) +{ + + struct timeval tstart_cpu; +#ifdef _OPENMP +#pragma omp parallel +{ +#endif + + struct timeval tstart_lev2; + +#ifdef _OPENMP +#pragma omp master +{ +#endif + cpu_timer_start(&tstart_cpu); + if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2); +#ifdef _OPENMP +} +#endif + + int *nlft, *nrht, *nbot, *ntop, *level; + + size_t ncells = mesh->ncells; + nlft = mesh->nlft; + nrht = mesh->nrht; + nbot = mesh->nbot; + ntop = mesh->ntop; + level = mesh->level; + +#ifdef _OPENMP +#pragma omp master + { +#endif + icount=0; + jcount=0; +#ifdef _OPENMP + } +#pragma omp barrier +#endif + +#ifdef HAVE_MPI + // We need to update the ghost regions and boundary regions for the state + // variables since they were changed in the finite difference routine. We + // want to use the updated values for refinement decisions + if (mesh->numpe > 1) { + apply_boundary_conditions_local(); +#ifdef _OPENMP +#pragma omp barrier +#pragma omp master +{ +#endif + L7_Update(&H[0], L7_STATE_T, mesh->cell_handle); + L7_Update(&U[0], L7_STATE_T, mesh->cell_handle); + L7_Update(&V[0], L7_STATE_T, mesh->cell_handle); +#ifdef _OPENMP +} +#pragma omp barrier +#endif + apply_boundary_conditions_ghost(); + } else { + apply_boundary_conditions(); + } +#else + apply_boundary_conditions(); +#endif + +#ifdef _OPENMP +#pragma omp barrier +#endif +/*****HIGH LEVEL OMP******/ + + int lowerBound, upperBound; + //mesh->set_bounds(ncells); + mesh->get_bounds(lowerBound,upperBound); + for (int ic=lowerBound; iccelltype[ic] != REAL_CELL) continue; + + state_t Hic = H[ic]; + //state_t Uic = U[ic]; + //state_t Vic = V[ic]; + + int nl = nlft[ic]; + state_t Hl = H[nl]; + //state_t Ul = U[nl]; + //state_t Vl = V[nl]; + + if (level[nl] > level[ic]){ + int nlt = ntop[nl]; + Hl = REFINE_HALF * (Hl + H[nlt]); + } + + int nr = nrht[ic]; + state_t Hr = H[nr]; + //state_t Ur = U[nr]; + //state_t Vr = V[nr]; + + if (level[nr] > level[ic]){ + int nrt = ntop[nr]; + Hr = REFINE_HALF * (Hr + H[nrt]); + } + + int nb = nbot[ic]; + state_t Hb = H[nb]; + //state_t Ub = U[nb]; + //state_t Vb = V[nb]; + + if (level[nb] > level[ic]){ + int nbr = nrht[nb]; + Hb = REFINE_HALF * (Hb + H[nbr]); + } + + int nt = ntop[ic]; + state_t Ht = H[nt]; + //state_t Ut = U[nt]; + //state_t Vt = V[nt]; + + if (level[nt] > level[ic]){ + int ntr = nrht[nt]; + Ht = REFINE_HALF * (Ht + H[ntr]); + } + + state_t duplus1; //, duplus2; + state_t duhalf1; //, duhalf2; + state_t duminus1; //, duminus2; + + duplus1 = Hr-Hic; + //duplus2 = Ur-Uic; + duhalf1 = Hic-Hl; + //duhalf2 = Uic-Ul; + + state_t qmax = REFINE_NEG_THOUSAND; + + state_t qpot = max(fabs(duplus1/Hic), fabs(duhalf1/Hic)); + if (qpot > qmax) qmax = qpot; + + duminus1 = Hic-Hl; + //duminus2 = Uic-Ul; + duhalf1 = Hr-Hic; + //duhalf2 = Ur-Uic; + + qpot = max(fabs(duminus1/Hic), fabs(duhalf1/Hic)); + if (qpot > qmax) qmax = qpot; + + duplus1 = Ht-Hic; + //duplus2 = Vt-Vic; + duhalf1 = Hic-Hb; + //duhalf2 = Vic-Vb; + + qpot = max(fabs(duplus1/Hic), fabs(duhalf1/Hic)); + if (qpot > qmax) qmax = qpot; + + duminus1 = Hic-Hb; + //duminus2 = Vic-Vb; + duhalf1 = Ht-Hic; + //duhalf2 = Vt-Vic; + + qpot = max(fabs(duminus1/Hic), fabs(duhalf1/Hic)); + if (qpot > qmax) qmax = qpot; + + mpot[ic]=0; + if (qmax > REFINE_GRADIENT && level[ic] < mesh->levmx) { + mpot[ic]=1; + } else if (qmax < COARSEN_GRADIENT && level[ic] > 0) { + mpot[ic] = -1; + } + //if (mpot[ic]) printf("DEBUG cpu cell is %d mpot %d\n",ic,mpot[ic]); + } + +#ifdef _OPENMP +#pragma omp master +{ +#endif + if (TIMING_LEVEL >= 2) { + cpu_timers[STATE_TIMER_CALC_MPOT] += cpu_timer_stop(tstart_lev2); + } +#ifdef _OPENMP +} +#endif + +#ifdef _OPENMP +} +#pragma omp barrier +#endif + int newcount = mesh->refine_smooth(mpot, icount, jcount); + //printf("DEBUG -- after refine smooth in file %s line %d icount %d jcount %d newcount %d\n",__FILE__,__LINE__,icount,jcount,newcount); + + cpu_timers[STATE_TIMER_REFINE_POTENTIAL] += cpu_timer_stop(tstart_cpu); + + return(newcount); +} + +#ifdef HAVE_OPENCL +size_t State::gpu_calc_refine_potential(int &icount, int &jcount) +{ + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + struct timeval tstart_lev2; + if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2); + + cl_command_queue command_queue = ezcl_get_command_queue(); + + size_t &ncells = mesh->ncells; + int &levmx = mesh->levmx; + cl_mem &dev_nlft = mesh->dev_nlft; + cl_mem &dev_nrht = mesh->dev_nrht; + cl_mem &dev_nbot = mesh->dev_nbot; + cl_mem &dev_ntop = mesh->dev_ntop; + //cl_mem &dev_mpot = mesh->dev_mpot; + cl_mem &dev_i = mesh->dev_i; + cl_mem &dev_j = mesh->dev_j; + cl_mem &dev_level = mesh->dev_level; + cl_mem &dev_celltype = mesh->dev_celltype; + cl_mem &dev_levdx = mesh->dev_levdx; + cl_mem &dev_levdy = mesh->dev_levdy; + + assert(dev_H); + assert(dev_U); + assert(dev_V); + assert(dev_nlft); + assert(dev_nrht); + assert(dev_nbot); + assert(dev_ntop); + assert(dev_i); + assert(dev_j); + assert(dev_level); + //assert(dev_mpot); + //assert(dev_ioffset); + assert(dev_levdx); + assert(dev_levdy); + + icount = 0; + jcount = 0; + + size_t local_work_size = 128; + size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size; + size_t block_size = global_work_size/local_work_size; + +#ifdef HAVE_MPI + //size_t nghost_local = mesh->ncells_ghost - ncells; + + if (mesh->numpe > 1) { + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 0, sizeof(cl_int), &ncells); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 1, sizeof(cl_mem), &dev_celltype); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 2, sizeof(cl_mem), &dev_nlft); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 3, sizeof(cl_mem), &dev_nrht); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 4, sizeof(cl_mem), &dev_ntop); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 5, sizeof(cl_mem), &dev_nbot); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 6, sizeof(cl_mem), &dev_H); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 7, sizeof(cl_mem), &dev_U); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 8, sizeof(cl_mem), &dev_V); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_local, 1, NULL, &global_work_size, &local_work_size, NULL); + + L7_Dev_Update(dev_H, L7_STATE_T, mesh->cell_handle); + L7_Dev_Update(dev_U, L7_STATE_T, mesh->cell_handle); + L7_Dev_Update(dev_V, L7_STATE_T, mesh->cell_handle); + + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 0, sizeof(cl_int), &ncells); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 1, sizeof(cl_mem), &dev_celltype); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 2, sizeof(cl_mem), &dev_nlft); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 3, sizeof(cl_mem), &dev_nrht); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 4, sizeof(cl_mem), &dev_ntop); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 5, sizeof(cl_mem), &dev_nbot); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 6, sizeof(cl_mem), &dev_H); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 7, sizeof(cl_mem), &dev_U); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 8, sizeof(cl_mem), &dev_V); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_ghost, 1, NULL, &global_work_size, &local_work_size, NULL); + } else { + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions, 1, NULL, &global_work_size, &local_work_size, NULL); + } +#else + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U); + ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V); + ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions, 1, NULL, &global_work_size, &local_work_size, NULL); +#endif + +#ifdef BOUNDS_CHECK + { + vector nlft_tmp(mesh->ncells_ghost); + vector nrht_tmp(mesh->ncells_ghost); + vector nbot_tmp(mesh->ncells_ghost); + vector ntop_tmp(mesh->ncells_ghost); + vector level_tmp(mesh->ncells_ghost); + vector H_tmp(mesh->ncells_ghost); + ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, mesh->ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, mesh->ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, mesh->ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, mesh->ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, mesh->ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_H, CL_TRUE, 0, mesh->ncells_ghost*sizeof(cl_int), &H_tmp[0], NULL); + for (uint ic=0; ic= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nlft %d\n",mesh->mype,__LINE__,ic,nl); + if (level_tmp[nl] > level_tmp[ic]){ + int ntl = ntop_tmp[nl]; + if (ntl<0 || ntl>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d global %d nlft %d ntop of nlft %d\n",mesh->mype,__LINE__,ic,ic+mesh->noffset,nl,ntl); + } + int nr = nrht_tmp[ic]; + if (nr<0 || nr>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nrht %d\n",mesh->mype,__LINE__,ic,nr); + if (level_tmp[nr] > level_tmp[ic]){ + int ntr = ntop_tmp[nr]; + if (ntr<0 || ntr>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d ntop of nrht %d\n",mesh->mype,__LINE__,ic,ntr); + } + int nb = nbot_tmp[ic]; + if (nb<0 || nb>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nbot %d\n",mesh->mype,__LINE__,ic,nb); + if (level_tmp[nb] > level_tmp[ic]){ + int nrb = nrht_tmp[nb]; + if (nrb<0 || nrb>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nrht of nbot %d\n",mesh->mype,__LINE__,ic,nrb); + } + int nt = ntop_tmp[ic]; + if (nt<0 || nt>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d ntop %d\n",mesh->mype,__LINE__,ic,nt); + if (level_tmp[nt] > level_tmp[ic]){ + int nrt = nrht_tmp[nt]; + if (nrt<0 || nrt>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nrht of ntop %d\n",mesh->mype,__LINE__,ic,nrt); + } + } + for (uint ic=0; icncells_ghost; ic++){ + if (H_tmp[ic] < 1.0) printf("%d: Warning at line %d cell %d H %lf\n",mesh->mype,__LINE__,ic,H_tmp[ic]); + } + } +#endif + + size_t result_size = 1; + cl_mem dev_result = ezcl_malloc(NULL, const_cast("dev_result"), &result_size, sizeof(cl_int2), CL_MEM_READ_WRITE, 0); + cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast("dev_redscratch"), &block_size, sizeof(cl_int2), CL_MEM_READ_WRITE, 0); + + dev_mpot = ezcl_malloc(NULL, const_cast("dev_mpot"), &mesh->ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0); + + /* + __kernel void refine_potential + const int ncells, // 0 Total number of cells. + const int levmx, // 1 Maximum level + __global state_t *H, // 2 + __global state_t *U, // 3 + __global state_t *V, // 4 + __global const int *nlft, // 5 Array of left neighbors. + __global const int *nrht, // 6 Array of right neighbors. + __global const int *ntop, // 7 Array of bottom neighbors. + __global const int *nbot, // 8 Array of top neighbors. + __global const int *level, // 9 Array of level information. + __global const int *celltype, // 10 Array of celltype information. + __global int *mpot, // 11 Array of mesh potential information. + __global int2 *redscratch, // 12 + __global const real_t *lev_dx, // 13 + __global const real_t *lev_dy, // 14 + __global int2 *result, // 15 + __local state_t *tile, // 16 Tile size in real4. + __local int8 *itile) // 17 Tile size in int8. + */ + + ezcl_set_kernel_arg(kernel_refine_potential, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_refine_potential, 1, sizeof(cl_int), (void *)&levmx); + ezcl_set_kernel_arg(kernel_refine_potential, 2, sizeof(cl_mem), (void *)&dev_H); + ezcl_set_kernel_arg(kernel_refine_potential, 3, sizeof(cl_mem), (void *)&dev_U); + ezcl_set_kernel_arg(kernel_refine_potential, 4, sizeof(cl_mem), (void *)&dev_V); + ezcl_set_kernel_arg(kernel_refine_potential, 5, sizeof(cl_mem), (void *)&dev_nlft); + ezcl_set_kernel_arg(kernel_refine_potential, 6, sizeof(cl_mem), (void *)&dev_nrht); + ezcl_set_kernel_arg(kernel_refine_potential, 7, sizeof(cl_mem), (void *)&dev_ntop); + ezcl_set_kernel_arg(kernel_refine_potential, 8, sizeof(cl_mem), (void *)&dev_nbot); + ezcl_set_kernel_arg(kernel_refine_potential, 9, sizeof(cl_mem), (void *)&dev_i); + ezcl_set_kernel_arg(kernel_refine_potential,10, sizeof(cl_mem), (void *)&dev_j); + ezcl_set_kernel_arg(kernel_refine_potential,11, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_refine_potential,12, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_refine_potential,13, sizeof(cl_mem), (void *)&dev_levdx); + ezcl_set_kernel_arg(kernel_refine_potential,14, sizeof(cl_mem), (void *)&dev_levdy); + ezcl_set_kernel_arg(kernel_refine_potential,15, sizeof(cl_mem), (void *)&dev_mpot); + ezcl_set_kernel_arg(kernel_refine_potential,16, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_refine_potential,17, sizeof(cl_mem), (void *)&dev_result); + ezcl_set_kernel_arg(kernel_refine_potential,18, local_work_size*sizeof(cl_state_t), NULL); + ezcl_set_kernel_arg(kernel_refine_potential,19, local_work_size*sizeof(cl_int8), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_refine_potential, 1, NULL, &global_work_size, &local_work_size, NULL); + + mesh->gpu_rezone_count2(block_size, local_work_size, dev_redscratch, dev_result); + + int count[2] = {0, 0}; + ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int2), count, NULL); + icount = count[0]; + jcount = count[1]; + //size_t result = ncells + icount - jcount; + + //int mpot_check[ncells]; + //ezcl_enqueue_read_buffer(command_queue, dev_mpot, CL_TRUE, 0, ncells*sizeof(cl_int), mpot_check, NULL); + //for (int ic=0; ic= 2) { + gpu_timers[STATE_TIMER_CALC_MPOT] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9); + } + + int my_result = mesh->gpu_refine_smooth(dev_mpot, icount, jcount); + //printf("DEBUG gpu calc refine potential %d icount %d jcount %d\n",my_result,icount,jcount); + + gpu_timers[STATE_TIMER_REFINE_POTENTIAL] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9); + + return((size_t)my_result); +} +#endif + +double State::mass_sum(int enhanced_precision_sum) +{ + size_t &ncells = mesh->ncells; + int *celltype = mesh->celltype; + int *level = mesh->level; + +#ifdef HAVE_MPI + //int &mype = mesh->mype; +#endif + + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + double summer = 0.0; + double total_sum = 0.0; + + if (enhanced_precision_sum == SUM_KAHAN) { + //printf("DEBUG -- kahan_sum\n"); + double corrected_next_term, new_sum; + struct esum_type local; +#ifdef HAVE_MPI + struct esum_type global; +#endif + + local.sum = 0.0; + local.correction = 0.0; + int ic; + for (ic = 0; ic < (int)ncells; ic++) { + if (celltype[ic] == REAL_CELL) { + // Exclude boundary cells. + corrected_next_term= H[ic]*mesh->lev_deltax[level[ic]]*mesh->lev_deltay[level[ic]] + local.correction; + new_sum = local.sum + local.correction; + local.correction = corrected_next_term - (new_sum - local.sum); + local.sum = new_sum; + } + } + +#ifdef HAVE_MPI + if (mesh->parallel) { + MPI_Allreduce(&local, &global, 1, MPI_TWO_DOUBLES, KNUTH_SUM, MPI_COMM_WORLD); + total_sum = global.sum + global.correction; + } else { + total_sum = local.sum + local.correction; + } + +//if(mype == 0) printf("MYPE %d: Line %d Iteration %d \t local_sum = %12.6lg, global_sum = %12.6lg\n", mype, __LINE__, mesh->m_ncycle, local.sum, global.sum); + +#else + total_sum = local.sum + local.correction; +#endif + + } else if (enhanced_precision_sum == SUM_REGULAR) { + //printf("DEBUG -- regular_sum\n"); + for (uint ic=0; ic < ncells; ic++){ + if (celltype[ic] == REAL_CELL) { + summer += H[ic]*mesh->lev_deltax[level[ic]]*mesh->lev_deltay[level[ic]]; + } + } +#ifdef HAVE_MPI + if (mesh->parallel) { + MPI_Allreduce(&summer, &total_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + } else { + total_sum = summer; + } +#else + total_sum = summer; +#endif + } + + cpu_timers[STATE_TIMER_MASS_SUM] += cpu_timer_stop(tstart_cpu); + + return(total_sum); +} + +#ifdef HAVE_OPENCL +double State::gpu_mass_sum(int enhanced_precision_sum) +{ + struct timeval tstart_cpu; + cpu_timer_start(&tstart_cpu); + + cl_command_queue command_queue = ezcl_get_command_queue(); + + size_t &ncells = mesh->ncells; + cl_mem &dev_levdx = mesh->dev_levdx; + cl_mem &dev_levdy = mesh->dev_levdy; + cl_mem &dev_celltype = mesh->dev_celltype; + cl_mem &dev_level = mesh->dev_level; + + assert(dev_H); + assert(dev_level); + assert(dev_levdx); + assert(dev_levdy); + assert(dev_celltype); + + size_t one = 1; + cl_mem dev_mass_sum, dev_redscratch; + double gpu_mass_sum_total; + + size_t local_work_size = 128; + size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size; + size_t block_size = global_work_size/local_work_size; + + if (enhanced_precision_sum) { + dev_mass_sum = ezcl_malloc(NULL, const_cast("dev_mass_sum"), &one, sizeof(cl_real2_t), CL_MEM_READ_WRITE, 0); + dev_redscratch = ezcl_malloc(NULL, const_cast("dev_redscratch"), &block_size, sizeof(cl_real2_t), CL_MEM_READ_WRITE, 0); + + /* + __kernel void reduce_sum_cl( + const int isize, // 0 + __global state_t *array, // 1 Array to be reduced. + __global int *level, // 2 + __global int *levdx, // 3 + __global int *levdy, // 4 + __global int *celltype, // 5 + __global real_t *redscratch, // 6 Final result of operation. + __local real_t *tile) // 7 + */ + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 1, sizeof(cl_mem), (void *)&dev_H); + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 2, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 3, sizeof(cl_mem), (void *)&dev_levdx); + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 4, sizeof(cl_mem), (void *)&dev_levdy); + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 5, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 6, sizeof(cl_mem), (void *)&dev_mass_sum); + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 7, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 8, local_work_size*sizeof(cl_real2_t), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_epsum_mass_stage1of2, 1, NULL, &global_work_size, &local_work_size, NULL); + + if (block_size > 1) { + /* + __kernel void reduce_sum_cl( + const int isize, // 0 + __global int *redscratch, // 1 Array to be reduced. + __local real_t *tile) // 2 + */ + + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 0, sizeof(cl_int), (void *)&block_size); + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 1, sizeof(cl_mem), (void *)&dev_mass_sum); + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 2, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 3, local_work_size*sizeof(cl_real2_t), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_epsum_mass_stage2of2, 1, NULL, &local_work_size, &local_work_size, NULL); + } + + struct esum_type local, global; + real2_t mass_sum; + + ezcl_enqueue_read_buffer(command_queue, dev_mass_sum, CL_TRUE, 0, 1*sizeof(cl_real2_t), &mass_sum, NULL); + + local.sum = mass_sum.s0; + local.correction = mass_sum.s1; + global.sum = local.sum; + global.correction = local.correction; +#ifdef HAVE_MPI + MPI_Allreduce(&local, &global, 1, MPI_TWO_DOUBLES, KNUTH_SUM, MPI_COMM_WORLD); +#endif + gpu_mass_sum_total = global.sum + global.correction; + } else { + dev_mass_sum = ezcl_malloc(NULL, const_cast("dev_mass_sum"), &one, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0); + dev_redscratch = ezcl_malloc(NULL, const_cast("dev_redscratch"), &block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0); + + /* + __kernel void reduce_sum_cl( + const int isize, // 0 + __global state_t *array, // 1 Array to be reduced. + __global int *level, // 2 + __global int *levdx, // 3 + __global int *levdy, // 4 + __global int *celltype, // 5 + __global real_t *redscratch, // 6 Final result of operation. + __local real_t *tile) // 7 + */ + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 0, sizeof(cl_int), (void *)&ncells); + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 1, sizeof(cl_mem), (void *)&dev_H); + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 2, sizeof(cl_mem), (void *)&dev_level); + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 3, sizeof(cl_mem), (void *)&dev_levdx); + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 4, sizeof(cl_mem), (void *)&dev_levdy); + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 5, sizeof(cl_mem), (void *)&dev_celltype); + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 6, sizeof(cl_mem), (void *)&dev_mass_sum); + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 7, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 8, local_work_size*sizeof(cl_real_t), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_sum_mass_stage1of2, 1, NULL, &global_work_size, &local_work_size, NULL); + + if (block_size > 1) { + /* + __kernel void reduce_sum_cl( + const int isize, // 0 + __global int *redscratch, // 1 Array to be reduced. + __local real_t *tile) // 2 + */ + + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 0, sizeof(cl_int), (void *)&block_size); + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 1, sizeof(cl_mem), (void *)&dev_mass_sum); + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 2, sizeof(cl_mem), (void *)&dev_redscratch); + ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 3, local_work_size*sizeof(cl_real_t), NULL); + + ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_sum_mass_stage2of2, 1, NULL, &local_work_size, &local_work_size, NULL); + } + + double local_sum, global_sum; + real_t mass_sum; + + ezcl_enqueue_read_buffer(command_queue, dev_mass_sum, CL_TRUE, 0, 1*sizeof(cl_real_t), &mass_sum, NULL); + + local_sum = mass_sum; + global_sum = local_sum; +#ifdef HAVE_MPI + MPI_Allreduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#endif + gpu_mass_sum_total = global_sum; + } + + ezcl_device_memory_delete(dev_redscratch); + ezcl_device_memory_delete(dev_mass_sum); + + gpu_timers[STATE_TIMER_MASS_SUM] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9); + + return(gpu_mass_sum_total); +} +#endif + +#ifdef HAVE_OPENCL +void State::allocate_device_memory(size_t ncells) +{ + dev_H = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast("dev_H"), DEVICE_REGULAR_MEMORY); + dev_U = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast("dev_U"), DEVICE_REGULAR_MEMORY); + dev_V = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast("dev_V"), DEVICE_REGULAR_MEMORY); +} +#endif + +void State::resize_old_device_memory(size_t ncells) +{ +#ifdef HAVE_OPENCL + gpu_state_memory.memory_delete(dev_H); + gpu_state_memory.memory_delete(dev_U); + gpu_state_memory.memory_delete(dev_V); + dev_H = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast("dev_H"), DEVICE_REGULAR_MEMORY); + dev_U = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast("dev_U"), DEVICE_REGULAR_MEMORY); + dev_V = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast("dev_V"), DEVICE_REGULAR_MEMORY); +#else + // Just to block compiler warnings + if (1 == 2) printf("DEBUG -- ncells is %ld\n",ncells); +#endif +} + +#ifdef HAVE_MPI +void State::do_load_balance_local(size_t &numcells){ + mesh->do_load_balance_local(numcells, NULL, state_memory); + memory_reset_ptrs(); +} +#endif +#ifdef HAVE_OPENCL +#ifdef HAVE_MPI +void State::gpu_do_load_balance_local(size_t &numcells){ + if (mesh->gpu_do_load_balance_local(numcells, NULL, gpu_state_memory) ){ + //gpu_state_memory.memory_report(); + dev_H = (cl_mem)gpu_state_memory.get_memory_ptr("dev_H"); + dev_U = (cl_mem)gpu_state_memory.get_memory_ptr("dev_U"); + dev_V = (cl_mem)gpu_state_memory.get_memory_ptr("dev_V"); +/* + if (dev_H == NULL){ + dev_H = (cl_mem)gpu_state_memory.get_memory_ptr("dev_H_new"); + dev_U = (cl_mem)gpu_state_memory.get_memory_ptr("dev_U_new"); + dev_V = (cl_mem)gpu_state_memory.get_memory_ptr("dev_V_new"); + } + printf("DEBUG memory for proc %d dev_H is %p dev_U is %p dev_V is %p\n",mesh->mype,dev_H,dev_U,dev_V); +*/ + } +} +#endif +#endif + +static double reference_time = 0.0; + +void State::output_timing_info(int do_cpu_calc, int do_gpu_calc, double total_elapsed_time) +{ + int parallel = mesh->parallel; + + double cpu_time_compute = 0.0; + double gpu_time_compute = 0.0; + + double cpu_elapsed_time = 0.0; + double gpu_elapsed_time = 0.0; + + double cpu_mesh_time = 0.0; + double gpu_mesh_time = 0.0; + + if (do_cpu_calc) { + cpu_time_compute = get_cpu_timer(STATE_TIMER_SET_TIMESTEP) + + get_cpu_timer(STATE_TIMER_FINITE_DIFFERENCE) + + get_cpu_timer(STATE_TIMER_REFINE_POTENTIAL) + + get_cpu_timer(STATE_TIMER_REZONE_ALL) + + mesh->get_cpu_timer(MESH_TIMER_CALC_NEIGHBORS) + + mesh->get_cpu_timer(MESH_TIMER_LOAD_BALANCE) + + get_cpu_timer(STATE_TIMER_MASS_SUM) + + mesh->get_cpu_timer(MESH_TIMER_CALC_SPATIAL_COORDINATES) + + mesh->get_cpu_timer(MESH_TIMER_PARTITION); + cpu_elapsed_time = cpu_time_compute; + cpu_mesh_time = mesh->get_cpu_timer(MESH_TIMER_CALC_NEIGHBORS) + + get_cpu_timer(STATE_TIMER_REZONE_ALL) + + mesh->get_cpu_timer(MESH_TIMER_REFINE_SMOOTH) + + mesh->get_cpu_timer(MESH_TIMER_LOAD_BALANCE); + } + if (do_gpu_calc) { + gpu_time_compute = get_gpu_timer(STATE_TIMER_APPLY_BCS) + + get_gpu_timer(STATE_TIMER_SET_TIMESTEP) + + get_gpu_timer(STATE_TIMER_FINITE_DIFFERENCE) + + get_gpu_timer(STATE_TIMER_REFINE_POTENTIAL) + + get_gpu_timer(STATE_TIMER_REZONE_ALL) + + mesh->get_gpu_timer(MESH_TIMER_CALC_NEIGHBORS) + + mesh->get_gpu_timer(MESH_TIMER_LOAD_BALANCE) + + get_gpu_timer(STATE_TIMER_MASS_SUM) + + mesh->get_gpu_timer(MESH_TIMER_CALC_SPATIAL_COORDINATES) + + mesh->get_gpu_timer(MESH_TIMER_COUNT_BCS); + gpu_elapsed_time = get_gpu_timer(STATE_TIMER_WRITE) + gpu_time_compute + get_gpu_timer(STATE_TIMER_READ); + gpu_mesh_time = mesh->get_gpu_timer(MESH_TIMER_CALC_NEIGHBORS) + + get_gpu_timer(STATE_TIMER_REZONE_ALL) + + mesh->get_gpu_timer(MESH_TIMER_REFINE_SMOOTH) + + mesh->get_gpu_timer(MESH_TIMER_LOAD_BALANCE); + } + + if (! parallel && do_cpu_calc) reference_time = cpu_elapsed_time; + + double speedup_ratio = 0.0; + if (reference_time > 0.0){ + if (do_cpu_calc && parallel) speedup_ratio = reference_time/cpu_elapsed_time; + if (do_gpu_calc) speedup_ratio = reference_time/gpu_elapsed_time; + } + + if (do_cpu_calc) { + output_timer_block(MESH_DEVICE_CPU, cpu_elapsed_time, cpu_mesh_time, cpu_time_compute, total_elapsed_time, speedup_ratio); + } + if (do_gpu_calc) { + output_timer_block(MESH_DEVICE_GPU, gpu_elapsed_time, gpu_mesh_time, gpu_time_compute, total_elapsed_time, speedup_ratio); + } +} + +void State::output_timer_block(mesh_device_types device_type, double elapsed_time, + double mesh_time, double compute_time, double total_elapsed_time, double speedup_ratio) +{ + int mype = mesh->mype; + int parallel = mesh->parallel; + + int rank = mype; + if (! parallel) { + // We need to get rank info for check routines +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &rank); +#endif + } + + if (! parallel && rank) return; + + char device_string[10]; + if (device_type == MESH_DEVICE_CPU) { + sprintf(device_string,"CPU"); + } else { + sprintf(device_string,"GPU"); + } + +#ifdef TIMING + if (rank == 0) { + printf("\n"); + printf("~~~~~~~~~~~~~~~~ Device timing information ~~~~~~~~~~~~~~~~~~\n"); + } + + if (rank == 0 && parallel) { + printf("\n%3s: Parallel timings\n\n",device_string); + } + + if (device_type == MESH_DEVICE_GPU) { + mesh->parallel_output("GPU: Write to device time was", get_gpu_timer(STATE_TIMER_WRITE), 0, "s"); + mesh->parallel_output("GPU: Read from device time was", get_gpu_timer(STATE_TIMER_READ), 0, "s"); + } + + const char *device_compute_string[2] = { + "CPU: Device compute time was", + "GPU: Device compute time was" + }; + mesh->parallel_output(device_compute_string[device_type], compute_time, 0, "s"); + + timer_output(STATE_TIMER_SET_TIMESTEP, device_type, 1); + timer_output(STATE_TIMER_FINITE_DIFFERENCE, device_type, 1); + timer_output(STATE_TIMER_REFINE_POTENTIAL, device_type, 1); + timer_output(STATE_TIMER_CALC_MPOT, device_type, 2); + mesh->timer_output(MESH_TIMER_REFINE_SMOOTH, device_type, 2); + timer_output(STATE_TIMER_REZONE_ALL, device_type, 1); + mesh->timer_output(MESH_TIMER_PARTITION, device_type, 1); + mesh->timer_output(MESH_TIMER_CALC_NEIGHBORS, device_type, 1); + if (mesh->get_calc_neighbor_type() == HASH_TABLE) { + mesh->timer_output(MESH_TIMER_HASH_SETUP, device_type, 2); + mesh->timer_output(MESH_TIMER_HASH_QUERY, device_type, 2); + if (parallel) { + mesh->timer_output(MESH_TIMER_FIND_BOUNDARY, device_type, 2); + mesh->timer_output(MESH_TIMER_PUSH_SETUP, device_type, 2); + mesh->timer_output(MESH_TIMER_PUSH_BOUNDARY, device_type, 2); + mesh->timer_output(MESH_TIMER_LOCAL_LIST, device_type, 2); + mesh->timer_output(MESH_TIMER_LAYER1, device_type, 2); + mesh->timer_output(MESH_TIMER_LAYER2, device_type, 2); + mesh->timer_output(MESH_TIMER_LAYER_LIST, device_type, 2); + mesh->timer_output(MESH_TIMER_COPY_MESH_DATA, device_type, 2); + mesh->timer_output(MESH_TIMER_FILL_MESH_GHOST, device_type, 2); + mesh->timer_output(MESH_TIMER_FILL_NEIGH_GHOST, device_type, 2); + mesh->timer_output(MESH_TIMER_SET_CORNER_NEIGH, device_type, 2); + mesh->timer_output(MESH_TIMER_NEIGH_ADJUST, device_type, 2); + mesh->timer_output(MESH_TIMER_SETUP_COMM, device_type, 2); + } + } else { + mesh->timer_output(MESH_TIMER_KDTREE_SETUP, device_type, 2); + mesh->timer_output(MESH_TIMER_KDTREE_QUERY, device_type, 2); + } + timer_output(STATE_TIMER_MASS_SUM, device_type, 1); + if (parallel) { + mesh->timer_output(MESH_TIMER_LOAD_BALANCE, device_type, 1); + } + mesh->timer_output(MESH_TIMER_CALC_SPATIAL_COORDINATES, device_type, 1); + if (! mesh->have_boundary) { + mesh->timer_output(MESH_TIMER_COUNT_BCS, device_type, 1); + } + if (rank == 0) printf("=============================================================\n"); + + const char *profile_string[2] = { + "Profiling: Total CPU time was", + "Profiling: Total GPU time was" + }; + mesh->parallel_output(profile_string[device_type], elapsed_time, 0, "s"); + if (elapsed_time > 600.0){ + mesh->parallel_output(" or ", elapsed_time/60.0, 0, "min"); + } + + if (rank == 0) printf("-------------------------------------------------------------\n"); + mesh->parallel_output("Mesh Ops (Neigh+rezone+smooth+balance) ",mesh_time, 0, "s"); + mesh->parallel_output("Mesh Ops Percentage ",mesh_time/elapsed_time*100.0, 0, "percent"); + if (rank == 0) printf("=============================================================\n"); + + mesh->parallel_output("Profiling: Total time was",total_elapsed_time, 0, "s"); + if (elapsed_time > 600.0){ + mesh->parallel_output(" or ",total_elapsed_time/60.0, 0, "min"); + } + + if (speedup_ratio > 0.0) { + mesh->parallel_output("Parallel Speed-up: ",speedup_ratio, 0, "Reference Serial CPU"); + } + + if (rank == 0) printf("=============================================================\n"); +#endif +} + +void State::timer_output(state_timer_category category, mesh_device_types device_type, int timer_level) +{ + int mype = mesh->mype; + + double local_time = 0.0; + if (device_type == MESH_DEVICE_CPU){ + local_time = get_cpu_timer(category); + } else { + local_time = get_gpu_timer(category); + } + + char string[80] = "/0"; + + if (mype == 0) { + const char *blank=" "; + + const char *device_string[2] = { + "CPU", + "GPU" + }; + + sprintf(string,"%3s: %.*s%-30.30s\t", device_string[device_type], + 2*timer_level, blank, state_timer_descriptor[category]); + } + + mesh->parallel_output(string, local_time, timer_level, "s"); +} + +#ifdef HAVE_OPENCL +void State::compare_state_gpu_global_to_cpu_global(const char* string, int cycle, uint ncells) +{ + cl_command_queue command_queue = ezcl_get_command_queue(); + + vectorH_check(ncells); + vectorU_check(ncells); + vectorV_check(ncells); + ezcl_enqueue_read_buffer(command_queue, dev_H, CL_FALSE, 0, ncells*sizeof(cl_state_t), &H_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_U, CL_FALSE, 0, ncells*sizeof(cl_state_t), &U_check[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_V, CL_TRUE, 0, ncells*sizeof(cl_state_t), &V_check[0], NULL); + for (uint ic = 0; ic < ncells; ic++){ + if (fabs(H[ic]-H_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d H & H_check %d %lf %lf\n",string,cycle,ic,H[ic],H_check[ic]); + if (fabs(U[ic]-U_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d U & U_check %d %lf %lf\n",string,cycle,ic,U[ic],U_check[ic]); + if (fabs(V[ic]-V_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d V & V_check %d %lf %lf\n",string,cycle,ic,V[ic],V_check[ic]); + } +} +#endif + +void State::compare_state_cpu_local_to_cpu_global(State *state_global, const char* string, int cycle, uint ncells, uint ncells_global, int *nsizes, int *ndispl) +{ + state_t *H_global = state_global->H; + state_t *U_global = state_global->U; + state_t *V_global = state_global->V; + + vectorH_check(ncells_global); + vectorU_check(ncells_global); + vectorV_check(ncells_global); +#ifdef HAVE_MPI + MPI_Allgatherv(&H[0], ncells, MPI_STATE_T, &H_check[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD); + MPI_Allgatherv(&U[0], ncells, MPI_STATE_T, &U_check[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD); + MPI_Allgatherv(&V[0], ncells, MPI_STATE_T, &V_check[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD); +#else + // Just to block compiler warnings + if (1 == 2) printf("DEBUG -- ncells %u nsizes %d ndispl %d\n",ncells, nsizes[0],ndispl[0]); +#endif + + for (uint ic = 0; ic < ncells_global; ic++){ + if (fabs(H_global[ic]-H_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d H & H_check %d %lf %lf\n",string,cycle,ic,H_global[ic],H_check[ic]); + if (fabs(U_global[ic]-U_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d U & U_check %d %lf %lf\n",string,cycle,ic,U_global[ic],U_check[ic]); + if (fabs(V_global[ic]-V_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d V & V_check %d %lf %lf\n",string,cycle,ic,V_global[ic],V_check[ic]); + } +} + +#ifdef HAVE_OPENCL +void State::compare_state_all_to_gpu_local(State *state_global, uint ncells, uint ncells_global, int mype, int ncycle, int *nsizes, int *ndispl) +{ +#ifdef HAVE_MPI + cl_command_queue command_queue = ezcl_get_command_queue(); + + state_t *H_global = state_global->H; + state_t *U_global = state_global->U; + state_t *V_global = state_global->V; + cl_mem &dev_H_global = state_global->dev_H; + cl_mem &dev_U_global = state_global->dev_U; + cl_mem &dev_V_global = state_global->dev_V; + + // Need to compare dev_H to H, etc + vectorH_save(ncells); + vectorU_save(ncells); + vectorV_save(ncells); + ezcl_enqueue_read_buffer(command_queue, dev_H, CL_FALSE, 0, ncells*sizeof(cl_state_t), &H_save[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_U, CL_FALSE, 0, ncells*sizeof(cl_state_t), &U_save[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_V, CL_TRUE, 0, ncells*sizeof(cl_state_t), &V_save[0], NULL); + for (uint ic = 0; ic < ncells; ic++){ + if (fabs(H[ic]-H_save[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 1 at cycle %d H & H_save %d %lf %lf \n",mype,ncycle,ic,H[ic],H_save[ic]); + if (fabs(U[ic]-U_save[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 1 at cycle %d U & U_save %d %lf %lf \n",mype,ncycle,ic,U[ic],U_save[ic]); + if (fabs(V[ic]-V_save[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 1 at cycle %d V & V_save %d %lf %lf \n",mype,ncycle,ic,V[ic],V_save[ic]); + } + + // And compare dev_H gathered to H_global, etc + vectorH_save_global(ncells_global); + vectorU_save_global(ncells_global); + vectorV_save_global(ncells_global); + MPI_Allgatherv(&H_save[0], nsizes[mype], MPI_STATE_T, &H_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD); + MPI_Allgatherv(&U_save[0], nsizes[mype], MPI_STATE_T, &U_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD); + MPI_Allgatherv(&V_save[0], nsizes[mype], MPI_STATE_T, &V_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD); + if (mype == 0) { + for (uint ic = 0; ic < ncells_global; ic++){ + if (fabs(H_global[ic]-H_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 2 at cycle %d H_global & H_save_global %d %lf %lf \n",mype,ncycle,ic,H_global[ic],H_save_global[ic]); + if (fabs(U_global[ic]-U_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 2 at cycle %d U_global & U_save_global %d %lf %lf \n",mype,ncycle,ic,U_global[ic],U_save_global[ic]); + if (fabs(V_global[ic]-V_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 2 at cycle %d V_global & V_save_global %d %lf %lf \n",mype,ncycle,ic,V_global[ic],V_save_global[ic]); + } + } + + // And compare H gathered to H_global, etc + MPI_Allgatherv(&H[0], nsizes[mype], MPI_STATE_T, &H_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD); + MPI_Allgatherv(&U[0], nsizes[mype], MPI_STATE_T, &U_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD); + MPI_Allgatherv(&V[0], nsizes[mype], MPI_STATE_T, &V_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD); + if (mype == 0) { + for (uint ic = 0; ic < ncells_global; ic++){ + if (fabs(H_global[ic]-H_save_global[ic]) > STATE_EPS) printf("DEBUG finite_difference 3 at cycle %d H_global & H_save_global %d %lf %lf \n",ncycle,ic,H_global[ic],H_save_global[ic]); + if (fabs(U_global[ic]-U_save_global[ic]) > STATE_EPS) printf("DEBUG finite_difference 3 at cycle %d U_global & U_save_global %d %lf %lf \n",ncycle,ic,U_global[ic],U_save_global[ic]); + if (fabs(V_global[ic]-V_save_global[ic]) > STATE_EPS) printf("DEBUG finite_difference 3 at cycle %d V_global & V_save_global %d %lf %lf \n",ncycle,ic,V_global[ic],V_save_global[ic]); + } + } + + // Now the global dev_H_global to H_global, etc + ezcl_enqueue_read_buffer(command_queue, dev_H_global, CL_FALSE, 0, ncells_global*sizeof(cl_state_t), &H_save_global[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_U_global, CL_FALSE, 0, ncells_global*sizeof(cl_state_t), &U_save_global[0], NULL); + ezcl_enqueue_read_buffer(command_queue, dev_V_global, CL_TRUE, 0, ncells_global*sizeof(cl_state_t), &V_save_global[0], NULL); + if (mype == 0) { + for (uint ic = 0; ic < ncells_global; ic++){ + if (fabs(H_global[ic]-H_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 4 at cycle %d H_global & H_save_global %d %lf %lf \n",mype,ncycle,ic,H_global[ic],H_save_global[ic]); + if (fabs(U_global[ic]-U_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 4 at cycle %d U_global & U_save_global %d %lf %lf \n",mype,ncycle,ic,U_global[ic],U_save_global[ic]); + if (fabs(V_global[ic]-V_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 4 at cycle %d V_global & V_save_global %d %lf %lf \n",mype,ncycle,ic,V_global[ic],V_save_global[ic]); + } + } +#else + // Just to get rid of compiler warnings + if (1 == 2) printf("%d: DEBUG -- ncells %d ncells_global %d ncycle %d nsizes[0] %d ndispl %d state_global %p\n", + mype,ncells,ncells_global,ncycle,nsizes[0],ndispl[0],state_global); +#endif +} +#endif + +void State::print_object_info(void) +{ + printf(" ---- State object info -----\n"); + +#ifdef HAVE_OPENCL + int num_elements, elsize; + + num_elements = ezcl_get_device_mem_nelements(dev_H); + elsize = ezcl_get_device_mem_elsize(dev_H); + printf("dev_H ptr : %p nelements %d elsize %d\n",dev_H,num_elements,elsize); + num_elements = ezcl_get_device_mem_nelements(dev_U); + elsize = ezcl_get_device_mem_elsize(dev_U); + printf("dev_U ptr : %p nelements %d elsize %d\n",dev_U,num_elements,elsize); + num_elements = ezcl_get_device_mem_nelements(dev_V); + elsize = ezcl_get_device_mem_elsize(dev_V); + printf("dev_V ptr : %p nelements %d elsize %d\n",dev_V,num_elements,elsize); + num_elements = ezcl_get_device_mem_nelements(dev_mpot); + elsize = ezcl_get_device_mem_elsize(dev_mpot); + printf("dev_mpot ptr : %p nelements %d elsize %d\n",dev_mpot,num_elements,elsize); + //num_elements = ezcl_get_device_mem_nelements(dev_ioffset); + //elsize = ezcl_get_device_mem_elsize(dev_ioffset); + //printf("dev_ioffset ptr : %p nelements %d elsize %d\n",dev_ioffset,num_elements,elsize); +#endif + state_memory.memory_report(); + //printf("vector H ptr : %p nelements %ld elsize %ld\n",&H[0],H.size(),sizeof(H[0])); + //printf("vector U ptr : %p nelements %ld elsize %ld\n",&U[0],U.size(),sizeof(U[0])); + //printf("vector V ptr : %p nelements %ld elsize %ld\n",&V[0],V.size(),sizeof(V[0])); +} + +void State::print(void) +{ //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size()); + + if (mesh->fp == NULL) { + char filename[10]; + sprintf(filename,"out%1d",mesh->mype); + mesh->fp=fopen(filename,"w"); + } + + if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){ + fprintf(mesh->fp,"%d: index global i j lev nlft nrht nbot ntop \n",mesh->mype); + for (uint ic=0; icncells; ic++) { + fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]); + } + for (uint ic=mesh->ncells; icncells_ghost; ic++) { + fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]); + } + } else { + fprintf(mesh->fp,"%d: index H U V i j lev\n",mesh->mype); + for (uint ic=0; icncells_ghost; ic++) { + fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]); + } + } +} + +const int CRUX_STATE_VERSION = 102; +const int num_int_vals = 1; + +size_t State::get_checkpoint_size(void) +{ +#ifdef FULL_PRECISION + size_t nsize = mesh->ncells*3*sizeof(double); +#else + size_t nsize = mesh->ncells*3*sizeof(float); +#endif + nsize += num_int_vals*sizeof(int); + nsize += mesh->get_checkpoint_size(); + return(nsize); +} + +void State::store_checkpoint(Crux *crux) +{ + // Store mesh data first + mesh->store_checkpoint(crux); + +//#ifndef HAVE_MPI + // Load up scalar values + int int_vals[num_int_vals]; + int_vals[0] = CRUX_STATE_VERSION; + + // Add to memory database for storing checkpoint + state_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "state_int_vals", RESTART_DATA | REPLICATED_DATA); + state_memory.memory_add(cpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_cpu_timers", RESTART_DATA); + state_memory.memory_add(gpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_gpu_timers", RESTART_DATA); + + crux->store_MallocPlus(state_memory); + + // Remove from database after checkpoint is stored + state_memory.memory_remove(int_vals); + state_memory.memory_remove(cpu_timers); + state_memory.memory_remove(gpu_timers); +//#endif +} + +void State::restore_checkpoint(Crux *crux) +{ + int storage; + // Restore mesh data first + mesh->restore_checkpoint(crux); + crux->restore_named_ints("storage", 7, &storage, 1); + + // Create memory for restoring data into + int int_vals[num_int_vals]; + + // allocate is a state method + allocate(storage); + + // Add to memory database for restoring checkpoint + state_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "state_int_vals", RESTART_DATA | REPLICATED_DATA); + state_memory.memory_add(cpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_cpu_timers", RESTART_DATA); + state_memory.memory_add(gpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_gpu_timers", RESTART_DATA); + + // Restore memory database + crux->restore_MallocPlus(state_memory); + + // Check version number + if (int_vals[ 0] != CRUX_STATE_VERSION) { + printf("CRUX version mismatch for state data, version on file is %d, version in code is %d\n", + int_vals[0], CRUX_STATE_VERSION); + exit(0); + } + +#ifdef DEBUG_RESTORE_VALS + if (DEBUG_RESTORE_VALS) { + printf("\n"); + printf(" === Restored state cpu timers ===\n"); + for (int i = 0; i < STATE_TIMER_SIZE; i++){ + printf(" %-30s %lg\n",state_timer_descriptor[i], cpu_timers[i]); + } + printf(" === Restored state cpu timers ===\n"); + printf("\n"); + } +#endif + +#ifdef DEBUG_RESTORED_VALS + if (DEBUG_RESTORED_VALS) { + printf("\n"); + printf(" === Restored state gpu timers ===\n"); + for (int i = 0; i < STATE_TIMER_SIZE; i++){ + printf(" %-30s %lld\n",state_timer_descriptor[i], gpu_timers[i]); + } + printf(" === Restored state gpu_timers ===\n"); + printf("\n"); + } +#endif + + state_memory.memory_remove(int_vals); + state_memory.memory_remove(cpu_timers); + state_memory.memory_remove(gpu_timers); + + memory_reset_ptrs(); +//#endif +} + +// Added overloaded print to get mesh information to print in each cycle +// Brian Atkinson (5-29-14) +void State::print(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage) +{ //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size()); + + char filename[40]; + sprintf(filename,"iteration%d",iteration); + mesh->fp=fopen(filename,"w"); + + if(iteration_mass == 0.0){ + fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime); + fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost); + fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tSimulation Time: %lf\n", initial_mass, simTime); + } + else{ + double mass_diff = iteration_mass - initial_mass; + fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime); + fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost); + fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tIteration Mass: %14.12lg\n", initial_mass, iteration_mass); + fprintf(mesh->fp,"Mass Difference: %12.6lg\t\tMass Difference Percentage: %12.6lg%%\n", mass_diff, mass_diff_percentage); + } + + if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){ + fprintf(mesh->fp,"%d: index global i j lev nlft nrht nbot ntop \n",mesh->mype); + for (uint ic=0; icncells; ic++) { + fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]); + } + for (uint ic=mesh->ncells; icncells_ghost; ic++) { + fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]); + } + } else { + fprintf(mesh->fp,"%d: index H U V i j lev\n",mesh->mype); + for (uint ic=0; icncells_ghost; ic++) { + fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]); + } + } +} + +void State::print_local(int ncycle) +{ //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size()); + + if (mesh->fp == NULL) { + char filename[10]; + sprintf(filename,"out%1d",mesh->mype); + mesh->fp=fopen(filename,"w"); + } + + fprintf(mesh->fp,"DEBUG in print_local ncycle is %d\n",ncycle); + if (mesh->nlft != NULL){ + fprintf(mesh->fp,"%d: index H U V i j lev nlft nrht nbot ntop\n",mesh->mype); + uint state_size = state_memory.get_memory_size(H); + for (uint ic=0; icncells_ghost; ic++) { + if (ic >= state_size){ + fprintf(mesh->fp,"%d: %6d %4d %4d %4d %4d %4d %4d %4d\n", mesh->mype,ic, mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]); + } else { + fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d %4d %4d %4d %4d\n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]); + } + } + } else { + fprintf(mesh->fp,"%d: index H U V i j lev\n",mesh->mype); + for (uint ic=0; icncells_ghost; ic++) { + fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d\n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]); + } + } +} + +void State::print_failure_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, bool got_nan){ + char filename[] = {"failure.log"}; + mesh->fp=fopen(filename,"w"); + + double mass_diff = iteration_mass - initial_mass; + if(got_nan){ + fprintf(mesh->fp,"Failed because of nan for H_sum was equal to NAN\n"); + } + else{ + fprintf(mesh->fp,"Failed because mass difference is outside of accepted percentage\n"); + } + fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime); + fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost); + fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tIteration Mass: %14.12lg\n", initial_mass, iteration_mass); + fprintf(mesh->fp,"Mass Difference: %12.6lg\t\tMass Difference Percentage: %12.6lg%%\n", mass_diff, mass_diff_percentage); + + if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){ + fprintf(mesh->fp,"%d: index global i j lev nlft nrht nbot ntop \n",mesh->mype); + for (uint ic=0; icncells; ic++) { + fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]); + } + for (uint ic=mesh->ncells; icncells_ghost; ic++) { + fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]); + } + } else { + fprintf(mesh->fp,"%d: index H U V i j lev\n",mesh->mype); + for (uint ic=0; icncells_ghost; ic++) { + fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]); + } + } +} + +void State::print_rollback_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, int backup_attempt, int num_of_attempts, int error_status){ + char filename[40]; + sprintf(filename, "rollback%d.log",backup_attempt); + mesh->fp=fopen(filename,"w"); + + double mass_diff = iteration_mass - initial_mass; + if(error_status == STATUS_NAN){ + fprintf(mesh->fp,"Rolling back because of nan for H_sum was equal to NAN\n"); + } + else{ + fprintf(mesh->fp,"Rolling back because mass difference is outside of accepted percentage\n"); + } + fprintf(mesh->fp,"Rollback attempt %d of %d ---> Number of attempts left:%d\n", backup_attempt, num_of_attempts, num_of_attempts - backup_attempt); + fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime); + fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost); + fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tIteration Mass: %14.12lg\n", initial_mass, iteration_mass); + fprintf(mesh->fp,"Mass Difference: %12.6lg\t\tMass Difference Percentage: %12.6lg%%\n", mass_diff, mass_diff_percentage); + + if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){ + fprintf(mesh->fp,"%d: index global i j lev nlft nrht nbot ntop \n",mesh->mype); + for (uint ic=0; icncells; ic++) { + fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]); + } + for (uint ic=mesh->ncells; icncells_ghost; ic++) { + fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]); + } + } else { + fprintf(mesh->fp,"%d: index H U V i j lev\n",mesh->mype); + for (uint ic=0; icncells_ghost; ic++) { + fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]); + } + } +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#ifndef _TIMER_H +#define _TIMER_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +void cpu_timer_start(struct timeval *tstart_cpu); +double cpu_timer_stop(struct timeval tstart_cpu); + +#ifdef __cplusplus +} +#endif + +#endif /* _TIMER_H */ + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#include +#include +#include +#include +#include +#include +#ifdef _OPENMP +#include +#endif + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "timer.h" + +void cpu_timer_start(struct timeval *tstart_cpu){ +#ifdef _OPENMP + if ( omp_in_parallel() ) { +#pragma omp master + { + gettimeofday(tstart_cpu, NULL); + } + } else { + gettimeofday(tstart_cpu, NULL); + } +#else + gettimeofday(tstart_cpu, NULL); +#endif +} + +double cpu_timer_stop(struct timeval tstart_cpu){ + double result; + struct timeval tstop_cpu, tresult; + +#ifdef _OPENMP + if ( omp_in_parallel() ) { +#pragma omp master + { + gettimeofday(&tstop_cpu, NULL); + tresult.tv_sec = tstop_cpu.tv_sec - tstart_cpu.tv_sec; + tresult.tv_usec = tstop_cpu.tv_usec - tstart_cpu.tv_usec; + result = (double)tresult.tv_sec + (double)tresult.tv_usec*1.0e-6; + } + } else { + gettimeofday(&tstop_cpu, NULL); + tresult.tv_sec = tstop_cpu.tv_sec - tstart_cpu.tv_sec; + tresult.tv_usec = tstop_cpu.tv_usec - tstart_cpu.tv_usec; + result = (double)tresult.tv_sec + (double)tresult.tv_usec*1.0e-6; + } +#else + gettimeofday(&tstop_cpu, NULL); + tresult.tv_sec = tstop_cpu.tv_sec - tstart_cpu.tv_sec; + tresult.tv_usec = tstop_cpu.tv_usec - tstart_cpu.tv_usec; + result = (double)tresult.tv_sec + (double)tresult.tv_usec*1.0e-6; +#endif + return(result); +} + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ +#ifndef _ZORDER_H +#define _ZORDER_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +void calc_zorder(int size, int *i, int *j, int *level, int levmx, int ibase, int *z_index, int *z_order); +unsigned long long index_to_bit(unsigned long long index, int lev, int levmx, int ibase); +unsigned long long twobit_to_index(unsigned long long ibit, unsigned long long jbit); +void printbits(int n); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZORDER_H */ + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.c @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2011-2012, Los Alamos National Security, LLC. + * All rights Reserved. + * + * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced + * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National + * Laboratory (LANL), which is operated by Los Alamos National Security, LLC + * for the U.S. Department of Energy. The U.S. Government has rights to use, + * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS + * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR + * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified + * to produce derivative works, such modified software should be clearly marked, + * so as not to confuse it with the version available from LANL. + * + * Additionally, redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Los Alamos National Security, LLC, Los Alamos + * National Laboratory, LANL, the U.S. Government, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL + * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * CLAMR -- LA-CC-11-094 + * This research code is being developed as part of the + * 2011 X Division Summer Workshop for the express purpose + * of a collaborative code for development of ideas in + * the implementation of AMR codes for Exascale platforms + * + * AMR implementation of the Wave code previously developed + * as a demonstration code for regular grids on Exascale platforms + * as part of the Supercomputing Challenge and Los Alamos + * National Laboratory + * + * Authors: Bob Robey XCP-2 brobey@lanl.gov + * Neal Davis davis68@lanl.gov, davis68@illinois.edu + * David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com + * Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com + * + */ + +#include +#include +#include "s7.h" +#include "zorder.h" + +#define DEBUG 0 + +void calc_zorder(int size, int *i, int *j, int *level, int levmx, int ibase, int *z_index, int *z_order) +{ unsigned long long ibit, // Bitwise representation of x-index. + jbit; // Bitwise representation of y-index. + + // Convert the indices to a bitwise representation. + int ic; + for (ic = 0; ic < size; ic++) + { if (level[ic] < 0) continue; + ibit = index_to_bit(i[ic], level[ic], levmx, ibase); + jbit = index_to_bit(j[ic], level[ic], levmx, ibase); + z_index[ic] = twobit_to_index(ibit, jbit); + z_order[ic] = ic; } + + // Sort the z-ordered indices. + S7_Index_Sort(z_index, size, S7_INT, z_order); + + // Output ordered mesh information. + if (DEBUG) + { printf("orig index i j lev ibit jbit ijbit z index z order\n"); + for (ic=0; ic>= 8; // Print in groups of four. + while (step >= n) + { i >>= 8; + step >>= 8; } + + // At this point, i is the smallest power of two larger or equal to n. + while (i > 0) + { if (n & i) + printf("1"); + else + printf("0"); + i >>= 1; } } + Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(CLAMR) Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile @@ -0,0 +1,6 @@ +# MultiSource/DOE-ProxyApps-C++ Makefile: Build all subdirectories automatically + +LEVEL = ../../.. +PARALLEL_DIRS = CLAMR + +include $(LEVEL)/Makefile.programs