Index: LICENSE.TXT
===================================================================
--- LICENSE.TXT
+++ LICENSE.TXT
@@ -84,6 +84,7 @@
                     llvm-test/MultiSource/Benchmarks/ASC_Sequoia/sphot
 smg2000:            llvm-test/MultiSource/Benchmarks/ASCI_Purple/SMG2000
 XSBench:            llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench
+CLAMR:              llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR
 Fhourstones:        llvm-test/MultiSource/Benchmarks/Fhourstones
 Fhourstones-3.1:    llvm-test/MultiSource/Benchmarks/Fhourstones-3.1
 McCat:              llvm-test/MultiSource/Benchmarks/McCat
Index: MultiSource/Benchmarks/CMakeLists.txt
===================================================================
--- MultiSource/Benchmarks/CMakeLists.txt
+++ MultiSource/Benchmarks/CMakeLists.txt
@@ -19,6 +19,7 @@
 add_subdirectory(nbench)
 add_subdirectory(sim)
 add_subdirectory(DOE-ProxyApps-C)
+add_subdirectory(DOE-ProxyApps-C++)
 
 if((NOT "${TARGET_OS}" STREQUAL "Darwin") OR (NOT "${ARCH}" STREQUAL "ARM"))
   add_subdirectory(TSVC)
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Bounds.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Bounds.h
@@ -0,0 +1,86 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ *           Other LANL authors
+ * 
+ */
+
+#include <stdbool.h>
+
+#ifndef _Bounds_
+#define _Bounds_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+   
+#include "Globals.h"
+
+typedef struct {
+   TVector min, max;
+} TBounds;
+
+extern void Bounds_Copy(TBounds* src, TBounds* dest);
+extern void Bounds_Infinite(TBounds* b);
+extern void Bounds_AddBounds(TBounds* b, TBounds* add);
+extern void Bounds_AddEpsilon(TBounds* b, double add);
+extern bool Bounds_IsOverlappingBounds(TBounds* b, TBounds* tst);
+extern double Bounds_WidthAxis(TBounds* b, unsigned int axis);
+extern double Bounds_CenterAxis(TBounds* b, unsigned int axis);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Bounds.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Bounds.c
@@ -0,0 +1,127 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ *           Other LANL authors
+ * 
+ */
+#include "Bounds.h"
+
+#define MEMCPY(s,d,n,t) {memcpy((void*)d, (void*)s, n * sizeof(t)); }
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+void Bounds_Copy(TBounds* src, TBounds* dest) {
+   assert(src && dest);
+   MEMCPY(src, dest, 1, TBounds);
+}
+
+void Bounds_Infinite(TBounds* b){
+   assert(b);
+   b->min.x = POSITIVE_INFINITY;
+   b->min.y = POSITIVE_INFINITY;
+   b->max.x = NEGATIVE_INFINITY;
+   b->max.y = NEGATIVE_INFINITY;
+}
+
+void Bounds_AddBounds(TBounds* b, TBounds* add) {
+   assert(b && add);
+   b->min.x = MIN(b->min.x, add->min.x);
+   b->min.y = MIN(b->min.y, add->min.y);
+   b->max.x = MAX(b->max.x, add->max.x);
+   b->max.y = MAX(b->max.y, add->max.y);
+}
+
+void Bounds_AddEpsilon(TBounds* b, double add) {
+   assert(b);
+   b->min.x = b->min.x - add;
+   b->min.y = b->min.y - add;
+   b->max.x = b->max.x + add;
+   b->max.y = b->max.y + add;
+}
+
+bool Bounds_IsOverlappingBounds(TBounds* b, TBounds* tst) {
+   assert(b && tst);
+   if((tst->max.x < b->min.x) || (tst->min.x > b->max.x))
+      return(false);
+   if((tst->max.y < b->min.y) || (tst->min.y > b->max.y))
+      return(false);
+   return(true);
+}
+
+double Bounds_WidthAxis(TBounds* b, unsigned int axis)
+{
+   double width;
+   
+   assert(b);
+   if(axis == XAXIS)
+      width = b->max.x - b->min.x;
+   else if(axis == YAXIS)
+      width = b->max.y - b->min.y;
+   else
+      assert(NULL);
+   return(width);
+}
+
+double Bounds_CenterAxis(TBounds* b, unsigned int axis)
+{
+   double center;
+   
+   assert(b);
+   if(axis == XAXIS)
+      center = (b->min.x + b->max.x) * 0.5;
+   else if(axis == YAXIS)
+      center = (b->min.y + b->max.y) * 0.5;
+   else
+      assert(NULL);
+   return(center);
+}
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CLAMR.reference_output
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CLAMR.reference_output
@@ -0,0 +1,19 @@
+Mass of initialized cells equal to  31290.8709635
+Iteration   0 timestep      n/a Sim Time      0.0 cells 4412 Mass Sum  31290.8709635
+Iteration 100 timestep 0.000349 Sim Time 0.045244 cells 4652 Mass Sum  31290.8709635 Mass Change            0
+Iteration 200 timestep 0.000386 Sim Time 0.080903 cells 4760 Mass Sum  31290.8709635 Mass Change            0
+Iteration 300 timestep 0.000442 Sim Time 0.121853 cells 4892 Mass Sum  31290.8709635 Mass Change            0
+Iteration 400 timestep 0.000502 Sim Time 0.169292 cells 4976 Mass Sum  31290.8709635 Mass Change -3.63798e-12
+Iteration 500 timestep 0.000614 Sim Time 0.224092 cells 5096 Mass Sum  31290.8709635 Mass Change -3.63798e-12
+Iteration 600 timestep 0.000701 Sim Time 0.288037 cells 5372 Mass Sum  31290.8709635 Mass Change -3.63798e-12
+Iteration 700 timestep 0.000787 Sim Time 0.362393 cells 5780 Mass Sum  31290.8709635 Mass Change -7.27596e-12
+Iteration 800 timestep 0.000922 Sim Time 0.449275 cells 6152 Mass Sum  31290.8709635 Mass Change -1.09139e-11
+Iteration 900 timestep 0.001131 Sim Time 0.551298 cells 6704 Mass Sum  31290.8709635 Mass Change -1.09139e-11
+Iteration 1000 timestep 0.001318 Sim Time 0.672188 cells 7208 Mass Sum  31290.8709635 Mass Change -1.09139e-11
+Using hash tables to calculate neighbors
+hash table size  bytes 278784
+Initial order is Hilbert sort.   No cycle reorder.   Local Stencil is on.
+CPU:  rezone frequency                	  17.1000	percent
+CPU:  calc neigh frequency            	  17.2000	percent
+CPU:  refine_smooth_iter per rezone   	   0.0000	
+exit 0
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeLists.txt
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeLists.txt
@@ -0,0 +1,3 @@
+set(PROG CLAMR)
+set(RUN_OPTIONS -n 64 -t 1000)
+llvm_multisource()
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Cmd.hh
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Cmd.hh
@@ -0,0 +1,306 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+#ifndef CMDHHINCLUDE
+#define CMDHHINCLUDE
+
+// ***************************************************************************
+// ***************************************************************************
+// Generalized command class.
+// ***************************************************************************
+// ***************************************************************************
+
+#include <string>
+#include <deque>
+#include <vector>
+#include <map>
+#include <sstream>
+#include "Variable.hh"
+#include "Function.hh"
+
+namespace PP
+{
+using std::string;
+using std::deque;
+using std::vector;
+using std::map;
+using std::stringstream;
+
+//class Variable;
+//class Function;
+
+class Cmd
+{
+public:
+    Cmd();
+    Cmd(string s, map<string, Variable> *v, map<string, Function> *f,
+        deque<string> *lstr, int lnum, int file_lnum, string fname,
+        stringstream &serr, int &ierr);
+
+    void set_index_base(int base);
+    void set_case_sensitive(bool case_sensitive_in);
+
+    void add_word(string str, int lnum, int file_lnum, string fname);
+    void erase_word(int iw);
+    void erase_last_word();
+    void reset_name_type();
+    void delete_words(int i1, int i2);
+    void check_ppmm(stringstream &serr, int &ierr);
+    void remove_commas();
+    void handle_two_words();
+    bool check_input_end(bool kill_run, stringstream &serr, int &ierr);
+    void print_duplicate_line(int iw, stringstream &ss, int fn_width,
+                              int lnum_width, string after_lnum);
+    void get_duplicate_sizes(int iw, int &fn_width, int &lnum_width);
+
+    string get_cmd_filename(stringstream &ssfiles);
+    void handle_quotes(stringstream &serr, int &ierr);
+    void handle_exe_args(string &sout);
+    void deprecated_input01(string action, stringstream &serr, int &ierr);
+    void fatal_error(int iw, stringstream &serr, int &ierr);
+    void warning(int iw, stringstream &serr, int &ierr);
+
+    void get_bool_int(string &cname, int *array_vals, const vector<int> &size,
+                      vector<Cmd *> &dup_cmd1, vector<int> &dup_wdex1,
+                      int dup_fatal, vector<int> &dup_vals,
+                      bool skip, stringstream &serr, int &ierr);
+
+    void get_bool(string &cname, bool *array_vals, const vector<int> &size,
+                  vector<Cmd *> &dup_cmd1, vector<int> &dup_wdex1,
+                  int dup_fatal, vector<int> &dup_vals,
+                  bool skip, stringstream &serr, int &ierr);
+
+    void get_int(string &cname, int *array_vals, const vector<int> &size,
+                 vector<Cmd *> &dup_cmd1, vector<int> &dup_wdex1,
+                 int dup_fatal, vector<int> &dup_vals,
+                 bool skip, stringstream &serr, int &ierr);
+
+    void get_int(string &cname, int64_t *array_vals, const vector<int> &size,
+                 vector<Cmd *> &dup_cmd1, vector<int> &dup_wdex1,
+                 int dup_fatal, vector<int> &dup_vals,
+                 bool skip, stringstream &serr, int &ierr);
+
+    void get_real(string &cname, double *array_vals, const vector<int> &size,
+                  vector<Cmd *> &dup_cmd1, vector<int> &dup_wdex1,
+                  int dup_fatal, vector<int> &dup_vals,
+                  bool skip, stringstream &serr, int &ierr);
+
+    void get_char(string &cname, vector<string> &vstr, const vector<int> &size,
+                  bool single_char, vector<Cmd *> &dup_cmd1,
+                  vector<int> &dup_wdex1, int dup_fatal,
+                  vector<int> &dup_vals, bool skip,
+                  stringstream &serr, int &ierr);
+
+    void get_size(vector<int> &size, stringstream &serr, int &ierr);
+    void get_sizeb(vector<int> &size, stringstream &serr, int &ierr);
+
+    // Handle unary minus and plus in command lines.
+    void handle_cmd_unary_minus(stringstream &serr, int &ierr);
+    void handle_cmd_unary_plus(stringstream &serr, int &ierr);
+
+    // Handle multiplicity in command lines, i.e. a(1)=15*3.0
+    void handle_cmd_multiplicity(stringstream &serr, int &ierr);
+
+    // Handle variables.
+    bool check_for_dimension(stringstream &serr, int &ierr);
+    bool check_for_var_description(stringstream &serr, int &ierr);
+    void substitute_variables(stringstream &serr, int &ierr);
+    void set_variables(stringstream &serr, int &ierr);
+
+    // Math evaluation.
+    void math_eval(stringstream &serr, int &ierr);
+    void check_misplaced_math(stringstream &serr, int &ierr);
+
+    // Handle comments.
+    void single_line_comments();
+    void multi_line_comments(int &level);
+
+    // Handle processed flags.
+    void clear_processed();
+    void set_processed(bool ip);
+    void check_processed(bool &good, stringstream &serr, int &ierr);
+
+    // If statements.
+    void handle_if(bool &skip, deque<bool> &skip_level,
+                   deque<bool> &satisfied, 
+                   stringstream &serr, int &ierr);
+
+    // Do loops.
+    void handle_do(bool &skip, deque<int> &do_start, int &cdex,
+                   bool &end_do_loop, stringstream &serr, int &ierr);
+    bool find_matching_enddo(int &dlev, bool &stop_checking);
+
+    // Subroutines
+    void handle_subroutines(bool &skip,
+                            bool &go_to_sub, string &sub_name,
+                            bool &go_to_call,
+                            stringstream &serr, int &ierr);
+    bool find_subroutine(string &sub_name);
+    void get_call_args(vector<string> &sargs, vector<bool> &sargs_isvar,
+                       stringstream &serr, int &ierr);
+    void get_sub_args(vector<string> &sargs, vector<bool> &sargs_isvar);
+    void copy_call_args(vector<string> &sargs, vector<bool> &sargs_isvar);
+    void copy_sub_args(vector<string> &sargs, vector<bool> &sargs_isvar);
+
+    // Accessor functions.
+    string get_cmd_name() { return cmd_name; }
+    string get_cmd_type() { return cmd_type; }
+    int get_nwords() { return words.size(); }
+    string get_string(int iw) {
+        if ((int)words.size() <= iw) return "";
+        return words[iw].get_string();
+    }
+    string get_original_str() { return original_str; }
+
+    int get_line_number(int iw) { return words[iw].get_line_number(); }
+    int get_file_line_number(int iw) { return words[iw].get_file_line_number(); }
+    string get_filename(int iw) { return words[iw].get_filename(); }
+    string get_filename() { return filename; }
+    deque<string> *get_lines() { return lines; }
+
+    void set_filename(string fn) {
+        filename = fn;
+        for (int iw=0; iw<(int)words.size(); iw++) {
+            words[iw].set_filename(fn);
+        }
+    }
+
+    bool is_include() { if(words[0].get_string() == "include") return true;
+                        return false; }
+
+    // Debug
+    void print_all_words();
+    void print_all_words(stringstream &ss);
+    void print_using_words(stringstream &ss);
+    void print_using_words_fm(stringstream &ss);
+    void print_original_string(stringstream &ss);
+
+
+private:
+    // Initialization method for this class.
+    void init();
+
+    void process_string(string in_str, stringstream &serr, int &ierr);
+    bool extract_next_word(int &istart, string &str, string &word,
+                           stringstream &serr, int &ierr);
+    int find_closing_symbol(string opensym, string closesym, int i1);
+    bool handle_innermost_parens(int &i1, int &i2, int &iwres, int &nargs,
+                                 bool remp, stringstream &serr, int &ierr);
+    void evaluate_function(int iw1, int &i2, int &nargs,
+                           stringstream &serr, int &ierr);
+    void seval(int &i1, int &i2, stringstream &serr, int &ierr);
+    void handle_unary_op(int i1, int &i2, string utype,
+                         stringstream &serr, int &ierr);
+    void do_unary_op(int ip, string utype);
+    void handle_star_star();  // ** exponentiation
+    void handle_ops();
+
+    void subvar_w0(int i1, int &i2, stringstream &serr, int &ierr);
+    void subvar0(int vardex, string &varname, int increment,
+                 stringstream &serr, int &ierr);
+    bool evaluate_variable(int iw1, int &i2, int &nargs,
+                           stringstream &serr, int &ierr);
+
+    int find(int i1, int i2, string s);
+    int find_last(string s, int i1, int i2);
+    int find_any_char(int i1, int i2, string s);
+    void replace_words(int i1, int i2, Word &w);
+    void replace_words(int i1, int i2, vector<Word> &vw);
+    void merge_words(int i1, int i2);
+    bool separate_str(string &subs, string &fstr, vector<string> &vs);
+    int find_equals();
+
+    bool check_syntax(vector<int> &istart, stringstream &serr, int &ierr);
+    bool get_nvals(vector<int> &istart, const vector<int> &size,
+                   int &nvals, stringstream &serr, int &ierr);
+    void debug_print_words(string s);
+
+    void fatal_error2(stringstream &serr, int &ierr);
+    void error_dup_line(string &cname, int wdex, int cdex,
+                        vector<int> &dup_wdex1, vector<Cmd *> &dup_cmd1,
+                        vector<int> &dup_vals, const vector<int> &size,
+                        int dup_fatal, stringstream &serr, int &ierr);
+
+
+    // This is needed for telling the user what line in the input
+    // file or include file the error occurred on.
+    //
+    // line_number  The line_number corresponding to this command, this is
+    //              an index into lines and starts from 1, not 0.
+    // lines        Pointer to the deque of original lines. This contains all
+    //              the lines from the input file and any include files.
+    // file_line_number  The line number in the input file or include file.
+    // filename          The name of the input file or include file.
+    //
+    // file_line_number and filename are needed to that the user can open
+    // the file and go to the line in error.
+    int line_number, file_line_number;
+    string filename;
+    deque<string> *lines;
+    
+    // index base, generally 1 for Fortran style and 0 for C/C++, default 1
+    // int index_base; -- using static variable instead
+
+    // The original string before processing.
+    string original_str;
+
+    // Pointer to the map of variables.
+    map<string, Variable> *vmap;
+
+    // Pointer to the map of functions.
+    map<string, Function> *fmap;
+
+    // Definitions of white space, delimiters, etc.
+    string white_space;
+    string delims;
+
+    // Storage for all the words on the line.
+    deque <Word> words;
+
+    // The name and type of the command.
+    string cmd_name;
+    string cmd_type;
+
+    // Used for subroutines.
+    vector<string> call_args, sub_args;
+    vector<bool> call_args_isvar, sub_args_isvar;
+};
+
+
+} // end of PP namespace
+
+#endif
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Cmd.cc
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Cmd.cc
@@ -0,0 +1,3972 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+// ***************************************************************************
+// ***************************************************************************
+// This class holds command lines broken up into words.
+// The term command is used in a general sense, it includes variable
+// assignments, do loops, usual commands, etc.
+// ***************************************************************************
+// ***************************************************************************
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <string>
+#include <vector>
+#include <deque>
+#include <sstream>
+#include <map>
+#include <algorithm>
+#include <math.h>
+#include <assert.h>
+
+#include "Parser_utils.hh"
+#include "Variable.hh"
+#include "Function.hh"
+#include "Word.hh"
+#include "Parser_math.hh"
+#include "Cmd.hh"
+
+namespace PP
+{
+using std::cout;
+using std::endl;
+using std::string;
+using std::deque;
+using std::vector;
+using std::stringstream;
+using std::pair;
+using std::ifstream;
+using std::ios;
+using std::setw;
+
+// index base, generally 1 for Fortran style and 0 for C/C++, default 1
+static int index_base = 1;
+static bool case_sensitive = false;
+
+// ===========================================================================
+// Default constructor.
+// ===========================================================================
+Cmd::Cmd() 
+{
+    init();
+}
+
+
+// ===========================================================================
+// Constructor including map of variables.
+// ===========================================================================
+Cmd::Cmd(string s, map<string, Variable> *v, map<string, Function> *f,
+         deque<string> *lstr, int lnum, int file_lnum, string fname,
+         stringstream &serr, int &ierr)
+{
+    init();
+    vmap = v;
+    fmap = f;
+    original_str = s;
+    line_number = lnum;
+    file_line_number = file_lnum;
+    filename = fname;
+    lines = lstr;
+    process_string(s, serr, ierr);
+}
+
+
+// ===========================================================================
+// Add a word to this cmd.
+// ===========================================================================
+void Cmd::add_word(string str, int lnum, int file_lnum, string fname)
+{
+    Word w(str, lnum, file_lnum, fname, lines);
+    words.push_back(w);
+}
+
+
+// ===========================================================================
+// Erase a word from this cmd.
+// ===========================================================================
+void Cmd::erase_word(int iw)
+{
+    words.erase(words.begin()+iw);
+}
+void Cmd::erase_last_word()
+{
+    words.erase(words.begin()+(int)words.size()-1);
+}
+
+
+// ===========================================================================
+// Remove words that are commas.
+// ===========================================================================
+void Cmd::remove_commas()
+{
+    for (int i=0; i<(int)words.size(); i++) {
+        if (words[i].is_comma()) {
+            words.erase(words.begin()+i);
+            i -= 1;
+        }
+    }
+}
+
+
+// ===========================================================================
+// Initialize various private data.
+// ===========================================================================
+void Cmd::init()
+{
+    original_str = "";
+    //processed = false;
+    white_space = " \t";
+    delims = " \t()[],*/+-=!#";
+    vmap = NULL;
+    fmap = NULL;
+    line_number = 0;
+    file_line_number = 0;
+    filename = "";
+}
+
+
+// ===========================================================================
+// Set index base for input file indexing. 1 -- Fortran like, 0 -- Other
+//   languages
+// ===========================================================================
+void Cmd::set_index_base(int base)
+{
+    //cout << "Info:: Setting index base to " << base << endl;
+    index_base = base;
+}
+
+// ===========================================================================
+// Set case sensitivity for input file commands.
+// ===========================================================================
+void Cmd::set_case_sensitive(bool case_sensitive_in)
+{
+    case_sensitive = case_sensitive_in;
+}
+
+// ===========================================================================
+// Process a string.
+// Break the string into words and copy each word to a double ended queue.
+// ===========================================================================
+void Cmd::process_string(string in_str, stringstream &serr, int &ierr)
+{
+    //cout << "&&&&& Original line = " << endl;
+    //cout << in_str << endl;
+    //cout << "01234567890123456789012345678901234567890123456789" << endl;
+    //cout << "0         1         2         3         4         " << endl;
+
+    string s;
+    int istart = 0;
+    bool found = false;
+    //int plevel = 0;
+    for (;;) {
+        delims = " \t()[],*/+-=!#";
+
+        // Extract the next word from the line.
+        found = extract_next_word(istart, in_str, s, serr, ierr);
+        if (!found) break;
+
+        // Create a new word using the word that was found.
+        // This removes quotes if there are any and types the word.
+        Word w(s, line_number, file_line_number, filename, lines);
+
+        // Copy the word to the end of the queue.
+        words.push_back(w);
+    }
+
+    // Set the command name and type.
+    reset_name_type();
+}
+
+
+// ===========================================================================
+// Given a string, str, and a starting position in
+// that string, istart, extract the next word and
+// pass it back as a string.
+// ===========================================================================
+bool Cmd::extract_next_word(int &istart, string &str, string &word,
+                            stringstream &serr, int &ierr)
+{
+    // To suppress compiler warnings of unused parameters
+    //assert(serr == serr);
+    assert(ierr == ierr);
+
+    // If istart is out of bounds then there is nothing to do.
+    if (istart < 0) return false;
+    if (istart >= (int)str.size()) return false;
+
+    // Find the next non blank character.
+    int i1 = str.find_first_not_of(white_space, istart);
+
+    // If a non whitespace character was not found then there are no more
+    // words to extract.
+    if (i1 == (int)string::npos) return false;
+
+    // If the non blank character that was found is a delimiter, like
+    // ()[]+-/* ... then it needs to be a word by itself.
+    if (delims.find(str[i1], 0) != string::npos) {
+        word = str[i1];
+        istart = i1+1;
+        return true;
+    }
+
+    // At this point we have found the start of a word. The end of the
+    // word will be one of the delimiters like ()[]+=-*/spacetab ...
+    string wend = delims;
+
+    // A word delimited by quotes is handled differently. If the i1
+    // position in the string is a beginning quotes then we need to search
+    // for an ending quotes. Anything between quotes is part of the word
+    // including delimters.
+    bool quotes = false;
+    if (str[i1] == '"') {
+        quotes = true;
+        wend = "\"";
+    }
+    if (str[i1] == '\'') {
+        quotes = true;
+        wend = "\'";
+    }
+
+    // Search for the end of the word by finding the next delimiter. The
+    // delimiter is one index past the end of the word.
+    // But if the next delimiter is + or - then we have to consider that
+    // this could be a floating point number in which case we continue
+    // past the + or - to find the next delimiter.
+    int i2;
+    int i1_start = i1+1;
+    for(;;) {
+        i2 = str.find_first_of(wend, i1_start);
+
+        // If a delimiter was not found then the word extends to the end
+        // of the line.
+        if (i2 == (int)string::npos) {
+            i2 = str.size();
+            break;
+        }
+        else {
+            // Check for a floating point number (fpn). For example
+            //       1.34e+14  or -3.8E-19
+            // i2 might point to the + or - in e+14 or E-19, so we check
+            // for that case. Note that if the + or - is not found, then it
+            // could be a number like 1.e14 but then i2 would point to
+            // something after e14 and we would be ok.
+            // If we do find +e or -e, then everything in front of it needs
+            // to be a digit, if not then this is not a number.
+            bool fpn = false;
+            if (str[i2] == '+' || str[i2] == '-') {
+                if (str[i2-1] == 'e' || str[i2-1] == 'E' ||
+                    str[i2-1] == 'd' || str[i2-1] == 'D') {
+                    fpn = true;
+                    for (int j=i1; j<=i2-2; j++) {
+                        if (!isdigit(str[j]) && str[j] != '.') {
+                            fpn = false;
+                            break;
+                        }
+                    }
+                }
+            }
+
+            if (!fpn) break;
+            i1_start = i2+1;
+        }
+    }
+
+    // If the word is quoted then it should end in quotes.
+    // We do not check for quotes matching here because at this
+    // point we might be in a comment region where quotes mismatch
+    // is allowed. We check for quotes mismatch later.
+    if (quotes) {
+        if (i2 >= (int)str.size()) i2 = (int)str.size() - 1;
+    }
+    /*
+    if (quotes) {
+        bool missing = false;
+        if (i2 >= (int)str.size()) missing = true;
+        else if (str[i1] == '\"' && str[i2] != '\"') missing = true;
+        else if (str[i1] == '\'' && str[i2] != '\'') missing = true;
+        else if (str[i1] == '\"' && str[i2] == '\'') missing = true;
+        else if (str[i1] == '\'' && str[i2] == '\"') missing = true;
+        if (missing) {
+            fatal_error2(serr, ierr);
+            serr << "Quotes mismatch found." << endl;
+            serr << "A starting quotes must have a closing quotes." << endl;
+            serr << "Double quotes, \", must be matched with double quotes." << endl;
+            serr << "Single quotes, \', must be matched with single quotes." << endl;
+            ierr = 2;
+            return false;
+        }
+    }
+    */
+
+    // We include the quotes symbols in the word. The quote symbols will
+    // be removed elsewhere.
+    if (quotes) i2 += 1;
+
+    // The word is now delimited by i1 and i2-1, return it in word.
+    word = str.substr(i1, i2 - i1);
+
+    // Update the starting point for finding the next word.
+    istart = i2;
+
+    // A word was successfully found so return true.
+    return true;
+}
+
+
+// ===========================================================================
+// Reset the command name and type. Consider the following command:
+//      * lasdkj */ cmd = 5.0
+// The original command name is "*", but after the multi-line comment is
+// removed, the command name should be "cmd".
+// ===========================================================================
+void Cmd::reset_name_type()
+{
+    if ((int)words.size() == 0) {
+        cmd_name = " ";
+        cmd_type = " ";
+        return;
+    }
+    cmd_name = words[0].get_string();
+    if (! case_sensitive) {
+       transform(cmd_name.begin(), cmd_name.end(), cmd_name.begin(), tolower);
+    }
+    cmd_type = "command";
+    if (words[0].is_variable()) cmd_type = "assignment";
+    if (cmd_name == "parser_list_variables") cmd_type = "debug";
+    if (cmd_name == "parser_list_functions") cmd_type = "debug";
+    if (cmd_name == "parser_print_fbuffer")  cmd_type = "debug";
+    if (cmd_name == "if")      cmd_type = "internal_cmd";
+    if (cmd_name == "elseif")  cmd_type = "internal_cmd";
+    if (cmd_name == "endif")   cmd_type = "internal_cmd";
+    if (cmd_name == "do")      cmd_type = "internal_cmd";
+    if (cmd_name == "return")  cmd_type = "internal_cmd";
+    if (cmd_name == "enddo")   cmd_type = "internal_cmd";
+    if (cmd_name == "stop")    cmd_type = "internal_cmd";
+    if (cmd_name == "when")    cmd_type = "internal_cmd";
+    if (cmd_name == "endwhen") cmd_type = "internal_cmd";
+}
+
+
+// ===========================================================================
+// Given a line like
+//    include filename1 filename2 filename3 ...
+// Find the first filename that exists and return that.
+// This should only be called on the io processor.
+// ===========================================================================
+string Cmd::get_cmd_filename(stringstream &ssfiles)
+{
+    for (int i=1; i<(int)words.size(); i++) {
+        string fn = words[i].get_string();
+
+        // The quotes may still be on the word, strip them off if they are
+        // present.
+        int len = (int)fn.size();
+        if ((fn[len-1] == '\"') || (fn[len-1] == '\'')) {
+            fn.erase(fn.end() - 1);
+        }
+        if ((fn[0] == '\"') || (fn[0] == '\'')) {
+            fn.erase(fn.begin());
+        }
+
+        ssfiles << "    " << fn << endl;
+
+        // Open the file to test if it exists.
+        ifstream instm(fn.c_str(), ios::in);
+        instm.close();
+        if( instm.fail() ) continue;
+        return fn;
+    }
+    return "";
+}
+
+
+// ===========================================================================
+// Handle unary minus in a command line (not in math(..))
+// ===========================================================================
+void Cmd::handle_cmd_unary_minus(stringstream &serr, int &ierr)
+{
+    int ipstart = 0;
+    for (;;) {
+        int ip = find(ipstart, (int)words.size()-1, "-");
+
+        // If we do not find any more minus signs then we are done.
+        if (ip == -1) return;
+
+        // The word after the minus sign must be a number.
+        if (!words[ip+1].is_number()) {
+            words[ip+1].fatal_error(serr, ierr);
+            serr << "Expected the object following the unary - to"
+                " be a number." << endl;
+            serr << "Instead, it was " << words[ip+1].get_string() << endl;
+            ierr = 2;
+            return;
+        }
+
+        // Actually do the negate operation.
+        do_unary_op(ip, "-");
+        ipstart = ip+1;
+        continue;
+    }
+}
+         
+
+// ===========================================================================
+// Handle unary plus in a command line (not in math(..))
+// ===========================================================================
+void Cmd::handle_cmd_unary_plus(stringstream &serr, int &ierr)
+{
+    int ipstart = 0;
+    for (;;) {
+        int ip = find(ipstart, (int)words.size()-1, "+");
+
+        // If we do not find any more minus signs then we are done.
+        if (ip == -1) return;
+
+        // The word after the plus sign must be a number.
+        if (!words[ip+1].is_number()) {
+            words[ip+1].fatal_error(serr, ierr);
+            serr << "Expected the object following the unary + to"
+                " be a number." << endl;
+            serr << "Instead, it was " << words[ip+1].get_string() << endl;
+            ierr = 2;
+            return;
+        }
+
+        // The + sign is not needed.
+        delete_words(ip, ip);
+        ipstart = ip+1;
+        continue;
+    }
+}
+         
+
+// ===========================================================================
+// The following type of command is allowed:
+//      a(1) = 15*3.0
+// meaning that 3.0 is to be replicated 15 times and thus a(1)-a(15) is set
+// by this command.
+// ===========================================================================
+void Cmd::handle_cmd_multiplicity(stringstream &serr, int &ierr)
+{
+    int ipstart = 0;
+    for (;;) {
+        int ip = find(ipstart, (int)words.size()-1, "*");
+
+        // If we do not find any more asterisks then we are done.
+        if (ip == -1) return;
+
+        if (ip==0) {
+            fatal_error2(serr, ierr);
+            serr << "Asterisk cannot be at the start of a line." << endl;
+            ierr = 2;
+            return;
+        }
+
+        if (ip == (int)words.size()-1) {
+            words[ip].fatal_error(serr, ierr);
+            serr << "Asterisk cannot be at the end of a line." << endl;
+            ierr = 2;
+            return;
+        }
+
+        // The word after the asterisk must be a number or a boolean.
+        // Wait, why is this? We actually allow strings also, really
+        // we allow anything.
+        //if (!words[ip+1].is_number() && !words[ip+1].is_bool()) {
+        //    words[ip+1].fatal_error(serr, ierr);
+        //    serr << "Expected the object following the * to"
+        //        " be a number or a logical." << endl;
+        //    serr << "Instead, it was " << words[ip+1].get_string() << endl;
+        //    ierr = 2;
+        //    return;
+        //}
+
+        // The word before the asterisk must be a number.
+        if (!words[ip-1].is_number()) {
+            words[ip-1].fatal_error(serr, ierr);
+            serr << "Expected the object before the * to"
+                " be a number." << endl;
+            serr << "Instead, it was " << words[ip-1].get_string() << endl;
+            ierr = 2;
+            return;
+        }
+
+        // Set the multiplicity.
+        int imult = words[ip-1].get_int(serr, ierr);
+        words[ip+1].set_multiplicity(imult);
+        Word w = words[ip+1];
+        replace_words(ip-1, ip+1, w);
+        ipstart = ip;
+    }
+}
+         
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Functions for getting values from the commands.
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Get boolean values. This gets all the words past the = sign,
+// converts them to bool (and then to int), and puts them in the output arrays.
+//
+// The expected commands are:
+//    cmdname = .true.                    0d
+//    cmdname(5) = false true false       1d
+//    cmdname(5,9) = true false true      2d
+//    etc.
+//
+// We also allow
+//    cmdname = false true false
+// and we will supply the starting indices of (1) or (1,1), etc.
+//
+// But note that the , is gone at this point, so the 2d command is
+//    cmdname ( 5 9 ) = true false true      2d
+//
+// This function works for any dimension, 0,1,2,3,...
+//
+// We pass the result back as an int because of the incompatibility between
+// fortran logical and c++ bool.
+// ===========================================================================
+void Cmd::get_bool_int(string &cname, int *array_vals, const vector<int> &size,
+                       vector<Cmd *> &dup_cmd1, vector<int> &dup_wdex1,
+                       int dup_fatal, vector<int> &dup_vals,
+                       bool skip, stringstream &serr, int &ierr)
+{
+    // Get the dimension of the array, 0,1,2,3,...
+    int dim = (int)size.size();
+
+    // Check syntax, for example an equals sign must be present, and set istart.
+    // istart   Position in array_vals where we start filling it.
+    //          Note that istart starts from index base (default 1, Fortran style)
+    //          Use set_index_base_zero for C/C++ index convention
+    vector<int> istart(dim,0);
+    if (!check_syntax(istart, serr, ierr)) return;
+
+    // If skipping, we don't need to get array values.
+    if (skip) {
+        set_processed(true);
+        return;
+    }
+
+    // Get the number of values past the = sign.
+    // Also mark the words up to and including the = sign as processed.
+    int nvals = 0;
+    if (!get_nvals(istart, size, nvals, serr, ierr)) return;
+
+    // 0d is a special case.
+    if (dim == 0) {
+        bool b = words[2].get_bool(serr, ierr);
+        int cvalue = 0;
+        if (b) cvalue = 1;
+        *array_vals = cvalue;
+        return;
+    }
+
+    // Get the values and return.
+    //int ieqp1 = 5 + dim - 1;
+    int ieqp1 = find_equals() + 1;
+    Parser_utils putils(index_base);
+    int k = putils.start_dex(istart, size);
+    for (int i=ieqp1; i<(int)words.size(); i++) {
+        bool b = words[i].get_bool(serr, ierr);
+        int cvalue = 0;
+        if (b) cvalue = 1;
+        int imult = words[i].get_multiplicity();
+        for (int j=1; j<=imult; j++) {
+            error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, 
+                           size, dup_fatal, serr, ierr);
+            array_vals[k++] = cvalue;
+        }
+    }
+}
+
+// ===========================================================================
+// Get boolean values. This gets all the words past the = sign,
+// converts them to bool and puts them in the output arrays.
+//
+// The expected commands are:
+//    cmdname = .true.                    0d
+//    cmdname(5) = false true false       1d
+//    cmdname(5,9) = true false true      2d
+//    etc.
+//
+// We also allow
+//    cmdname = false true false
+// and we will supply the starting indices of (1) or (1,1), etc.
+//
+// But note that the , is gone at this point, so the 2d command is
+//    cmdname ( 5 9 ) = true false true      2d
+//
+// This function works for any dimension, 0,1,2,3,...
+// ===========================================================================
+void Cmd::get_bool(string &cname, bool *array_vals, const vector<int> &size,
+                   vector<Cmd *> &dup_cmd1, vector<int> &dup_wdex1,
+                   int dup_fatal, vector<int> &dup_vals,
+                   bool skip, stringstream &serr, int &ierr)
+{
+    // Get the dimension of the array, 0,1,2,3,...
+    int dim = (int)size.size();
+
+    // Check syntax, for example an equals sign must be present, and set istart.
+    // istart   Position in array_vals where we start filling it.
+    //          Note that istart starts from index base (default 1, Fortran style)
+    //          Use set_index_base_zero for C/C++ index convention
+    vector<int> istart(dim,0);
+    if (!check_syntax(istart, serr, ierr)) return;
+
+    // If skipping, we don't need to get array values.
+    if (skip) {
+        set_processed(true);
+        return;
+    }
+
+    // Get the number of values past the = sign.
+    // Also mark the words up to and including the = sign as processed.
+    int nvals = 0;
+    if (!get_nvals(istart, size, nvals, serr, ierr)) return;
+
+    // 0d is a special case.
+    if (dim == 0) {
+        bool b = words[2].get_bool(serr, ierr);
+        *array_vals = b;
+        return;
+    }
+
+    // Get the values and return.
+    //int ieqp1 = 5 + dim - 1;
+    int ieqp1 = find_equals() + 1;
+    Parser_utils putils(index_base);
+    int k = putils.start_dex(istart, size);
+    for (int i=ieqp1; i<(int)words.size(); i++) {
+        bool b = words[i].get_bool(serr, ierr);
+        int imult = words[i].get_multiplicity();
+        for (int j=1; j<=imult; j++) {
+            error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, 
+                           size, dup_fatal, serr, ierr);
+            array_vals[k++] = b;
+        }
+    }
+}
+
+
+// ===========================================================================
+// Get integer values. This gets all the words past the = sign,
+// converts them to int, and puts them in the output arrays.
+//
+// The expected commands are:
+//    cmdname = some_int                  0d
+//    cmdname(5) = 3, 5, -15, 10          1d
+//    cmdname(5,9) = 3, 7, -20, 154       2d
+//    etc.
+//
+// We also allow
+//    cmdname = 3, 5, -15, 10
+// and we will supply the starting indices of (1) or (1,1), etc.
+//
+// But note that the , is gone at this point, so the 2d command is
+//    cmdname ( 5 9 ) = 3  7  -20  154 ...
+//
+// This function works for any dimension, 0,1,2,3,...
+// ===========================================================================
+void Cmd::get_int(string &cname, int *array_vals, const vector<int> &size,
+                  vector<Cmd *> &dup_cmd1, vector<int> &dup_wdex1,
+                  int dup_fatal, vector<int> &dup_vals,
+                  bool skip, stringstream &serr, int &ierr)
+{
+    // Get the dimension of the array, 0,1,2,3,...
+    int dim = (int)size.size();
+
+    // Check syntax, for example an equals sign must be present, and set istart.
+    // istart   Position in array_vals where we start filling it.
+    //          Note that istart starts from index base (default 1, Fortran style)
+    //          Use set_index_base_zero for C/C++ index convention
+    vector<int> istart(dim,0);
+    if (!check_syntax(istart, serr, ierr)) return;
+
+    // If skipping, we don't need to get array values.
+    if (skip) {
+        set_processed(true);
+        return;
+    }
+
+    // Get the number of values past the = sign.
+    // Also mark the words up to and including the = sign as processed.
+    int nvals = 0;
+    if (!get_nvals(istart, size, nvals, serr, ierr)) return;
+
+    // 0d is a special case.
+    if (dim == 0) {
+        *array_vals = words[2].get_int(serr, ierr);
+        return;
+    }
+
+    // Get the values and return.
+    //int ieqp1 = 5 + dim - 1;
+    int ieqp1 = find_equals() + 1;
+    Parser_utils putils(index_base);
+    int k = putils.start_dex(istart, size);
+    for (int i=ieqp1; i<(int)words.size(); i++) {
+        int iw = words[i].get_int(serr, ierr);
+        int imult = words[i].get_multiplicity();
+        for (int j=1; j<=imult; j++) {
+            error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, 
+                           size, dup_fatal, serr, ierr);
+            array_vals[k++] = iw;
+        }
+    }
+}
+
+
+// ===========================================================================
+// Get int64_t values. This gets all the words past the = sign,
+// converts them to int, and puts them in the output arrays.
+//
+// The expected commands are:
+//    cmdname = some_int                  0d
+//    cmdname(5) = 3, 5, -15, 10          1d
+//    cmdname(5,9) = 3, 7, -20, 154       2d
+//    etc.
+//
+// We also allow
+//    cmdname = 3, 5, -15, 10
+// and we will supply the starting indices of (1) or (1,1), etc.
+//
+// But note that the , is gone at this point, so the 2d command is
+//    cmdname ( 5 9 ) = 3  7  -20  154 ...
+//
+// This function works for any dimension, 0,1,2,3,...
+// ===========================================================================
+void Cmd::get_int(string &cname, int64_t *array_vals, const vector<int> &size,
+                  vector<Cmd *> &dup_cmd1, vector<int> &dup_wdex1,
+                  int dup_fatal, vector<int> &dup_vals,
+                  bool skip, stringstream &serr, int &ierr)
+{
+    // Get the dimension of the array, 0,1,2,3,...
+    int dim = (int)size.size();
+
+    // Check syntax, for example an equals sign must be present, and set istart.
+    // istart   Position in array_vals where we start filling it.
+    //          Note that istart starts from index base (default 1, Fortran style)
+    //          Use set_index_base_zero for C/C++ index convention
+    vector<int> istart(dim,0);
+    if (!check_syntax(istart, serr, ierr)) return;
+
+    // If skipping, we don't need to get array values.
+    if (skip) {
+        set_processed(true);
+        return;
+    }
+
+    // Get the number of values past the = sign.
+    // Also mark the words up to and including the = sign as processed.
+    int nvals = 0;
+    if (!get_nvals(istart, size, nvals, serr, ierr)) return;
+
+    // 0d is a special case.
+    if (dim == 0) {
+        *array_vals = words[2].get_int64_t(serr, ierr);
+        return;
+    }
+
+    // Get the values and return.
+    //int ieqp1 = 5 + dim - 1;
+    int ieqp1 = find_equals() + 1;
+    Parser_utils putils(index_base);
+    int k = putils.start_dex(istart, size);
+    for (int i=ieqp1; i<(int)words.size(); i++) {
+        int64_t iw = words[i].get_int64_t(serr, ierr);
+        int imult = words[i].get_multiplicity();
+        for (int j=1; j<=imult; j++) {
+            error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, 
+                           size, dup_fatal, serr, ierr);
+            array_vals[k++] = iw;
+        }
+    }
+}
+
+// ===========================================================================
+// Get the real (double) values. This gets all the words past the = sign,
+// converts them to doubles, and puts them in the output arrays.
+//
+// The expected commands are:
+//    cmdname = some_double                         0d
+//    cmdname(5) = 3.0, 35, -15e20, 10.154          1d
+//    cmdname(5,9) = 3.0, 35, -15e20, 10.154        2d
+//    etc.
+//
+// We also allow
+//    cmdname = 3.0, 35, -15e20, 10.154
+// and we will supply the starting indices of (1) or (1,1), etc.
+//
+// But note that the , is gone at this point, so the 2d command is
+//    cmdname ( 5 9 ) = 3.0  35  -15e20  10.154  ...
+//
+// This function works for any dimension, 0,1,2,3,...
+// ===========================================================================
+void Cmd::get_real(string &cname, double *array_vals, const vector<int> &size,
+                   vector<Cmd *> &dup_cmd1, vector<int> &dup_wdex1,
+                   int dup_fatal, vector<int> &dup_vals,
+                   bool skip, stringstream &serr, int &ierr)
+{
+    // Get the dimension of the array, 0,1,2,3,...
+    int dim = (int)size.size();
+
+    // Check syntax, for example an equals sign must be present, and set istart.
+    // istart   Position in array_vals where we start filling it.
+    //          Note that istart starts from index base (default 1, Fortran style)
+    //          Use set_index_base_zero for C/C++ index convention
+    vector<int> istart(dim,0);
+    if (!check_syntax(istart, serr, ierr)) return;
+
+    // If skipping, we don't need to get array values.
+    if (skip) {
+        set_processed(true);
+        return;
+    }
+
+    // Get the number of values past the = sign.
+    // Also mark the words up to and including the = sign as processed.
+    int nvals = 0;
+    if (!get_nvals(istart, size, nvals, serr, ierr)) return;
+
+    // 0d is a special case.
+    // Note that we do not increment dup_vals for 0d because duplicate scalar
+    // commands are handled differently from array commands.
+    if (dim == 0) {
+        *array_vals = words[2].get_double(serr, ierr);
+        return;
+    }
+
+    // All other dimensions.
+    //int ieqp1 = 5 + dim - 1;
+    int ieqp1 = find_equals() + 1;
+    Parser_utils putils(index_base);
+    int k = putils.start_dex(istart, size);
+    for (int i=ieqp1; i<(int)words.size(); i++) {
+        double d = words[i].get_double(serr, ierr);
+        int imult = words[i].get_multiplicity();
+        for (int j=1; j<=imult; j++) {
+            error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, 
+                           size, dup_fatal, serr, ierr);
+            array_vals[k++] = d;
+        }
+    }
+}
+
+
+// ===========================================================================
+// Get the character values. This gets all the words past the = sign,
+// converts them to chars, and puts them in the output arrays.
+//
+// The expected commands are:
+//    cmdname = q                                   0d single character
+//    cmdname = char_string                         0d character string
+//    cmdname(3) = "May" "the", "force", "be"       1d array of strings
+//    cmdname(5,9) = "11" "21" "31"                 2d
+//    etc.
+//
+// We also allow
+//    cmdname = "May" "the", "force", "be"
+// and we will supply the starting indices of (1) or (1,1), etc.
+//
+// But note that the , is gone at this point, so the 2d command is
+//    cmdname ( 5 9 ) = "11" "21" "31"
+//
+// This function works for any dimension, 0,1,2,3,...
+// For 0d, it has an extra flag to distinguish between single characters
+// and a character string.
+// ===========================================================================
+void Cmd::get_char(string &cname, vector<string> &vstr, const vector<int> &size,
+                   bool single_char, vector<Cmd *> &dup_cmd1,
+                   vector<int> &dup_wdex1, int dup_fatal,
+                   vector<int> &dup_vals, bool skip,
+                   stringstream &serr, int &ierr)
+{
+    // Get the dimension of the array, 0,1,2,3,...
+    int dim = (int)size.size();
+
+    // Check syntax, for example an equals sign must be present, and set istart.
+    // istart   Position in array_vals where we start filling it.
+    //          Note that istart starts from index base (default 1, Fortran style)
+    //          Use set_index_base_zero for C/C++ index convention
+    vector<int> istart(dim,0);
+    if (!check_syntax(istart, serr, ierr)) return;
+
+    // If skipping, we don't need to get array values.
+    if (skip) {
+        set_processed(true);
+        return;
+    }
+
+    // Get the number of values past the = sign.
+    // Also mark the words up to and including the = sign as processed.
+    int nvals = 0;
+    if (!get_nvals(istart, size, nvals, serr, ierr)) return;
+
+    // 0d is a special case - get a single char
+    if (dim == 0 && single_char) {
+        vstr[0] = words[2].get_single_char(serr, ierr);
+        return;
+    }
+
+    // 0d is a special case - get a single string
+    if (dim == 0) {
+        vstr[0] = words[2].get_stringp();
+        return;
+    }
+
+    // Get the value and return, dim > 0.
+    // get_stringp is the same as get_string except get_stringp also marks
+    // the word as being processed.
+    //int ieqp1 = 5 + dim - 1;
+    int ieqp1 = find_equals() + 1;
+    Parser_utils putils(index_base);
+    int k = putils.start_dex(istart, size);
+    for (int i=ieqp1; i<(int)words.size(); i++) {
+        string s = words[i].get_stringp();
+        int imult = words[i].get_multiplicity();
+        for (int j=1; j<=imult; j++) {
+            error_dup_line(cname, i, k, dup_wdex1, dup_cmd1, dup_vals, 
+                           size, dup_fatal, serr, ierr);
+            vstr[k++] = s;
+        }
+    }
+
+}
+
+
+// ===========================================================================
+// Get sizes of arrays, this works for dimensions 1,2,3,...
+//
+// The size vector contains the sizes (or bounds) of each array dimensions.
+// It is assumed in this routine that all but the last size is known (this
+// is input) and that this routine will determine the last size. See the
+// get_sizeb function below where a different assumption is made.
+//
+// Suppose, for example, we have a 3d array called a3d which is dimensioned
+// a3d(5,3,:). The first two dimensions are known, 5 and 3, the last dimension
+// is unknown and will be determined by this routine.
+//
+// size is a vector of ints of size 3, with elements 5,3,? where the ? is
+// to be determined.
+//
+// Note that this routine is called in a loop over all the lines in the
+// input which is why we set the size using maximum.
+// ===========================================================================
+void Cmd::get_size(vector<int> &size, stringstream &serr, int &ierr)
+{
+    // Get the dimension of the array, 0,1,2,3,...
+    int dim = (int)size.size();
+
+    // Check syntax, also sets istart.
+    // istart   Position in array where we start filling it.
+    //          Example command might be a3d(3,2,2) = ...  In this case istart
+    //          would be a vector of length 3 contining 3,2,2
+    //          Note that istart starts from index base (default 1, Fortran style)
+    //          Use set_index_base_zero for C/C++ index convention
+    vector<int> istart(dim,0);
+    if (!check_syntax(istart, serr, ierr)) return;
+
+    // Get the number of values past the = sign.
+    int nvals = 0;
+    vector<int> size0(dim,0);
+    if (!get_nvals(istart, size0, nvals, serr, ierr)) return;
+
+    int sm = 1;
+    for (int i=0; i<dim-1; i++) {
+        sm *= size[i];
+    }
+
+    // Set the size.
+    int maxval = istart[dim-1] + (nvals-1)/sm;
+    if (maxval > size[dim-1]) {
+        size[dim-1] = maxval;
+    }
+}
+
+
+// ===========================================================================
+// This is a special purpose routine to get sizes for certain 2d arrays.
+//
+// Suppose we have the following input
+//      mults(1,1) = 0. 0.  1. 5.  6. 9.
+//      mults(1,2) = 3. 5.  8. 9.  10. 11. 20. 10
+//      mults(1,3) = 30. 5. 38. 3.
+// In this case we don't know the size of either of the array dimensions, and
+// of course the user does not know the size either and thus cannot somehow
+// merge the above two lines. 
+//
+// The purpose of this function is to obtain sizes for both the array
+// dimensions so memory allocation of the array can be done.
+//
+// The size vector contains the sizes (or bounds) of each array dimensions.
+// For the above example, this function would determine size[0] to be 8 and
+// size[1] to be 3. 8 is just the max of the number of values put in per
+// entry and 3 is just the max of the second index.
+//
+// Note that this routine is called in a loop over all the lines in the
+// input which is why we set the size using maximum.
+//
+// This routine only works for 2d arrays.
+// ===========================================================================
+void Cmd::get_sizeb(vector<int> &size, stringstream &serr, int &ierr)
+{
+    // Get the dimension of the array, 0,1,2,3,...
+    int dim = (int)size.size();
+
+    // This is a special purpose routine, dim must be 2.
+    if (dim != 2) {
+        fatal_error2(serr, ierr);
+        serr << "Cmd.cc, get_sizeb, internal error." << endl;
+        serr << "dim != 2, dim=" << dim << endl << endl;
+        ierr = 2;
+        return;
+    }
+
+    // Check syntax, also sets istart.
+    // istart   Position in array where we start filling it.
+    //          Example command might be a3d(3,2,2) = ...  In this case istart
+    //          would be a vector of length 3 contining 3,2,2
+    //          Note that istart starts from index base (default 1, Fortran style)
+    //          Use set_index_base_zero for C/C++ index convention
+    vector<int> istart(dim,0);
+    if (!check_syntax(istart, serr, ierr)) return;
+
+    // Get the number of values past the = sign.
+    int nvals = 0;
+    vector<int> size0(dim,0);
+    if (!get_nvals(istart, size0, nvals, serr, ierr)) return;
+
+    // Set the size vector
+    int maxval = istart[0] + nvals - 1;
+    if (maxval > size[0]) {
+        size[0] = maxval;
+    }
+
+    maxval = istart[1];
+    if (maxval > size[1]) {
+        size[1] = maxval;
+    }
+}
+
+
+// ===========================================================================
+// Check command syntax for any dimension array. The expected command is:
+//    cmdname = .true.                          0d
+//    cmdname(5) = 1, 3, -4                     1d
+//    cmdname(3,4) = 1.e19, 23., -45.           2d
+//    etc.
+//
+// We also allow
+//    cmdname = "May" "the", "force", "be"
+// and we will supply the starting indices of (1) or (1,1), etc.
+//
+// Note that at this point, the commas have been removed so the 2d command
+// is actually
+//    cmdname ( 3 4 ) = 1.e19 23. -45. 
+// ===========================================================================
+bool Cmd::check_syntax(vector<int> &istart, stringstream &serr, int &ierr)
+{
+    // Get the dimension of the array, 0,1,2,3,...
+    int dim = (int)istart.size();
+
+    bool skip_check = false;
+    if (dim > 0) {
+        int ieqt = find_equals();
+        if (ieqt == 1) skip_check = true;
+    }
+
+    // Must be at least a certain number of words on the line.
+    int nw_min = 3;
+    int nw_min_wc = 3;
+    if (dim > 0 && (!skip_check)) {
+        nw_min = 6 + dim - 1;
+        nw_min_wc = nw_min + dim - 1;
+    }
+    if ((int)words.size() < nw_min) {
+        fatal_error2(serr, ierr);
+        serr << "Expected number words in this line >= " << nw_min_wc << endl;
+        serr << "Actual number words = " << words.size() << endl << endl;
+        ierr = 2;
+        // If there aren't enough words on the line, then it is hopeless.
+        return false;
+    }
+
+
+    // Word at index ieq must be an = sign.
+    int ieq = 1;
+    int ieq_wc = 2;
+    if (dim > 0 && (!skip_check)) {
+        ieq = 4 + dim -1;
+        ieq_wc = ieq + 1 + dim - 1;
+    }
+    if (words[ieq].get_string() != "=") {
+        words[ieq].fatal_error(serr, ierr);
+        serr << "Expected an equals sign for symbol " << ieq_wc << endl;
+        serr << "Instead symbol " << ieq_wc << " is: " <<
+            words[ieq].get_string() << endl << endl;
+        ierr = 2;
+    }
+
+    // The value must not have any multiplicity, i.e. be just a single value.
+    // This only applies to 0d, values for arrays can have multiplicity.
+    if (dim == 0) {
+        if (words[2].get_multiplicity() != 1) {
+            words[2].fatal_error(serr, ierr);
+            serr << "Multiplicity not equal 1 for " << words[2].get_string() << endl;
+            serr << "Multiplicity is: " << words[2].get_multiplicity() << endl << endl;
+            ierr = 2;
+        }
+    }
+
+    // Nothing more to check for 0d.
+    if (dim == 0) return true;
+
+    if (!skip_check) {
+        // Word at index 1 must be a "(".
+        if (words[1].get_string() != "(") {
+            words[1].fatal_error(serr, ierr);
+            serr << "Expected an open parenthesis ,(, following the command name"
+                " in this line," << endl;
+            serr << "For example: " << cmd_name << "(...) = ..." << endl;
+            serr << "Instead found: " << words[1].get_string() << endl << endl;
+            ierr = 2;
+        }
+
+        // There must be a closing parenthses.
+        int irp = 3 + dim - 1;
+        if (words[irp].get_string() != ")") {
+            words[irp].fatal_error(serr, ierr);
+            serr << "Expected a close parenthesis ,), following the array indices"
+                " in this line," << endl;
+            serr << "For example: " << cmd_name << "(...) = ..." << endl;
+            serr << "Instead found: " << words[irp].get_string() << endl << endl;
+            ierr = 2;
+        }
+    }
+
+
+    // istart   Position in array_vals where we start filling it.
+    //          Note that istart starts from index base (default 1, Fortran style)
+    //          Use set_index_base_zero for C/C++ index convention
+    if (skip_check) {
+        for (int i=0; i<dim; i++) {
+            istart[i] = index_base;
+        }
+    }
+    else {
+        int ierr2 = 0;
+        for (int i=0; i<dim; i++) {
+            int iloc = 2 + i;
+            istart[i] = words[iloc].get_int(serr, ierr);
+            if (ierr < 2  &&  istart[i] < index_base) {
+                words[iloc].fatal_error(serr, ierr);
+                serr << "The index for the array must be an integer that is >= " << index_base << endl;
+                serr << "Integer includes numbers like 3, 3., 3.0, but not 3.5" << endl;
+                serr << "The index input is: " << istart[i] << endl << endl;
+                ierr2 = 2;
+            }
+        }
+        if (ierr2 == 2) ierr = 2;
+    }
+
+    for (int i=0; i<dim; i++) {
+        if (istart[i] < index_base) return false;
+    }
+    return true;
+}
+
+
+// ===========================================================================
+// Get the number of values (nvals) past the = sign.
+// Check that nvals does not exceed array size.
+// Also mark the words up to and including the = sign as processed.
+//
+// Works for any array dimension = 0,1,2,3,...
+// ===========================================================================
+bool Cmd::get_nvals(vector<int> &istart, const vector<int> &size,
+                    int &nvals, stringstream &serr, int &ierr)
+{
+    int nvals_cur;
+    // Get the array dimension, 0,1,2,3,...
+    int dim = (int)istart.size();
+
+    // 0d is a special case.
+    if (dim == 0) {
+        nvals = 1;
+        //if (size[0] == 0) return true;        
+        words[0].set_processed(true);
+        words[1].set_processed(true);
+        return true;
+    }
+
+    // Index of word after equals sign.
+    //int ieqp1 = 5 + dim - 1;
+    int ieqp1 = find_equals() + 1;
+
+    // nvals    Number of values after the = sign.
+    nvals = 0;
+    for (int i=ieqp1; i<(int)words.size(); i++) {
+        nvals_cur = words[i].get_multiplicity();
+        if( nvals_cur <= 0 ){
+            fatal_error2(serr, ierr);
+            serr << "Count must be positive [" << nvals_cur << "]" << endl;
+            ierr = 2;
+        }
+        nvals += nvals_cur;
+    }
+
+    // This is for the get size function. We just want nvals and do not want
+    // to do the check or marking as processed.
+    if (size[0] == 0) return true;
+
+    // Get the max size of the array.
+    //int maxvals = size1*size2;
+    int maxvals = size[0];
+    for (int i=2; i<=dim; i++) {
+        maxvals *= size[i-1];
+    }
+
+    //int ip = istart[0]-1;
+    //if (dim == 2) {
+    //    ip = istart[0]-1 + (istart[1]-1)*size[0];
+    //}
+    //if (dim == 3) {
+    //    ip = istart[0]-1 + (istart[1]-1)*size[0] + (istart[2]-1)*size[0]*size[1];
+    //}
+    //if (dim == 4) {
+    //    ip = istart[0]-1 + (istart[1]-1)*size[0] + (istart[2]-1)*size[0]*size[1] +
+    //        (istart[3]-1)*size[0]*size[1]*size[2];
+    //}
+    //ip += nvals - maxvals;
+
+    // Find the excess, i.e. the max array position the user is trying to
+    // fill compared with the max size allowed. 
+    Parser_utils putils(index_base);
+    int ix = putils.start_dex(istart, size);
+    int excess = ix + nvals - maxvals;
+
+    //cout << "&&&&&cw ip = " << ip << "  excess = " << excess << endl;
+
+    // Check that the number of values input by the user does not exceed
+    // the array size.
+    //int excess = istart1 - 1 + (istart2-1)*size1 + nvals - maxvals;
+    if (excess > 0) {
+        fatal_error2(serr, ierr);
+        serr << "Maximum number of values allowed = " << maxvals << endl;
+        serr << "(for multi-dimension arrays this max number is" << endl;
+        serr << "     max_dim1 * max_dim2 * ...)" << endl;
+        serr << "This command exceeds that value by " <<
+             excess << endl << endl;
+        ierr = 2;
+    }
+    
+    // If fatal errors, then do not attempt further processing.
+    if (ierr == 2) return false;
+
+    // Mark as processed.
+    for (int i=0; i<ieqp1; i++) {
+        words[i].set_processed(true);
+    }
+
+    return true;
+}
+
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Handle variables.
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Check for the user defining variable dimensions for multi-dimensional
+// arrays.
+//
+// This function works for any dimension, 0,1,2,3,...
+// ===========================================================================
+bool Cmd::check_for_dimension(stringstream &serr, int &ierr)
+{
+    // First do some checks.
+    string varname = words[0].get_string();
+    if (!words[0].is_variable()) return false;
+
+    for (int i=0; i<(int)words.size(); i++) {
+        if (words[i].get_string() == "=") {
+            return false;
+        }
+    }
+
+    if (words[1].get_string() != "dimension") return false;
+
+    // Find the variable, if not found, then create it.
+    map<string, Variable>::iterator p;
+    p = vmap->find(varname);
+    if (p == vmap->end()) {
+        Variable v(varname);
+        vmap->insert(pair<string, Variable>(v.get_varname(), v));
+    }
+    p = vmap->find(varname);
+
+    // Extract the bounds from the line.
+    vector<int> bounds;
+    for (int i=2; i<(int)words.size(); i++) {
+        if (words[i].get_string() == "(") continue;
+        if (words[i].is_comma())          continue;
+        if (words[i].get_string() == ":") continue;
+        if (words[i].get_string() == ")") break;
+
+        // Get the bounds, note that this also makes sure it is an integer.
+        bounds.push_back(words[i].get_int(serr, ierr));
+    }
+
+    // Actually set the bounds for the variable.
+    int lnum = words[0].get_line_number();
+    int file_lnum = words[0].get_file_line_number();
+    string fname = words[0].get_filename();
+    p->second.set_bounds(bounds, lnum, file_lnum, fname,
+                         lines, serr, ierr);
+
+    return true;
+}
+
+
+// ===========================================================================
+// Check for the command
+//    variable_description variable_name description
+// If found, then set the description for the variable.
+// Create the variable if necessary.
+// ===========================================================================
+bool Cmd::check_for_var_description(stringstream &serr, int &ierr)
+{
+    if (words[0].get_string() != "variable_description") return false;
+
+    // Must be 3 words in the line.
+    if (words.size() != 3) {
+        words[0].fatal_error(serr, ierr);
+        serr << "The variable_description command must have 3 words on the"
+            " line" << endl;
+        serr << "First word =  variable_description" << endl;
+        serr << "Second word =  name of the variable" << endl;
+        serr << "Third word =  description (usually some phrase in quotes)" << endl;
+        serr << "This command has " << words.size() <<
+            " words instead of 3 words." << endl;
+        ierr = 2;
+        return true;
+    }
+
+    // The variable name is word 1.
+    string varname = words[1].get_string();
+    if (!words[1].is_variable()) {
+        words[0].fatal_error(serr, ierr);
+        serr << "Expected a variable name as word 2" << endl;
+        serr << "Variable names must begin with the $ character." << endl;
+        serr << "This variable name does not begin with a $ character." << endl;
+        serr << "Note that putting quotes around a variable name makes it" << endl;
+        serr << "a string, not a variable." << endl;
+        serr << "Variable name = " << varname << endl;
+        ierr = 2;
+        return true;
+    }
+
+    // Get the description.
+    string vardes = words[2].get_string();
+
+    // Find the variable, if not found, then create it.
+    map<string, Variable>::iterator p;
+    p = vmap->find(varname);
+    if (p == vmap->end()) {
+        Variable v(varname);
+        vmap->insert(pair<string, Variable>(v.get_varname(), v));
+    }
+    p = vmap->find(varname);
+
+    // Cannot change pre-defined variables.
+    if (p->second.is_pre_defined()) {
+        words[0].fatal_error(serr, ierr);
+        serr << "Cannot change the description for a pre-defined"
+             " variable" << endl;
+        serr << "Variable name = " << varname << " is pre-defined." << endl;
+        ierr = 2;
+        return true;
+    }
+    
+    // Actually set the description.
+    p->second.set_description(vardes);
+
+    return true;
+}
+
+
+// ===========================================================================
+// Go through each word on the line (starting after the equals sign if
+// present), and replace each variable with its value.
+// This is for scalar variables only.
+// ===========================================================================
+void Cmd::substitute_variables(stringstream &serr, int &ierr)
+{
+    int irstart = 0;
+    for (int i=0; i<(int)words.size(); i++) {
+        if (words[i].get_string() == "=") {
+            irstart = 1;
+            break;
+        }
+    }
+    int nw1 = (int)words.size()-1;
+    subvar_w0(irstart, nw1, serr, ierr);
+}
+
+
+// ===========================================================================
+// Scan words i1 through i2 inclusive, replace any variables found with
+// their value.
+// If the variable is followed by ++ or --, handle that also.
+// This is for scalar variables only.
+// ===========================================================================
+void Cmd::subvar_w0(int i1, int &i2, stringstream &serr, int &ierr)
+{
+    for (int i=i1; i<=i2; i++) {
+        string s = words[i].get_string();
+        if (words[i].is_variable()) {
+            int increment = 0;
+            if (i < i2) {
+                string ppmm = words[i+1].get_string();
+                if (ppmm == "++") increment = 1;
+                if (ppmm == "--") increment = -1;
+            }
+            subvar0(i, s, increment, serr, ierr);
+            if (increment != 0) {
+                delete_words(i+1,i+1);
+                i2 -= 1;
+            }
+        }
+    }
+}
+
+
+// ===========================================================================
+// Given a variable name, varname, and its index in the words array, vardex,
+// replace it with its value.
+// This is for scalar variables only.
+// ===========================================================================
+void Cmd::subvar0(int vardex, string &varname, int increment,
+                  stringstream &serr, int &ierr)
+{
+    vector<int> adex;
+
+    map<string, Variable>::iterator p;
+    p = vmap->find(varname);
+    if (p != vmap->end()) {
+        int lnum = words[vardex].get_line_number();
+        int file_lnum = words[vardex].get_file_line_number();
+        string fname = words[vardex].get_filename();
+        string svalue = p->second.get_var_value(adex, words[vardex].get_string(),
+                                                lnum, file_lnum, fname,
+                                                lines, serr, ierr);
+        //int increment = words[vardex].get_increment();
+        if (increment != 0) p->second.bump_var(adex, increment,
+                                               lnum, file_lnum, fname,
+                                               lines, serr, ierr);
+        //words[vardex].set_increment(0);
+        words[vardex].set_value(svalue);
+    }
+    else {
+        // The variable has not been defined yet.
+        words[vardex].fatal_error(serr, ierr);
+        serr << "Attempted to use a variable before it was defined."
+             << endl;
+        serr << "Undefined variable = " << varname << endl;
+        ierr = 2;
+    }
+}
+
+
+// ===========================================================================
+// Store the variable value(s), define if needed.
+// Examples:
+//    $radius = 3.0                         0d
+//    $radius(1) = 3.0 4. 5.6e19            1d
+//    $radius(3,4) = 3.0 4. 5.6e19 4 5 9    2d
+//    ...
+//
+// This function works for any dimension, 0,1,2,3,...
+// ===========================================================================
+void Cmd::set_variables(stringstream &serr, int &ierr)
+{
+    //cout << "&&&&&cw Enter set_variables, words[0] = " << words[0].get_string() << endl;
+    int ieq = -1;
+    for (int i=0; i<(int)words.size(); i++) {
+        if (words[i].get_string() == "=") {
+            ieq = i;
+            break;
+        }
+    }
+
+    // If an equals sign was not found on the line, then this is not a
+    // variable assignment.
+    if (ieq == -1) return;
+
+    // If the first character of the first word is not a $, then this is not
+    // a variable assignment.
+    string vname = words[0].get_string();
+    if (!words[0].is_variable()) return;
+
+    // Define a few common things.
+    int lnum = words[0].get_line_number();
+    int file_lnum = words[0].get_file_line_number();
+    string fname = words[0].get_filename();
+    vector<string> valvec;
+
+    int dim = 0;
+    if (ieq >= 4) dim = ieq - 3;
+    //cout << "&&&&&cw vname=" << vname << "  ieq=" << ieq << "  dim=" << dim << endl;
+
+    // Do some checking.
+    if (dim == 0) {
+        // Must be 3 words in the line (for example: $radius = 3.0)
+        if (words.size() != 3) {
+            fatal_error2(serr, ierr);
+            serr << "Expected number words in this line = 3" << endl;
+            serr << "Actual number words = " << words.size() << endl << endl;
+            ierr = 2;
+            // If there aren't enough words on the line, then it is hopeless.
+            if (words.size() < 3) return;
+        }
+
+        // The value must not have any multiplicity, i.e. be just a single value.
+        // This only applies to 0d, values for arrays can have multiplicity.
+        if (words[2].get_multiplicity() != 1) {
+            words[2].fatal_error(serr, ierr);
+            serr << "Multiplicity not equal 1 for " << words[2].get_string() << endl;
+            serr << "Multiplicity is: " << words[2].get_multiplicity() << endl << endl;
+            ierr = 2;
+        }
+    }
+
+    if (dim > 0) {
+        int nw_min = dim + 5;
+        int nw_min_wc = nw_min + dim - 1;
+        if ((int)words.size() < nw_min) {
+            fatal_error2(serr, ierr);
+            serr << "Expected number of symbols in this line >= " << nw_min_wc << endl;
+            serr << "Actual number of symbols is less than expected." << endl << endl;
+            ierr = 2;
+            // If there aren't enough words on the line, then it is hopeless.
+            return;
+        }
+
+        // Word at index 1 must be a "(".
+        if (words[1].get_string() != "(") {
+            words[1].fatal_error(serr, ierr);
+            serr << "Expected an open parenthesis ,(, following the variable name"
+                " in this line," << endl;
+            serr << "For example: " << vname << "(...) = ..." << endl;
+            serr << "Instead found: " << words[1].get_string() << endl << endl;
+            ierr = 2;
+            return;
+        }
+
+        // There must be a closing parenthses.
+        int irp = 3 + dim - 1;
+        if (words[irp].get_string() != ")") {
+            words[irp].fatal_error(serr, ierr);
+            serr << "Expected a close parenthesis ,), following the array indices"
+                " in this line," << endl;
+            serr << "For example: " << vname << "(...) = ..." << endl;
+            serr << "Instead found: " << words[irp].get_string() << endl << endl;
+            ierr = 2;
+            return;
+        }
+    }
+
+    // Store the values in a vector.
+    for (int i=ieq+1; i<(int)words.size(); i++) {
+        int imult = words[i].get_multiplicity();
+        string s = words[i].get_string();
+        for (int j=1; j<=imult; j++) {
+            valvec.push_back(s);
+        }
+    }
+
+    // Store the array indices in a vector.
+    vector<int> istart(dim,0);
+    int ierr2 = 0;
+    for (int d=0; d<dim; d++) {
+        istart[d] = words[d+2].get_int(serr, ierr);
+        if (istart[d] <= 0) {
+            words[d+2].fatal_error(serr, ierr);
+            serr << "The index for the array must be an integer that is >= " << index_base << endl;
+            serr << "Integer includes numbers like 3, 3., 3.0, but not 3.5" << endl;
+            serr << "The index input is: " << istart[d] << endl << endl;
+            ierr2 = 2;
+        }
+    }
+    if (ierr2 == 2) {
+        ierr = 2;
+        return;
+    }
+
+    // Find the variable name in the variable map.
+    map<string, Variable>::iterator p;
+    p = vmap->find(vname);
+
+    // If the variable is found in the variable map, then replace
+    // its value with the new value. If the variable is not found
+    // in the variable map, then add it as a new variable.
+    if (p != vmap->end()) {
+        p->second.set_var_value(istart, valvec, lnum, file_lnum, fname,
+                                lines, serr, ierr);
+    }
+    else {
+        Variable v(vname, istart, valvec, lnum, file_lnum, fname, lines,
+                   serr, ierr);
+        vmap->insert(pair<string, Variable>(v.get_varname(), v));
+    }
+}
+
+
+
+// ===========================================================================
+// Evaluate a variable.
+// We have a word followed by multiple arguments. Find out if the word is
+// a variable, use the arguments to get the variable value, replace the
+// variable and arguments with its value.
+//
+// This function works for any dimension, 0,1,2,3,...
+// ===========================================================================
+bool Cmd::evaluate_variable(int iw1, int &i2, int &nargs,
+                            stringstream &serr, int &ierr)
+{
+    // If there is no map of variables, then we do nothing.
+    if (vmap == NULL) return false;
+
+    // Do nothing if the word is not a variable (begins with $).
+    if (!words[iw1].is_variable()) return false;
+
+    // Get the variable name.
+    string varname = words[iw1].get_string();
+
+    // Find the variable.
+    map<string, Variable>::iterator p;
+    p = vmap->find(varname);
+
+    // The variable was not found.
+    if (p == vmap->end()) {
+        words[iw1].fatal_error(serr, ierr);
+        serr << "Trying to use a variable before it is defined." << endl;
+        serr << "Undefined variable = " << varname << endl;
+        serr << "The list of defined variables (at this point) is:" << endl;
+        for (p=vmap->begin(); p!=vmap->end(); p++) {
+            serr << p->second.get_varname() << endl;
+        }
+        ierr = 2;
+        return true;
+    }
+
+    // The variable was found, do the evaluation and replace the words.
+
+    // Check to see if all the variable arguments have a value.
+    bool has_value = true;
+    for (int i=0; i<nargs; i++) {
+        int j = iw1 + 1 + i;
+        if (!words[j].is_number()) {
+            has_value = false;
+            words[j].fatal_error(serr, ierr);
+            serr << "Expected a number for variable index " <<
+                i+1 << endl;
+            serr << "Instead found:   " << words[j].get_string() << endl;
+            ierr = 2;
+        }
+    }
+    if (!has_value) return true;
+
+    // Check to see if all the variable arguments are integer.
+    bool all_int = true;
+    for (int i=0; i<nargs; i++) {
+        int j = iw1 + 1 + i;
+        if (!words[j].is_integer()) {
+            all_int = false;
+            words[j].fatal_error(serr, ierr);
+            serr << "Expected an integer for variable index " <<
+                i+1 << endl;
+            serr << "Instead found:   " << words[j].get_string() << endl;
+            ierr = 2;
+        }
+    }
+    if (!all_int) return true;
+
+    // All the indices have values and are ints.
+    // Load all the arguments into a vector of ints.
+    vector<int> vdex;
+    for (int i=0; i<nargs; i++) {
+        int j = iw1 + 1 + i;
+        int iv = words[j].get_int();
+        vdex.push_back(iv);
+    }
+
+    // Check for ++ or -- following the variable.
+    int increment = 0;
+    int ippmm = iw1 + nargs + 1;
+    if (ippmm <= i2 && ippmm <(int)words.size()) {
+        string sppmm = words[ippmm].get_string();
+        if (sppmm == "++") increment = 1;
+        if (sppmm == "--") increment = -1;
+    }
+
+    // Evaluate the variable and replace the words.
+    // Works in any dimensionality.
+    int ln = words[iw1].get_line_number();
+    int file_ln = words[iw1].get_file_line_number();
+    string fname = words[iw1].get_filename();
+    string result = p->second.get_var_value(vdex, varname, ln, file_ln,
+                                            fname, lines, serr, ierr);
+    //int increment = words[iw1].get_increment();
+    if (increment != 0) p->second.bump_var(vdex, increment, ln, file_ln,
+                                           fname, lines, serr, ierr);
+    //words[iw1].set_increment(0);
+    Word w(result, ln, file_ln, fname, lines);
+    replace_words(iw1, iw1+nargs, w);
+    i2 -= nargs;
+
+    // If the variable was followed by a ++ or --, then remove the
+    // ++ or -- since it has been used.
+    if (increment != 0) {
+        delete_words(iw1+1, iw1+1);
+        i2 -= 1;
+    }
+
+    return true;
+}
+
+
+
+
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Math evaluation.
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Math evaluation driver.
+// ===========================================================================
+void Cmd::math_eval(stringstream &serr, int &ierr)
+{
+    // Combine * * into **, i.e. form the exponentiation operator.
+    handle_star_star();
+
+    // Ops like .and., .eq., ... can at this point be part of larger words,
+    // they need to be extracted as individual words.
+    handle_ops();
+
+    //cout << "&&&&&cw Cmd.cc, Enter math_eval" << endl;
+    //for (int i=0; i<(int)words.size(); i++) {
+    //    cout << words[i].get_string() << endl;
+    //}
+
+    int ieq = find_any_char(0, (int)words.size()-1, "=");
+
+    bool ifcmd = false;
+    if (words[0].get_string() == "if") {
+        ifcmd = true;
+        ieq = -1;
+    }
+
+    for (int i=0; i<(int)words.size(); i++) {
+        if (words[i].get_string() == "(" /*&& i>ieq*/) {
+            if (ifcmd && i>1) continue;
+            if (i > (int)words.size()-2) {
+                words[i].fatal_error(serr, ierr);
+                serr << "Expected (...)" << endl;
+                serr << "Found " << words[(int)words.size()-2].get_string() <<
+                    words[(int)words.size()-1].get_string() << endl;
+                ierr = 2;
+                return;
+            }
+
+            // The starting index of the math expression.
+            int istart = i+1;
+
+            // Find the ending index of the math expression.
+            int iclose = find_closing_symbol("(", ")", istart);
+            if (iclose == -1) {
+                words[i+1].fatal_error(serr, ierr);
+                serr << "Did not find a closing parenthesis, ), in"
+                    " math expression" << endl;
+                serr << "Check for unbalanced parentheses in math expression." << endl;
+                ierr = 2;
+                return;
+            }
+            int iend = iclose - 1;
+
+            int iwres = 0;
+            int nargs = 0;
+            for (;;) {
+                if (!handle_innermost_parens(istart, iend, iwres, nargs, true,
+                                             serr, ierr)) break;
+                bool isvar = false;
+                bool doit = true;
+                if (iwres <= 0) doit = false;
+                if (cmd_type == "assignment" && iwres==1) doit = false;
+                if (doit) {
+                    isvar = evaluate_variable(iwres-1, iend, nargs, serr, ierr);
+                }
+                if ((iwres > 0) && (!isvar)) {
+                    evaluate_function(iwres-1, iend, nargs, serr, ierr);
+                }
+            }
+
+            // Now set ihip1,2 to the original set of parens and handle those.
+            // This will do a math eval inside the parens, handle multiple arguments
+            // and possibly remove the parens.
+            // We do not remove the parens if this is a command line or an assignment
+            // line and we are to the left of the equals because a lot of checks
+            // depend on the parens being there.
+            int ihip1 = istart-1;
+            int ihip2 = iend + 1;
+            bool remp = true;
+            if (ieq>ihip2 && ihip1==1) remp = false;
+            handle_innermost_parens(ihip1, ihip2, iwres, nargs, remp,
+                                    serr, ierr);
+
+            // Handle the case of a variable array, i.e. evaluate and replace
+            // the variable array reference. We of course do not do this for
+            // an assignment statement where we are to the left of the equals.
+            //bool isvar = false;
+            bool doit = true;
+            if (iwres <= 0) doit = false;
+            if (cmd_type == "assignment" && iwres==1) doit = false;
+            if (doit) {
+                // We use ihip3 to account for ++ or -- following a
+                // variable array reference.
+                int ihip3 = ihip2 + 1;
+                //isvar = evaluate_variable(iwres-1, ihip3, nargs, serr, ierr);
+                evaluate_variable(iwres-1, ihip3, nargs, serr, ierr);
+            }
+
+            // Do not allow a function outside of parens. We could do this
+            // but for now all math is inside parens.
+            //if ((iwres > 0) && (!isvar)) {
+            //    evaluate_function(iwres-1, iend, nargs, serr, ierr);
+            //}
+
+        }  // if find (
+    } // End of loop through all words on the line.
+
+    //cout << "&&&&&cw Cmd.cc, Exit math_eval" << endl;
+}
+
+
+// ===========================================================================
+// Handle the innermost set of parentheses.
+// The return value is false if parens were not found or if they were
+// unbalanced. If parens were found and handled then true is returned.
+// iwres is an output quantity and is the location of the resultant word.
+// ===========================================================================
+bool Cmd::handle_innermost_parens(int &i1, int &i2, int &iwres, int &nargs,
+                                  bool remp, stringstream &serr, int &ierr)
+{
+    // If no innermost parens are found then iwres is meaningless.
+    iwres = -1;
+
+    // Search for the innermost left parens. It is ok if there is no left
+    // parens, this line just does not have parens.
+    int ip1 = find_last("(", i1, i2);
+    if (ip1 == -1) return false;
+
+    // After evaluation, the resultant word will be at ip1.
+    iwres = ip1;
+
+    int ipstart = ip1 + 1;
+    bool done = false;
+    nargs = 1;
+    for (;;) {
+        int ip2 = find_any_char(ipstart, i2, ",)");
+        if (ip2 == -1) {
+            words[ipstart].fatal_error(serr, ierr);
+            serr << "Did not find a closing parenthesis, ), in"
+                " math expression" << endl;
+            serr << "Check for unbalanced parentheses in math expression." << endl;
+            ierr = 2;
+            return false;
+        }
+
+        if (words[ip2].get_string() == ")") done = true;
+        if (words[ip2].is_comma()) nargs += 1;
+
+        if (remp) {
+            delete_words(ip2, ip2);
+            i2 -= 1;
+        }
+        int ip21a = ip2 - 1;
+        int ip21 = ip2 - 1;
+        seval(ipstart, ip21, serr, ierr);
+        i2 -= ip21a - ip21;
+
+        if (done) break;
+
+        ipstart += 1;
+    }
+
+    // Delete the leading paren.
+    if (remp) {
+        delete_words(ip1, ip1);
+        i2 -= 1;
+    }
+
+    return true;
+}
+
+
+// ===========================================================================
+// Simple evaluation of a series of words i1 to i2 inclusive.
+// ===========================================================================
+void Cmd::seval(int &i1, int &i2, stringstream &serr, int &ierr)
+{
+    Parser_math pmath;
+
+    subvar_w0(i1, i2, serr, ierr);
+    handle_unary_op(i1, i2, "-", serr, ierr);
+    handle_unary_op(i1, i2, "+", serr, ierr);
+
+    //     Level   Operators
+    //     -----   -----------------------
+    //       8     ()
+    //       7     ++ --
+    //       6     **
+    //       5     *  /
+    //       4     +  -
+    //       3     .gt. .ge. .lt. .le. .eq. .ne.
+    //       2     .not.
+    //       1     .and.
+    //       0     .or.
+    for (int level=6; level>=0; level--) {
+        for (int i=i1; i<=i2; i+=1) {
+            if (words[i].is_operator(level)) {
+                int ln = words[i].get_line_number();
+                int file_ln = words[i].get_file_line_number();
+                string fname = words[i].get_filename();
+                Word w("", ln, file_ln, fname, lines);
+
+                string op_type = words[i].get_op_type();
+                if (op_type == "arithmetic")
+                    pmath.do_op(i-1, i, i+1, words, w, serr, ierr);
+                if (op_type == "relational")
+                    pmath.do_op_relational(i-1, i, i+1, words, w, serr, ierr);
+                if (op_type == "logical" && level == 2)   // .not. is unary
+                    pmath.do_op_not(i, i+1, words, w, serr, ierr);
+                if (op_type == "logical" && level != 2)
+                    pmath.do_op_logical(i-1, i, i+1, words, w, serr, ierr);
+
+                // level 2, .not., is unary and is handled differently.
+                if (level == 2) {
+                    replace_words(i, i+1, w);
+                    i2 -= 1;
+                }
+                else {
+                    replace_words(i-1, i+1, w);
+                    i2 -= 2;
+                    i -= 1;
+                }
+                continue;
+            }
+        }
+    }
+    //cout << "&&&&&cw Leave seval" << endl;
+}
+
+
+// ===========================================================================
+// Handle unary plus and minus.
+// utype is either "+" or "-".
+// ===========================================================================
+void Cmd::handle_unary_op(int i1, int &ipend, string utype,
+                          stringstream &serr, int &ierr)
+{
+    int ipstart = i1;
+    for (;;) {
+        int ip = find(ipstart, ipend, utype);
+
+        // If we do not find any more plus/minus signs then we are done.
+        if (ip == -1) return;
+
+        // Fatal error is the plus/minus sign is the last word on the line.
+        if (ip >= ((int)words.size()-1)) {
+            words[ip].fatal_error(serr, ierr);
+            serr << "Found a " << utype << " sign at the end of a line." << endl;
+            serr << "Expected something to the right of the  " << utype
+                 << " sign to operate on." << endl;
+            ierr = 2;
+            return;
+        }
+
+
+        // If the plus/minus sign is the first word on the line, then it has
+        // to be a unary op. The word after the plus/minus sign must be a
+        // number or variable.
+        // This should never happen because we are always inside (...)
+        // and will never be word 0, still we should be general and take care
+        // of this case.
+        if (ip == 0) {
+            if (!words[ip+1].is_number()) {
+                words[ip+1].fatal_error(serr, ierr);
+                serr << "Expected the object following the unary " << utype
+                     << " to be a number." << endl;
+                serr << "Instead, it was " << words[ip+1].get_string() << endl;
+                ierr = 2;
+                return;
+            }
+
+            // Actually do the negate operation.
+            do_unary_op(ip, utype);
+            ipstart = ip+1;
+            ipend -= 1;
+            continue;
+        }
+         
+        // Check to see if the +/- is a binary op. If so, then nothing needs
+        // to be done with this +/- sign, binary ops are handled elsewhere.
+        if (words[ip-1].is_number() &&
+            (words[ip+1].is_number() || words[ip+1].get_string() == "-" ||
+             words[ip+1].get_string() == "+")
+            ) {
+            ipstart = ip+1;
+            continue;
+        }
+
+        // Check to see if the +/- is a unary op.
+        if (!words[ip-1].is_number() && words[ip+1].is_number()) {
+            do_unary_op(ip, utype);
+            ipstart = ip+1;
+            ipend -= 1;
+            continue;
+        }
+
+        // Check for an error.
+        if (!words[ip-1].is_number() &&
+            !words[ip+1].is_number()) {
+            words[ip-1].fatal_error(serr, ierr);
+            serr << "Expected the object following the unary " << utype << " to"
+                " be a number." << endl;
+            serr << "Instead, it was " << words[ip+1].get_string() << endl;
+            ierr = 2;
+            return;
+        }
+
+        words[ip].fatal_error(serr, ierr);
+        serr << "Unknown error with unary " << utype << endl;
+        serr << "Error with words: " << endl;
+        serr << words[ip].get_string() << words[ip+1].get_string() << endl;
+        ierr = 2;
+        return;
+    }
+}
+
+
+
+// ===========================================================================
+// Do a unary operation.
+// The minus sign is at word ip and the word to be negated is at word ip+1.
+// After negation, both words get replaced by the new negated word.
+// If the unary op is plus then all we need to do is get rid of the + sign.
+// ===========================================================================
+void Cmd::do_unary_op(int ip, string utype)
+{
+    if (utype == "+") {
+        delete_words(ip,ip);
+        return;
+    }
+
+    if (utype == "-") {
+        if (words[ip+1].is_number()) {
+            Word w = words[ip+1];
+            w.negate_value();
+            replace_words(ip, ip+1, w);
+            return;
+        }
+    }
+}
+
+
+
+// ===========================================================================
+// Check to see that all ++ and -- have been handled and removed.
+// ===========================================================================
+void Cmd::check_ppmm(stringstream &serr, int &ierr)
+{
+    for (int i=0; i<(int)words.size(); i++) {
+        string s = words[i].get_string();
+        if (s == "++" || s == "--") {
+            words[i].fatal_error(serr, ierr);
+            serr << "Misplaced " << s << " operator." << endl;
+            serr << "++ and -- operators must follow a variable or " << endl;
+            serr << "an element of an array variable. " << endl;
+            ierr = 2;
+        }
+    }
+}
+
+
+// ===========================================================================
+// Evaluate a function.
+// ===========================================================================
+void Cmd::evaluate_function(int iw1, int &i2, int &nargs,
+                            stringstream &serr, int &ierr)
+{
+    // If there is no map of functions, then we do nothing.
+    if (fmap == NULL) return;
+
+    // If the word at iw1 is not a string then it will not be a function.
+    if (!words[iw1].is_string()) return;
+
+    // Find the function.
+    string s = words[iw1].get_string();
+    map<string, Function>::iterator p;
+    p = fmap->find(s);
+
+    // The function was not found.
+    if (p == fmap->end()) {
+        words[iw1].fatal_error(serr, ierr);
+        serr << "Expected a function" << endl;
+        serr << "Instead found:   " << words[iw1].get_string() << endl;
+        serr << "The list of known functions is:" << endl;
+        for (p=fmap->begin(); p!=fmap->end(); p++) {
+            serr << p->second.get_name() << endl;
+        }
+        ierr = 2;
+        return;
+    }
+
+    // The function was found, do the evaluation and replace the words.
+
+    // Common items needed for all types of functions.
+    int ln = words[iw1].get_line_number();
+    int file_ln = words[iw1].get_file_line_number();
+    string fname = words[iw1].get_filename();
+
+
+    // Is a variable defined or not.
+    if (s == "defined") {
+        string varname = words[iw1+1].get_string();
+        string result = "true";
+        map<string, Variable>::iterator p;
+        p = vmap->find(varname);
+        if (p == vmap->end()) result = "false";
+        Word w(result, ln, file_ln, fname, lines);
+        replace_words(iw1, iw1+nargs, w);
+        i2 -= nargs;
+        return;
+    }
+
+
+    // String functions - string arguments, string results.
+    if (p->second.get_type() == "string") {
+        // Load all the arguments into a vector of strings.
+        vector<string> vs;
+        for (int i=0; i<nargs; i++) {
+            int j = iw1 + 1 + i;
+            string s = words[j].get_string();
+            vs.push_back(s);
+        }
+
+        // Calculate the function and replace the words.
+        string result = p->second.evaluate(vs, serr, ierr, ln, file_ln,
+                                           fname, lines);
+        Word w(result, ln, file_ln, fname, lines);
+        replace_words(iw1, iw1+nargs, w);
+        i2 -= nargs;
+    }
+
+
+    // Real functions - double arguments, double results.
+    if (p->second.get_type() == "real") {
+        // Check to see if all the function arguments have a value.
+        bool has_value = true;
+        for (int i=0; i<nargs; i++) {
+            int j = iw1 + 1 + i;
+            if (!words[j].is_number()) {
+                has_value = false;
+                words[j].fatal_error(serr, ierr);
+                serr << "Expected a number for function argument number " <<
+                    i+1 << endl;
+                serr << "Instead found:   " << words[j].get_string() << endl;
+                ierr = 2;
+            }
+        }
+        if (!has_value) return;
+
+        // All the arguments have values so we can deal with them
+        // as doubles.
+        // Load all the arguments into a vector of doubles.
+        vector<double> vd;
+        for (int i=0; i<nargs; i++) {
+            int j = iw1 + 1 + i;
+            double d = words[j].get_double();
+            vd.push_back(d);
+        }
+
+        // Calculate the function and replace the words.
+        double result = p->second.evaluate(vd, serr, ierr, ln, file_ln,
+                                       fname, lines);
+        Word w(result, ln, file_ln, fname, lines);
+        replace_words(iw1, iw1+nargs, w);
+        i2 -= nargs;
+    }
+}
+
+
+
+// ===========================================================================
+// When two "*" characters are together, assume that is the exponentiation
+// operator, "**", and replace both "*"'s with "**".
+// ===========================================================================
+void Cmd::handle_star_star()
+{
+    for (int i=0; i<(int)words.size()-1; i++) {
+        if (words[i].get_string() == "*" && words[i+1].get_string() == "*") {
+            int lnum = words[i].get_line_number();
+            int file_ln = words[i].get_file_line_number();
+            string fname = words[i].get_filename();
+            string s = "**";
+            Word w(s, lnum, file_ln, fname, lines);
+            replace_words(i, i+1, w);
+        }
+    }
+}
+
+
+// ===========================================================================
+// The parser does not automatically separate operators like .eq., .ne., etc.
+// For example, the phrase a.eq.b will be one word when it should be 3 words.
+// This routine finds those cases and splits the one word into multiple words.
+// ===========================================================================
+void Cmd::handle_ops()
+{
+    vector<string> subs;
+    subs.push_back(".eq.");
+    subs.push_back(".ne.");
+    subs.push_back(".gt.");
+    subs.push_back(".ge.");
+    subs.push_back(".lt.");
+    subs.push_back(".le.");
+    subs.push_back(".hgeq.");
+    subs.push_back(".hgne.");
+    subs.push_back(".hggt.");
+    subs.push_back(".hgge.");
+    subs.push_back(".hglt.");
+    subs.push_back(".hgle.");
+    subs.push_back(".not.");
+    subs.push_back(".and.");
+    subs.push_back(".or.");
+
+    for (int i=0; i<(int)words.size(); i++) {
+        string fstr = words[i].get_string();
+        for (int j=0; j<(int)subs.size(); j++) {
+            vector<string> vs;
+            bool b = separate_str(subs[j], fstr, vs);
+            if (b) {
+                vector<Word> vw;
+                for (int k=0; k<(int)vs.size(); k++) {
+                    int lnum = words[i].get_line_number();
+                    int file_lnum = words[i].get_file_line_number();
+                    string fname = words[i].get_filename();
+                    Word w(vs[k], lnum, file_lnum, fname, lines);
+                    vw.push_back(w);
+                    //cout << vs[k] << endl;
+                }
+                replace_words(i, i, vw);
+                i--;
+                break;
+            }
+        }
+
+
+        //int lnum = words[i].get_line_number();
+        //string s = "**";
+        //Word w(s, lnum, lines);
+        //replace_words(i, i+1, w);
+    }
+}
+
+
+// ===========================================================================
+// After the line has mostly been processed, check for any misplaced math
+// operations. For example, the following line
+//    xcenter = 1.0 + 2.0
+// has a misplaced math op in it, i.e. it should be in parentheses
+//    xcenter = (1.0 + 2.0)
+// ===========================================================================
+void Cmd::check_misplaced_math(stringstream &serr, int &ierr)
+{
+    for (int i=0; i<(int)words.size(); i++) {
+        if (words[i].is_operator()) {
+            words[i].fatal_error(serr, ierr);
+            serr << "Misplaced math operation." << endl;
+            serr << "All math operations must be inside parentheses." << endl;
+            ierr = 2;
+        }
+    }
+}
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Handle if/elseif/else/endif
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Handle if/elseif/else/endif statements.
+// ===========================================================================
+void Cmd::handle_if(bool &skip, deque<bool> &skip_level,
+                    deque<bool> &satisfied, stringstream &serr, int &ierr)
+{
+    // If's can be nested to any level, the number of levels is determined
+    // by the size of skip_level, the size of satisfied would also work here.
+    int nlevels = (int)skip_level.size();
+
+    // The endif statement ends a block if.
+    if (words[0].get_string() == "endif") {
+        if ((int)words.size() > 1) {
+            words[1].fatal_error(serr, ierr);
+            serr << "The endif (or end if) statement should not have "
+                 "anything else on the line." << endl;
+            serr << "Found other words on the line." << endl;
+            ierr = 2;
+        }
+
+        // The if level has ended, just erase it.
+        skip_level.erase(skip_level.begin()+nlevels-1);
+        satisfied.erase(satisfied.begin()+nlevels-1);
+        skip = true;
+        return;
+    }
+
+    // Else statment.
+    if (words[0].get_string() == "else") {
+        if ((int)words.size() > 1) {
+            words[1].fatal_error(serr, ierr);
+            serr << "The else statement should not have "
+                 "anything else on the line." << endl;
+            serr << "Found other words on the line." << endl;
+            ierr = 2;
+        }
+
+        // If the if has been satisfied before this else, then just
+        // skip the else block. Otherwise the if will be satisfied and
+        // we do not skip the else block.
+        if (satisfied[nlevels-1]) {
+            skip_level[nlevels-1] = true;
+        }
+        else {
+            satisfied[nlevels-1] = true;
+            skip_level[nlevels-1] = false;
+        }
+        skip = true;
+        return;
+    }
+
+    // If any level is in skip mode, then we will skip this line.
+    // This is mostly for non if related lines, but the skip flag is
+    // used below.
+    skip = false;
+    for (int n=0; n<nlevels; n++) {
+        if (skip_level[n]) {
+            skip = true;
+        }
+    }
+
+
+    // Process the elseif statement.
+    if (words[0].get_string() == "elseif") {
+        if (satisfied[nlevels-1]) {
+            skip_level[nlevels-1] = true;
+        }
+        else {
+            // If we are in skip mode at a higher level, then we can ignore this
+            // elseif.
+            if (skip && !skip_level[nlevels-1]) return;
+
+            // Do some syntax checking.
+            int wsize = (int)words.size();
+
+            if (wsize > 1) {
+                if (words[1].get_string() != "(") {
+                    words[1].fatal_error(serr, ierr);
+                    serr << "Expected an open parentheses, (, following " <<
+                          words[0].get_string() << endl;
+                    serr << "Instead found: " << words[1].get_string() << endl;
+                    ierr = 2;
+                }
+            }
+
+            int nw = wsize-2;
+            if (nw >= 0) {
+                if (words[nw].get_string() != ")") {
+                    words[nw].fatal_error(serr, ierr);
+                    serr << "Expected a close parentheses, ), as the next to last "
+                        "symbol on the line." << endl;
+                    serr << "Instead found: " << words[nw].get_string() << endl;
+                    ierr = 2;
+                }
+            }
+
+            nw = wsize-1;
+            if (nw >= 0) {
+                if (words[nw].get_string() != "then") {
+                    words[nw].fatal_error(serr, ierr);
+                    serr << "Expected then as the last word on the line." << endl;
+                    serr << "Instead found: " << words[nw].get_string() << endl;
+                    ierr = 2;
+                }
+            }
+
+            // Evaluate the conditional.
+            math_eval(serr, ierr);
+
+            if (words[1].get_bool(serr, ierr)) {
+                satisfied[nlevels-1] = true;
+                skip_level[nlevels-1] = false;
+            }
+            else {
+                skip_level[nlevels-1] = true;
+            }
+
+        }
+
+        // Set skip to skip the elseif statement.
+        skip = true;
+        return;
+    }
+
+
+
+
+    if (words[0].get_string() == "if") {
+        //cout << "&&&&&cw Cmd.cc, if statment encountered" << endl;
+
+        // If we are in skip mode at a higher level, then we can ignore this
+        // if.
+        if (skip) {
+            skip_level.push_back(true);
+            satisfied.push_back(true);
+            return;
+        }
+
+        // Do some syntax checking.
+        int wsize = (int)words.size();
+
+        if (wsize > 1) {
+            if (words[1].get_string() != "(") {
+                words[1].fatal_error(serr, ierr);
+                serr << "Expected an open parentheses, (, following " <<
+                    words[0].get_string() << endl;
+                serr << "Instead found: " << words[1].get_string() << endl;
+                ierr = 2;
+            }
+        }
+
+        // Evaluate the conditional.
+        math_eval(serr, ierr);
+
+        //for (int i=0; i<(int)words.size(); i++) {
+        //    cout << words[i].get_string() << endl;
+        //}
+
+        // Single line if
+        if (words[2].get_string() != "then") {
+            if (words[1].get_bool(serr, ierr)) {
+                delete_words(0,1);
+                reset_name_type();
+                skip = false;
+            }
+            else {
+                skip = true;
+            }
+            return;
+        }
+
+        // Multi-block if
+        if (words[2].get_string() == "then") {
+            if (words[1].get_bool(serr, ierr)) {
+                skip_level.push_back(false);
+                satisfied.push_back(true);
+                skip = true;
+            }
+            else {
+                skip_level.push_back(true);
+                satisfied.push_back(false);
+                skip = true;
+            }
+            return;
+        }
+    }
+
+
+    //for (int i=0; i<(int)words.size(); i++) {
+    //    if (words[i].get_string() == "*" && words[i+1].get_string() == "*") {
+    //    }
+    //}
+}
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Handle do loops
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Handle do loops.
+// ===========================================================================
+void Cmd::handle_do(bool &skip, deque<int> &do_start, int &cdex,
+                    bool &end_do_loop, stringstream &serr, int &ierr)
+{
+    // Do's can be nested to any level, the number of levels is determined
+    // by the size of do_start.
+    int nlevels = (int)do_start.size();
+
+    // End of do loop, go back to do line.
+    if (words[0].get_string() == "enddo") {
+        //cout << "&&&&&cw Cmd, handle_do, start of enddo, cdex=" << cdex << endl;
+        if ((int)words.size() > 1) {
+            words[1].fatal_error(serr, ierr);
+            serr << "The enddo (or end do) statement should not have "
+                 "anything else on the line." << endl;
+            serr << "Found other words on the line." << endl;
+            ierr = 2;
+        }
+
+        cdex = do_start[nlevels-1] - 1;
+        skip = true;
+        return;
+    }
+
+
+    // Cycle command encountered.
+    if (words[0].get_string() == "cycle") {
+        if ((int)words.size() > 1) {
+            words[1].fatal_error(serr, ierr);
+            serr << "The cycle statement should not have "
+                 "anything else on the line." << endl;
+            serr << "Found other words on the line." << endl;
+            ierr = 2;
+        }
+
+        cdex = do_start[nlevels-1] - 1;
+        skip = true;
+        return;
+    }
+
+    // Break out of the do loop.
+    if (words[0].get_string() == "exit") {
+        if ((int)words.size() > 1) {
+            words[1].fatal_error(serr, ierr);
+            serr << "The exit statement should not have "
+                 "anything else on the line." << endl;
+            serr << "Found other words on the line." << endl;
+            ierr = 2;
+        }
+
+        end_do_loop = true;
+        return;
+    }
+
+    if (words[0].get_string() == "do") {
+
+        //for (int i=0; i<(int)words.size(); i++) {
+        //  cout << words[i].get_string() << endl;
+        //}
+
+        // Evaluate any math expressions on the do line.
+        math_eval(serr, ierr);
+
+        // Replace any simple variables on the line with their values.
+        // Of course, do not replace the loop variable.
+        int ieq = -1;
+        for (int i=0; i<(int)words.size(); i++) {
+            if (words[i].get_string() == "=") {
+                ieq = i;
+                break;
+            }
+        }
+        if (ieq >= 0) {
+            int nw1 = (int)words.size()-1;
+            subvar_w0(ieq+1, nw1, serr, ierr);
+        }
+
+        // Handle unary minus
+        handle_cmd_unary_minus(serr, ierr);
+
+        // Number of words on the line after math evaluation.
+        int nwords = (int)words.size();
+
+        // Get the loop variable name.
+        string do_varname = "$i";
+        bool isvar = true;
+        if (nwords>1) {
+            do_varname = words[1].get_string();
+            if (!words[1].is_variable()) isvar = false;
+        }
+
+        // Expecting 6 or 8 words, i.e. "do $i = 1 , 10" 
+        if (nwords < 6) {
+            words[0].fatal_error(serr, ierr);
+            serr << "Expected at least 6 words on this line after any math evaluations."
+                 << endl;
+            serr << "For example, " << endl
+                 << "    do " << do_varname << " = 1 , 10" << endl;
+            serr << "Instead found " << nwords << " words on the line." << endl;
+            serr << "The line (after any math evaluations have been done) is:" << endl;
+            serr << "    ";
+            for (int iw=0; iw<nwords; iw++) {
+                serr << words[iw].get_string() << " ";
+            }
+            serr << endl;
+            ierr = 2;
+            return;
+        }
+
+        // Variables must begin with a $
+        if (!isvar) {
+            words[0].fatal_error(serr, ierr);
+            serr << "Loop variable names must begin with a $ sign." << endl;
+            serr << "Note that putting quotes around a variable name makes it" << endl;
+            serr << "a string, not a variable." << endl;
+            serr << "Instead found: " << do_varname << endl;
+            ierr = 2;
+            return;
+        }
+
+        // Word 3 must be an = sign.
+        if (words[2].get_string() != "=") {
+            words[2].fatal_error(serr, ierr);
+            serr << "The third word must be an = sign." << endl;
+            serr << "Instead found: " << words[2].get_string() << endl;
+            ierr = 2;
+            return;
+        }
+
+        // Word 5 must be a comma.
+        if (!words[4].is_comma()) {
+            words[4].fatal_error(serr, ierr);
+            serr << "The fifth word must be a comma." << endl;
+            serr << "Instead found: " << words[4].get_string() << endl;
+            ierr = 2;
+            return;
+        }
+
+        // The get_int functions generate an error if the values are
+        // not integer.
+        int i1 = words[3].get_int(serr, ierr);
+        int i2 = words[5].get_int(serr, ierr);
+
+        // Get the step (increment) if specified.
+        int istep = 1;
+        if (nwords > 7) istep = words[7].get_int(serr, ierr);
+
+        string s1 = words[3].get_string();
+        string s2 = words[5].get_string();
+
+        bool do_continue = false;
+        if (nlevels > 0) {
+            if (do_start[nlevels-1] == cdex) do_continue = true;
+        }
+
+        if (do_continue) {  // This do has already been encountered.
+            //cout << "&&&&&cw Cmd, handle_do, do:continue" << endl;
+
+            // Find the variable in the list of variables, increment it, test for
+            // ending the loop, and store the incremented value.
+            map<string, Variable>::iterator p;
+            p = vmap->find(do_varname);
+            if (p != vmap->end()) {
+                string do_var_value = p->second.get_var_value();
+                int lnum = words[0].get_line_number();
+                int file_lnum = words[0].get_file_line_number();
+                string fname = words[0].get_filename();
+                Word w(do_var_value, lnum, file_lnum, filename, lines);
+                int ival = w.get_int(serr, ierr);
+                ival += istep;
+                //cout << "&&&&&cw Cmd, handle_do, do:continue, do_var_value=" << ival << endl;
+                if (istep >= 0 && ival > i2) {
+                    end_do_loop = true;
+                    return;
+                }
+                if (istep < 0 && ival < i2) {
+                    end_do_loop = true;
+                    return;
+                }
+                stringstream ss;
+                ss << ival;
+                string sval = ss.str();
+                vector<string> valvec;
+                valvec.push_back(sval);
+                vector<int> istart(0,0);
+                p->second.set_var_value(istart, valvec, lnum, file_lnum, fname,
+                                        lines, serr, ierr);
+            }
+            else {
+                words[1].fatal_error(serr, ierr);
+                serr << "The loop variable, " << do_varname <<
+                    " was not found in the variable list." << endl;
+                serr << "This should not happen, possible code bug?" << endl;
+                ierr = 2;
+                return;
+            }
+        }
+        else {  // A new do loop has been encountered.
+            do_start.push_back(cdex);
+
+            // It is possible that we don't execute the do loop at all.
+            if (istep >= 0 && i1 > i2) {
+                end_do_loop = true;
+                return;
+            }
+            if (istep < 0 && i1 < i2) {
+                end_do_loop = true;
+                return;
+            }
+
+            //cout << "&&&&&cw Cmd, handle_do, do:start, cdex=" << cdex << endl;
+            //cout << "&&&&&cw Cmd, handle_do, do:start, s1=" << s1 << endl;
+            //cout << "&&&&&cw Cmd, handle_do, do:start, s2=" << s2 << endl;
+
+            // Store the loop variable, create it if necessary.
+            vector<int> istart(0,0);
+            int lnum = words[0].get_line_number();
+            int file_lnum = words[0].get_file_line_number();
+            string fname = words[0].get_filename();
+            vector<string> valvec;
+            valvec.push_back(s1);
+            map<string, Variable>::iterator p;
+            p = vmap->find(do_varname);
+            if (p != vmap->end()) {
+                p->second.set_var_value(istart, valvec, lnum, file_lnum, fname,
+                                        lines, serr, ierr);
+            }
+            else {
+                Variable v(do_varname, istart, valvec, lnum, file_lnum, fname,
+                           lines, serr, ierr);
+                vmap->insert(pair<string, Variable>(v.get_varname(), v));
+            }
+        }
+        skip = true;
+        return;
+    }
+}
+
+
+
+// ===========================================================================
+// Starting at a do statement, find the matching enddo.
+// ===========================================================================
+bool Cmd::find_matching_enddo(int &dlev, bool &stop_checking)
+{
+    if (words[0].get_string() == "enddo") {
+        if (dlev == 1) return true;
+        dlev -= 1;
+        return false;
+    }
+    if (words[0].get_string() == "do") {
+        dlev += 1;
+        return false;
+    }
+
+    // If we are in main and hit a subroutine statement then that is the
+    // end of main and we need to stop checking.
+    if (words[0].get_string() == "subroutine") {
+        stop_checking = true;
+        return false;
+    }
+
+    // If we are in a subroutine and hit an endsubroutine statement then
+    // we need to stop checking.
+    if (words[0].get_string() == "endsubroutine") {
+        stop_checking = true;
+        return false;
+    }
+
+    return false;
+}
+
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Subroutines
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Handle subroutines.
+// ===========================================================================
+void Cmd::handle_subroutines(bool &skip, bool &go_to_sub, string &sub_name,
+                             bool &go_to_call, stringstream &serr, int &ierr)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(skip == skip);
+    //assert(serr == serr);
+    assert(ierr == ierr);
+
+    // 
+    if (words[0].get_string() == "call") {
+        sub_name = words[1].get_string();
+        go_to_sub = true;
+        return;
+    }
+
+    if (words[0].get_string() == "endsubroutine" ||
+        words[0].get_string() == "return") {
+        go_to_call = true;
+        return;
+    }
+
+}
+
+
+// ===========================================================================
+// Searching for subroutine sub_name.
+// ===========================================================================
+bool Cmd::find_subroutine(string &sub_name)
+{
+    if ((int)words.size() < 2) return false;
+    if (words[0].get_string() == "subroutine" &&
+        words[1].get_string() == sub_name) return true;
+    return false;
+}
+
+
+// ===========================================================================
+// A call statement has been encountered, get the arguments, if any.
+// The call is expected to be
+//      call subname ( arg1, arg2, ...)
+// ===========================================================================
+void Cmd::get_call_args(vector<string> &sargs, vector<bool> &sargs_isvar,
+                        stringstream &serr, int &ierr)
+{
+    //debug_print_words("Cmd, enter get_call_args");
+
+    // We do not want to modify the words on this line, but we have to
+    // temporarily to get the math eval to work right. Therefore store the
+    // words on the line and restore them later.
+    deque <Word> words_store;
+    for (int i=0; i<(int)words.size(); i++) {
+        words_store.push_back(words[i]);
+    }
+
+    // Erase the call and the subroutine name.
+    erase_word(0);
+    erase_word(0);
+
+    // Erase the opening and closing parens.
+    if ((int)words.size() > 0) {
+        if (words[0].get_string() == "(") erase_word(0);
+    }
+    if ((int)words.size() > 0) {
+        if (words[(int)words.size()-1].get_string() == ")") erase_last_word();
+    }
+    //debug_print_words("Cmd, get_call_args, after erase");
+
+    // Do a math eval to get one word arguments. If the arguments are
+    // variables they will not be evaluated, so we will end up with a mix
+    // of variables and numbers.
+    if ((int)words.size() > 0) {
+        math_eval(serr, ierr);
+        int wlen = (int)words.size() - 1;
+        handle_unary_op(0, wlen, "-", serr, ierr);
+        wlen = (int)words.size() - 1;
+        handle_unary_op(0, wlen, "+", serr, ierr);
+    }
+
+    //debug_print_words("Cmd, get_call_args, after math eval");
+
+    // Store the arguments in the vector of strings, sargs, to be returned
+    // to the calling code and also store in the class, call_args.
+    call_args.clear();
+    call_args_isvar.clear();
+    for (int i=0; i<(int)words.size(); i++) {
+        if (words[i].is_comma()) continue;
+        sargs.push_back(words[i].get_string());
+        sargs_isvar.push_back(words[i].is_variable());
+        call_args.push_back(words[i].get_string());
+        call_args_isvar.push_back(words[i].is_variable());
+    }
+
+    // Restore the words before leaving this function.
+    words.clear();
+    for (int i=0; i<(int)words_store.size(); i++) {
+        words.push_back(words_store[i]);
+    }
+    //debug_print_words("Cmd, get_call_args, after restoring words");
+}
+
+
+// ===========================================================================
+// A subroutine statement has been encountered, get the arguments, if any.
+// The subroutine statement is expected to be
+//      subroutine subname ( arg1, arg2, ...)
+// ===========================================================================
+void Cmd::get_sub_args(vector<string> &sargs, vector<bool> &sargs_isvar)
+{
+    sub_args.clear();
+    sub_args_isvar.clear();
+    for (int i=3; i<(int)words.size(); i+=2) {
+        sargs.push_back(words[i].get_string());
+        sargs_isvar.push_back(words[i].is_variable());
+        sub_args.push_back(words[i].get_string());
+        sub_args_isvar.push_back(words[i].is_variable());
+    }
+}
+
+
+// ===========================================================================
+// Accessor functions for the calling and subroutine arguments.
+// ===========================================================================
+void Cmd::copy_call_args(vector<string> &sargs, vector<bool> &sargs_isvar)
+{
+    for (int i=0; i<(int)call_args.size(); i++) {
+        sargs.push_back(call_args[i]);
+        sargs_isvar.push_back(call_args_isvar[i]);
+    }
+}
+
+void Cmd::copy_sub_args(vector<string> &sargs, vector<bool> &sargs_isvar)
+{
+    for (int i=0; i<(int)sub_args.size(); i++) {
+        sargs.push_back(sub_args[i]);
+        sargs_isvar.push_back(sub_args_isvar[i]);
+    }
+}
+
+
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Handle comments.
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Process single line comments.
+// ===========================================================================
+void Cmd::single_line_comments()
+{
+    for (int i=0; i<(int)words.size()-1; i++) {
+        if ((words[i].get_string() == "!") ||
+            (words[i].get_string() == "#") ||
+            (words[i].get_string() == "/" && words[i+1].get_string() == "/")
+            ) {
+            words.erase(words.begin()+i, words.begin()+(int)words.size());
+            break;
+        }
+    }
+
+    // Handle the case where the last word might be an ! or a #
+    int ilast = (int)words.size()-1; 
+    if (ilast < 0) return;
+    if ((words[ilast].get_string() == "!") || (words[ilast].get_string() == "#"))
+        words.erase(words.begin()+ilast);
+}
+
+
+// ===========================================================================
+// Process multi-line comments.
+// ===========================================================================
+void Cmd::multi_line_comments(int &level)
+{
+    /*
+    cout << "*** Enter multi_line_comments, level=" << level << endl;
+    cout << "**** original string: " << original_str << endl;
+    stringstream ss;
+    print_using_words(ss);
+    cout << "     print words before:" << endl;
+    cout << "         " << ss.str() << endl;
+    */
+
+    int istart = -1;
+    if (level > 0) istart = 0;
+
+    for (int i=0; i<(int)words.size()-1; i++) {
+        //cout << "Top of i loop, i=" << i << endl;
+        if (words[i].get_string() == "/" && words[i+1].get_string() == "*") {
+            if (level == 0) istart = i;
+            level += 1;
+            //cout << "found /*, i=" << i << endl;
+            i += 1;
+            continue;
+        }
+
+        if (words[i].get_string() == "*" && words[i+1].get_string() == "/") {
+            if (level == 0) {
+                cout << "Error in line " << line_number << " umatched */" << endl;
+            }
+            //cout << "found */, istart=" << istart << "  words[istart]=" <<
+            //    words[istart].get_string() << endl;
+            //cout << "found */, i=" << i << "  words[i]=" <<
+            //    words[i].get_string() << endl;
+            //cout << "found */, i+1=" << i+1 << "  words[i+1]=" <<
+            //    words[i+1].get_string() << endl;
+            words.erase(words.begin()+istart, words.begin()+i+2);
+            level -= 1;
+            int ndel = i+1-istart+1;
+            //cout << "ndel=" << ndel << "  i=" << i << endl;
+            i = i+1-ndel+1;
+            //cout << "after changing i, i=" << i << "  words[i]=" <<
+            //    words[i].get_string() << endl;
+            if (level > 0) istart = i;
+            continue;
+        }
+
+    }
+
+    if (level > 0) {
+        words.erase(words.begin()+istart, words.begin()+(int)words.size());
+    }
+
+    /*
+    stringstream ss1;
+    print_using_words(ss1);
+    cout << "     print words after:" << endl;
+    cout << "         " << ss1.str() << endl;
+
+    cout << endl << "**********" << endl;
+    */
+}
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Miscellaneous
+// ***************************************************************************
+// ***************************************************************************
+
+
+// ===========================================================================
+// Go through every word on the line, check for matching quotes, then remove
+// them.
+// ===========================================================================
+void Cmd::handle_quotes(stringstream &serr, int &ierr)
+{
+    for (int i=0; i<(int)words.size(); i++) {
+        words[i].handle_quotes(serr, ierr);
+    }
+}
+
+
+
+// ===========================================================================
+// The execution line arguments are expected to be of the form
+//     -v r1=5 -v somevar = 14
+// The -v is a keyword indicating that a variable setting follows. There is
+// no $ because the shell does not allow that.
+// At this point the execution line arguments have been parsed into words.
+// This function extracts the variable definitions, inserts the $, and turns
+// them into separate lines, and returns that in string sout. Then the
+// parser can simply add that to the top of the user input file.
+// ===========================================================================
+void Cmd::handle_exe_args(string &sout)
+{
+  // We use a stringstream here instead of modifying sout directly so that
+  // we can use endl instead of \n since endl is portable and \n is not.
+  stringstream ss;
+  bool line;
+  for (int i=0; i<(int)words.size()-1; i++) {
+    // if you hit a -<letter>
+    if (words[i].get_string() == "-" &&
+        ( words[i+1].get_string() == "v" ||
+          words[i+1].get_string() == "l" ) ) {
+      // get type of argument
+      line = true;
+      if( words[i+1].get_string() == "v" ){
+        line = false;
+      }
+      // move in after the -<letter> and stuff line until next -<letter>
+      int istart = i+2;
+      for (int j=istart; j<(int)words.size(); j++) {
+        // stop at next -<letter>
+        if (j < (int)words.size()-1) {
+          if (words[j].get_string() == "-" &&
+              ( words[j+1].get_string() == "v" ||
+                words[j+1].get_string() == "l" ) ) {
+            break;
+          }
+        }
+        string sj = words[j].get_string();
+        if (j == istart && !line) {
+          sj.insert(sj.begin(), '$');
+        }
+        ss << sj << " ";
+      }
+      ss << endl;
+    }
+  }
+  sout = ss.str();
+}
+
+
+// ===========================================================================
+// Consider the following input
+//     2.0, 3.0 e15, -7.0
+// The issue is with the middle two words, "3.0 e15", the old parser ignored
+// the space and treated this as one word, 3.0e15. The new parser treats is
+// as two words.
+//
+// This should have been treated as an input error by the old parser but was
+// not, so now we have to deal with it.
+//
+// This routine detects this situation and allows the calling code to deal
+// with it according to the action input, allowed action values are:
+//
+//    ignore      Treat it as two words and silently continue.
+//    fix         Merge the two words into one word, as the old parser did.
+//    error       Generate a fatal error, force the user to fix it.
+//
+// ===========================================================================
+void Cmd::deprecated_input01(string action, stringstream &serr, int &ierr)
+{
+    //if (cmd_name != "depcmd01") return;
+
+    if (cmd_type != "command") return;
+
+    for (int i=0; i<(int)words.size()-2; i++) {
+        // A comma must be found first.
+        if (!words[i].is_comma()) continue;
+
+        // There could be a unary plus or minus on the next number, if so
+        // then skip it. At this point, the unary plus and minus have not
+        // been merged with their number.
+        int in1 = i+1;
+        string spm = words[i+1].get_string();
+        string s1;
+        if (spm == "+" || spm == "-") {
+            s1 = spm;
+            in1 += 1;
+        }
+        s1 += words[in1].get_string();
+
+        // in1 is where the first number is, it needs to be a number.
+        if (!words[in1].is_number()) continue;
+
+        // in2 is where the second number is, it needs to be a number.
+        int in2 = in1 + 1;
+        if (in2 > (int)words.size() - 1) break;
+        if (!words[in2].is_number()) continue;
+        string s2 = words[in2].get_string();
+        s2.erase(s2.begin());
+
+        // The first character of the second number should be an e or E.
+        // But at this point we have already detected this and prepended
+        // a 1 on to the word. So the first character should be 1 and the
+        // second character should be e or E
+        char c30 = words[in2].get_string()[0];
+        if (c30 != '1') continue;
+
+        char c31 = words[in2].get_string()[1];
+        if (c31 != 'e' && c31 != 'E') continue;
+
+        // The next word, if present should be a comma.
+        int ic2 = in2 + 1;
+        if (ic2 <= (int)words.size()-1) {
+            if (!words[ic2].is_comma()) continue;
+        }
+
+        // A deprecated input has been found, ignore it, fix it, or
+        // generate a fatal error.
+        if (action == "ignore") continue;
+        if (action == "fix") {
+            words[in2].erase_char(0);
+            merge_words(in1, in2);
+            continue;
+        }
+        if (action == "error") {
+            words[in1].fatal_error(serr, ierr);
+            serr << "Possible error, detected the following" << endl;
+            serr << "   comma digits space exponent comma" << endl;
+            serr << "The digits and exponent are separated by one or more spaces,"
+                 << endl;
+            serr << "this is not allowed for a single number." << endl;
+            serr << "The digits are: " << s1 << "  and the exponent is: "
+                 << s2 << endl;
+            serr << "If this is one number, then remove the space." << endl;
+            serr << "If this is two numbers, then put a comma between the"
+                 << " digits and exponent."<< endl;
+            serr << "This error (and the same error in subsequent lines) can"
+                 << endl;
+            serr << "be controlled with the following input file command"
+                 << endl;
+            serr << "and arguments (put before the lines with errors)" << endl;
+            serr << "      depcmd_dse argument" << endl;
+            serr << "where argument has one of the following values:" << endl;
+            serr << "      fix     Silently remove the space, merge into one number" << endl;
+            serr << "      ignore  Silently treat as two numbers" << endl;
+            serr << "      error   Generate fatal error (default)" << endl;
+            serr << "While the default is to generate an error, if the" << endl;
+            serr << "command name is matdef, then the default is fix." << endl;
+            ierr = 2;
+            continue;
+        }
+    }
+}
+
+
+
+// ===========================================================================
+// Fatal error
+// This is mainly meant to be called from some other class that does not
+// know about words.
+// ===========================================================================
+void Cmd::fatal_error(int iw, stringstream &serr, int &ierr)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(ierr == ierr);
+
+    int lnum = words[iw].get_line_number();
+    int file_lnum = words[iw].get_file_line_number();
+    string fname = words[iw].get_filename();
+    serr << endl;
+    serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+    serr << "    " << (*lines)[lnum-1] << endl;
+    serr << "in file: " << fname << endl;
+}
+
+// ===========================================================================
+// This is meant to be called from within this class.
+// ===========================================================================
+void Cmd::fatal_error2(stringstream &serr, int &ierr)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(ierr == ierr);
+
+    serr << endl;
+    serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+    serr << "    " << (*lines)[line_number-1] << endl;
+    serr << "in file: " << filename << endl;
+}
+
+
+// ===========================================================================
+// Warning
+// ===========================================================================
+void Cmd::warning(int iw, stringstream &serr, int &ierr)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(ierr == ierr);
+
+    int lnum = words[iw].get_line_number();
+    int file_lnum = words[iw].get_file_line_number();
+    string fname = words[iw].get_filename();
+    serr << endl;
+    serr << "*** WARNING in line " << file_lnum << ":" << endl;
+    serr << "    " << (*lines)[lnum-1] << endl;
+    serr << "in file: " << fname << endl;
+}
+
+
+// ===========================================================================
+// This is used when printing duplicate lines warnings.
+// ===========================================================================
+void Cmd::print_duplicate_line(int iw, stringstream &ss, int fn_width,
+                               int lnum_width, string after_lnum)
+{
+    int lnum = words[iw].get_line_number();
+    int file_lnum = words[iw].get_file_line_number();
+    string fname = words[iw].get_filename();
+    ss << setw(fn_width) << fname;
+    ss << setw(lnum_width) << file_lnum << after_lnum;
+    ss << (*lines)[lnum-1];
+    //print_using_words_fm(ss);
+}
+
+
+// ===========================================================================
+// Get the filename size and line number size for formatting purposes
+// when printing duplicate lines warnings.
+// ===========================================================================
+void Cmd::get_duplicate_sizes(int iw, int &fn_width, int &lnum_width)
+{
+    int file_lnum = words[iw].get_file_line_number();
+    string fname = words[iw].get_filename();
+    fn_width = (int)fname.size();
+    lnum_width = 1;
+    if (file_lnum >= 10)       lnum_width = 2;
+    if (file_lnum >= 100)      lnum_width = 3;
+    if (file_lnum >= 1000)     lnum_width = 4;
+    if (file_lnum >= 10000)    lnum_width = 5;
+    if (file_lnum >= 100000)   lnum_width = 6;
+    if (file_lnum >= 1000000)  lnum_width = 7;
+}
+
+
+// ===========================================================================
+// Look at the places where this function is called to understand the
+// following indices.
+//     wdex = i    Index into the words array.
+//     cdex = k    C index in the output array.
+// ===========================================================================
+void Cmd::error_dup_line(string &cname, int wdex, int cdex,
+                         vector<int> &dup_wdex1, vector<Cmd *> &dup_cmd1,
+                         vector<int> &dup_vals, const vector<int> &size,
+                         int dup_fatal, stringstream &serr, int &ierr)
+{
+    if (dup_vals[cdex] == 0) {
+        dup_cmd1[cdex] = this;
+        dup_wdex1[cdex] = wdex;
+    }
+    dup_vals[cdex] += 1;
+    if (dup_fatal == 0) return;
+    if (dup_vals[cdex] > 1) {
+        int wdex1 = dup_wdex1[cdex];
+        Cmd *cmd = dup_cmd1[cdex];
+
+        // Get the dimension of the array, 0,1,2,3,...
+        int dim = (int)size.size();
+
+        if (dup_fatal == 2) words[wdex].fatal_error(serr, ierr);
+        if (dup_fatal == 1) words[wdex].warning(serr, ierr);
+        int tot_size = 1;
+        for (int ts=0; ts<dim; ts++) {
+            tot_size *= size[ts];
+        }
+        vector<int> irdices(dim, 0);
+        Parser_utils putils(index_base);
+        putils.reverse_dex(cdex, tot_size, irdices, size);
+        serr << "A duplicate value has been specified for: " << cname << "(";
+        for (int irdex=0; irdex<dim; irdex++) {
+            serr << irdices[irdex];
+            if (irdex < dim-1) serr << ",";
+        }
+        serr << ") = " <<
+            words[wdex].get_string() << endl;
+
+        serr << "This array element was first specified in line " <<
+            cmd->get_file_line_number(wdex1) << endl;
+        string fname = cmd->get_filename(wdex1);
+        serr << "    " << (*lines)[cmd->get_line_number(wdex1)-1] <<
+            endl;
+        serr << "in file: " << fname << endl;
+
+        if (dup_fatal == 2) {
+            serr << "This fatal error can be turned into a warning with the command " <<
+                endl << "    duplicate_array_values = warning" << endl;
+        }
+        if (dup_fatal == 1) {
+            serr << "This warning can be turned into a fatal error with the command " <<
+                endl << "    duplicate_array_values = fatal" << endl;
+        }
+        serr << "Duplicate array value checking can be turned off totally with" <<
+            endl << "    duplicate_array_values = none" << endl;
+        serr << "This is not recommended since you will lose the opportunity" <<
+            endl << "to check for legimate errors in your input." << endl;
+
+        ierr = 1;
+        if (dup_fatal == 2) ierr = 3;
+    }
+}
+
+
+// ===========================================================================
+// There are some commands that can be written as two words such as "end if",
+// "else if", and "end do". Find these and combine them into one word.
+// ===========================================================================
+void Cmd::handle_two_words()
+{
+    // Handle + + -> ++
+    for (int i=0; i<(int)words.size()-1; i++) {
+        if (words[i].get_string() == "+" && words[i+1].get_string() == "+") {
+            bool combine = false;
+            if (i == (int)words.size()-2) combine = true;
+            if (i < (int)words.size()-2) {
+                if (!words[i+2].is_numvar()) combine = true;
+            }
+            if (combine) {
+                int lnum = words[i].get_line_number();
+                int file_lnum = words[i].get_file_line_number();
+                string fname = words[i].get_filename();
+                string s = "++";
+                Word w(s, lnum, file_lnum, fname, lines);
+                replace_words(i, i+1, w);
+            }
+        }
+    }
+
+    // Handle - - -> --
+    for (int i=0; i<(int)words.size()-1; i++) {
+        if (words[i].get_string() == "-" && words[i+1].get_string() == "-") {
+            bool combine = false;
+            if (i == (int)words.size()-2) combine = true;
+            if (i < (int)words.size()-2) {
+                if (!words[i+2].is_numvar()) combine = true;
+            }
+            if (combine) {
+                int lnum = words[i].get_line_number();
+                int file_lnum = words[i].get_file_line_number();
+                string fname = words[i].get_filename();
+                string s = "--";
+                Word w(s, lnum, file_lnum, fname, lines);
+                replace_words(i, i+1, w);
+            }
+        }
+    }
+
+
+    // The rest of these have at least two words on the line,
+    // like "end subroutine".
+    if ((int)words.size() < 2) return;
+
+    // Common settings.
+    int lnum = words[0].get_line_number();
+    int file_lnum = words[0].get_file_line_number();
+    string fname = words[0].get_filename();
+
+    // Handle the case where enddo is written as two
+    // words, just combine them into one word.
+    if (words[0].get_string() == "end" && words[1].get_string() == "do") {
+        string s = "enddo";
+        Word w(s, lnum, file_lnum, fname, lines);
+        replace_words(0, 1, w);
+    }
+
+    // Handle the case where endsubroutine is written as two
+    // words, just combine them into one word.
+    if (words[0].get_string() == "end" && words[1].get_string() == "subroutine") {
+        string s = "endsubroutine";
+        Word w(s, lnum, file_lnum, fname, lines);
+        replace_words(0, 1, w);
+    }
+
+    // Handle the case where endwhen is written as two
+    // words, just combine them into one word.
+    if (words[0].get_string() == "end" && words[1].get_string() == "when") {
+        string s = "endwhen";
+        Word w(s, lnum, file_lnum, fname, lines);
+        replace_words(0, 1, w);
+    }
+
+    // Handle the case where endif and elseif are written as two
+    // words, just combine them into one word.
+    if (words[0].get_string() == "end" && words[1].get_string() == "if") {
+        string s = "endif";
+        Word w(s, lnum, file_lnum, fname, lines);
+        replace_words(0, 1, w);
+    }
+    if (words[0].get_string() == "else" && words[1].get_string() == "if") {
+        string s = "elseif";
+        Word w(s, lnum, file_lnum, fname, lines);
+        replace_words(0, 1, w);
+    }
+}
+
+
+// ===========================================================================
+// Check for end of input. There are several ways user input ends:
+//    End of file
+//    Encounter a stop command
+//    Encounter a fatal error command.
+// ===========================================================================
+bool Cmd::check_input_end(bool kill_run, stringstream &serr, int &ierr)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(kill_run == kill_run);
+
+    if (words[0].get_string() == "fatal_error") {
+        int lnum = words[0].get_line_number();
+        int file_lnum = words[0].get_file_line_number();
+        string fname = words[0].get_filename();
+        serr << endl;
+        serr << "*** User has issued a fatal_error command in line "
+             << file_lnum << ":" << endl;
+        serr << "    " << (*lines)[lnum-1] << endl;
+        serr << "in file: " << fname << endl;
+        serr << endl << "The user supplied fatal_error message is: " << endl;
+        serr << "    ";
+        string s = (*lines)[lnum-1];
+        int i1 = s.find("f", 0);
+        for (int i=i1+12; i<(int)s.size(); i++) {
+            serr << s[i];
+        }
+        serr << endl;
+        ierr = 2;
+        return true;
+    }
+
+    if (words[0].get_string() == "stop") return true;
+    return false;
+}
+
+
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Operations on the deque of words.
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Find the first occurrence of string s in part of the line.
+// ===========================================================================
+int Cmd::find(int i1, int i2, string s)
+{
+    for (int i=i1; i<=i2; i++) {
+        if (words[i].get_string() == s) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+
+// ===========================================================================
+// Find the first occurrence of any character in string s in part of
+// the line.
+// ===========================================================================
+int Cmd::find_any_char(int i1, int i2, string s)
+{
+    for (int i=i1; i<=i2; i++) {
+        for (int j=0; j<(int)s.size(); j++) {
+            string ssub = s.substr(j, 1);
+            if (words[i].get_string() == ssub) {
+                return i;
+            }
+        }
+    }
+    return -1;
+}
+
+
+// ===========================================================================
+// Find the last occurrence of string s in the line.
+// ===========================================================================
+int Cmd::find_last(string s, int i1, int i2)
+{
+    int iloc = -1;
+    for (int i=i1; i<=i2; i++) {
+        if (words[i].get_string() == s) {
+            iloc = i;
+        }
+    }
+    return iloc;
+}
+
+
+// ===========================================================================
+// Given an input string, fstr, and a sub string, subs, find the first
+// occurrence of subs in fstr. Return in the string vector, vs, the string
+// to the left of subs, if any, the sub string itself, subs, and the string
+// to the right of subs, if any.
+// ===========================================================================
+bool Cmd::separate_str(string &subs, string &fstr, vector<string> &vs)
+{
+    // Make sure the return vector is empty.
+    vs.clear();
+
+    // Find the sub string, if not found, then nothing more to do.
+    int loc = (int)fstr.find(subs,0);
+    if (loc == (int)string::npos) return false;
+
+    // If fstr only contains subs and nothing more, then there is nothing
+    // to do.
+    if (subs == fstr) return false;
+
+    // Anything to the left of the sub string is the first string returned.
+    if (loc > 0) {
+        string s1 = fstr.substr(0,loc);
+        vs.push_back(s1);
+    }
+
+    // The sub string itself is the second string returned.
+    vs.push_back(subs);
+
+    // Find the number of characters to the right of the sub string.
+    int subs_len = (int)subs.size();
+    int fstr_len = (int)fstr.size();
+    int istart = loc + subs_len;
+    int nchar = fstr_len - istart;
+
+    // Anything to the right of the sub string is the third string returned.
+    if (nchar > 0) {
+        string s2 = fstr.substr(istart,nchar);
+        vs.push_back(s2);
+    }
+
+    return true;
+}
+
+
+// ===========================================================================
+// Find the location of the closing symbol that matches the opening symbol.
+// Symbol examples are (), [], {}
+// We assume that the opening symbol has been found and we are starting the
+// search after the opening symbol location.
+// Nesting is handled, for example, (...(...(...)...)...)
+// ===========================================================================
+int Cmd::find_closing_symbol(string opensym, string closesym, int i1)
+{
+    int level = 0;
+    for (int i=i1; i<(int)words.size(); i++) {
+        string w = words[i].get_string();
+        if (w == opensym) {
+            level += 1;
+            continue;
+        }
+        if (w == closesym) {
+            if (level == 0) return i;
+            level -= 1;
+            continue;
+        }
+    }
+    return -1;
+}
+
+
+// ===========================================================================
+// Delete words i1 through i2 inclusive from the deque.
+// ===========================================================================
+void Cmd::delete_words(int i1, int i2)
+{
+    deque<Word>::iterator p = words.begin();
+    words.erase(p + i1, p + i2 + 1);
+}
+
+
+// ===========================================================================
+// Replace words i1 through i2 inclusive with word w.
+// ===========================================================================
+void Cmd::replace_words(int i1, int i2, Word &w)
+{
+    delete_words(i1, i2);
+    deque<Word>::iterator p = words.begin();
+    words.insert(p + i1, w);
+}
+
+
+// ===========================================================================
+// Replace words i1 through i2 inclusive with all the words in vector vw.
+// ===========================================================================
+void Cmd::replace_words(int i1, int i2, vector<Word> &vw)
+{
+    delete_words(i1, i2);
+    for (int i=(int)vw.size()-1; i>=0; i--) {
+        deque<Word>::iterator p = words.begin();
+        words.insert(p + i1, vw[i]);
+    }
+}
+
+
+// ===========================================================================
+// Merge words i1 through i2 inclusive into one word located at i1, remove
+// words i1+1 through i2 inclusive.
+// ===========================================================================
+void Cmd::merge_words(int i1, int i2)
+{
+    int lnum = words[i1].get_line_number();
+    int file_lnum = words[i1].get_file_line_number();
+    string fname = words[i1].get_filename();
+    string s = words[i1].get_string();
+
+    for (int i=i1+1; i<=i2; i++) {
+        s += words[i].get_string();
+    }
+
+    Word w(s, lnum, file_lnum, fname, lines);
+    replace_words(i1, i2, w);
+}
+
+
+// ===========================================================================
+// Find the equals sign on the line.
+// ===========================================================================
+int Cmd::find_equals()
+{
+    int ieq = -1;
+    for (int i=0; i<(int)words.size(); i++) {
+        if (words[i].get_string() == "=") {
+            ieq = i;
+            break;
+        }
+    }
+    return ieq;
+}
+
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Handle processed flags.
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Set all the processed flags in the line to be false.
+// ===========================================================================
+void Cmd::clear_processed()
+{
+    for (int i=0; i<(int)words.size(); i++) {
+        words[i].set_processed(false);
+    }
+}
+
+void Cmd::set_processed(bool ip)
+{
+    for (int i=0; i<(int)words.size(); i++) {
+        words[i].set_processed(ip);
+    }
+}
+
+
+
+// ===========================================================================
+// Check processed flags for each word.
+// ===========================================================================
+void Cmd::check_processed(bool &good, stringstream &serr, int &ierr)
+{
+    // First we check to see if any of the words on the line have been
+    // processed. If none of the words have been processed, then we print
+    // the entire line as an error. This saves the user from having to
+    // wade through an error print for every word on the line.
+    bool p = false;
+    for (int i=0; i<(int)words.size(); i++) {
+        p = words[i].get_processed();
+        if (p) break;
+    }
+
+    if (!p) {
+        good = false;
+        words[0].fatal_error(serr, ierr);
+        serr << "This line has not been processed." << endl;
+        ierr = 2;
+
+        return;
+    }
+
+
+    // At least one word on the line has been processed.
+    // Check all the words on the line, throw an error for any word not
+    // processed.
+    for (int i=0; i<(int)words.size(); i++) {
+        p = words[i].get_processed();
+
+        if (!p) {
+            good = false;
+            words[i].fatal_error(serr, ierr);
+            serr << "A word on this line has not been processed." << endl;
+            serr << "Not proccessed word = " << words[i].get_string() << endl;
+            ierr = 2;
+
+        }
+    }
+}
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Debug
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Print all the words on the line mainly for debugging.
+// Output is to a stringstream so the calling method can decide what to do
+// with the output - send it to the screen, use it for testing, whatever.
+// ===========================================================================
+void Cmd::print_all_words()
+{
+    stringstream ss;
+    print_all_words(ss);
+    cout << ss.str();
+}
+
+void Cmd::print_all_words(stringstream &ss)
+{
+    ss << "*** Command name = " << cmd_name << endl;
+    for (int i=0; i<(int)words.size(); i++) {
+        stringstream ss2;
+        words[i].print_type(ss2);
+        ss << words[i].get_string() << " " << ss2.str() << endl;
+    }
+    ss << endl;
+}
+
+
+// ===========================================================================
+// Print all the words on the line mainly for debugging.
+// Output is to a stringstream so the calling method can decide what to do
+// with the output - send it to the screen, use it for testing, whatever.
+// ===========================================================================
+void Cmd::print_using_words(stringstream &ss)
+{
+    for (int i=0; i<(int)words.size(); i++) {
+        bool enc_quote = true;
+        if (i == 0) enc_quote = false;
+        ss << words[i].get_print_string(enc_quote) << " ";
+    }
+}
+
+// ===========================================================================
+// Another version of printing all the words on the line.
+// This version is mainly for printing out the final cmds buffer.
+//
+// The output is formatted, commas are put back in, spaces are handled better,
+// if a line is too long (see nctot_max), it is split into more than one line.
+//
+// For example, suppose the words on a line were
+//     a4d ( 1 1 1 2 ) = -3.4 4.7 5.2 4.6e19
+// spaces are used to delimit the words, but it is not very readable. This
+// routine will print the above line as
+//     a4d(1, 1, 1, 2) = -3.4, 4.7, 5.2, 4.6e19
+// This has the added advantage that spaces can be eliminated and a compact
+// form can be achieved.
+//
+// Another example is the line
+//    strinsert_cmd01 = Use The Force
+// The string "Use The Force" is actually one word, even though it appears to
+// be three words. This routine prints this correctly as
+//    strinsert_cmd01 = "Use The Force"
+// ===========================================================================
+void Cmd::print_using_words_fm(stringstream &ss)
+{
+    //debug_print_words("print_using_words_fm");
+    int nctot_max = 75;
+    int istart = 0;
+    int ieq = -1;
+    int ip1 = -1;
+    int ip2 = -1;
+    for (;;) {
+        if (istart > 0) ss << "     ";
+        int nctot = 0;
+        bool done = false;
+        for (int i=istart; i<(int)words.size(); i++) {
+            bool enc_quote = true;
+            if (i == 0) enc_quote = false;
+            string s = words[i].get_print_string(enc_quote);
+            if (s == "=") ieq = i;
+            if (s == "(") ip1 = i;
+            if (s == ")") ip2 = i;
+            string sp = "";
+            if (i<(int)words.size()-1) {
+                sp = words[i+1].get_print_string(enc_quote);
+            }
+            if (sp == ")") ip2 = i+1;
+            int nc = (int)s.size();
+            if ((i>istart) && (nc+1+nctot > nctot_max)) {
+                istart = i;
+                break;
+            }
+            if (i >= (int)words.size()-1) {
+                done = true;
+                ss << s;
+            }
+            else {
+                string endstr = " ";
+                int endinc = 1;
+                if (ieq > -1) {
+                    if (i > ieq) {
+                        endstr = ", ";
+                        endinc = 2;
+                    }
+                }
+                if (ip1 > -1 && (ip2 == -1 || i < ip2)) {
+                    if (i > ip1 && (ip2 == -1 || i < ip2-1)) {
+                        endstr = ", ";
+                        endinc = 2;
+                    }
+                }                    
+                if (endstr == " ") {
+                    if (s == "(") {
+                        endstr = "";
+                        endinc = 0;
+                    }
+                    if (i == ip2-1) {
+                        endstr = "";
+                        endinc = 0;
+                    }
+                    if (i == 0 && sp == "(") {
+                        endstr = "";
+                        endinc = 0;
+                    }
+                }
+                ss << s << endstr;
+                nctot += nc + endinc;
+            }
+        }
+        if (done) break;
+        ss << endl;
+    }
+}
+
+
+// ===========================================================================
+// Print the original command before processing, mainly for debugging.
+// Output is to a stringstream so the calling method can decide what to do
+// with the output - send it to the screen, use it for testing, whatever.
+// ===========================================================================
+void Cmd::print_original_string(stringstream &ss)
+{
+    ss << original_str;
+}
+
+
+// ===========================================================================
+// This is mainly for debugging this class. It prints on all procs.
+// ===========================================================================
+void Cmd::debug_print_words(string s)
+{
+    cout << s << endl;
+    cout << "    ";
+    for (int i=0; i<(int)words.size(); i++) {
+        bool enc_quote = true;
+        if (i == 0) enc_quote = false;
+        cout << words[i].get_print_string(enc_quote) << " ";
+    }
+    cout << endl;
+}
+
+
+
+
+
+
+} // End of the PP namespace
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Comm.hh
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Comm.hh
@@ -0,0 +1,93 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+#ifndef COMMHHINCLUDE
+#define COMMHHINCLUDE
+
+#ifdef __MPI__
+#define HAVE_MPI
+#endif
+
+#ifdef HAVE_MPI
+#define MPI_NO_CPPBIND
+#include "mpi.h"
+#endif
+
+namespace PP {
+
+class Comm
+{
+ public:
+
+    Comm();
+    ~Comm();
+
+    void broadcast(char *buffer, int count);
+    void global_abort_parser();
+
+    int getProcRank(void) const {
+        return(mype);
+    }
+
+    int getNumProcs(void) const {
+        return(npes);
+    }
+
+    int getIORank(void) const {
+        return(iope);
+    }
+
+    bool isIOProc(void) const {
+        if (mype == iope) return true;
+        return false;
+    }
+
+
+ private:
+    int init_flag;
+
+    int npes, mype, iope;
+
+    //Comm(const Comm&);
+    //Comm& operator=(const Comm&);
+};
+
+
+} // End of PP namespace
+
+#endif
+
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Comm.cc
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Comm.cc
@@ -0,0 +1,117 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+#include <cstdlib>
+#include <assert.h>
+#include "Comm.hh"
+
+namespace PP {
+
+
+// ===========================================================================
+// Constructor
+// ===========================================================================
+Comm::Comm()
+{
+    npes = 1;
+    mype = 0;
+    iope = 0;
+
+#ifdef HAVE_MPI
+    int argc = 1;
+    char **argv = NULL;
+
+    int init_check;
+    MPI_Initialized(&init_check);
+    //printf("DEBUG -- mpi initialized %d\n",init_check);
+
+    init_flag = 0;
+    if (! init_check) {
+       // Only way for init_flag to be true is here; must be false otherwise
+       init_flag = 1;
+       MPI_Init(&argc, &argv);
+    }
+    //printf("DEBUG -- comm constructor -- init_flag %d\n",init_flag);
+
+    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
+    MPI_Comm_size(MPI_COMM_WORLD, &npes );
+    MPI_Comm_rank(MPI_COMM_WORLD, &mype );
+#endif
+}
+
+// ===========================================================================
+// Destructor
+// ===========================================================================
+Comm::~Comm()
+{
+   //printf("DEBUG -- comm destructor -- init_flag %d\n",init_flag);
+#ifdef HAVE_MPI
+   if (init_flag) {
+      init_flag = 0;
+      MPI_Finalize();
+   }
+#endif
+}
+
+// ===========================================================================
+// Broadcast
+// ===========================================================================
+void Comm::broadcast(char *buffer, int count)
+{
+   // To suppress compiler warnings of unused parameters
+   assert(buffer == buffer);
+   assert(count == count);
+
+   if (npes == 1) return;
+#ifdef HAVE_MPI
+   MPI_Bcast(buffer, count, MPI_CHAR, 0, MPI_COMM_WORLD);
+#endif
+}
+
+// ===========================================================================
+// Error handling
+// ===========================================================================
+void Comm::global_abort_parser()
+{
+#ifdef HAVE_MPI
+   MPI_Abort(MPI_COMM_WORLD, 1);
+#endif
+   exit(1);
+}
+// ===========================================================================
+} // End of PP namespace
+
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Function.hh
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Function.hh
@@ -0,0 +1,120 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+#ifndef FUNCTIONHHINCLUDE
+#define FUNCTIONHHINCLUDE
+
+// ***************************************************************************
+// ***************************************************************************
+// This class holds information about a function. It is mostly for use with
+// the parser.
+// ***************************************************************************
+// ***************************************************************************
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <deque>
+
+namespace PP
+{
+using std::string;
+using std::deque;
+using std::stringstream;
+using std::vector;
+
+enum FuncType {FUNC_};
+
+//class ErrorState;
+
+class Function
+{
+
+public:
+    Function();
+    Function(string nme, bool ext, int na, string ftype,  string fdes);
+
+    // Evaluate the function.
+    double evaluate(vector<double> &vd, stringstream &serr, int &ierr,
+                    int line_number, int file_line_number,
+                    string filename, deque<string> *lines);
+
+    string evaluate(vector<string> &vs, stringstream &serr, int &ierr,
+                    int line_number, int file_line_number,
+                    string filename, deque<string> *lines);
+
+    // Accessor methods.
+    string get_name()        { return name; }
+    int    get_num_args()    { return nargs; }
+    string get_description() { return description; }
+    string get_type()        { return type; }
+
+private:
+
+    void name_err(stringstream &serr, int &ierr,
+                  int line_number, int file_line_number,
+                  string filename, deque<string> *lines);
+
+    void args_mismatch_err(int nargs_found, int nargs_expected,
+                           stringstream &serr, int &ierr,
+                           int line_number, int file_line_number,
+                           string filename, deque<string> *lines);
+
+    // The name of the function.
+    string name;
+
+    // Whether the function is external or internal. External functions
+    // are C++ functions like sin(), log(), ... Internal functions
+    // are defined within the input to the parser (this feature is not
+    // implemented yet).
+    bool external;
+
+    // The number of arguments for the function.
+    int nargs;
+
+    // A text description of the function.
+    string description;
+
+    // The type of function. Allowed types are:
+    //     real    double arguments, double result (cos, sin, log, ...)
+    //     string  string arguments, string results (strlen, strcat, ...)
+    string type;
+};
+
+
+} // End of the PP namespace
+
+#endif
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Function.cc
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Function.cc
@@ -0,0 +1,324 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+// ***************************************************************************
+// ***************************************************************************
+// This class holds information about a function. It is mostly for use with
+// the parser.
+// ***************************************************************************
+// ***************************************************************************
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <deque>
+#include <cctype>
+#include <cmath>
+
+#include "stdio.h"
+#include "stdlib.h"
+
+#include "Function.hh"
+
+namespace PP
+{
+using std:: string;
+using std::cout;
+using std::endl;
+using std::deque;
+using std::stringstream;
+using std::setprecision;
+using std::vector;
+
+
+// ===========================================================================
+// Default constructor.
+// ===========================================================================
+Function::Function()
+{
+   name = "__NO_NAME_GIVEN__";
+   external = true;
+   nargs = 1;
+   description = " ";
+   type = "real";
+}
+
+
+// ===========================================================================
+// Most used constructor for functions.
+// ===========================================================================
+Function::Function(string nme, bool ext, int na, string ftype, string fdes)
+{
+   name = nme;
+   external = ext;
+   nargs = na;
+   description = fdes;
+   type = ftype;
+}
+
+
+// ===========================================================================
+// Evaluate the function. This is for the case that the arguments all have
+// values (double type values) and the function can be evaluated to a double.
+// ===========================================================================
+double Function::evaluate(vector<double> &vd, stringstream &serr, int &ierr,
+                          int line_number, int file_line_number,
+                          string filename, deque<string> *lines)
+{
+    // Verify that the number of args needed is equal to the number of args
+    // supplied.
+    int nvd = (int)vd.size();
+    if (nvd != nargs) {
+        args_mismatch_err(nvd, nargs, serr, ierr, line_number,
+                          file_line_number, filename, lines);
+        return 0.;
+    }
+
+    // Functions with one argument.
+    if (nargs == 1) {
+        double d = vd[0];
+        if (name == "acos") {
+            if (d < -1. || d > 1.) {
+                serr << endl;
+                serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+                serr << "    " << (*lines)[line_number-1] << endl;
+                serr << "in file: " << filename << endl;
+                serr << "Argument to acos is out of bounds." << endl;
+                serr << "Argument = " << d << endl;
+                serr << "This must be between -1. and 1." << endl;
+                ierr = 2;
+                return 0.;
+            }
+            return acos(d);
+        }
+
+        if (name == "asin") {
+            if (d < -1. || d > 1.) {
+                serr << endl;
+                serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+                serr << "    " << (*lines)[line_number-1] << endl;
+                serr << "in file: " << filename << endl;
+                serr << "Argument to asin is out of bounds." << endl;
+                serr << "Argument = " << d << endl;
+                serr << "This must be between -1. and 1." << endl;
+                ierr = 2;
+                return 0.;
+            }
+            return asin(d);
+        }
+
+        if (name == "atan")  return atan(d);
+        if (name == "ceil")  return ceil(d);
+        if (name == "cos")   return cos(d);
+        if (name == "cosh")  return cosh(d);
+        if (name == "exp")   return exp(d);
+        if (name == "fabs")  return fabs(d);
+        if (name == "floor") return floor(d);
+        if (name == "log")   return log(d);
+        if (name == "log10") return log10(d);
+        if (name == "sin")   return sin(d);
+        if (name == "sinh")  return sinh(d);
+        if (name == "sqrt")  return sqrt(d);
+        if (name == "tan")   return tan(d);
+        if (name == "tanh")  return tanh(d);
+    }
+
+    // Functions with two arguments.
+    if (nargs == 2) {
+        double d1 = vd[0];
+        double d2 = vd[1];
+
+        if (name == "atan2") return atan2(d1, d2);
+        if (name == "fmod") return fmod(d1, d2);
+
+        if (name == "max") {
+            double result = d2;
+            if (d1 > d2) result = d1;
+            return result;
+        }
+
+        if (name == "min") {
+            double result = d2;
+            if (d1 < d2) result = d1;
+            return result;
+        }
+
+        if (name == "pow") {
+            if (d1 <= 0.) {
+                serr << endl;
+                serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+                serr << "    " << (*lines)[line_number-1] << endl;
+                serr << "in file: " << filename << endl;
+                serr << "First argument (base) to pow is out of bounds." << endl;
+                serr << "Argument = " << d1 << endl;
+                serr << "This must be greater than 0." << endl;
+                ierr = 2;
+                return 0.;
+            }
+            return pow(d1, d2);
+        }
+    }
+
+
+    // If we get down to this point, then the name supplied at
+    // construction was not recognized as a function name.
+    // This should never happen because we check for a valid function
+    // name before entering this function.
+    name_err(serr, ierr, line_number, file_line_number, filename, lines);
+    return 0.;
+}
+
+
+// ===========================================================================
+// Evaluate the function. This is for string functions.
+// ===========================================================================
+string Function::evaluate(vector<string> &vs, stringstream &serr, int &ierr,
+                          int line_number, int file_line_number,
+                          string filename, deque<string> *lines)
+{
+    // Verify that the number of args needed is equal to the number of args
+    // supplied.
+    int nvs = (int)vs.size();
+    if (nvs != nargs) {
+        args_mismatch_err(nvs, nargs, serr, ierr, line_number,
+                          file_line_number, filename, lines);
+        return "";
+    }
+
+    // Functions with one argument.
+    if (nargs == 1) {
+        string s1 = vs[0];
+        if (name == "strlen") {
+            int len = (int)s1.size();
+            stringstream ss;
+            ss << len;
+            return ss.str();
+        }
+
+        if (name == "strtrim") {
+            int len = (int)s1.size();
+            if (len == 0) return s1;
+            string whitespace = " \t";
+            int iend = s1.find_last_not_of(whitespace, len - 1);
+            int NPOS = (int)string::npos;
+            if (iend == NPOS) return s1;
+            s1.erase(iend+1, (len-1) -(iend+1) + 1);
+            return s1;
+        }
+    }
+
+    // Functions with two arguments.
+    if (nargs == 2) {
+        string s1 = vs[0];
+        string s2 = vs[1];
+        if (name == "strcat") {
+            return s1+s2;
+        }
+    }
+
+    // Functions with three arguments.
+    if (nargs == 3) {
+        string s1 = vs[0];
+        string s2 = vs[1];
+        string s3 = vs[2];
+        if (name == "strerase") {
+            int i1 = atoi(s2.c_str()) - 1;   // minus 1 to get c index
+            int i2 = atoi(s3.c_str()) - 1;
+            s1.erase(i1, i2-i1+1);
+            return s1;
+        }
+
+        if (name == "strinsert") {
+            int i1 = atoi(s2.c_str()) - 1;   // minus 1 to get c index
+            s1.insert(i1, s3);
+            return s1;
+        }
+
+        if (name == "strsubstr") {
+            int i1 = atoi(s2.c_str()) - 1;     // minus 1 to get c index
+            int nchar = atoi(s3.c_str()); 
+            string sret = s1.substr(i1, nchar);
+            return sret;
+        }
+    }
+
+    // If we get down to this point, then the name supplied at
+    // construction was not recognized as a function name.
+    // This should never happen because we check for a valid function
+    // name before entering this function.
+    name_err(serr, ierr, line_number, file_line_number, filename, lines);
+    return "";
+}
+
+
+// ===========================================================================
+// Name not recognized error.
+// ===========================================================================
+void Function::name_err(stringstream &serr, int &ierr,
+                        int line_number, int file_line_number,
+                        string filename, deque<string> *lines)
+{
+    serr << endl;
+    serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+    serr << "    " << (*lines)[line_number-1] << endl;
+    serr << "in file: " << filename << endl;
+    serr << "** Math function fatal error **" << endl;
+    serr << "Name not recognized as a function." << endl;
+    serr << "Name = " << name << endl;
+    ierr = 2;
+}
+
+
+// ===========================================================================
+// Number of args mismatch error.
+// ===========================================================================
+void Function::args_mismatch_err(int nargs_found, int nargs_expected,
+                                 stringstream &serr, int &ierr,
+                                 int line_number, int file_line_number,
+                                 string filename, deque<string> *lines)
+{
+    serr << endl;
+    serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+    serr << "    " << (*lines)[line_number-1] << endl;
+    serr << "in file: " << filename << endl;
+    serr << "For function " << name << endl;
+    serr << "Number of args expected = " << nargs_expected << endl;
+    serr << "Number of args found = " << nargs_found << endl;
+    ierr = 2;
+}
+
+} // End of the PP namespace
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Globals.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Globals.h
@@ -0,0 +1,100 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ *           Other LANL authors
+ * 
+ */
+#ifndef _Globals_
+#define _Globals_
+
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+   
+//#define NDEBUG 1
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ENTITY_COINCIDENCE_TOLERANCE      ((double)1.0E-5)
+
+#define KDTREE_ELEMENT_BLOCKING_SIZE      ((long)1024)
+
+#define POSITIVE_INFINITY (+1.0E+64)
+#define NEGATIVE_INFINITY (-1.0E+64)
+
+#define XAXIS ((unsigned long)0)
+#define YAXIS ((unsigned long)1)
+
+typedef struct {
+   double x, y;
+} TVector;
+
+//#ifndef _BOOL
+//typedef unsigned char boolean;
+//#define true  ((boolean)1)
+//#define false ((boolean)0)
+//#endif
+
+#ifndef SWAP
+#define SWAP(a,b,t) {t h; h = a; a = b; b = h; }
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+   
+#endif
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/KDTree.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/KDTree.h
@@ -0,0 +1,147 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ *           Other LANL authors
+ * 
+ * 
+ *  Implements a 2-dimensional k-D tree. One begins to use the k-D tree by
+ *  adding the bounding box of geometric "elements" to the tree structure
+ *  through a call to "KDTreeAddElement". Every element should be of the same
+ *  type, but could be a single point, a line segment, triangles, etc. Once
+ *  all the element bounding boxes have been added, the user of the structure
+ *  may make queries against the tree. The actual tree is constructed lazily
+ *  when an actual query occurs on the structure.
+ *
+ *  This version only has one query -- intersection of a box with the elements
+ *  and a set of "candidate" elements are returned. The candidates are identified
+ *  by an index number (0, ...) signifying the order in which the element was
+ *  added to the tree. It is up to the calling code to do additional processing
+ *  based on the type of element being used to determine "real" intersections.
+ *
+ *  The process of actually building the tree takes "n log n" time. Queries 
+ *  take "log n" time.
+ *
+ */
+
+#ifndef _KDTree_
+#define _KDTree_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+  
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "Globals.h"
+#include "Bounds.h"
+   
+#define LEFT_HALF   0
+#define RIGHT_HALF  1
+#define BOTTOM_HALF 0
+#define TOP_HALF    1   
+
+typedef struct {
+   TBounds extent;
+   int elements_num, elements_allocated;
+   TBounds* elements;
+   bool tree_built;
+   int tree_size;
+   TBounds* tree_safety_boxes;
+   int * tree_link;
+} TKDTree;
+
+extern void KDTree_Initialize(TKDTree *t);
+extern void KDTree_Finalize(TKDTree *t);
+extern void KDTree_Destroy(TKDTree* t);
+extern void KDTree_AddElement(TKDTree* t, TBounds* add);
+extern void KDTree_CreateTree(TKDTree* t);
+extern void KDTree_QueryBoxIntersect(TKDTree* t,
+                                     int* result_num, int* result_indicies,
+                                     TBounds* box);
+
+void KDTree_QueryCircleIntersect_Double(TKDTree* t,
+                                 int* result_num, int* result_indicies,
+                                 double radius, int ncells, 
+                                 double *x, double *dx, double *y, double *dy);
+void KDTree_QueryCircleIntersect_Float(TKDTree* t,
+                                 int* result_num, int* result_indicies,
+                                 double radius, int ncells, 
+                                 float *x, float *dx, float *y, float *dy);
+
+void KDTree_QueryCircleIntersectWeighted_Double(TKDTree* t,
+                                 int* result_num, int* result_indicies, double *weight,
+                                 double circ_radius, int ncells, 
+                                 double *x, double *dx, double *y, double *dy);
+void KDTree_QueryCircleIntersectWeighted_Float(TKDTree* t,
+                                 int* result_num, int* result_indicies, double *weight,
+                                 double circ_radius, int ncells, 
+                                 float *x, float *dx, float *y, float *dy);
+
+void KDTree_QueryCircleInterior_Double(TKDTree* t,
+                                 int* result_num, int* result_indicies,
+                                 double circ_radius, int ncells, 
+                                 double *x, double *dx, double *y, double *dy);
+void KDTree_QueryCircleInterior_Float(TKDTree* t,
+                                 int* result_num, int* result_indicies,
+                                 double circ_radius, int ncells, 
+                                 float *x, float *dx, float *y, float *dy);
+   
+#ifdef __cplusplus
+}
+#endif
+
+#endif
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/KDTree.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/KDTree.c
@@ -0,0 +1,712 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ *           Other LANL authors
+ * 
+ */
+#include <math.h>
+#include "KDTree.h"
+
+#define MALLOC(n,t) ((t*)(malloc(n * sizeof(t))))
+#define REALLOC(p,n,t) ((t*)(realloc((void*)p, n * sizeof(t))))
+#define FREE(p) { if (p) free(p); }
+
+static void median_sort(TKDTree* t,
+                        unsigned int cut_direction, int k, int num, int* idx)
+{
+   int left, mid, right, a, i, j;
+   
+   for (left = 0, right = num - 1; (right - left) > 1;) {
+      mid = (left + right) / 2;
+      SWAP(idx[mid], idx[left + 1], int);
+      if(Bounds_CenterAxis(&(t->elements[idx[left + 1]]), cut_direction) >
+         Bounds_CenterAxis(&(t->elements[idx[right]]), cut_direction))
+         SWAP(idx[left + 1], idx[right], int);
+      if(Bounds_CenterAxis(&(t->elements[idx[left]]), cut_direction) >
+         Bounds_CenterAxis(&(t->elements[idx[right]]), cut_direction))
+         SWAP(idx[left], idx[right], int);
+      if(Bounds_CenterAxis(&(t->elements[idx[left + 1]]), cut_direction) >
+         Bounds_CenterAxis(&(t->elements[idx[left]]), cut_direction))
+         SWAP(idx[left + 1], idx[left], int);
+      a = idx[left];
+      i = left + 1;
+      j = right;
+      while (1) {
+         for (i++;
+              Bounds_CenterAxis(&(t->elements[idx[i]]), cut_direction) <
+                Bounds_CenterAxis(&(t->elements[a]), cut_direction);
+              i++);
+         for (j--;
+              Bounds_CenterAxis(&(t->elements[idx[j]]), cut_direction) >
+              Bounds_CenterAxis(&(t->elements[a]), cut_direction);
+              j--);
+         if(j < i)
+            break;
+         SWAP(idx[i], idx[j], int);
+      }
+      idx[left] = idx[j];
+      idx[j] = a;
+      if(j >= k)
+         right = j - 1;
+      if(j <= k)
+         left = i;
+   }
+   if(((right - left) ==1) &&
+      (Bounds_CenterAxis(&(t->elements[idx[right]]), cut_direction) <
+       Bounds_CenterAxis(&(t->elements[idx[left]]), cut_direction)))
+      SWAP(idx[right], idx[left], int);
+}
+
+void KDTree_Initialize(TKDTree* t)
+{
+   assert(t);
+   /* Flush the overall tree extent */
+   Bounds_Infinite(&(t->extent));
+   /* Allocate the initial memory for tree elements */
+   t->elements_num = 0;
+   t->elements_allocated = KDTREE_ELEMENT_BLOCKING_SIZE;
+   t->elements = MALLOC(t->elements_allocated, TBounds);
+   assert(t->elements);
+   /* Start without a built tree */
+   t->tree_built = false;
+   t->tree_size = 0;
+   t->tree_safety_boxes = NULL;
+   t->tree_link = NULL;
+}
+
+void KDTree_Finalize(TKDTree* t)
+{
+   free(t->elements);
+}
+
+void KDTree_Destroy(TKDTree* t)
+{
+   assert(t);
+   /* Flush the overall tree extent */
+   Bounds_Infinite(&(t->extent));
+   /* Destroy the element list */
+   t->elements_num = 0;
+   t->elements_allocated = 0;
+   FREE(t->elements);
+   t->elements = NULL;
+   /* Destroy the actual tree */
+   t->tree_built = false;
+   t->tree_size = 0;
+   FREE(t->tree_safety_boxes);
+   t->tree_safety_boxes = NULL;
+   FREE(t->tree_link);
+   t->tree_link = NULL;
+}
+
+
+
+void KDTree_AddElement(TKDTree* t, TBounds* add)
+{
+   assert(t && add);
+   /* Destroy the current tree if it is built */
+   if(t->tree_built) {
+      t->tree_built = false;
+      t->tree_size = 0;
+      FREE(t->tree_safety_boxes);
+      t->tree_safety_boxes = NULL;
+      FREE(t->tree_link);
+      t->tree_link = NULL;
+   }
+   /* Expand the element array if necessary */
+   if(t->elements_num == t->elements_allocated) {
+      t->elements_allocated += KDTREE_ELEMENT_BLOCKING_SIZE;
+      t->elements = REALLOC(t->elements, t->elements_allocated, TBounds);
+      assert(t->elements);
+   }
+   /* Add the new element to the overall extent and the element list */
+   Bounds_AddBounds(&(t->extent), add);
+   Bounds_Copy(add, &(t->elements[t->elements_num]));
+   t->elements_num++;
+}
+
+void KDTree_CreateTree(TKDTree* t)
+{
+   unsigned int i;
+   int next_node, stack_ptr, min, mid, max, parent, cut_direction;
+   double width, max_width;
+   int* stack;
+   int* idx;
+   
+   assert(t);
+   /* If the tree is already built, we don't have to do anything */
+   if(t->tree_built)
+      return;
+   /* If there are no elements in the tree, we don't have to do anything */
+   if(t->elements_num > 0) {
+      /* Allocate the k-D tree memory */
+      t->tree_size = 2 * t->elements_num;
+      t->tree_safety_boxes = MALLOC(t->tree_size, TBounds);
+      t->tree_link = MALLOC(t->tree_size, int);
+      /* Create and initialize temporary arrays */
+      next_node = 0;
+      stack_ptr = 0;
+      stack = MALLOC(3 * t->tree_size, int);
+      idx = MALLOC(t->elements_num, int);
+      for (i = 0; (int)i <  t->elements_num; i++) {
+         idx[i] = i;
+      }
+      /* Setup the root node of the tree and put it on the stack */
+      stack[stack_ptr++] = 0;                   /* Node Number in the Tree */
+      stack[stack_ptr++] = 0;                   /* Element Span Minumum */
+      stack[stack_ptr++] = t->elements_num - 1; /* Element Span Maximum */
+      Bounds_Copy(&(t->extent), &(t->tree_safety_boxes[0]));
+      next_node++;
+      /* Construct k-D tree by setting up each pair of child nodes */
+      while (stack_ptr) {
+         /* Pop the top entry off the stack */
+         max = stack[--stack_ptr];
+         min = stack[--stack_ptr];
+         parent = stack[--stack_ptr];
+         /* If the current node should be a leaf node, make it one */
+         if ((max - min) == 0) {
+            Bounds_Copy(&(t->elements[idx[min]]), &(t->tree_safety_boxes[parent]));
+            t->tree_link[parent] = - idx[min];
+            continue;
+         }
+         /* Select optimum cutting direction for the parent node's safety box */
+         cut_direction = -1;
+         max_width = NEGATIVE_INFINITY;
+         for (i = 0; i < 2; i++) {
+            width = Bounds_WidthAxis(&(t->tree_safety_boxes[parent]), i);
+            if(width > max_width) {
+               max_width = width;
+               cut_direction = i;
+            }
+         }
+         assert(cut_direction >= 0);
+         /* Do a median sort of the elements under the parent node. The sort key
+            is the center point of the element bounding boxes along the selected
+            cutting direction. */
+         mid = (min + max) /2;
+         median_sort(t, (unsigned int)cut_direction, mid - min, max - min + 1, &(idx[min]));
+         /* Give the parent a reference to its two children */
+         t->tree_link[parent] = next_node;
+         /* Add the "left" child to the tree and the stack */
+         stack[stack_ptr++] = next_node;  /* Node Number in the Tree */
+         stack[stack_ptr++] = min;        /* Element Span Minimum */
+         stack[stack_ptr++] = mid;        /* Element Span Maximum */
+         Bounds_Infinite(&(t->tree_safety_boxes[next_node]));
+         for (i = min; (int)i <= mid; i++) {
+            Bounds_AddBounds(&(t->tree_safety_boxes[next_node]),
+                             &(t->elements[idx[i]]));
+         }
+         next_node++;
+         /* Add the "right" child to the tree and the stack */
+         stack[stack_ptr++] = next_node;  /* Node Number in the Tree */
+         stack[stack_ptr++] = mid + 1;    /* Element Span Minimum */
+         stack[stack_ptr++] = max;        /* Element Span Maximum */
+         Bounds_Infinite(&(t->tree_safety_boxes[next_node]));
+         for (i = min + 1; (int)i <= max; i++) {
+            Bounds_AddBounds(&(t->tree_safety_boxes[next_node]),
+                             &(t->elements[idx[i]]));
+         }
+         next_node++;
+      }
+      /* Destroy the temporary arrays */
+      FREE(stack);
+      FREE(idx);
+   }
+   /* Mark the tree "built" */
+   t->tree_built = true;
+}
+
+void KDTree_QueryBoxIntersect(TKDTree* t,
+                              int* result_num, int* result_indicies,
+                              TBounds* box)
+{
+   int stack_ptr, node;
+   TBounds sb;
+   int* stack;
+   
+   assert(t && result_num && result_indicies && box);
+   /* Build the k-D tree if necessary */
+   if(!t->tree_built){
+      //printf("BUILDING TREE... \n");
+      //fflush(stdout);
+      KDTree_CreateTree(t);
+   }
+   /* Allocate the results array */
+   *result_num = 0;
+   /* Create the temporary stack array */
+   stack_ptr = 0;
+   stack = MALLOC(t->tree_size, int);
+   
+   /* Put the root node of the tree onto the stack */
+   stack[stack_ptr++] = 0;
+   /* Search the k-D tree until the stack is empty */
+   
+   while (stack_ptr) {
+      /* Pop the top entry off the stack */
+      node = stack[--stack_ptr];
+      /* Check if the query box intersects an epsilon-expanded safety box for
+         the current node. */
+      Bounds_Copy(&(t->tree_safety_boxes[node]), &sb);
+      //Bounds_AddEpsilon(&sb, ENTITY_COINCIDENCE_TOLERANCE);
+      /* If the query box doesn't intersect this node's safety box, we are done
+         visiting the node and should continue with the next node */
+      if(!Bounds_IsOverlappingBounds(&sb, box))
+         continue;
+      /* If the current node is a leaf node, add it to the collision list. If
+         the current node is an interior node, add its children to the stack. */
+      if(t->tree_link[node] <= 0) {
+         result_indicies[*result_num] = - t->tree_link[node];
+         (*result_num)++;
+      }
+      else {
+         stack[stack_ptr++] = t->tree_link[node];
+         stack[stack_ptr++] = t->tree_link[node] + 1;
+      }
+   }
+   /* Destroy the temporary stack array */
+   FREE(stack);
+}
+
+void KDTree_QueryCircleIntersect_Double(TKDTree* t,
+                                 int* result_num, int* result_indicies,
+                                 double circ_radius, int ncells, 
+                                 double *x, double *dx, double *y, double *dy)
+{
+   assert(t && result_num && result_indicies && circ_radius);
+   /* Build the k-D tree if necessary */
+   if(!t->tree_built){
+      //printf("BUILDING TREE... \n");
+      //fflush(stdout);
+      KDTree_CreateTree(t);
+   }
+   
+   int nez;
+   int *ind=(int *)malloc(ncells*sizeof(int));
+   
+   TBounds box;
+   box.min.x = -circ_radius;
+   box.max.x =  circ_radius;
+   box.min.y = -circ_radius;
+   box.max.y =  circ_radius;
+   KDTree_QueryBoxIntersect(t, &nez, ind, &box);
+   
+   //for (int ic=0; ic<nez; ic++) {
+   //   printf("box is ind[%d]=%d\n",ic,ind[ic]);
+   //}
+   
+   /* Allocate the results array */
+   *result_num = 0;
+
+   double rad1, rad2, rad3, rad4;
+   int ii;
+   for (int i=0; i<nez; ++i){
+      ii = ind[i];
+      rad1 = sqrt( pow(x[ii],       2.0) + pow(y[ii],       2.0) );
+      rad2 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii],       2.0) );
+      rad3 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii]+dy[ii],2.0) );
+      rad4 = sqrt( pow(x[ii]       ,2.0) + pow(y[ii]+dy[ii],2.0) );
+      
+      if ((circ_radius < rad1 && circ_radius > rad2 ) ||
+          (circ_radius > rad1 && circ_radius < rad2 ) ) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      } else if ((circ_radius < rad2 && circ_radius > rad3 ) ||
+          (circ_radius > rad2 && circ_radius < rad3 ) ) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      } else if ((circ_radius < rad3 && circ_radius > rad4 ) ||
+          (circ_radius > rad3 && circ_radius < rad4 ) ) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      } else if ((circ_radius < rad4 && circ_radius > rad1 ) ||
+          (circ_radius > rad4 && circ_radius < rad1 ) ) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      }
+   } // for  
+   free(ind);
+}
+
+void KDTree_QueryCircleIntersect_Float(TKDTree* t,
+                                 int* result_num, int* result_indicies,
+                                 double circ_radius, int ncells, 
+                                 float *x, float *dx, float *y, float *dy)
+{
+   assert(t && result_num && result_indicies && circ_radius);
+   /* Build the k-D tree if necessary */
+   if(!t->tree_built){
+      //printf("BUILDING TREE... \n");
+      //fflush(stdout);
+      KDTree_CreateTree(t);
+   }
+   
+   int nez;
+   int *ind=(int *)malloc(ncells*sizeof(int));
+   
+   TBounds box;
+   box.min.x = -circ_radius;
+   box.max.x =  circ_radius;
+   box.min.y = -circ_radius;
+   box.max.y =  circ_radius;
+   KDTree_QueryBoxIntersect(t, &nez, ind, &box);
+   
+   //for (int ic=0; ic<nez; ic++) {
+   //   printf("box is ind[%d]=%d\n",ic,ind[ic]);
+   //}
+   
+   /* Allocate the results array */
+   *result_num = 0;
+
+   double rad1, rad2, rad3, rad4;
+   int ii;
+   for (int i=0; i<nez; ++i){
+      ii = ind[i];
+      rad1 = sqrt( pow(x[ii],       2.0) + pow(y[ii],       2.0) );
+      rad2 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii],       2.0) );
+      rad3 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii]+dy[ii],2.0) );
+      rad4 = sqrt( pow(x[ii]       ,2.0) + pow(y[ii]+dy[ii],2.0) );
+      
+      if ((circ_radius < rad1 && circ_radius > rad2 ) ||
+          (circ_radius > rad1 && circ_radius < rad2 ) ) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      } else if ((circ_radius < rad2 && circ_radius > rad3 ) ||
+          (circ_radius > rad2 && circ_radius < rad3 ) ) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      } else if ((circ_radius < rad3 && circ_radius > rad4 ) ||
+          (circ_radius > rad3 && circ_radius < rad4 ) ) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      } else if ((circ_radius < rad4 && circ_radius > rad1 ) ||
+          (circ_radius > rad4 && circ_radius < rad1 ) ) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      }
+   } // for  
+   free(ind);
+}
+
+void KDTree_QueryCircleIntersectWeighted_Double(TKDTree* t,
+                                 int* result_num, int* result_indicies, double *weight,
+                                 double circ_radius, int ncells, 
+                                 double *x, double *dx, double *y, double *dy)
+{
+   assert(t && result_num && result_indicies && circ_radius);
+   /* Build the k-D tree if necessary */
+   if(!t->tree_built){
+      //printf("BUILDING TREE... \n");
+      //fflush(stdout);
+      KDTree_CreateTree(t);
+   }
+   
+   int nez;
+   int *ind=(int *)malloc(ncells*sizeof(int));
+   
+   TBounds box;
+   box.min.x = -circ_radius;
+   box.max.x =  circ_radius;
+   box.min.y = -circ_radius;
+   box.max.y =  circ_radius;
+   KDTree_QueryBoxIntersect(t, &nez, ind, &box);
+   
+   //for (int ic=0; ic<nez; ic++) {
+   //   printf("box is ind[%d]=%d\n",ic,ind[ic]);
+   //}
+   
+   /* Allocate the results array */
+   *result_num = 0;
+   
+   double rad1, rad2, rad3, rad4;
+   int cuts_bottom, cuts_top, cuts_left, cuts_right;
+   int vertical_half, horizontal_half;
+   int ii;
+   for (int i=0; i<nez; ++i){
+      ii = ind[i];
+      rad1 = sqrt( pow(x[ii],       2.0) + pow(y[ii],       2.0) );
+      rad2 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii],       2.0) );
+      rad3 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii]+dy[ii],2.0) );
+      rad4 = sqrt( pow(x[ii]       ,2.0) + pow(y[ii]+dy[ii],2.0) );
+      
+      cuts_bottom=0;
+      cuts_top=0;
+      cuts_left=0;
+      cuts_right=0;
+      if ((circ_radius < rad1 && circ_radius > rad2 ) ||
+          (circ_radius > rad1 && circ_radius < rad2 ) ) {
+         cuts_bottom=1;
+      }
+      if ((circ_radius < rad2 && circ_radius > rad3 ) ||
+          (circ_radius > rad2 && circ_radius < rad3 ) ) {
+         cuts_right=1;
+      }
+      if ((circ_radius < rad3 && circ_radius > rad4 ) ||
+          (circ_radius > rad3 && circ_radius < rad4 ) ) {
+         cuts_top=1;
+      }
+      if ((circ_radius < rad4 && circ_radius > rad1 ) ||
+          (circ_radius > rad4 && circ_radius < rad1 ) ) {
+         cuts_left=1;
+      }
+      
+      horizontal_half=0;
+      vertical_half=0;
+      if (x[ii]+0.5*dx[ii] > 0.0) horizontal_half = RIGHT_HALF;
+      if (y[ii]+0.5*dy[ii] > 0.0) vertical_half   = TOP_HALF;
+      
+      
+      if        (horizontal_half == RIGHT_HALF && vertical_half == TOP_HALF) { /* quadrant 1 */
+         weight[*result_num] = (circ_radius - rad1)/(rad3-rad1);
+      } else if (horizontal_half == LEFT_HALF  && vertical_half == TOP_HALF) { /* quadrant 2 */
+         weight[*result_num] = (circ_radius - rad2)/(rad4-rad2);
+      } else if (horizontal_half == LEFT_HALF  && vertical_half == BOTTOM_HALF) { /* quadrant 3 */
+         weight[*result_num] = (circ_radius - rad3)/(rad1-rad3);
+      } else if (horizontal_half == RIGHT_HALF && vertical_half == BOTTOM_HALF) { /* quadrant 4 */
+         weight[*result_num] = (circ_radius - rad4)/(rad2-rad4);
+      } else {
+         weight[*result_num] = 0.5;
+      }
+      if (cuts_bottom || cuts_top || cuts_left || cuts_right) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      }
+
+   } // for  
+   free(ind);
+}
+
+void KDTree_QueryCircleIntersectWeighted_Float(TKDTree* t,
+                                 int* result_num, int* result_indicies, double *weight,
+                                 double circ_radius, int ncells, 
+                                 float *x, float *dx, float *y, float *dy)
+{
+   assert(t && result_num && result_indicies && circ_radius);
+   /* Build the k-D tree if necessary */
+   if(!t->tree_built){
+      //printf("BUILDING TREE... \n");
+      //fflush(stdout);
+      KDTree_CreateTree(t);
+   }
+   
+   int nez;
+   int *ind=(int *)malloc(ncells*sizeof(int));
+   
+   TBounds box;
+   box.min.x = -circ_radius;
+   box.max.x =  circ_radius;
+   box.min.y = -circ_radius;
+   box.max.y =  circ_radius;
+   KDTree_QueryBoxIntersect(t, &nez, ind, &box);
+   
+   //for (int ic=0; ic<nez; ic++) {
+   //   printf("box is ind[%d]=%d\n",ic,ind[ic]);
+   //}
+   
+   /* Allocate the results array */
+   *result_num = 0;
+   
+   double rad1, rad2, rad3, rad4;
+   int cuts_bottom, cuts_top, cuts_left, cuts_right;
+   int vertical_half, horizontal_half;
+   int ii;
+   for (int i=0; i<nez; ++i){
+      ii = ind[i];
+      rad1 = sqrt( pow(x[ii],       2.0) + pow(y[ii],       2.0) );
+      rad2 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii],       2.0) );
+      rad3 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii]+dy[ii],2.0) );
+      rad4 = sqrt( pow(x[ii]       ,2.0) + pow(y[ii]+dy[ii],2.0) );
+      
+      cuts_bottom=0;
+      cuts_top=0;
+      cuts_left=0;
+      cuts_right=0;
+      if ((circ_radius < rad1 && circ_radius > rad2 ) ||
+          (circ_radius > rad1 && circ_radius < rad2 ) ) {
+         cuts_bottom=1;
+      }
+      if ((circ_radius < rad2 && circ_radius > rad3 ) ||
+          (circ_radius > rad2 && circ_radius < rad3 ) ) {
+         cuts_right=1;
+      }
+      if ((circ_radius < rad3 && circ_radius > rad4 ) ||
+          (circ_radius > rad3 && circ_radius < rad4 ) ) {
+         cuts_top=1;
+      }
+      if ((circ_radius < rad4 && circ_radius > rad1 ) ||
+          (circ_radius > rad4 && circ_radius < rad1 ) ) {
+         cuts_left=1;
+      }
+      
+      horizontal_half=0;
+      vertical_half=0;
+      if (x[ii]+0.5*dx[ii] > 0.0) horizontal_half = RIGHT_HALF;
+      if (y[ii]+0.5*dy[ii] > 0.0) vertical_half   = TOP_HALF;
+      
+      
+      if        (horizontal_half == RIGHT_HALF && vertical_half == TOP_HALF) { /* quadrant 1 */
+         weight[*result_num] = (circ_radius - rad1)/(rad3-rad1);
+      } else if (horizontal_half == LEFT_HALF  && vertical_half == TOP_HALF) { /* quadrant 2 */
+         weight[*result_num] = (circ_radius - rad2)/(rad4-rad2);
+      } else if (horizontal_half == LEFT_HALF  && vertical_half == BOTTOM_HALF) { /* quadrant 3 */
+         weight[*result_num] = (circ_radius - rad3)/(rad1-rad3);
+      } else if (horizontal_half == RIGHT_HALF && vertical_half == BOTTOM_HALF) { /* quadrant 4 */
+         weight[*result_num] = (circ_radius - rad4)/(rad2-rad4);
+      } else {
+         weight[*result_num] = 0.5;
+      }
+      if (cuts_bottom || cuts_top || cuts_left || cuts_right) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      }
+
+   } // for  
+   free(ind);
+}
+
+void KDTree_QueryCircleInterior_Double(TKDTree* t,
+                            int* result_num, int* result_indicies,
+                            double circ_radius, int ncells, 
+                            double *x, double *dx, double *y, double *dy)
+{
+   assert(t && result_num && result_indicies && circ_radius);
+   /* Build the k-D tree if necessary */
+   if(!t->tree_built){
+      //printf("BUILDING TREE... \n");
+      //fflush(stdout);
+      KDTree_CreateTree(t);
+   }
+   
+   int nez;
+   int *ind=(int *)malloc(ncells*sizeof(int));
+   
+   TBounds box;
+   box.min.x = -circ_radius;
+   box.max.x =  circ_radius;
+   box.min.y = -circ_radius;
+   box.max.y =  circ_radius;
+   KDTree_QueryBoxIntersect(t, &nez, ind, &box);
+   
+   //for (int ic=0; ic<nez; ic++) {
+   //   printf("box is ind[%d]=%d\n",ic,ind[ic]);
+   //}
+   
+   /* Allocate the results array */
+   *result_num = 0;
+   
+   double rad1, rad2, rad3, rad4;
+   int ii;
+   for (int i=0; i<nez; ++i){
+      ii = ind[i];
+      rad1 = sqrt( pow(x[ii],       2.0) + pow(y[ii],       2.0) );
+      rad2 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii],       2.0) );
+      rad3 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii]+dy[ii],2.0) );
+      rad4 = sqrt( pow(x[ii]       ,2.0) + pow(y[ii]+dy[ii],2.0) );
+      
+      if ((circ_radius > rad1 || circ_radius > rad2 ) ||
+          (circ_radius > rad3 || circ_radius > rad4 ) ) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      }
+   } // for  
+   free(ind);
+}
+
+void KDTree_QueryCircleInterior_Float(TKDTree* t,
+                            int* result_num, int* result_indicies,
+                            double circ_radius, int ncells, 
+                            float *x, float *dx, float *y, float *dy)
+{
+   assert(t && result_num && result_indicies && circ_radius);
+   /* Build the k-D tree if necessary */
+   if(!t->tree_built){
+      //printf("BUILDING TREE... \n");
+      //fflush(stdout);
+      KDTree_CreateTree(t);
+   }
+   
+   int nez;
+   int *ind=(int *)malloc(ncells*sizeof(int));
+   
+   TBounds box;
+   box.min.x = -circ_radius;
+   box.max.x =  circ_radius;
+   box.min.y = -circ_radius;
+   box.max.y =  circ_radius;
+   KDTree_QueryBoxIntersect(t, &nez, ind, &box);
+   
+   //for (int ic=0; ic<nez; ic++) {
+   //   printf("box is ind[%d]=%d\n",ic,ind[ic]);
+   //}
+   
+   /* Allocate the results array */
+   *result_num = 0;
+   
+   double rad1, rad2, rad3, rad4;
+   int ii;
+   for (int i=0; i<nez; ++i){
+      ii = ind[i];
+      rad1 = sqrt( pow(x[ii],       2.0) + pow(y[ii],       2.0) );
+      rad2 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii],       2.0) );
+      rad3 = sqrt( pow(x[ii]+dx[ii],2.0) + pow(y[ii]+dy[ii],2.0) );
+      rad4 = sqrt( pow(x[ii]       ,2.0) + pow(y[ii]+dy[ii],2.0) );
+      
+      if ((circ_radius > rad1 || circ_radius > rad2 ) ||
+          (circ_radius > rad3 || circ_radius > rad4 ) ) {
+         result_indicies[*result_num] = ind[i];
+         (*result_num)++;
+      }
+   } // for  
+   free(ind);
+}
+
+
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/LICENSE
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/LICENSE
@@ -0,0 +1,41 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  CLAMR -- LA-CC-11-094
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ */
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/MallocPlus.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/MallocPlus.h
@@ -0,0 +1,812 @@
+/*
+ *  Copyright (c) 2011-2013, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ * 
+ */
+#ifndef MALLOCPLUS_H_
+#define MALLOCPLUS_H_
+
+#include <map>
+#include <string>
+#include <string.h>
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#define HOST_REGULAR_MEMORY     0x00000
+#define HOST_MANAGED_MEMORY     0x00001
+#define DEVICE_REGULAR_MEMORY   0x00002
+#define INDEX_ARRAY_MEMORY      0x00004
+#define LOAD_BALANCE_MEMORY     0x00008
+#define RESTART_DATA            0x00010
+#define REPLICATED_DATA         0x00020
+#define DISTRIBUTED_INT_DATA    0x00040
+#define DISTRIBUTED_DOUBLE_DATA 0x00080
+
+#if defined(HAVE_MPI)
+#include "mpi.h"
+#if defined(HAVE_J7)
+#include "j7/j7.h"
+#endif
+#endif
+
+using namespace std;
+
+/****************************************************************//**
+ * \brief
+ * Memory entry with fields for each entry in database
+ *******************************************************************/
+struct malloc_plus_memory_entry {
+   void   *mem_ptr;       //!< memory pointer for entry
+   size_t  mem_capacity;  //!< allocated capacity for memory buffer
+                          //!< this may be larger than the number of
+                          //!< elements and is used to internally handle
+                          //!< memory resizing
+   size_t *mem_nelem;     //!< number of elements
+   size_t  mem_ndims;     //!< number of dimensions
+   size_t  mem_elsize;    //!< element size for type of data
+   int     mem_flags;     //!< flags for special handling
+   char   *mem_name;      //!< name of memory entry
+};
+
+struct cmp_str
+{
+   bool operator()(char const *a, char const *b)
+   {
+      return strcmp(a, b) < 0;
+   }
+};
+
+/****************************************************************//**
+ * MallocPlus class
+ *    Provide an enhanced memory allocation package with database
+ *    of allocations, sizes and contiguous memory allocations for
+ *    multi-dimensional arrays
+ *******************************************************************/
+class MallocPlus {
+//protected:
+public:
+   map<string, malloc_plus_memory_entry*> memory_name_dict; //!< Dictionary entries by name
+   map<void*, malloc_plus_memory_entry*> memory_ptr_dict; //!< Dictionary entries by pointer
+
+#if defined(HAVE_MPI) && defined(HAVE_J7)
+private:
+   J7 *j7;
+#endif
+   
+public:
+   // if we have MPI support enable these routines. they only really do anything
+   // if we also have j7 support, but that's okay; we don't want ifdefs all
+   // over.
+#if defined(HAVE_MPI)
+   // parallel initialization routine
+   void pinit(MPI_Comm smComm, std::size_t memPoolSize);
+   // parallel finalization routine
+   void pfini(void);
+#endif
+
+/****************************************************************//**
+ * \brief
+ * Allocates memory for a 1D array and put entry in database
+ *
+ * **Parameters**
+ * * size_t nelem -- number of elements in array
+ * * size_t elsize -- element size in bytes
+ * * const char *name -- name of array
+ * * int flags -- flags for special handling, default is 0
+ *
+ * Typical Usage
+ *
+ *     double *density = my_mem->memory_malloc(ncells, sizeof(double),
+ *                       "Density");
+ *******************************************************************/
+   void *memory_malloc(size_t nelem, size_t elsize, const char *name, int flags=0);
+
+/****************************************************************//**
+ * \brief
+ * Duplicate memory and add new entry into database
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr -- memory pointer to duplicate
+ * * const char *addname -- new name for variable
+ *
+ * Typical Usage
+ *
+ *     double *new_density = my_mem->memory_duplicate(density, "Density_new");
+ *******************************************************************/
+   void *memory_duplicate(void *malloc_mem_ptr, const char *addname);
+
+/****************************************************************//**
+ * \brief
+ * Reallocates memory for memory pointer in database
+ *
+ * **Parameters**
+ * * size_t nelem -- number of elements for new allocation
+ * * void *malloc_mem_ptr -- memory pointer to duplicate
+ *
+ * Typical Usage
+ *
+ *     double *density = my_mem->memory_realloc(new_ncells, density);
+ *******************************************************************/
+   void *memory_realloc(size_t nelem, void *malloc_mem_ptr);
+
+/****************************************************************//**
+ * \brief
+ * Reallocates memory for named entry in database
+ *
+ * **Parameters**
+ * * size_t nelem -- number of elements for new allocation
+ * * const char *name -- named entry to duplicate
+ *
+ * Typical Usage
+ *
+ *     double *density = my_mem->memory_realloc(new_ncells, "Density");
+ *******************************************************************/
+   void *memory_realloc(size_t nelem, const char *name);
+
+/****************************************************************//**
+ * \brief
+ * Request memory buffer capacity reallocation for memory pointer in database.
+ * This only changes the capacity for managed memory and does not change
+ * the current number of elements registered for the array.
+ *
+ * **Parameters**
+ * * size_t capacity -- capacity in number of elements for reallocation
+ * * void *malloc_mem_ptr -- memory pointer to reallocate more capacity
+ *
+ * Typical Usage
+ *
+ *     double *density = my_mem->memory_request(new_capacity, density);
+ *******************************************************************/
+   void *memory_request(size_t new_capacity, void *malloc_mem_ptr);
+
+/****************************************************************//**
+ * \brief
+ * Request memory buffer capacity reallocation for named entry in database.
+ * This only changes the capacity for managed memory and does not change
+ * the current number of elements registered for the array.
+ *
+ * **Parameters**
+ * * size_t capacity -- capacity in number of elements for reallocation
+ * * const char *name -- named entry in database
+ *
+ * Typical Usage
+ *
+ *     double *density = my_mem->memory_request(new_capacity, "Density");
+ *******************************************************************/
+   void *memory_request(size_t new_capacity, const char *name);
+
+/****************************************************************//**
+ * \brief
+ * Reallocates memory for all arrays in the database. Element size stays
+ * the same.
+ *
+ * **Parameters**
+ * * size_t nelem -- number of elements for new allocation
+ *
+ * Typical Usage
+ *
+ *     my_mem->memory_realloc_all(new_ncells);
+ *******************************************************************/
+   void memory_realloc_all(size_t nelem);
+
+/****************************************************************//**
+ * \brief
+ * Requests capacity reallocation for all arrays in the database. Element
+ * size and number of elements stays the same. The maximum memory
+ * capacity is increased.
+ *
+ * **Parameters**
+ * * size_t capacity -- number of elements for new allocation
+ *
+ * Typical Usage
+ *
+ *     my_mem->memory_realloc_all(new_capacity);
+ *******************************************************************/
+   void memory_request_all(size_t new_capacity);
+
+/****************************************************************//**
+ * \brief
+ * Replaces a database entry with another database entry, effectively
+ * renaming the entry and deallocating the memory for the old entry
+ * and removing the other database entry. Both the return and the 
+ * first argument old memory pointer gets reset to the new memory
+ * location.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr_old -- memory pointer to entry to replace
+ * * void * const malloc_mem_ptr_new -- memory pointer to entry to
+ *       put in place of old memory
+ *
+ * Typical Usage
+ *
+ *     double *density_new = (double *)my_mem->memory_malloc(new_ncells,
+ *                           sizeof(double), "Density_new");
+ *       ... lots of calculations of density_new from density (old) ...
+ *     density = (double *)my_mem->memory_replace(density, density_new);
+ *******************************************************************/
+   void *memory_replace(void *malloc_mem_ptr_old, void * const malloc_mem_ptr_new);
+
+/****************************************************************//**
+ * \brief
+ * Swaps a database entry with another database entry, effectively
+ * renaming both entries. The new pointers are returned in the
+ * two arguments.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr_old -- memory pointer to swap
+ * * void *malloc_mem_ptr_new -- memory pointer to swap
+ *
+ * Typical Usage
+ *
+ *     int *level_old = (int *)my_mem->memory_malloc(new_ncells,
+ *                           sizeof(int), "level_old");
+ *     level = (int *)my_mem->memory_swap(&level, &level_old);
+ *       ... lots of calculations of level from level_new ...
+ *     my_mem->memory_delete(level_old);
+ *******************************************************************/
+   void memory_swap(int **malloc_mem_ptr_old, int **malloc_mem_ptr_new);
+
+/****************************************************************//**
+ * \brief
+ * Swaps a database entry with another database entry, effectively
+ * renaming both entries. The new pointers are returned in the
+ * two arguments.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr_old -- memory pointer to swap
+ * * void *malloc_mem_ptr_new -- memory pointer to swap
+ *
+ * Typical Usage
+ *
+ *     float *density_old = (float *)my_mem->memory_malloc(new_ncells,
+ *                           sizeof(float), "Density_old");
+ *     density = (float *)my_mem->memory_swap(&density, &density_old);
+ *       ... lots of calculations of density from density_new ...
+ *     my_mem->memory_delete(density_old);
+ *******************************************************************/
+   void memory_swap(float **malloc_mem_ptr_old, float **malloc_mem_ptr_new);
+
+/****************************************************************//**
+ * \brief
+ * Swaps a database entry with another database entry, effectively
+ * renaming both entries. The new pointers are returned in the
+ * two arguments.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr_old -- memory pointer to swap
+ * * void *malloc_mem_ptr_new -- memory pointer to swap
+ *
+ * Typical Usage
+ *
+ *     double *density_old = (double *)my_mem->memory_malloc(new_ncells,
+ *                           sizeof(double), "Density_old");
+ *     density = (double *)my_mem->memory_swap(&density, &density_old);
+ *       ... lots of calculations of density from density_new ...
+ *     my_mem->memory_delete(density_old);
+ *******************************************************************/
+   void memory_swap(double **malloc_mem_ptr_old, double **malloc_mem_ptr_new);
+
+/****************************************************************//**
+ * \brief
+ * Adds an entry for an already allocated array into the database
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr -- memory pointer to add
+ * * size_t nelem -- number of elements in array
+ * * size_t elsize -- element size in bytes
+ * * const char *name -- name of array
+ * * int flags -- flags for special handling, default is 0
+ *
+ * Typical Usage
+ *
+ *     double *density = my_mem->memory_add(density, ncells, sizeof(double),
+ *                       "Density");
+ *******************************************************************/
+   void *memory_add(void *malloc_mem_ptr, size_t nelem, size_t elsize,
+      const char *name, int flags=0);
+
+   void *memory_add(void *malloc_mem_ptr, int ndim, size_t *nelem, size_t elsize,
+      const char *name, int flags=0);
+
+/****************************************************************//**
+ * \brief
+ * Reorders all of the arrays in the database by the indices in the
+ * iorder array. The reorder does the following:
+ *     tmp[ic] = density[iorder[ic]];
+ *     SWAP_PTR(tmp, density);
+ * Note that the pointer value will change during the operation and
+ * will be returned in the return value.
+ *
+ * **Parameters**
+ * * double *malloc_mem_ptr -- memory pointer to entry to reorder
+ * * int *iorder -- index array for reordering
+ *
+ * Typical Usage
+ *
+ *     double *density = my_mem->memory_reorder_all(density, iorder);
+ *******************************************************************/
+   double *memory_reorder(double *malloc_mem_ptr, int *iorder);
+
+/****************************************************************//**
+ * \brief
+ * Reorders all of the arrays in the database by the indices in the
+ * iorder array. The reorder does the following:
+ *     tmp[ic] = density[iorder[ic]];
+ *     SWAP_PTR(tmp, density);
+ * Note that the pointer value will change during the operation and
+ * will be returned in the return value.
+ *
+ * **Parameters**
+ * * float *malloc_mem_ptr -- memory pointer to entry to reorder
+ * * int *iorder -- index array for reordering
+ *
+ * Typical Usage
+ *
+ *     float *density = my_mem->memory_reorder_all(density, iorder);
+ *******************************************************************/
+   float *memory_reorder(float *malloc_mem_ptr, int *iorder);
+
+/****************************************************************//**
+ * \brief
+ * Reorders all of the arrays in the database by the indices in the
+ * iorder array. The reorder does the following:
+ *     tmp[ic] = level[iorder[ic]];
+ *     SWAP_PTR(tmp, level);
+ * Note that the pointer value will change during the operation and
+ * will be returned in the return value.
+ *
+ * **Parameters**
+ * * int *malloc_mem_ptr -- memory pointer to entry to reorder
+ * * int *iorder -- index array for reordering
+ *
+ * Typical Usage
+ *
+ *     int *level = my_mem->memory_reorder_all(level, iorder);
+ *******************************************************************/
+   int *memory_reorder(int *malloc_mem_ptr, int *iorder);
+
+/****************************************************************//**
+ * \brief
+ * Reorders an index array in the database by the indices in the
+ * iorder array and reindexes the array by the inverse order given
+ * by inv_iorder. The reorder does the following:
+ *     tmp[ic] = inv_iorder[level[iorder[ic]]];
+ *     SWAP_PTR(tmp, level);
+ * Note that the pointer value will change during the operation and
+ * will be returned in the return value.
+ *
+ * **Parameters**
+ * * int *malloc_mem_ptr -- memory pointer to entry to reorder
+ * * int *iorder -- index array for reordering
+ * * int *inv_order -- inverse index array for reordering
+ *
+ * Typical Usage
+ *
+ *     int *level = my_mem->memory_reorder_all(level, iorder, inv_iorder);
+ *******************************************************************/
+   int *memory_reorder_indexarray(int *malloc_mem_ptr, int *iorder, int *inv_iorder);
+
+/****************************************************************//**
+ * \brief
+ * Reorders all of the arrays in the database by the indices in the
+ * iorder array. The reorder does the following:
+ *     tmp[ic] = density[iorder[ic]];
+ *     SWAP_PTR(tmp, density);
+ * Note that the pointer value will change during the operation and
+ * must be retrieved from the database.
+ *
+ * **Parameters**
+ * * int *iorder -- index array for reordering
+ *
+ * Typical Usage
+ *
+ *     my_mem->memory_reorder_all(iorder);
+ *******************************************************************/
+   void memory_reorder_all(int *iorder);
+
+/****************************************************************//**
+ * \brief
+ * Prints out a report of all the arrays in the database.
+ *
+ * Typical Usage
+ *
+ *     my_mem->memory_report();
+ *******************************************************************/
+   void memory_report(void);
+
+/****************************************************************//**
+ * \brief
+ * Deallocates memory for a memory pointer in the database and removes the
+ * entry from the database.
+ *
+ * Typical Usage
+ *
+ *     my_mem->memory_delete(density);
+ *******************************************************************/
+   void *memory_delete(void *malloc_mem_ptr);
+
+/****************************************************************//**
+ * \brief
+ * Deallocates memory for a named entry in the database and removes the
+ * entry from the database.
+ *
+ * Typical Usage
+ *
+ *     my_mem->memory_delete("Density");
+ *******************************************************************/
+   void *memory_delete(const char *name);
+
+/****************************************************************//**
+ * \brief
+ * Deallocates memory for all arrays in the database.
+ *
+ * Typical Usage
+ *
+ *     my_mem->memory_delete_all();
+ *******************************************************************/
+   void memory_delete_all(void);
+
+/****************************************************************//**
+ * \brief
+ * Removes the entry for a memory pointer from the database. (This does
+ * not delete the memory).
+ *
+ * Typical Usage
+ *
+ *     my_mem->memory_delete(density);
+ *******************************************************************/
+   void memory_remove(void *malloc_mem_ptr);
+
+/****************************************************************//**
+ * \brief
+ * Removes the entry for a named entry from the database. (This does
+ * not delete the memory).
+ *
+ * Typical Usage
+ *
+ *     my_mem->memory_delete("Density");
+ *******************************************************************/
+   void memory_remove(const char *name);
+
+/****************************************************************//**
+ * \brief
+ * Gets initial memory pointer from database for iterating over the
+ * entries and processing each.
+ *
+ * Typical Usage
+ *
+ *     for (void *mem_ptr = my_mem.memory_begin(); mem_ptr!=NULL;
+ *          mem_ptr = my_mem.memory_next() ){
+ *        ... process entries ...
+ *     }
+ *******************************************************************/
+   void *memory_begin(void);
+
+/****************************************************************//**
+ * \brief
+ * Gets next memory pointer from database for iterating over the
+ * entries and processing each. Note that their is an implied caching
+ * of the current memory pointer in MallocPlus.
+ *
+ * Typical Usage
+ *
+ *     for (void *mem_ptr = my_mem.memory_begin(); mem_ptr!=NULL;
+ *          mem_ptr = my_mem.memory_next() ){
+ *        ... process entries ...
+ *     }
+ *******************************************************************/
+   void *memory_next(void);
+
+/****************************************************************//**
+ * \brief
+ * Gets initial memory pointer from database for iterating over the
+ * entries and processing each.
+ *
+ * Typical Usage
+ *
+ *     for (void *mem_ptr = my_mem.memory_by_name_begin(); mem_ptr!=NULL;
+ *          mem_ptr = my_mem.memory_by_name_next() ){
+ *        ... process entries ...
+ *     }
+ *******************************************************************/
+   void *memory_by_name_begin(void);
+
+/****************************************************************//**
+ * \brief
+ * Gets next memory pointer from database for iterating over the
+ * entries and processing each. Note that their is an implied caching
+ * of the current memory pointer in MallocPlus.
+ *
+ * Typical Usage
+ *
+ *     for (void *mem_ptr = my_mem.memory_by_name_begin(); mem_ptr!=NULL;
+ *          mem_ptr = my_mem.memory_by_name_next() ){
+ *        ... process entries ...
+ *     }
+ *******************************************************************/
+   void *memory_by_name_next(void);
+
+/****************************************************************//**
+ * \brief
+ * Gets initial memory entry from database for iterating over the
+ * entries and processing each.
+ *
+ * Typical Usage
+ *
+ *     malloc_plus_memory_entry memory_item;
+ *     for (memory_item = my_mem.memory_entry_begin(); 
+ *          memory_item != my_mem.memory_entry_end();
+ *          memory_item = my_mem.memory_entry_next() ){
+ *        ... process entries ...
+ *     }
+ *******************************************************************/
+   malloc_plus_memory_entry *memory_entry_begin(void);
+
+/****************************************************************//**
+ * \brief
+ * Gets next memory entry from database for iterating over the
+ * entries and processing each.
+ *
+ * Typical Usage
+ *
+ *     malloc_plus_memory_entry memory_item;
+ *     for (memory_item = my_mem.memory_entry_begin(); 
+ *          memory_item != my_mem.memory_entry_end();
+ *          memory_item = my_mem.memory_entry_next() ){
+ *        ... process entries ...
+ *     }
+ *******************************************************************/
+   malloc_plus_memory_entry *memory_entry_next(void);
+
+/****************************************************************//**
+ * \brief
+ * Gets initial memory iterator from database for iterating over the
+ * entries and processing each.
+ *
+ * Typical Usage
+ *
+ *     malloc_plus_memory_entry *memory_item;
+ *     for (memory_item = my_mem.memory_entry_begin(); 
+ *          memory_item != my_mem.memory_entry_end();
+ *          memory_item = my_mem.memory_entry_next() ){
+ *        ... process entries ...
+ *     }
+ *******************************************************************/
+   malloc_plus_memory_entry *memory_entry_end(void);
+
+/****************************************************************//**
+ * \brief
+ * Gets initial memory entry from name database for iterating over the
+ * entries and processing each.
+ *
+ * Typical Usage
+ *
+ *     malloc_plus_memory_entry memory_item;
+ *     for (memory_item = my_mem.memory_entry_by_name_begin(); 
+ *          memory_item != my_mem.memory_entry_by_name_end();
+ *          memory_item = my_mem.memory_entry_by_name_next() ){
+ *        ... process entries ...
+ *     }
+ *******************************************************************/
+   malloc_plus_memory_entry *memory_entry_by_name_begin(void);
+
+/****************************************************************//**
+ * \brief
+ * Gets next memory entry from database for iterating over the
+ * entries and processing each.
+ *
+ * Typical Usage
+ *
+ *     malloc_plus_memory_entry memory_item;
+ *     for (memory_item = my_mem.memory_entry_by_name_begin(); 
+ *          memory_item != my_mem.memory_entry_by_name_end();
+ *          memory_item = my_mem.memory_entry_by_name_next() ){
+ *        ... process entries ...
+ *     }
+ *******************************************************************/
+   malloc_plus_memory_entry *memory_entry_by_name_next(void);
+
+/****************************************************************//**
+ * \brief
+ * Gets initial memory iterator from database for iterating over the
+ * entries and processing each.
+ *
+ * Typical Usage
+ *
+ *     malloc_plus_memory_entry *memory_item;
+ *     for (memory_item = my_mem.memory_entry_by_name_begin(); 
+ *          memory_item != my_mem.memory_entry_by_name_end();
+ *          memory_item = my_mem.memory_entry_by_name_next() ){
+ *        ... process entries ...
+ *     }
+ *******************************************************************/
+   malloc_plus_memory_entry *memory_entry_by_name_end(void);
+
+/****************************************************************//**
+ * \brief
+ * Get number of elements for a memory pointer in the database.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr -- memory pointer for entry in the database
+ * 
+ * Typical Usage
+ *
+ *     size_t nsize = my_mem->get_memory_size(density);
+ *******************************************************************/
+   size_t get_memory_size(void *malloc_mem_ptr);
+
+/****************************************************************//**
+ * \brief
+ * Get the memory capacity in number of elements for a memory pointer
+ * in the database. Memory capacity is the overallocated size of the
+ * array in schemes where memory is managed internally to reduce
+ * the number of reallocations.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr -- memory pointer for entry in the database
+ * 
+ * Typical Usage
+ *
+ *     size_t var_capacity = my_mem->get_memory_capacity(density);
+ *******************************************************************/
+   size_t get_memory_capacity(void *malloc_mem_ptr);
+
+/****************************************************************//**
+ * \brief
+ * Get the element size for a memory pointer in the database.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr -- memory pointer for entry in the database
+ * 
+ * Typical Usage
+ *
+ *     int elsize = my_mem->get_memory_elemsize(density);
+ *******************************************************************/
+   int get_memory_elemsize(void *malloc_mem_ptr);
+
+/****************************************************************//**
+ * \brief
+ * Get name for a memory pointer in the database.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr -- memory pointer for entry in the database
+ * 
+ * Typical Usage
+ *
+ *     const char *var_name = my_mem->get_memory_name(density);
+ *******************************************************************/
+   const char *get_memory_name(void *malloc_mem_ptr);
+
+/****************************************************************//**
+ * \brief
+ * Get memory pointer for a named entry from the database.
+ *
+ * **Parameters**
+ * * const char *name -- name of entry in the database
+ * 
+ * Typical Usage
+ *
+ *     density = my_mem->get_memory_ptr("Density");
+ *******************************************************************/
+   void *get_memory_ptr(const char *name);
+
+/****************************************************************//**
+ * \brief
+ * Set a memory attribute for a memory pointer in the database.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr -- memory pointer of entry in the database
+ * * int attribute -- flag to set for entry
+ *
+ * Typical Usage
+ *
+ *     my_mem->set_memory_attribute(density, HOST_MANAGED_MEMORY);
+ *******************************************************************/
+   void set_memory_attribute(void *malloc_mem_ptr, int attribute);
+
+/****************************************************************//**
+ * \brief
+ * Clear memory attribute for a memory pointer in the database.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr -- memory pointer of entry in the database
+ * * int attribute -- flag to clear for entry
+ *
+ * Typical Usage
+ *
+ *     my_mem->clear_memory_attribute(density, HOST_MANAGED_MEMORY);
+ *******************************************************************/
+   void clear_memory_attribute(void *malloc_mem_ptr, int attribute);
+
+/****************************************************************//**
+ * \brief
+ * Get memory attributes for a memory pointer in the database. Returns
+ * the flag field.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr -- memory pointer of entry in the database
+ *
+ * Typical Usage
+ *
+ *     int flag = my_mem->get_memory_attribute(density);
+ *******************************************************************/
+   int  get_memory_flags(void *malloc_mem_ptr);
+
+/****************************************************************//**
+ * \brief
+ * Checks the setting for a memory attribute for a memory pointer in
+ * the database. Returns true for set and false for unset.
+ *
+ * **Parameters**
+ * * void *malloc_mem_ptr -- memory pointer of entry in the database
+ * * int attribute -- flag to check setting for entry
+ *
+ * Typical Usage
+ *
+ *     bool flag = my_mem->check_memory_attribute(density, HOST_MANAGED_MEMORY);
+ *******************************************************************/
+   bool  check_memory_attribute(void *malloc_mem_ptr, int attribute);
+};
+
+extern "C" {
+  MallocPlus *MallocPlus_new();
+           
+  void MallocPlus_memory_report(MallocPlus *mem_object);
+
+  void MallocPlus_memory_add(MallocPlus *mem_object, void *dbleptr,
+    size_t nelem, size_t elsize, char *name, unsigned long long flags);
+  void MallocPlus_memory_add_nD(MallocPlus *mem_object, void *dbleptr,
+    int ndim, size_t *nelem, size_t elsize, char *name, unsigned long long flags);
+}
+
+#endif // ifndef MALLOCPLUS_H_
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/MallocPlus.cpp
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/MallocPlus.cpp
@@ -0,0 +1,1227 @@
+/*
+ *  Copyright (c) 2011-2014, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+
+// SKG TODO op realloc (similar to managed)
+
+#undef HAVE_OPENCL
+
+#include "MallocPlus.h"
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <algorithm>
+#include <queue>
+#include <string.h>
+#ifdef HAVE_OPENCL
+#include "ezcl/ezcl.h"
+#endif
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+#define WARNING_SUPPRESSION 0
+
+#ifdef HAVE_CL_DOUBLE
+#ifdef HAVE_OPENCL
+typedef cl_double2  cl_real2;
+#endif
+#else
+#ifdef HAVE_OPENCL
+typedef cl_float2   cl_real2;
+#endif
+#endif
+
+#ifndef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef SWAP_PTR
+#define SWAP_PTR(xnew,xold,xtmp) (xtmp=xnew, xnew=xold, xold=xtmp)
+#endif
+
+typedef unsigned int uint;
+map<void *,malloc_plus_memory_entry*>::iterator it_save, it_end;
+map<string, malloc_plus_memory_entry*, cmp_str>::iterator it_save_by_name, it_end_by_name;
+
+#if defined(HAVE_MPI)
+void
+MallocPlus::pinit(MPI_Comm smComm, std::size_t memPoolSize)
+{
+#if defined(HAVE_J7)
+    try {
+        j7 = new J7(smComm, memPoolSize);
+    }
+    catch(...) {
+        std::cerr << "*** pinit failure ***" << std::endl;
+        throw;
+    }
+#else
+    // Just to suppress compiler warnings
+    if (WARNING_SUPPRESSION) printf("DEBUG memPoolSize = %lu smComm = %p\n",memPoolSize,smComm);
+#endif
+}
+
+void
+MallocPlus::pfini(void)
+{
+#if defined(HAVE_J7)
+    try {
+        delete j7;
+        j7 = NULL;
+    }
+    catch(...) {
+        std::cerr << "*** pfini failure ***" << std::endl;
+        throw;
+    }
+#endif
+}
+#endif // if defined(HAVE_MPI)
+
+void *MallocPlus::memory_malloc(size_t nelem, size_t elsize, const char *name, int flags){
+   malloc_plus_memory_entry *memory_item = (malloc_plus_memory_entry *)malloc(sizeof(malloc_plus_memory_entry));
+
+   memory_item->mem_nelem    = (size_t *)malloc(1*sizeof(size_t));
+   memory_item->mem_nelem[0] = nelem;
+   memory_item->mem_ndims    = 1;
+   memory_item->mem_elsize   = elsize;
+   memory_item->mem_flags    = flags;
+
+   // allocate memory on the accelerator if flag is set
+   if ((flags & DEVICE_REGULAR_MEMORY) != 0){
+#ifdef HAVE_OPENCL
+      cl_context context = ezcl_get_context();
+      memory_item->mem_capacity = nelem;
+      memory_item->mem_ptr      = ezcl_device_memory_malloc(context, NULL, name, nelem, elsize, CL_MEM_READ_WRITE, 0);
+#endif
+   }
+   // Managed memory allocates extra space and expands as necessary to reduce allocations
+   else if ((flags & HOST_MANAGED_MEMORY) != 0){
+      memory_item->mem_capacity = 2 * nelem;
+      memory_item->mem_ptr      = malloc(2* nelem*elsize);
+   }
+#ifdef HAVE_J7
+   // experimental shared memory allocation
+   else if (flags & LOAD_BALANCE_MEMORY) {
+      memory_item->mem_capacity = nelem;
+      memory_item->mem_ptr      = j7->memAlloc(nelem * elsize);
+   }
+#endif
+   // Just regular memory allocation
+   else {
+      memory_item->mem_capacity = nelem;
+      memory_item->mem_ptr      = malloc(nelem*elsize);
+   }
+
+   memory_item->mem_name = strdup(name); // Mallocs memory for copy
+
+   //printf("MALLOC_PLUS_MEMORY_MALLOC: DEBUG -- malloc plus memory pointer for :%s: is %p nelements %ld elsize is %ld flags %d\n",memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_nelem[0],memory_item->mem_elsize,memory_item->mem_flags);
+
+   // Insert entry into dictionary -- two versions, one by name and another by pointer address
+   memory_name_dict.insert(std::pair<string, malloc_plus_memory_entry*>(name, memory_item) );
+   memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(memory_item->mem_ptr, memory_item) );
+
+   if (DEBUG) printf("MALLOC_PLUS_MEMORY_MALLOC: DEBUG -- malloc plus memory pointer for :%s: is %p nelements %ld elsize is %ld\n",memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_nelem[0],memory_item->mem_elsize);
+
+   // return the pointer for use by the calling routine
+   return(memory_item->mem_ptr);
+}
+
+void *MallocPlus::memory_realloc(size_t nelem, void *malloc_mem_ptr){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+   void *mem_ptr=NULL;
+
+   if (it != memory_ptr_dict.end() ){
+      // "second" will be the pointer to the memory entry data structure -- the value
+      // associated with the key
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("MALLOC_PLUS_MEMORY_REALLOC: DEBUG -- reallocated memory pointer %p\n",memory_item->mem_ptr);
+
+      // memory pointer will probably change, so delete the dictionary entry
+      // named dictionary entry does not need to change; the pointer in the data structure
+      //    will just be updated, but the pointer to the memory entry data structure
+      //    will be the same
+      memory_ptr_dict.erase(it);
+
+      if (memory_item->mem_flags & HOST_MANAGED_MEMORY){
+         // Check to see if memory needs to be expanded
+         if (nelem > memory_item->mem_capacity) {
+            // Need to realloc memory. Allocate extra for growth of array.
+            mem_ptr=realloc(memory_item->mem_ptr, 2*nelem*memory_item->mem_elsize);
+            memory_item->mem_capacity = 2*nelem;
+            memory_item->mem_nelem[0] = nelem;
+            memory_item->mem_ptr      = mem_ptr;
+         } else {
+            // Just move size to use more of memory buffer
+            memory_item->mem_nelem[0] = nelem;
+         }
+      }
+#ifdef HAVE_J7
+      else if (memory_item->mem_flags & LOAD_BALANCE_MEMORY) {
+         mem_ptr = j7->memRealloc(memory_item->mem_ptr, nelem * memory_item->mem_elsize);
+         memory_item->mem_capacity = nelem;
+         memory_item->mem_nelem[0] = nelem;
+         memory_item->mem_ptr      = mem_ptr;
+      }
+#endif
+      else {
+         mem_ptr=realloc(memory_item->mem_ptr, nelem*memory_item->mem_elsize);
+         memory_item->mem_capacity = nelem;
+         memory_item->mem_nelem[0] = nelem;
+         memory_item->mem_ptr      = mem_ptr;
+      }
+
+      // Put the pointer entry back into the dictionary
+      memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(memory_item->mem_ptr, memory_item) );
+   } else {
+      if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr);
+   }
+
+   return(mem_ptr);
+}
+
+void *MallocPlus::memory_realloc(size_t nelem, const char *name){
+   map <string, malloc_plus_memory_entry*>::iterator it = memory_name_dict.find(name);
+   void *mem_ptr=NULL;
+
+   if (it != memory_name_dict.end() ){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("MALLOC_PLUS_MEMORY_REALLOC: DEBUG -- "
+                           "reallocated memory pointer %p\n",memory_item->mem_ptr);
+
+      // Need to get the iterator for the pointer entry; the one above is for the name entry
+      map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(memory_item->mem_ptr);
+      memory_ptr_dict.erase(it);
+
+      if (memory_item->mem_flags & HOST_MANAGED_MEMORY) {
+         // Check to see if memory needs to be expanded
+         if (nelem > memory_item->mem_capacity) {
+            // Need to realloc memory. Allocate extra for growth of array.
+            mem_ptr=realloc(memory_item->mem_ptr, 2*nelem*memory_item->mem_elsize);
+            memory_item->mem_capacity = 2*nelem;
+            memory_item->mem_nelem[0] = nelem;
+            memory_item->mem_ptr      = mem_ptr;
+         } else {
+            // Just move size to use more of memory buffer
+            memory_item->mem_nelem[0] = nelem;
+         }
+      }
+#ifdef HAVE_J7
+      else if (memory_item->mem_flags & LOAD_BALANCE_MEMORY) {
+         mem_ptr = j7->memRealloc(memory_item->mem_ptr, nelem * memory_item->mem_elsize);
+         memory_item->mem_capacity = nelem;
+         memory_item->mem_nelem[0] = nelem;
+         memory_item->mem_ptr      = mem_ptr;
+      }
+#endif
+      else {
+         //memory_name_dict.erase(it);
+         mem_ptr=realloc(memory_item->mem_ptr, nelem*memory_item->mem_elsize);
+         memory_item->mem_capacity = nelem;
+         memory_item->mem_nelem[0] = nelem;
+         memory_item->mem_ptr      = mem_ptr;
+         //memory_name_dict.insert(std::pair<string, malloc_plus_memory_entry*>(name, memory_item) );
+      }
+
+      memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(memory_item->mem_ptr, memory_item) );
+   } else {
+      if (DEBUG) printf("Warning -- memory named %s not found\n",name);
+   }
+
+   return(mem_ptr);
+}
+
+void *MallocPlus::memory_request(size_t new_capacity, void *malloc_mem_ptr){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+   void *mem_ptr=NULL;
+
+   if (it != memory_ptr_dict.end() ){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("MALLOC_PLUS_MEMORY_REQUEST: DEBUG -- reallocated memory pointer %p\n",memory_item->mem_ptr);
+      memory_ptr_dict.erase(it);
+      mem_ptr=realloc(memory_item->mem_ptr, new_capacity*memory_item->mem_elsize);
+      memory_item->mem_capacity = new_capacity;
+      memory_item->mem_ptr      = mem_ptr;
+      memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(mem_ptr, memory_item) );
+   } else {
+      if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr);
+   }
+
+   return(mem_ptr);
+}
+
+// Increases the capacity of the allocated memory, primarily for the managed memory functionality
+void *MallocPlus::memory_request(size_t new_capacity, const char *name){
+   map <string, malloc_plus_memory_entry*>::iterator it = memory_name_dict.find(name);
+   void *mem_ptr=NULL;
+
+   if (it != memory_name_dict.end() ){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("MALLOC_PLUS_MEMORY_REQUEST: DEBUG -- reallocated memory pointer %p\n",memory_item->mem_ptr);
+      map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(memory_item->mem_ptr);
+      memory_ptr_dict.erase(it);
+      mem_ptr=realloc(memory_item->mem_ptr, new_capacity*memory_item->mem_elsize);
+      memory_item->mem_capacity = new_capacity;
+      memory_item->mem_ptr      = mem_ptr;
+      memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(mem_ptr, memory_item) );
+   } else {
+      if (DEBUG) printf("Warning -- memory named %s not found\n",name);
+   }
+
+   return(mem_ptr);
+}
+
+void MallocPlus::memory_realloc_all(size_t nelem){
+   // Need a copy of the dictionary since we will be modifying while being used
+   map <void *, malloc_plus_memory_entry*> memory_ptr_dict_old = memory_ptr_dict;
+
+   // Need iterators to both new and old; new will be modified during the loop
+   map<void *, malloc_plus_memory_entry*>::iterator it_old;
+   map<void *, malloc_plus_memory_entry*>::iterator it_new;
+   void *mem_ptr=NULL;
+
+   for ( it_old=memory_ptr_dict_old.begin(); it_old != memory_ptr_dict_old.end(); it_old++){
+      // Get the memory entry for the old dictionary
+      malloc_plus_memory_entry *memory_item = it_old->second;
+
+      // Get the iterator to the new dictionary by memory pointer and delete it
+      //   since it will probably change
+      // The dictionary by name does not need to be updated
+      it_new = memory_ptr_dict.find(memory_item->mem_ptr);
+      memory_ptr_dict.erase(it_new);
+
+      if (memory_item->mem_flags & HOST_MANAGED_MEMORY) {
+         if (nelem > memory_item->mem_capacity) {
+            mem_ptr=realloc(memory_item->mem_ptr, nelem*memory_item->mem_elsize);
+            if (DEBUG) printf("MALLOC_PLUS_MEMORY_REALLOC_ALL: DEBUG -- reallocated memory pointer %p new pointer %p\n",memory_item->mem_ptr,mem_ptr);
+            memory_item->mem_capacity = nelem;
+            memory_item->mem_nelem[0] = nelem;
+            memory_item->mem_ptr      = mem_ptr;
+         } else {
+            memory_item->mem_nelem[0] = nelem;
+         }
+      }
+#ifdef HAVE_J7
+      else if (it->mem_flags & LOAD_BALANCE_MEMORY) {
+         mem_ptr = j7->memRealloc(memory_item->mem_ptr, nelem * memory_item->mem_elsize);
+         memory_item->mem_capacity = nelem;
+         memory_item->mem_nelem[0] = nelem;
+         memory_item->mem_ptr      = mem_ptr;
+      }
+#endif
+      else {
+         mem_ptr=realloc(memory_item->mem_ptr, nelem*memory_item->mem_elsize);
+         if (DEBUG) printf("MALLOC_PLUS_MEMORY_REALLOC_ALL: DEBUG -- reallocated memory pointer %p new pointer %p\n",memory_item->mem_ptr,mem_ptr);
+         memory_item->mem_capacity = nelem;
+         memory_item->mem_nelem[0] = nelem;
+         memory_item->mem_ptr      = mem_ptr;
+      }
+
+      //Insert the entry back into the dictionary
+      memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(mem_ptr, memory_item) );
+   }
+}
+
+void MallocPlus::memory_request_all(size_t new_capacity){
+   map <void *, malloc_plus_memory_entry*> memory_ptr_dict_old = memory_ptr_dict;
+
+   map<void *, malloc_plus_memory_entry*>::iterator it_old;
+   map<void *, malloc_plus_memory_entry*>::iterator it_new;
+
+   for ( it_old=memory_ptr_dict_old.begin(); it_old != memory_ptr_dict_old.end(); it_old++){
+      malloc_plus_memory_entry *memory_item = it_old->second;
+
+      it_new = memory_ptr_dict.find(memory_item->mem_ptr);
+      memory_ptr_dict.erase(it_new);
+
+      void *mem_ptr=realloc(memory_item->mem_ptr, new_capacity*memory_item->mem_elsize);
+      if (DEBUG) printf("MALLOC_PLUS_MEMORY_REQUEST_ALL: DEBUG -- reallocated memory pointer %p new pointer %p\n",memory_item->mem_ptr,mem_ptr);
+      memory_item->mem_capacity = new_capacity;
+      memory_item->mem_ptr      = mem_ptr;
+
+      memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(mem_ptr, memory_item) );
+   }
+}
+
+// This routine is for memory allocated by the host program and added to the database
+void *MallocPlus::memory_add(void *malloc_mem_ptr, size_t nelem, size_t elsize, const char *name, int flags){
+   malloc_plus_memory_entry *memory_item = (malloc_plus_memory_entry *)malloc(sizeof(malloc_plus_memory_entry));
+
+   memory_item->mem_nelem    = (size_t *)malloc(1*sizeof(size_t));
+   memory_item->mem_nelem[0] = nelem;
+   memory_item->mem_ndims    = 1;
+   memory_item->mem_capacity = nelem;
+   memory_item->mem_elsize   = elsize;
+   memory_item->mem_flags    = flags;
+   memory_item->mem_ptr      = malloc_mem_ptr;
+   memory_item->mem_name = strdup(name); // mallocs memory
+   memory_ptr_dict.insert(std::pair<void *, malloc_plus_memory_entry*>(malloc_mem_ptr, memory_item) );
+   memory_name_dict.insert(std::pair<string, malloc_plus_memory_entry*>(name, memory_item) );
+   if (DEBUG) printf("MALLOC_PLUS_MEMORY_ADD: DEBUG -- added memory pointer for %s is %p\n",name,malloc_mem_ptr);
+
+   return(malloc_mem_ptr);
+}
+
+// This routine is for memory allocated by the host program and added to the database
+void *MallocPlus::memory_add(void *malloc_mem_ptr, int ndim, size_t *nelem, size_t elsize, const char *name, int flags){
+   malloc_plus_memory_entry *memory_item = (malloc_plus_memory_entry *)malloc(sizeof(malloc_plus_memory_entry));
+
+   memory_item->mem_nelem    = (size_t *)malloc(ndim*sizeof(size_t));
+   for (int i=0; i<ndim; i++){
+     memory_item->mem_nelem[i] = nelem[i];
+   }
+   memory_item->mem_ndims    = ndim;
+   memory_item->mem_capacity = 0;
+   memory_item->mem_elsize   = elsize;
+   memory_item->mem_flags    = flags;
+   memory_item->mem_ptr      = malloc_mem_ptr;
+   memory_item->mem_name = strdup(name); // mallocs memory
+   memory_ptr_dict.insert(std::pair<void *, malloc_plus_memory_entry*>(malloc_mem_ptr, memory_item) );
+   memory_name_dict.insert(std::pair<string, malloc_plus_memory_entry*>(name, memory_item) );
+   if (DEBUG) printf("MALLOC_PLUS_MEMORY_ADD: DEBUG -- added memory pointer for %s is %p\n",name,malloc_mem_ptr);
+
+   return(malloc_mem_ptr);
+}
+
+double *MallocPlus::memory_reorder(double *malloc_mem_ptr, int *iorder){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end() ){
+      malloc_plus_memory_entry *memory_item = it->second;
+      double *ptr;
+
+      memory_ptr_dict.erase(it);
+
+      if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+      double *tmp = (double *)malloc(memory_item->mem_nelem[0]*memory_item->mem_elsize);
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+      for (uint ic = 0; ic < memory_item->mem_nelem[0]; ic++){
+         tmp[ic] = malloc_mem_ptr[iorder[ic]];
+      }
+      SWAP_PTR(malloc_mem_ptr, tmp, ptr);
+      free(tmp);
+      memory_item->mem_ptr = malloc_mem_ptr;
+
+      memory_ptr_dict.insert(std::pair<void *, malloc_plus_memory_entry*>(malloc_mem_ptr, memory_item) );
+   } else {
+      if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr);
+   }
+
+   return(malloc_mem_ptr);
+}
+
+float *MallocPlus::memory_reorder(float *malloc_mem_ptr, int *iorder){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end() ){
+      malloc_plus_memory_entry *memory_item = it->second;
+      float *ptr;
+
+      memory_ptr_dict.erase(it);
+
+      if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+      float *tmp = (float *)malloc(memory_item->mem_nelem[0]*memory_item->mem_elsize);
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+      for (uint ic = 0; ic < memory_item->mem_nelem[0]; ic++){
+         tmp[ic] = malloc_mem_ptr[iorder[ic]];
+      }
+      SWAP_PTR(malloc_mem_ptr, tmp, ptr);
+      free(tmp);
+      memory_item->mem_ptr = malloc_mem_ptr;
+
+      memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(malloc_mem_ptr, memory_item) );
+   } else {
+      if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr);
+   }
+
+   return(malloc_mem_ptr);
+}
+
+int *MallocPlus::memory_reorder(int *malloc_mem_ptr, int *iorder){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end() ){
+      malloc_plus_memory_entry *memory_item = it->second;
+      int *ptr;
+
+      memory_ptr_dict.erase(it);
+
+      if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+      int *tmp = (int *)malloc(memory_item->mem_nelem[0]*memory_item->mem_elsize);
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+      for (uint ic = 0; ic < memory_item->mem_nelem[0]; ic++){
+         tmp[ic] = malloc_mem_ptr[iorder[ic]];
+      }
+      SWAP_PTR(malloc_mem_ptr, tmp, ptr);
+      free(tmp);
+      memory_item->mem_ptr = malloc_mem_ptr;
+
+      memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(malloc_mem_ptr, memory_item) );
+   } else {
+      if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr);
+   }
+
+   return(malloc_mem_ptr);
+}
+
+int *MallocPlus::memory_reorder_indexarray(int *malloc_mem_ptr, int *iorder, int *inv_iorder){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end() ){
+      malloc_plus_memory_entry *memory_item = it->second;
+      int *ptr;
+
+      if (DEBUG) printf("Found memory_item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+      memory_ptr_dict.erase(it);
+      int *tmp = (int *)malloc(memory_item->mem_nelem[0]*memory_item->mem_elsize);
+      for (uint ic = 0; ic < memory_item->mem_nelem[0]; ic++){
+         tmp[ic] = inv_iorder[malloc_mem_ptr[iorder[ic]]];
+      }
+      SWAP_PTR(malloc_mem_ptr, tmp, ptr);
+      free(tmp);
+      memory_item->mem_ptr = malloc_mem_ptr;
+      memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(malloc_mem_ptr, memory_item) );
+   } else {
+      if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr);
+   }
+
+   return(malloc_mem_ptr);
+}
+
+void MallocPlus::memory_reorder_all(int *iorder){
+   map <void *, malloc_plus_memory_entry*> memory_ptr_dict_old = memory_ptr_dict;
+   map <void *, malloc_plus_memory_entry*>::iterator it_old;
+   vector<int> inv_iorder;
+
+   for ( it_old=memory_ptr_dict_old.begin(); it_old != memory_ptr_dict_old.end(); it_old++){
+      malloc_plus_memory_entry *memory_item_old = it_old->second;
+
+      map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(memory_item_old->mem_ptr);
+      malloc_plus_memory_entry *memory_item = it_old->second;
+      memory_ptr_dict.erase(it);
+
+      if (memory_item_old->mem_flags & 0x100) {
+         if (inv_iorder.size() < memory_item_old->mem_nelem[0]) {
+            inv_iorder.resize(memory_item_old->mem_nelem[0]);
+            for (int ic = 0; ic < (int)memory_item_old->mem_nelem[0]; ic++){
+               inv_iorder[iorder[ic]] = ic;
+            }
+         }
+         int *ptr;
+         int *malloc_mem_ptr = (int *)memory_item_old->mem_ptr;
+         int *tmp = (int *)malloc(memory_item_old->mem_nelem[0]*memory_item_old->mem_elsize);
+         for (uint ic = 0; ic < memory_item_old->mem_nelem[0]; ic++){
+            tmp[ic] = inv_iorder[malloc_mem_ptr[iorder[ic]]];
+         }
+         memory_replace(malloc_mem_ptr, tmp);
+         SWAP_PTR(malloc_mem_ptr, tmp, ptr);
+         free(tmp);
+         memory_item->mem_ptr = malloc_mem_ptr;
+         memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(malloc_mem_ptr, memory_item) );
+      } else if (memory_item_old->mem_elsize == 8){
+         double *ptr;
+         double *malloc_mem_ptr = (double *)memory_item_old->mem_ptr;
+         double *tmp = (double *)malloc(memory_item_old->mem_nelem[0]*memory_item_old->mem_elsize);
+
+         for (uint ic = 0; ic < memory_item_old->mem_nelem[0]; ic++){
+            tmp[ic] = malloc_mem_ptr[iorder[ic]];
+         }
+
+         SWAP_PTR(malloc_mem_ptr, tmp, ptr);
+         free(tmp);
+         memory_item->mem_ptr = malloc_mem_ptr;
+         memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(malloc_mem_ptr, memory_item) );
+      } else {
+         float *ptr;
+         float *malloc_mem_ptr = (float *)memory_item_old->mem_ptr;
+         float *tmp = (float *)malloc(memory_item_old->mem_nelem[0]*memory_item_old->mem_elsize);
+         for (uint ic = 0; ic < memory_item_old->mem_nelem[0]; ic++){
+            tmp[ic] = malloc_mem_ptr[iorder[ic]];
+         }
+         memory_replace(malloc_mem_ptr, tmp);
+         SWAP_PTR(malloc_mem_ptr, tmp, ptr);
+         free(tmp);
+         memory_item->mem_ptr = malloc_mem_ptr;
+         memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(malloc_mem_ptr, memory_item) );
+      }
+
+   }
+
+   inv_iorder.clear();
+}
+
+void MallocPlus::memory_report(void){
+   map<void *, malloc_plus_memory_entry*>::iterator it_ptr;
+
+   for ( it_ptr=memory_ptr_dict.begin(); it_ptr != memory_ptr_dict.end(); it_ptr++){
+      malloc_plus_memory_entry *memory_item = it_ptr->second;
+
+      printf("MallocPlus ptr  %p: name %10s ptr %p dims %lu nelem (",
+            it_ptr->first,memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_ndims);
+
+      char nelemstring[80];
+      char *str_ptr = nelemstring;
+      str_ptr += sprintf(str_ptr,"%lu", memory_item->mem_nelem[0]);
+      for (uint i = 1; i < memory_item->mem_ndims; i++){
+         str_ptr += sprintf(str_ptr,", %lu", memory_item->mem_nelem[i]);
+      }
+      printf("%12s",nelemstring);
+
+      printf(") elsize %lu flags %d capacity %lu\n",
+            memory_item->mem_elsize,memory_item->mem_flags,memory_item->mem_capacity);
+   }
+
+   map<string, malloc_plus_memory_entry*>::iterator it_name;
+
+   for ( it_name=memory_name_dict.begin(); it_name != memory_name_dict.end(); it_name++){
+      malloc_plus_memory_entry *memory_item = it_name->second;
+
+      printf("MallocPlus name %14s: name %10s ptr %p dims %lu nelem (",
+            it_name->first.c_str(),memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_ndims);
+
+      char nelemstring[80];
+      char *str_ptr = nelemstring;
+      str_ptr += sprintf(str_ptr,"%lu", memory_item->mem_nelem[0]);
+      for (uint i = 1; i < memory_item->mem_ndims; i++){
+         str_ptr += sprintf(str_ptr,", %lu", memory_item->mem_nelem[i]);
+      }
+      printf("%12s",nelemstring);
+
+      printf(") elsize %lu flags %d capacity %lu\n",
+            memory_item->mem_elsize,memory_item->mem_flags,memory_item->mem_capacity);
+   }
+}
+
+void *MallocPlus::memory_delete(void *malloc_mem_ptr){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p\n",memory_item->mem_ptr);
+
+      if ((memory_item->mem_flags & DEVICE_REGULAR_MEMORY) != 0){
+#ifdef HAVE_OPENCL
+         //printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p\n",memory_item->mem_ptr);
+         ezcl_device_memory_delete(memory_item->mem_ptr);
+#endif
+      }
+#ifdef HAVE_J7
+      else if (memory_item->mem_flags & LOAD_BALANCE_MEMORY) {
+         j7->memFree(memory_item->mem_ptr);
+      }
+#endif
+      else {
+         free(memory_item->mem_ptr);
+      }
+
+      memory_ptr_dict.erase(it);
+      // Need to delete the entry in the name dictionary. This is done in a separate scope
+      // so the iterator "it" is isolated for this use
+      {
+         map <string, malloc_plus_memory_entry*>::iterator it = memory_name_dict.find(memory_item->mem_name);
+         memory_name_dict.erase(it);
+      }
+
+      free(memory_item->mem_nelem);
+      free(memory_item->mem_name);
+      free(memory_item);
+   } else {
+      if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr);
+   }
+
+   return(NULL);
+}
+
+void *MallocPlus::memory_delete(const char *name){
+   map <string, malloc_plus_memory_entry*>::iterator it = memory_name_dict.find(name);
+
+   if (it != memory_name_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p\n",memory_item->mem_ptr);
+      if ((memory_item->mem_flags & DEVICE_REGULAR_MEMORY) != 0){
+#ifdef HAVE_OPENCL
+         ezcl_device_memory_delete(memory_item->mem_ptr);
+#endif
+      }
+#ifdef HAVE_J7
+      else if (memory_item->mem_flags & LOAD_BALANCE_MEMORY) {
+         j7->memFree(memory_item->mem_ptr);
+      }
+#endif
+      else {
+         free(memory_item->mem_ptr);
+      }
+
+      memory_name_dict.erase(it);
+      {
+         map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(memory_item->mem_ptr);
+         memory_ptr_dict.erase(it);
+      }
+
+      free(memory_item->mem_nelem);
+      free(memory_item->mem_name);
+      free(memory_item);
+   } else {
+      if (DEBUG) printf("Warning -- memory named %s not found\n",name);
+   }
+
+   return(NULL);
+}
+
+void MallocPlus::memory_delete_all(void){
+   map <void *, malloc_plus_memory_entry*> memory_ptr_dict_old = memory_ptr_dict;
+   map <void *, malloc_plus_memory_entry*>::iterator it;
+
+   for ( it=memory_ptr_dict_old.begin(); it != memory_ptr_dict_old.end(); it++){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+
+      if ((memory_item->mem_flags & DEVICE_REGULAR_MEMORY) != 0){
+#ifdef HAVE_OPENCL
+         ezcl_device_memory_delete(memory_item->mem_ptr);
+#endif
+      } else {
+         free(memory_item->mem_ptr);
+      }
+
+      free(memory_item->mem_nelem);
+      free(memory_item->mem_name);
+      free(memory_item);
+   }
+
+   memory_ptr_dict.clear();
+   memory_name_dict.clear();
+}
+
+// For memory that was allocated by the host and added to the database with the
+// memory_add function. This is the corresponding routine to delete the dictionary entry.
+// The memory itself is not freed.
+void MallocPlus::memory_remove(void *malloc_mem_ptr){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p\n",memory_item->mem_ptr);
+      memory_ptr_dict.erase(it);
+      {
+         map <string, malloc_plus_memory_entry*>::iterator it = memory_name_dict.find(memory_item->mem_name);
+         memory_name_dict.erase(it);
+      }
+      free(memory_item->mem_nelem);
+      free(memory_item->mem_name);
+      free(memory_item);
+   } else {
+      if (DEBUG) printf("Warning -- memory pointer %p not found\n",malloc_mem_ptr);
+   }
+}
+
+void MallocPlus::memory_remove(const char *name){
+   map <string, malloc_plus_memory_entry*>::iterator it = memory_name_dict.find(name);
+
+   if (it != memory_name_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("MALLOC_PLUS_MEMORY_REMOVE: DEBUG -- removed memory pointer %p\n",memory_item->mem_ptr);
+      memory_name_dict.erase(it);
+      {
+         map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(memory_item->mem_ptr);
+         memory_ptr_dict.erase(it);
+      }
+      free(memory_item->mem_nelem);
+      free(memory_item->mem_name);
+      free(memory_item);
+   } else {
+      if (DEBUG) printf("Warning -- memory named %s not found\n",name);
+   }
+}
+
+void *MallocPlus::memory_begin(void){
+   it_save = memory_ptr_dict.begin();
+   malloc_plus_memory_entry *memory_item = it_save->second;
+   return(memory_item->mem_ptr);
+}
+
+void *MallocPlus::memory_next(void){
+   map <void *, malloc_plus_memory_entry*>::iterator it;
+
+   it_save++;
+   it = it_save;
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+      return(memory_item->mem_ptr);
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+      return(NULL);
+   }
+}
+
+void *MallocPlus::memory_by_name_begin(void){
+   it_save_by_name = memory_name_dict.begin();
+   malloc_plus_memory_entry *memory_item = it_save->second;
+   return(memory_item->mem_ptr);
+}
+
+void *MallocPlus::memory_by_name_next(void){
+   map<string, malloc_plus_memory_entry*, cmp_str>::iterator it_by_name;
+
+   it_save_by_name++;
+   it_by_name = it_save_by_name;
+
+   if (it_by_name != memory_name_dict.end()){
+      malloc_plus_memory_entry *memory_item = it_by_name->second;
+
+      if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+      return(memory_item->mem_ptr);
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+      return(NULL);
+   }
+}
+
+malloc_plus_memory_entry* MallocPlus::memory_entry_begin(void){
+   it_save = memory_ptr_dict.begin();
+   malloc_plus_memory_entry *memory_item = it_save->second;
+   return(memory_item);
+}
+
+malloc_plus_memory_entry* MallocPlus::memory_entry_next(void){
+   it_save++;
+   if (it_save == memory_ptr_dict.end()) return(NULL);
+   malloc_plus_memory_entry *memory_item = it_save->second;
+   if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+   return(memory_item);
+}
+
+malloc_plus_memory_entry* MallocPlus::memory_entry_end(void){
+   return(NULL);
+}
+
+malloc_plus_memory_entry* MallocPlus::memory_entry_by_name_begin(void){
+   it_save_by_name = memory_name_dict.begin();
+   malloc_plus_memory_entry *memory_item = it_save_by_name->second;
+   return(memory_item);
+}
+
+malloc_plus_memory_entry* MallocPlus::memory_entry_by_name_next(void){
+   it_save_by_name++;
+   if (it_save_by_name == memory_name_dict.end()) return(NULL);
+   malloc_plus_memory_entry *memory_item = it_save_by_name->second;
+   if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+   return(memory_item);
+}
+
+malloc_plus_memory_entry* MallocPlus::memory_entry_by_name_end(void){
+   return(NULL);
+}
+
+size_t MallocPlus::get_memory_size(void *malloc_mem_ptr){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+      return(memory_item->mem_nelem[0]);
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+   return(0);
+}
+
+int MallocPlus::get_memory_elemsize(void *malloc_mem_ptr){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+      return(memory_item->mem_elsize);
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+   return(0);
+}
+
+int MallocPlus::get_memory_flags(void *malloc_mem_ptr){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("Found memory item ptr %p name %s attribute %d\n",memory_item->mem_ptr,memory_item->mem_name,memory_item->mem_flags);
+      return(memory_item->mem_flags);
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+   return(0);
+}
+
+size_t MallocPlus::get_memory_capacity(void *malloc_mem_ptr){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+      return(memory_item->mem_capacity);
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+   return(0);
+}
+
+const char * MallocPlus::get_memory_name(void *malloc_mem_ptr){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+      return(memory_item->mem_name);
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+   return(NULL);
+}
+
+void *MallocPlus::memory_replace(void *malloc_mem_ptr_old, void * const malloc_mem_ptr_new){
+   map <void *, malloc_plus_memory_entry*>::iterator it_old = memory_ptr_dict.find(malloc_mem_ptr_old);
+   map <void *, malloc_plus_memory_entry*>::iterator it_new = memory_ptr_dict.find(malloc_mem_ptr_new);
+
+   if (it_old != memory_ptr_dict.end() && it_new != memory_ptr_dict.end() ){
+      malloc_plus_memory_entry *memory_item_old = it_old->second;
+      malloc_plus_memory_entry *memory_item_new = it_new->second;
+
+      // erase the entries in the pointer dictionary
+      memory_ptr_dict.erase(it_new);
+      memory_ptr_dict.erase(it_old);
+      // get the iterators for the named dictionary
+      map <string, malloc_plus_memory_entry*>::iterator it_old = memory_name_dict.find(memory_item_old->mem_name);
+      map <string, malloc_plus_memory_entry*>::iterator it_new = memory_name_dict.find(memory_item_new->mem_name);
+      memory_name_dict.erase(it_new);
+      memory_name_dict.erase(it_old);
+
+      if (DEBUG) printf("Found memory item ptr_old %p name %s ptr_new %p name %s\n",memory_item_old->mem_ptr,memory_item_old->mem_name,memory_item_new->mem_ptr,memory_item_new->mem_name);
+
+      if ((memory_item_old->mem_flags & DEVICE_REGULAR_MEMORY) != 0){
+#ifdef HAVE_OPENCL
+         if (DEBUG) printf("Deleting device memory name %s pointer %p\n",memory_item_old->mem_name,memory_item_old->mem_ptr);
+         ezcl_device_memory_replace(&memory_item_old->mem_ptr, &memory_item_new->mem_ptr);
+#endif
+      }
+#ifdef HAVE_J7
+      else if (memory_item->mem_flags & LOAD_BALANCE_MEMORY) {
+         j7->memFree(memory_item_old->mem_ptr);
+         memory_item_old->mem_ptr      = memory_item_new->mem_ptr;
+      }
+#endif
+      else {
+         free(memory_item_old->mem_ptr);
+         memory_item_old->mem_ptr      = memory_item_new->mem_ptr;
+      }
+
+      memory_item_old->mem_nelem[0] = memory_item_new->mem_nelem[0];
+      memory_item_old->mem_capacity = memory_item_new->mem_capacity;
+      memory_item_old->mem_elsize   = memory_item_new->mem_elsize;
+      memory_item_old->mem_flags    = memory_item_new->mem_flags;
+      malloc_mem_ptr_old = (void *)malloc_mem_ptr_new;
+      free(memory_item_new->mem_nelem);
+      free(memory_item_new->mem_name);
+      free(memory_item_new);
+      
+      memory_ptr_dict.insert(std::pair<void*, malloc_plus_memory_entry*>(malloc_mem_ptr_old, memory_item_old) );
+      memory_name_dict.insert(std::pair<const char*, malloc_plus_memory_entry*>(memory_item_old->mem_name, memory_item_old) );
+
+      return(memory_item_old->mem_ptr);
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+   return(NULL);
+}
+
+void MallocPlus::memory_swap(int **malloc_mem_ptr_old, int **malloc_mem_ptr_new){
+   map <void *, malloc_plus_memory_entry*>::iterator it_old = memory_ptr_dict.find(*malloc_mem_ptr_old);
+   map <void *, malloc_plus_memory_entry*>::iterator it_new = memory_ptr_dict.find(*malloc_mem_ptr_new);
+
+   if (it_old != memory_ptr_dict.end() && it_new != memory_ptr_dict.end() ){
+      // Swap the memory entries during the retrieval
+      malloc_plus_memory_entry *memory_item_new = it_old->second;
+      malloc_plus_memory_entry *memory_item_old = it_new->second;
+
+      if (DEBUG) printf("Found memory item ptr_old %p name %s ptr_new %p name %s\n",memory_item_old->mem_ptr,memory_item_old->mem_name,memory_item_new->mem_ptr,memory_item_new->mem_name);
+
+      const char *mem_name_tmp;
+                  mem_name_tmp  = memory_item_old->mem_name;
+      memory_item_old->mem_name = memory_item_new->mem_name;
+      memory_item_new->mem_name = (char *)mem_name_tmp;
+
+      // Delete the ptr entries
+      memory_ptr_dict.erase(it_old);
+      memory_ptr_dict.erase(it_new);
+
+      memory_ptr_dict.insert(std::pair<void *, malloc_plus_memory_entry*>(memory_item_old->mem_ptr, memory_item_old) );
+      memory_ptr_dict.insert(std::pair<void *, malloc_plus_memory_entry*>(memory_item_new->mem_ptr, memory_item_new) );
+
+      // Delete the named entries
+      map <string, malloc_plus_memory_entry*>::iterator it_name_old = memory_name_dict.find(memory_item_old->mem_name);
+      map <string, malloc_plus_memory_entry*>::iterator it_name_new = memory_name_dict.find(memory_item_new->mem_name);
+      memory_name_dict.erase(it_name_old);
+      memory_name_dict.erase(it_name_new);
+
+      memory_name_dict.insert(std::pair<string, malloc_plus_memory_entry*>(memory_item_old->mem_name, memory_item_old) );
+      memory_name_dict.insert(std::pair<string, malloc_plus_memory_entry*>(memory_item_new->mem_name, memory_item_new) );
+
+      // memory items have been swapped, so return the new pointers
+      *malloc_mem_ptr_old = (int *)memory_item_old->mem_ptr;
+      *malloc_mem_ptr_new = (int *)memory_item_new->mem_ptr;
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+}
+
+void MallocPlus::memory_swap(float **malloc_mem_ptr_old, float **malloc_mem_ptr_new){
+   map <void *, malloc_plus_memory_entry*>::iterator it_old = memory_ptr_dict.find(*malloc_mem_ptr_old);
+   map <void *, malloc_plus_memory_entry*>::iterator it_new = memory_ptr_dict.find(*malloc_mem_ptr_new);
+
+   if (it_old != memory_ptr_dict.end() && it_new != memory_ptr_dict.end() ){
+      // Swap the memory entries during the retrieval
+      malloc_plus_memory_entry *memory_item_new = it_old->second;
+      malloc_plus_memory_entry *memory_item_old = it_new->second;
+
+      if (DEBUG) printf("Found memory item ptr_old %p name %s ptr_new %p name %s\n",memory_item_old->mem_ptr,memory_item_old->mem_name,memory_item_new->mem_ptr,memory_item_new->mem_name);
+
+      const char *mem_name_tmp;
+                  mem_name_tmp  = memory_item_old->mem_name;
+      memory_item_old->mem_name = memory_item_new->mem_name;
+      memory_item_new->mem_name = (char *)mem_name_tmp;
+
+      // Delete the ptr entries
+      memory_ptr_dict.erase(it_old);
+      memory_ptr_dict.erase(it_new);
+
+      memory_ptr_dict.insert(std::pair<void *, malloc_plus_memory_entry*>(memory_item_old->mem_ptr, memory_item_old) );
+      memory_ptr_dict.insert(std::pair<void *, malloc_plus_memory_entry*>(memory_item_new->mem_ptr, memory_item_new) );
+
+      // Delete the named entries
+      map <string, malloc_plus_memory_entry*>::iterator it_old = memory_name_dict.find(memory_item_old->mem_name);
+      map <string, malloc_plus_memory_entry*>::iterator it_new = memory_name_dict.find(memory_item_new->mem_name);
+      memory_name_dict.erase(it_old);
+      memory_name_dict.erase(it_new);
+
+      memory_name_dict.insert(std::pair<string, malloc_plus_memory_entry*>(memory_item_old->mem_name, memory_item_old) );
+      memory_name_dict.insert(std::pair<string, malloc_plus_memory_entry*>(memory_item_new->mem_name, memory_item_new) );
+
+      // memory items have been swapped, so return the new pointers
+      *malloc_mem_ptr_old = (float *)memory_item_old->mem_ptr;
+      *malloc_mem_ptr_new = (float *)memory_item_new->mem_ptr;
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+}
+
+void MallocPlus::memory_swap(double **malloc_mem_ptr_old, double **malloc_mem_ptr_new){
+   map <void *, malloc_plus_memory_entry*>::iterator it_old = memory_ptr_dict.find(*malloc_mem_ptr_old);
+   map <void *, malloc_plus_memory_entry*>::iterator it_new = memory_ptr_dict.find(*malloc_mem_ptr_new);
+
+   if (it_old != memory_ptr_dict.end() && it_new != memory_ptr_dict.end() ){
+      // Swap the memory entries during the retrieval
+      malloc_plus_memory_entry *memory_item_new = it_old->second;
+      malloc_plus_memory_entry *memory_item_old = it_new->second;
+
+      if (DEBUG) printf("Found memory item ptr_old %p name %s ptr_new %p name %s\n",memory_item_old->mem_ptr,memory_item_old->mem_name,memory_item_new->mem_ptr,memory_item_new->mem_name);
+
+      const char *mem_name_tmp;
+                  mem_name_tmp  = memory_item_old->mem_name;
+      memory_item_old->mem_name = memory_item_new->mem_name;
+      memory_item_new->mem_name = (char *)mem_name_tmp;
+
+      // Delete the ptr entries
+      memory_ptr_dict.erase(it_old);
+      memory_ptr_dict.erase(it_new);
+
+      memory_ptr_dict.insert(std::pair<void *, malloc_plus_memory_entry*>(memory_item_old->mem_ptr, memory_item_old) );
+      memory_ptr_dict.insert(std::pair<void *, malloc_plus_memory_entry*>(memory_item_new->mem_ptr, memory_item_new) );
+
+      // Delete the named entries
+      map <string, malloc_plus_memory_entry*>::iterator it_old = memory_name_dict.find(memory_item_old->mem_name);
+      map <string, malloc_plus_memory_entry*>::iterator it_new = memory_name_dict.find(memory_item_new->mem_name);
+      memory_name_dict.erase(it_old);
+      memory_name_dict.erase(it_new);
+
+      memory_name_dict.insert(std::pair<char const *, malloc_plus_memory_entry*>(memory_item_old->mem_name, memory_item_old) );
+      memory_name_dict.insert(std::pair<char const *, malloc_plus_memory_entry*>(memory_item_new->mem_name, memory_item_new) );
+
+      // memory items have been swapped, so return the new pointers
+      *malloc_mem_ptr_old = (double *)memory_item_old->mem_ptr;
+      *malloc_mem_ptr_new = (double *)memory_item_new->mem_ptr;
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+}
+
+// This duplicates memory for a variable and makes a new dictionary entry for the new variable
+void *MallocPlus::memory_duplicate(void *malloc_mem_ptr, const char *addname){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+   void *mem_ptr_dup;
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      // The memory_malloc will add the database entry
+      mem_ptr_dup = memory_malloc(memory_item->mem_nelem[0], memory_item->mem_elsize, addname, memory_item->mem_flags);
+      return(mem_ptr_dup);
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+   return(NULL);
+}
+
+void *MallocPlus::get_memory_ptr(const char *name){
+   map <string, malloc_plus_memory_entry*>::iterator it = memory_name_dict.find(name);
+
+   if (it != memory_name_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("Found memory item ptr %p name %s\n",memory_item->mem_ptr,memory_item->mem_name);
+      return(memory_item->mem_ptr);
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+   return(NULL);
+}
+
+bool MallocPlus::check_memory_attribute(void *malloc_mem_ptr, int attribute){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("Found memory item ptr %p name %s attribute %d\n",memory_item->mem_ptr,memory_item->mem_name,memory_item->mem_flags);
+      bool bvalue = false;
+      if (memory_item->mem_flags & attribute) bvalue = true;
+
+      return bvalue;
+   } else {
+      printf("Error -- memory not found\n");
+      exit(1);
+   }
+}
+
+void MallocPlus::set_memory_attribute(void *malloc_mem_ptr, int attribute){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("Found memory item ptr %p name %s attribute %d\n",memory_item->mem_ptr,memory_item->mem_name,memory_item->mem_flags);
+      memory_item->mem_flags |= attribute;
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+}
+
+void MallocPlus::clear_memory_attribute(void *malloc_mem_ptr, int attribute){
+   map <void *, malloc_plus_memory_entry*>::iterator it = memory_ptr_dict.find(malloc_mem_ptr);
+
+   if (it != memory_ptr_dict.end()){
+      malloc_plus_memory_entry *memory_item = it->second;
+
+      if (DEBUG) printf("Found memory item ptr %p name %s attribute %d\n",memory_item->mem_ptr,memory_item->mem_name,memory_item->mem_flags);
+      memory_item->mem_flags &= ~attribute;
+      if (DEBUG) printf("Found memory item ptr %p name %s attribute %d\n",memory_item->mem_ptr,memory_item->mem_name,memory_item->mem_flags);
+   } else {
+      if (DEBUG) printf("Warning -- memory not found\n");
+   }
+}
+
+extern "C" {
+   MallocPlus *MallocPlus_new(){
+     return new MallocPlus;
+   }
+
+   void MallocPlus_memory_report(MallocPlus *mem_object) {
+      mem_object->memory_report();
+   }
+
+   void MallocPlus_memory_add(MallocPlus *mem_object, void *dbleptr, size_t nelem,
+       size_t elsize, char *name, unsigned long long flags){
+//   printf("DEBUG -- nelem %lu elsize %lu\n", nelem, elsize);
+     mem_object->memory_add(dbleptr, nelem, elsize, name,
+       (unsigned long long)flags);
+   }
+   void MallocPlus_memory_add_nD(MallocPlus *mem_object, void *dbleptr, int ndim, size_t *nelem,
+       size_t elsize, char *name, unsigned long long flags){
+//   printf("DEBUG -- ndim %d nelem[0] %lu elsize %lu\n",ndim, nelem[0], elsize);
+     mem_object->memory_add(dbleptr, ndim, nelem, elsize, name,
+       (unsigned long long)flags);
+   }
+}
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_math.hh
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_math.hh
@@ -0,0 +1,85 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+#ifndef PARSERMATHHHINCLUDE
+#define PARSERMATHHHINCLUDE
+
+// ***************************************************************************
+// ***************************************************************************
+// This class collects various parser math functions.
+// There are two reasons to have this class:
+// 1. To keep the command processing class from getting too big.
+// 2. Some of these functions are used in more than one class.
+// ***************************************************************************
+// ***************************************************************************
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <deque>
+
+namespace PP
+{
+using std::string;
+using std::stringstream;
+using std::vector;
+using std::deque;
+
+
+class Parser_math
+{
+
+public:
+    Parser_math();
+
+    void do_op(int i1, int i2, int i3, deque <Word> &wq, Word &wres,
+               stringstream &serr, int &ierr);
+    void do_op_relational(int i1, int i2, int i3, deque <Word> &wq,
+                          Word &wres, stringstream &serr, int &ierr);
+    void do_op_logical(int i1, int i2, int i3, deque <Word> &wq,
+                       Word &wres, stringstream &serr, int &ierr);
+    void do_op_not(int i2, int i3, deque <Word> &wq,
+                   Word &wres, stringstream &serr, int &ierr);
+
+
+private:
+
+};
+
+
+} // End of the PP namespace
+
+#endif
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_math.cc
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_math.cc
@@ -0,0 +1,326 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+// ***************************************************************************
+// ***************************************************************************
+// This class collects various parser math functions.
+//
+// There are two reasons to have this class:
+//     1. To keep the command processing class from getting too big.
+//     2. Some of these functions are used in more than one class.
+// ***************************************************************************
+// ***************************************************************************
+
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <deque>
+#include <math.h>
+
+#include "Word.hh"
+#include "Parser_math.hh"
+
+namespace PP
+{
+using std:: string;
+using std::cout;
+using std::endl;
+using std::stringstream;
+using std::setprecision;
+using std::vector;
+using std::deque;
+
+
+// ===========================================================================
+// Default constructor.
+// ===========================================================================
+Parser_math::Parser_math()
+{
+}
+
+
+// ===========================================================================
+// Do a single arithmetic binary operation involving 3 words. i1, on the left,
+// is supposed to be a number, i2 is the operator, and i3, on the right, is
+// supposed to be a number.
+// The result is put in word wres.
+// ===========================================================================
+void Parser_math::do_op(int i1, int i2, int i3,deque <Word> &wq, Word &wres,
+                        stringstream &serr, int &ierr)
+{
+    // The words to the left and right of the operator have to be a number.
+    if ((!wq[i1].is_number()) || (!wq[i3].is_number())) {
+        wq[i2].fatal_error(serr, ierr);
+        serr << "Expected some number " << wq[i2].get_string() <<
+            " some number" << endl;
+        serr << "But did not find a number, instead found" << endl;
+        serr << wq[i1].get_string() << wq[i2].get_string() <<
+            wq[i3].get_string() << endl;
+        ierr = 2;
+        wres.set_value(0.);
+        return;
+    }
+
+
+    double d1 = wq[i1].get_double();
+    string op = wq[i2].get_string();
+    double d2 = wq[i3].get_double();
+
+    double result = 0.;
+
+    if (op == "+") result = d1 + d2;
+    if (op == "-") result = d1 - d2;
+    if (op == "*") result = d1 * d2;
+
+    if (op == "**") {
+        if (d1 == 0. && d2 >= 0.) {
+            wres.set_value(0.);
+            return;
+        }
+
+        if (d1 == 0. && d2 < 0.) {
+            wq[i2].fatal_error(serr, ierr);
+            serr << "Trying to exponentiate 0 to a negative power." << endl;
+            serr << "Base = " << d1 << "  Exponent = " << d2 << endl;
+            ierr = 2;
+            wres.set_value(0.);
+            return;
+        }
+
+        if (d1 < 0. && !wq[i3].is_integer()) {
+            wq[i2].fatal_error(serr, ierr);
+            serr << "Trying to exponentiate a negative number to a non-integer power." << endl;
+            serr << "Base = " << d1 << "  Exponent = " << d2 << endl;
+            ierr = 2;
+            wres.set_value(0.);
+            return;
+        }
+
+        result = pow(d1,d2);
+    }
+
+    if (op == "/") {
+        if (d2 == 0.) {
+            if (d1 == 0.) result = 0.;
+            else          result = 1.e30;
+            wq[i2].fatal_error(serr, ierr);
+            serr << "Divide by 0." << endl;
+            serr << "Numerator = " << d1 << "  Denominator = " << d2 << endl;
+            ierr = 2;
+            wres.set_value(result);
+            return;
+        }
+        result = d1 / d2;
+    }
+
+    // Do not implement the % operator, it is too much like the fortran %
+    // operator which is for referencing components of a fortran structure.
+    /*
+    if (op == "%") {
+        if (d2 == 0.) {
+            result = 0.;
+            wq[i2].fatal_error(serr, ierr);
+            serr << "Modulus (%) second argument is 0." << endl;
+            serr << "First arg = " << d1 << "  second arg = " << d2 << endl;
+            ierr = 2;
+            wres.set_value(result);
+            return;
+        }
+        result = ((int)d1) % ((int)d2);
+    }
+    */
+
+    wres.set_value(result);
+}
+
+
+// ===========================================================================
+// Do a single relational binary operation involving 3 words.
+// Relational operators include .eq., .ne., .le., ...
+// The result is either true or false and is put in word wres.
+// ===========================================================================
+void Parser_math::do_op_relational(int i1, int i2, int i3, deque <Word> &wq,
+                                   Word &wres, stringstream &serr, int &ierr)
+{
+    string s1 = wq[i1].get_string();
+    string op = wq[i2].get_string();
+    string s3 = wq[i3].get_string();
+    bool result = false;
+
+    //cout << "&&&&&cw op = " << s1 << op << s3 << endl;
+
+    if ((wq[i1].is_bool()) && (wq[i3].is_bool())) {
+        if (op == ".gt." || op == ".ge." || op == ".lt." || op == ".le.") {
+            wq[i2].fatal_error(serr, ierr);
+            serr << "Does not make sense to compare logical values" << endl;
+            serr << " with greater than or less than" << endl;
+            serr << "    " << s1 << " " << op << " " << s3 << endl;
+            ierr = 2;
+            wres.set_value(false);
+            return;
+        }
+    }
+
+    if ( ((wq[i1].is_bool()) && (!wq[i3].is_bool())) ||
+         ((!wq[i1].is_bool()) && (wq[i3].is_bool()))
+         ) {
+        wq[i2].fatal_error(serr, ierr);
+        serr << "Does not make sense to compare logical and" << endl;
+        serr << " non-logical values" << endl;
+        serr << "    " << s1 << " " << op << " " << s3 << endl;
+        ierr = 2;
+        wres.set_value(false);
+        return;
+    }
+
+    if ( ((wq[i1].is_number()) && (!wq[i3].is_number())) ||
+         ((!wq[i1].is_number()) && (wq[i3].is_number()))
+         ) {
+        wq[i2].fatal_error(serr, ierr);
+        serr << "Does not make sense to compare numerical and" << endl;
+        serr << " non-numerical values" << endl;
+        serr << "    " << s1 << " " << op << " " << s3 << endl;
+        ierr = 2;
+        wres.set_value(false);
+        return;
+    }
+
+    // Compare two numbers.
+    if ( (wq[i1].is_number()) && (wq[i3].is_number()) ) {
+        double d1 = wq[i1].get_double();
+        double d3 = wq[i3].get_double();
+        if (op == ".gt.") result = d1 >  d3;
+        if (op == ".ge.") result = d1 >= d3;
+        if (op == ".lt.") result = d1 <  d3;
+        if (op == ".le.") result = d1 <= d3;
+        if (op == ".eq.") result = d1 == d3;
+        if (op == ".ne.") result = d1 != d3;
+        //cout << "&&&&&cw relational result = " << result << endl;
+        wres.set_value(result);
+        return;
+    }
+
+    if ( (wq[i1].is_bool()) && (wq[i3].is_bool()) ) {
+        bool b1 = wq[i1].get_bool(serr, ierr);
+        bool b3 = wq[i3].get_bool(serr, ierr);
+        if (op == ".eq.") result = b1 == b3;
+        if (op == ".ne.") result = b1 != b3;
+        //cout << "&&&&&cw relational result = " << result << endl;
+        wres.set_value(result);
+        return;
+    }
+
+    // Compare two strings.
+    if (op == ".gt.") result = s1 >  s3;
+    if (op == ".ge.") result = s1 >= s3;
+    if (op == ".lt.") result = s1 <  s3;
+    if (op == ".le.") result = s1 <= s3;
+    if (op == ".eq.") result = s1 == s3;
+    if (op == ".ne.") result = s1 != s3;
+    wres.set_value(result);
+    return;
+}
+
+// ===========================================================================
+// Do the .not. operation, this is different from all the others in that
+// .not. is a unary operator, the others are binary ops.
+// The result is either true or false and is put in word wres.
+// ===========================================================================
+void Parser_math::do_op_not(int i2, int i3, deque <Word> &wq,
+                            Word &wres, stringstream &serr, int &ierr)
+{
+    string op = wq[i2].get_string();
+    string s3 = wq[i3].get_string();
+    bool result = false;
+
+    if (!wq[i3].is_bool()) {
+        wq[i2].fatal_error(serr, ierr);
+        serr << "The word following the .not. operator must be"
+             " true or false." << endl;
+        serr << "Instead the word following .not. is " << s3 << endl;
+        ierr = 2;
+        wres.set_value(false);
+        return;
+    }
+
+    result = true;
+    if (wq[i3].get_bool(serr, ierr) == true) result = false;
+    wres.set_value(result);
+    return;
+}
+
+
+// ===========================================================================
+// Do a single logical binary operation involving 3 words.
+// The binary logical operators are .and. and .or.
+// The result is either true or false and is put in word wres.
+// ===========================================================================
+void Parser_math::do_op_logical(int i1, int i2, int i3, deque <Word> &wq,
+                                Word &wres, stringstream &serr, int &ierr)
+{
+    string s1 = wq[i1].get_string();
+    string op = wq[i2].get_string();
+    string s3 = wq[i3].get_string();
+    bool result = false;
+
+    //cout << "&&&&&cw logical = " << s1 << op << s3 << endl;
+
+    // For .and. and .or., both operands must be boolean.
+    if ((!wq[i1].is_bool()) || (!wq[i3].is_bool())) {
+        wq[i2].fatal_error(serr, ierr);
+        serr << "The operator is " << op << endl;
+        serr << "The two operands (on the left and right of the operator)" << endl;
+        serr << "must be logical values (true or false)." << endl;
+        serr << "    " << s1 << " " << op << " " << s3 << endl;
+        ierr = 2;
+        wres.set_value(false);
+        return;
+    }
+
+    bool b1 = wq[i1].get_bool(serr, ierr);
+    bool b3 = wq[i3].get_bool(serr, ierr);
+    if (op == ".and.") result = b1 && b3;
+    if (op == ".or.")  result = b1 || b3;
+    //cout << "&&&&&cw logical result = " << result << endl;
+    wres.set_value(result);
+    return;
+}
+
+
+
+} // End of the PP namespace
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_utils.hh
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_utils.hh
@@ -0,0 +1,80 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+#ifndef PARSERUTILSHHINCLUDE
+#define PARSERUTILSHHINCLUDE
+
+// ***************************************************************************
+// ***************************************************************************
+// This class collects various low level utilities for the parser.
+// ***************************************************************************
+// ***************************************************************************
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <deque>
+
+namespace PP
+{
+using std::string;
+using std::stringstream;
+using std::vector;
+using std::deque;
+
+
+class Parser_utils
+{
+
+public:
+    Parser_utils(int base);
+
+    int start_dex(vector<int> &istart, const vector<int> &size);
+    void reverse_dex(int icdex, int nvals, vector<int> &istart,
+                     const vector<int> &size);
+
+    void print_strings(vector< vector<string> > rows, int n_header_rows,
+                       int offset, int col_spacing, int line_len_max,
+                       stringstream &ss);
+
+private:
+
+};
+
+
+} // End of the PP namespace
+
+#endif
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_utils.cc
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Parser_utils.cc
@@ -0,0 +1,329 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+// ***************************************************************************
+// ***************************************************************************
+// This class collects various low level utilities for the parser.
+// ***************************************************************************
+// ***************************************************************************
+
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <deque>
+
+#include "Parser_utils.hh"
+
+namespace PP
+{
+using std:: string;
+using std::cout;
+using std::endl;
+using std::stringstream;
+using std::setprecision;
+using std::vector;
+using std::deque;
+using std::setw;
+
+static int index_base = 1; // For Fortran, 0 for C/C++style
+
+
+// ===========================================================================
+// Default constructor.
+// ===========================================================================
+Parser_utils::Parser_utils(int base)
+{
+   index_base = base;
+}
+
+
+// ===========================================================================
+// Given an array command like
+//     cmd(5,3) = 1.0, 3.0, -5.0
+// find the starting position in a 1d array.
+//
+// The indices in cmd are referenced from 1 (i.e. fortran) while the 1d array
+// is referenced from 0 (C++).
+//
+// This function works for any dimension array, 0,1,2,3,...
+//
+// Example 1: Consider a 1d command
+//     cmd(5) = 1.0, 3.4
+// We start filling the 1d fortran array at position 5 and put in two values.
+// Subtract 1 to reference from 0, so this function returns 4.
+//
+// Example 2: Consider the 2d command above:
+//     cmd(5,3) = 1.0, 3.0, -5.0
+// We also need to know that the max size of the first dimension is say 7.
+// Since in fortran, the first index varies fastest, the fortran 1d index
+// would be
+//     5 + (3-1)*7 = 19
+// Subtract 1 to reference from 0, thus the return value is 18.
+//
+// The istart vector contains the indices, for example 2 this would be 5 and
+// 3. The size vector contains the max size of each dimension, for example 2
+// this would be 7 and whatever for the second dimension.
+// ===========================================================================
+int Parser_utils::start_dex(vector<int> &istart, const vector<int> &size)
+{
+    // Get the array dimension, 0,1,2,3,...
+    int dim = (int)istart.size();
+
+    // 0d is a special case.
+    if (dim == 0) return 0;
+
+    // Find the index.
+    // Adjustment for base 1
+    int ix = istart[0]-index_base;
+    for (int i=1; i<dim; i++) {
+        int t = istart[i]-index_base;
+        for (int j=0; j<i; j++) {
+            t *= size[j];
+        }
+        ix += t;
+    }
+
+    return ix;
+}
+    
+
+// ===========================================================================
+// This is the reverse of the start_dex function above.
+// Given the 1d index, icdex (from 0 to nvals-1), find the corresponding
+// multi dimensional fortran indices (each starting from 1).
+//
+// Example 1: Consider a 1d array
+//     var1d(1) = 1 3 5 9 -4 -5 6
+// Suppose the user inputs icdex=3, corresponding to array value 9.
+// This 1d case is very simple, all we do is add 1 to icdex to get a reference
+// from 1, thus returning 4.
+//
+// Example 1: Consider a 2d array
+//     $var2d(1,1) = 11. 21. 31.   12. 22. 32.   13. 23. 33.
+// Where the max of the first dimension is 3. Suppose the user specifies
+// icdex = 5, this corresponds to array value 32. The two indices returned
+// would be 3,2 (referenced from 1).
+//
+// The istart vector contains the output indices, for example 2 this would be 3
+// and 2. The size vector contains the max size of each dimension, for example
+// 2 this would be 3 and whatever for the second dimension. The number of
+// elements in size is normally dim-1, but it does not hurt if it has dim
+// elements (in which case the last element is not referenced or used).
+// ===========================================================================
+void Parser_utils::reverse_dex(int icdex,
+                               int nvals,
+                               vector<int> &istart,
+                               const vector<int> &size)
+{
+    // Get the dimension.
+    int dim = (int)istart.size();
+
+    // Nothing to do for scalars.
+    if (dim == 0) return;
+
+    // Start at 1,1,1,1,1,1,...
+    for (int i=0; i<dim; i++) {
+        istart[i] = index_base;
+    }
+
+    // Get the first 1d index (referenced from 0)
+    int i1 = start_dex(istart, size);
+    if (i1 == icdex) return;
+
+    // Go through all indices until the desired one is found.
+    // Yes, this is inefficient, but dim is a small integer, like 2,3,4, so
+    // the efficiency does not really matter.
+    // Perhaps somebody can devise a better algorithm someday.
+    for (int i1dex=0; i1dex<nvals; i1dex++) {
+        for (int i=0; i<dim; i++) {
+            if (i < dim-1) {
+                if (istart[i] == size[i]) {
+                    istart[i] = index_base;
+                }
+                else {
+                    istart[i] += 1;
+                    break;
+                }
+            }
+            else {
+                istart[i] += 1;
+                break;
+            }
+        }
+
+        i1 = start_dex(istart, size);
+        if (i1 == icdex) return;
+    }
+}
+
+
+
+// ***************************************************************************
+// ***************************************************************************
+// Utilities for printing to the screen (or to a file).
+// ***************************************************************************
+// ***************************************************************************
+
+
+
+// ===========================================================================
+
+
+//                    Line                                                        
+//      Filename     Number                         Command                       
+//    ------------   ------   ------------------------------------------------
+//         test.in        4   some_logical_cmd = false
+//         test.in       28   some_logical_cmd = false
+//         test.in      170   some_logical_cmd = false
+//         test.in      175   some_logical_cmd = true     //No space betwe ...
+//    test_inc1.in        2   some_logical_cmd = true
+//    test_inc3.in        2   some_logical_cmd = true
+//                             
+//         test.in      442   strinsert_cmd01 = "test duplicates commands"
+//         test.in      456   strinsert_cmd01 = "test duplicates commands"
+
+
+
+// Given a header row of strings and several data rows of string where each
+// row consists of the same number of columns, list all the rows with the
+// columns lined up and the headers centered on the columns.
+// ===========================================================================
+void Parser_utils::print_strings(vector< vector<string> > rows, int n_header_rows,
+                                 int offset, int col_spacing, int line_len_max,
+                                 stringstream &ss)
+{
+    // Get the number of columns.
+    int ncol = (int)rows[0].size();
+
+    // Find the max number of characters in each column for all the rows.
+    vector<int> maxc(ncol, 0);
+    for (int row=0; row<(int)rows.size(); row++) {
+        for (int c=0; c<ncol; c++) {
+            string s = rows[row][c];
+            if ((int)s.size() > maxc[c]) maxc[c] = (int)s.size();
+        }
+    }
+
+    // Find the column widths.
+    vector<int> col_width(ncol,0);
+    for (int c=0; c<ncol; c++) {
+        if (maxc[c] > col_width[c]) col_width[c] = maxc[c];
+    }
+
+    // Spacing between columns.
+    vector<int> cspace(ncol, col_spacing);
+    cspace[0] = offset;
+
+    // Limit the lines to a max length.
+    if (line_len_max > 0) {
+        int line_len = 0;
+        for (int c=0; c<ncol; c++) {
+            line_len += cspace[c] + col_width[c];
+        }
+        int excess_c = line_len - line_len_max;
+
+        if (excess_c > 0) {
+            col_width[ncol-1] -= excess_c;
+            for (int row=0; row<(int)rows.size(); row++) {
+                int len = 0;
+                for (int c=0; c<ncol; c++) {
+                    string s = rows[row][c];
+                    if (c < ncol-1) len += cspace[c] + col_width[c];
+                    if (c == ncol-1) len += cspace[c] + (int)s.size();
+                }
+                if (len <= line_len_max) continue;
+
+                int c = ncol -1;
+                string s = rows[row][c];
+                int ec = len - line_len_max;
+                int start = (int)s.size() - ec - 4;
+                if (start < 0) start = 0;
+                int nc = ec+4;
+                if (nc > (int)s.size()) nc = (int)s.size();
+                s.erase(start, nc);
+                rows[row][c] = s + " ...";
+            }
+        }
+    }
+
+
+    // Write the rows.
+    for (int row=0; row<(int)rows.size(); row++) {
+
+        // Insert the line of dashes after the header rows.
+        if (row == n_header_rows) {
+            for (int c=0; c<ncol; c++) {
+                for (int i=0; i<cspace[c]; i++) ss << " ";
+                for (int i=0; i<col_width[c]; i++) ss << "-";
+            }
+            ss << endl;
+        }
+
+        for (int c=0; c<ncol; c++) {
+            // Use nc to center the headers, but not center the data.
+            int nc = maxc[c];
+            if (row < n_header_rows) nc = (int)rows[row][c].size();
+
+            int nsp_left = 0;
+            int nsp_right = 0;
+            int dsp = col_width[c] - nc;
+            if (dsp > 0) {
+                nsp_left = dsp/2;
+                nsp_right = col_width[c] - nsp_left - nc;
+            }
+            for (int i=0; i<cspace[c]; i++) ss << " ";
+            for (int i=0; i<nsp_left; i++) ss << " ";
+
+
+            if (row >= n_header_rows) {
+                if (c < ncol-1)  ss << setw(maxc[c]) << rows[row][c];
+                if (c == ncol-1) ss << rows[row][c];
+            }
+            else {
+                ss << rows[row][c];
+            }
+
+            for (int i=0; i<nsp_right; i++) ss << " ";
+        }
+        ss << endl;
+    }
+}
+
+
+
+} // End of the PP namespace
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/PowerParser.hh
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/PowerParser.hh
@@ -0,0 +1,717 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+// ***************************************************************************
+// ***************************************************************************
+// Provide a class that parses text files into lines and words.
+// ***************************************************************************
+// ***************************************************************************
+#ifndef PARSEHHINCLUDE
+#define PARSEHHINCLUDE
+
+// Need to include Cmd.hh because on the PGI compiler, the deque<Cmd>
+// declaration did not work with just doing "class Cmd;", we need to fully
+// include Cmd.hh. 
+#include "Comm.hh"
+#include <sstream>
+#include <fstream>
+#include "Word.hh"
+#include "Cmd.hh"
+#include "Restartblock.hh"
+#include "Whenthen.hh"
+
+/****************************************************************//**
+ * PP is the namespace for PowerParser. Example:
+ *
+ *     using namespace PP;
+ *******************************************************************/
+namespace PP
+{
+using std::ofstream;
+using std::streambuf;
+
+/****************************************************************//**
+ * PowerParser class 
+ *    Provide a class that parses text files into lines and words.
+ *******************************************************************/
+class PowerParser
+{
+
+public:
+
+    ofstream fileout;
+    streambuf *coutbuf;
+
+    // Constructors, destructors and drivers.
+/****************************************************************//**
+ * \brief
+ * Constructor with no arguments
+ *
+ * Typical Usage   
+ *
+ *     PowerParser parse;
+ *           or
+ *     PowerParser *parse = new PowerParser();
+ *******************************************************************/
+    PowerParser(void);
+
+/****************************************************************//**
+ * \brief
+ * Constructor -- with input filename in string format
+ *
+ * **Parameters**
+ * * string filename[in] -- the input file. The file will be
+ *    read in, broadcast, and then parsed
+ *
+ * Typical Usage
+ *
+ *     string fin("simfile.in");
+ *     PowerParser parse(fin);
+ *           or
+ *     string fin("simfile.in");
+ *     PowerParser *parse = new PowerParser(fin);
+ *******************************************************************/
+    PowerParser(string filename);
+
+/****************************************************************//**
+ * \brief
+ *  Constructor -- with input filename in char array format
+ *
+ *  **Parameters**
+ *  * const char *filename[in] -- the input file. The file will be
+ *    read in, broadcast, and then parsed
+ *
+ * Typical Usage
+ *
+ *     PowerParser parse("simfile.in");
+ *           or
+ *     PowerParser *parse = new PowerParser("simfile.in");
+ *******************************************************************/
+    PowerParser(const char *filename);
+
+/****************************************************************//**
+ * \brief
+ *  Destructor with no arguments
+ *
+ * Typical Usage   
+ *
+ *     delete parse;
+ *******************************************************************/
+    ~PowerParser(void);
+
+    void dictionary_add(char *name, double value, bool pred, char *vdesc);
+    void dictionary_env_add(char *name, bool pred);
+
+/****************************************************************//**
+ * \brief
+ * Reads the file in on the IO processor, broadcast the string
+ * to all the other processors, then parse the string.
+ *
+ * **Parameters**
+ * * string filename
+ *
+ * Typical Usage
+ *
+ *     string fin("simfile.in");
+ *     PowerParser parse();
+ *     parse.parse_file(fin);
+ *******************************************************************/
+    void parse_file(string filename);
+
+/****************************************************************//**
+ * \brief
+ * Reads the file in on the IO processor, broadcast the string
+ * to all the other processors, then parse the string.
+ *
+ * **Parameters**
+ * * const char *filename
+ *
+ * Typical Usage
+ *
+ *     PowerParser parse();
+ *     parse.parse_file("simfile.in");
+ *******************************************************************/
+    void parse_file(const char *filename);
+
+/****************************************************************//**
+ * \brief
+ * Given a multi-line string on every processor, parse it into cmds
+ * and words. After calling this function, the parser is ready for use.
+ *******************************************************************/
+    void parse_string(string filename, string s_in);
+
+/****************************************************************//**
+ * \brief
+ * The input file(s) has been read and put into commands. Now do the
+ * compilation phase.
+ *******************************************************************/
+    void compile_buffer(int &return_value);
+
+/****************************************************************//**
+ * \brief
+ * Handle the execution line arguments
+ *******************************************************************/
+    void handle_exe_args(string other_argggs);
+
+/****************************************************************//**
+ * \brief
+ * Clear out the parser and re-init
+ *******************************************************************/
+    void clear_and_init();
+
+/****************************************************************//**
+ *******************************************************************/
+    void store_exe_args(string &oargs, string &fname) {
+        other_args = oargs;
+        file_name = fname;
+    }
+
+/****************************************************************//**
+ *******************************************************************/
+    void get_exe_args(string &oargs, string &fname) {
+        oargs = other_args;
+        fname = file_name;
+    }
+
+/****************************************************************//**
+ * \brief
+ * String version of the driver for getting boolean values as integers.
+ * This works for arrays of any dimension, 0,1,2,3,...
+ *
+ * **Parameters**
+ * * string &cname -- key word in input file
+ * * int *cvalue -- variable to set in simulation code
+ * * const vector<int> &size = vector<int>() -- sizes of array,
+ *      (default is null for a scalar).
+ * * bool skip = false -- skip setting variable, (default is false)
+ *
+ * Typical Usage
+ *
+ *     for scalars
+ *        string InputName("OutputGraphics");
+ *        int iflag = 0;
+ *        parse.get_bool_int(InputName, &iflag);
+ *     or for arrays
+ *        string InputName("OutputGraphicsTypes");
+ *        vector<int> iflags[2] = {0, 0};
+ *        vector<int> size = {2};
+ *        parse.get_bool_int(InputName, &iflags[0], size);
+ *******************************************************************/
+    void get_bool_int(string &cname,
+                      int *cvalue,
+                      const vector<int> &size = vector<int>(), // optional argument
+                      bool skip = false);                      // optional argument
+
+/****************************************************************//**
+ * \brief
+ * String version of the driver for getting boolean values.
+ * This works for arrays of any dimension, 0,1,2,3,...
+ *
+ * **Parameters**
+ * * string &cname -- key word in input file
+ * * bool *cvalue -- variable to set in simulation code
+ * * const vector<int> &size = vector<int>() -- sizes of array,
+ *      (default is null for a scalar).
+ * * bool skip = false -- skip setting variable, (default is false)
+ *
+ * Typical Usage
+ *
+ *     for scalars
+ *        string InputName("OutputGraphics");
+ *        bool iflag = 0;
+ *        parse.get_bool(InputName, &iflag);
+ *     or for arrays
+ *        string InputName("OutputGraphicsTypes");
+ *        vector<bool> iflags[2] = {0, 0};
+ *        vector<int> size = {2};
+ *        parse.get_bool(InputName, &iflags[0], size);
+ *******************************************************************/
+    void get_bool(string &cname,
+                  bool *cvalue,
+                  const vector<int> &size = vector<int>(),     // optional argument
+                  bool skip = false);                          // optional argument
+
+/****************************************************************//**
+ * \brief
+ * String version of the driver for getting integer values.
+ * This works for arrays of any dimension, 0,1,2,3,...
+ *
+ * **Parameters**
+ * * const char *cname -- key word in input file
+ * * int *cvalue -- variable to set in simulation code. Int can be
+ *      either standard int or long long int
+ * * const vector<int> &size = vector<int>() -- sizes of array,
+ *      (default is null for a scalar).
+ * * bool skip = false -- skip setting variable, (default is false)
+ *
+ * Typical Usage
+ *
+ *     for scalars
+ *        int ivalue = 0;
+ *        parse.get_int("Num_Cycles", &ivalue);
+ *     or for arrays
+ *        vector<int> ivalue[2] = {0, 0};
+ *        vector<int> size = {2};
+ *        parse.get_int("Dimensions", &ivalue[0], size);
+ *******************************************************************/
+    template< typename T >
+    void get_int(string &cname,
+                 T *cvalue,
+                 const vector<int> &size = vector<int>(),      // optional argument
+                 bool skip = false);                           // optional argument
+
+/****************************************************************//**
+ * \brief
+ * String version of the driver for getting real values.
+ * This works for arrays of any dimension, 0,1,2,3,...
+ *
+ * **Parameters**
+ * * const char *cname -- key word in input file
+ * * double *cvalue -- variable to set in simulation code.
+ * * const vector<int> &size = vector<int>() -- sizes of array,
+ *      (default is null for a scalar).
+ * * bool skip = false -- skip setting variable, (default is false)
+ *
+ * Typical Usage
+ *
+ *     for scalars
+ *        double rvalue = 0;
+ *        parse.get_real("TimeStop", &rvalue);
+ *     or for arrays
+ *        vector<double> rvalues[2] = {0.0, 0.0};
+ *        vector<int> size = {2};
+ *        parse.get_real("DumpTimes", &rvalues[0], size);
+ *******************************************************************/
+    void get_real(string &cname,
+                  double *cvalue,
+                  const vector<int> &size = vector<int>(),     // optional argument
+                  bool skip = false);                          // optional argument
+
+/****************************************************************//**
+ *******************************************************************/
+    void get_char(string &cname,
+                  vector<string> &vstr,
+                  const vector<int> &size = vector<int>(),     // optional argument
+                  bool single_char = false,                    // optional argument
+                  bool skip = false);                          // optional argument
+
+    // These are just convenience function to allow char arrays for get variable so
+    // the calls are simpler. They convert the cname to a string and call the 
+    // string versions above
+
+/****************************************************************//**
+ * \brief
+ * Char array version of the driver for getting boolean values as integers.
+ * This works for arrays of any dimension, 0,1,2,3,...
+ *
+ * **Parameters**
+ * * const char *cname -- key word in input file
+ * * int *cvalue -- variable to set in simulation code
+ * * const vector<int> &size = vector<int>() -- sizes of array,
+ *      (default is null for a scalar).
+ * * bool skip = false -- skip setting variable, (default is false)
+ *
+ * Typical Usage
+ *
+ *     for scalars
+ *        int iflag = 0;
+ *        parse.get_bool_int("OutputGraphics", &iflag);
+ *     or for arrays
+ *        vector<int> iflags[2] = {0, 0};
+ *        vector<int> size = {2};
+ *        parse.get_bool_int("OutputGraphicsTypes", &iflags[0], size);
+ *******************************************************************/
+    void get_bool_int(const char *cname,
+                      int *cvalue,
+                      const vector<int> &size = vector<int>(), // optional argument
+                      bool skip = false);                      // optional argument
+
+/****************************************************************//**
+ * \brief
+ * Char array version of the driver for getting boolean values.
+ * This works for arrays of any dimension, 0,1,2,3,...
+ *
+ * **Parameters**
+ * * const char *cname -- key word in input file
+ * * bool *cvalue -- variable to set in simulation code
+ * * const vector<int> &size = vector<int>() -- sizes of array,
+ *      (default is null for a scalar).
+ * * bool skip = false -- skip setting variable, (default is false)
+ *
+ * Typical Usage
+ *
+ *     for scalars
+ *        bool iflag = 0;
+ *        parse.get_bool("OutputGraphics", &iflag);
+ *     or for arrays
+ *        vector<bool> iflags[2] = {0, 0};
+ *        vector<int> size = {2};
+ *        parse.get_bool("OutputGraphicsTypes", &iflags[0], size);
+ *******************************************************************/
+    void get_bool(const char *cname,
+                  bool *cvalue,
+                  const vector<int> &size = vector<int>(),     // optional argument
+                  bool skip = false);                          // optional argument
+
+/****************************************************************//**
+ * \brief
+ * Char array version of the driver for getting integer values.
+ * This works for arrays of any dimension, 0,1,2,3,...
+ *
+ * **Parameters**
+ * * const char *cname -- key word in input file
+ * * int *cvalue -- variable to set in simulation code. Int can be
+ *      either standard int or long long int
+ * * const vector<int> &size = vector<int>() -- sizes of array,
+ *      (default is null for a scalar).
+ * * bool skip = false -- skip setting variable, (default is false)
+ *
+ * Typical Usage
+ *
+ *     for scalars
+ *        int ivalue = 0;
+ *        parse.get_int("Num_Cycles", &ivalue);
+ *     or for arrays
+ *        vector<int> ivalue[2] = {0, 0};
+ *        vector<int> size = {2};
+ *        parse.get_int("Dimensions", &ivalue[0], size);
+ *******************************************************************/
+    template< typename T >
+    void get_int(const char *cname,
+                 T *cvalue,
+                 const vector<int> &size = vector<int>(),      // optional argument
+                 bool skip = false);                           // optional argument
+
+/****************************************************************//**
+ * \brief
+ * Char array version of the driver for getting real values.
+ * This works for arrays of any dimension, 0,1,2,3,...
+ *
+ * **Parameters**
+ * * const char *cname -- key word in input file
+ * * double *cvalue -- variable to set in simulation code.
+ * * const vector<int> &size = vector<int>() -- sizes of array,
+ *      (default is null for a scalar).
+ * * bool skip = false -- skip setting variable, (default is false)
+ *
+ * Typical Usage
+ *
+ *     for scalars
+ *        double rvalue = 0;
+ *        parse.get_real("TimeStop", &rvalue);
+ *     or for arrays
+ *        vector<real> rvalue[2] = {0.0, 0.0};
+ *        vector<int> size = {2};
+ *        parse.get_real("DumpTimes", &rvalue[0], size);
+ *******************************************************************/
+    void get_real(const char *cname,
+                  double *cvalue,
+                  const vector<int> &size = vector<int>(),     // optional argument
+                  bool skip = false);                          // optional argument
+
+/****************************************************************//**
+ *******************************************************************/
+    void get_char(const char *cname,
+                  vector<string> &vstr,
+                  const vector<int> &size = vector<int>(),     // optional argument
+                  bool single_char = false,                    // optional argument
+                  bool skip = false);                          // optional argument
+
+
+/****************************************************************//**
+ * \brief
+ * Driver for getting array sizes.
+ *******************************************************************/
+    void get_size(string &cname, vector<int> &size);
+
+/****************************************************************//**
+ * \brief
+ * Driver for getting array sizes. Version to get all sizes
+ *******************************************************************/
+    void get_sizeb(string &cname, vector<int> &size);
+
+
+/****************************************************************//**
+ * \brief
+ * Check if the input command, cname, appears in the final, parsed user input.
+ *
+ * The two outputs are in_input and in_whenthen,
+ *    in_input     command is in (or not) the main part of the input, i.e.
+ *                 everything except the when...then statements.
+ *    in_whenthen  command is in (or not) at least one when...then statement.
+ *******************************************************************/
+    void cmd_in_input(string &cname, bool &in_input, bool &in_whenthen);
+
+/****************************************************************//**
+ * \brief
+ * Set the processed flag for all words for all commands that match cname.
+ * The value to set the processed flag to is bval.
+ * This sets the processed flag for commands in the final buffer and in the
+ * when...then final buffers.
+ *******************************************************************/
+    void cmd_set_processed(string &cname, bool bval);
+
+/****************************************************************//**
+ * \brief
+ * Check all processed flags on every command. If any word on any command
+ * has not been processed, then that is a fatal error.
+ *******************************************************************/
+    void check_processed(bool &good);
+
+/****************************************************************//**
+ * \brief
+ * If commands appear more than once in the input file(s), print a warning
+ * to the user.
+ *******************************************************************/
+    void check_duplicates();
+
+
+/****************************************************************//**
+ * \brief
+ * Echo user input to a stringstream.
+ *******************************************************************/
+    void echo_input_start();
+
+/****************************************************************//**
+ * \brief
+ * Echo user input to a stringstream.
+ *******************************************************************/
+    void echo_input_ss(stringstream &ssinp);
+
+/****************************************************************//**
+ * Get a line from the ssfout stringstream. (low-level function)
+ *******************************************************************/
+    bool get_ssfout_line(string &sline);
+
+    // Communications object from the infrastructure.
+/****************************************************************//**
+ * \brief
+ *  Holds internal comm class for PowerParser. Comm is initialized
+ *  automatically and will use an already initialized MPI or 
+ *  initialize it itself.  This is meant to be for use internal to
+ *  the package, but developers can get the number of processors
+ *  and rank with
+ *
+ *      int mype = parse->comm->getProcRank();
+ *      int npes = parse->comm->getNumProcs();
+ *******************************************************************/
+    Comm *comm;
+
+/****************************************************************//**
+ *******************************************************************/
+    void list_funcs_start();
+
+/****************************************************************//**
+ *******************************************************************/
+    void list_vars_start();
+
+/****************************************************************//**
+ *******************************************************************/
+    void list_cmdsf_start();
+
+/****************************************************************//**
+ *******************************************************************/
+    void list_wt_cmdsf_start();
+
+    void process_error_global(int &return_value);
+
+
+    void rb_check(vector<string> &code_varnames,
+                  vector<string> &code_values,
+                  vector<int> &vv_active, int *rbci,
+                  int *rb_ntriggered, int *rb_triggered_indices);
+    int  get_rb_num_varnames();
+    void get_rb_varnames(vector<string> &rb_varnames_vstr);
+    void get_num_rb(int *rbnum) { *rbnum = (int)restartblocks.size(); }
+    void set_num_rb(int rbnum)  { nrb_on_dump = rbnum; }
+    void get_rb_names(vector<string> &rb_names_vstr);
+    void set_rb_names(vector<string> &rb_names_vstr);
+    void get_rb_aflags(int *rb_aflags);
+    void set_rb_aflags(int *rb_aflags, int rb_num);
+    void get_rb_satsize(int *rb_satsize);
+    void set_rb_satsize(int rb_satsize);
+    void get_rb_satprb(int *rb_satprb);
+    void set_rb_satprb(int *rb_satprb, int rb_num);
+    void get_rb_sat(int *rb_sat);
+    void set_rb_sat(int *rb_sat, int rb_satsize);
+    void list_rb();
+    void list_rb_start();
+    void list_rb_ss(stringstream &ssc);
+    void list_rb1_start(int *rb);
+    void list_rb1_ss(stringstream &ssc, int *rbp);
+    void list_one_rb_ss(stringstream &ssc, int rb);
+
+
+    void get_num_whenthen(int *wtnum) { *wtnum = (int)whenthens.size(); }
+    void wt_check(int wtn, vector<string> &code_varnames,
+                  vector<string> &code_values,
+                  vector<int> &vv_active, int *wtci);
+    void wt_set_cmdsfp(int wtn);
+    void wt_reset();
+    void wt_casize(int wtn, int *wt_casize);
+    void wt_carray(int wtn, char *wt_ca, int wt_casize);
+
+    void wt_satsize(int wtn, int *wt_satsize);
+    void wt_getsat(int wtn, int *wt_sat, int wt_satsize);
+    void wt_setsat(int wtn, int *wt_sat, int wt_satsize);
+    void wt_getprocessed(int wtn, int *wtp);
+    void wt_setprocessed(int wtn, int wtp);
+    void wt_getseq(int wtn, int *wtseq);
+    void wt_setseq(int wtn, int wtseq);
+
+    void chars_to_vstr(char *chars_1d, vector<string> &vstr,
+                       int nv, int nchar);
+    void vstr_to_chars(char *chars_1d, vector<string> &vstr,
+                       int nv, int nchar);
+
+    void ListIncludeFiles();
+    int NumIncludeFiles();
+    string GetIncludeFile(int);
+
+
+
+private:
+
+    void init();
+    int  process_dav_cmd();
+    void check_dup_scalar(int wtn, bool &found_any);
+    void set_dup_row(vector<string> &row, Cmd &cmdi, int iw);
+    void remove_dup_scalar(int wtn);
+    void read_into_string(string filename, string &s_in);
+    void broadcast_buffer(string &s_in);
+    bool get_line_from_string(string &strn, string &sout, int &current_pos);
+    bool get_sc_line_from_string(string &strn, string &sout, int &current_pos);
+    void store_line_strings(string &s_in);
+    void eliminate_white_space(string &sline);
+    void cmd_set_reprocessed(bool bval);
+    int  process_error_return_int(stringstream &serr, int &ierr);
+    void process_error(stringstream &serr, int &ierr);
+
+    void list_vars(string lv1, string lv2, string var_to_list);
+    void list_vars_ss(string lv1, string lv2, string var_to_list,
+                      stringstream &ssvars);
+
+    void list_funcs(string lf1, string lf2);
+    void list_funcs_ss(string lf1, string lf2, stringstream &ssfunc);
+
+    void list_cmdsf(string lc1, string lc2);
+    void list_cmdsf_ss(string lc1, string lc2,
+                       stringstream &ssc);
+    void list_wt_cmdsf();
+    void list_wt_cmdsf_ss(stringstream &ssc);
+
+    void print_strings(vector< vector<string> > rows, int n_header_rows,
+                       int offset, int col_spacing, int line_len_max,
+                       stringstream &ss);
+    bool end_do_loop(int &i, deque<int> &do_start,
+                     stringstream &serr, int &ierr);
+    void end_do_ret(int &i, deque<int> &do_start,
+                    stringstream &serr, int &ierr);
+    void check_enddo(deque<int> &do_start, stringstream &serr, int &ierr);
+    int  jump_to_call(int &i, deque<int> &icall, deque<int> &isub,
+                      stringstream &serr, int &ierr);
+    int  jump_to_sub(int &i, string &sub_name,
+                     stringstream &serr, int &ierr);
+    void print_line(int i);
+    void print_line(Cmd &cmd);
+
+    // Store exe line arguments.
+    string other_args, file_name;
+
+    // A double ended queue for storing the original lines. This is
+    // before the lines get turned into Cmds. 
+    // line_number is an index into cmd_strings, note that it starts
+    // from 1, not 0.
+    deque<string> cmd_strings;
+    int line_number;
+
+    // Define a map for a set of variables.
+    map<string, Variable> vmap;
+    
+    // Maintain a list of included files
+
+    std::map<int,string> IncludeFiles;
+
+    // Define a map for the functions.
+    map<string, Function> fmap;
+
+    // A double ended queue for storing the commands.
+    deque<Cmd> cmds;
+    deque<Cmd> cmdsf;
+    deque<Cmd> *cmdsfp;
+
+    // Store cmd names that have been processed, used for clearing and
+    // recreating the parser.
+    deque<string> processed_cmd_names;
+
+    // Related to writing output to a fortran file.
+    int ssfout_current_pos;
+    stringstream ssfout;
+
+    // Used for storing the list of pre-defined variables to be printed
+    // out later.
+    stringstream pre_defined_varss;
+
+    // Used for storing multiple errors and processing them later.
+    stringstream serr_global;
+    int ierr_global;
+
+    // The execution line arguments are put in this string.
+    string exe_args_str;
+
+    // The when ... then objects.
+    deque<Whenthen> whenthens;
+
+    // Restart blocks.
+    deque<Restartblock> restartblocks;
+    int nrb_on_dump;
+    deque<string> bnames_on_dump;
+    deque<bool> baflags_on_dump;
+    int satsize_on_dump;
+    deque<bool> rbsat_on_dump;
+    deque<int> rbsatprb_on_dump;
+
+    // Flag for whether duplicate array values will be none, fatal, or
+    // a warning, determined by the duplicate_array_values command.
+    //    dup_fatal = 0     Turn off duplicate array value checking
+    //    dup_fatal = 1     Duplicate array value checking is a warning
+    //    dup_fatal = 2     Duplicate array value checking is a fatal error
+    int dup_fatal;
+};
+
+} // end of PP namespace
+
+#endif
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/PowerParser.cc
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/PowerParser.cc
@@ -0,0 +1,3269 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+// ***************************************************************************
+// ***************************************************************************
+// Provide a class that parses text files into lines and words.
+// ***************************************************************************
+// ***************************************************************************
+
+#include "PowerParser.hh"
+#include "Parser_utils.hh"
+#include "Variable.hh"
+#include "Function.hh"
+#include <cctype>
+#include <cstdio>
+#include <unistd.h>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <limits>
+#include <algorithm>
+#include <stdint.h>
+#include <string.h>
+#include <cstdlib>
+#include <assert.h>
+
+namespace PP
+{
+using std::cout;
+using std::endl;
+using std::string;
+using std::ifstream;
+using std::ios;
+using std::deque;
+using std::map;
+using std::pair;
+using std::vector;
+using std::stringstream;
+using std::setw;
+using std::setprecision;
+using std::numeric_limits;
+
+static int index_base = 1;
+static bool case_sensitive = false;
+
+// ===========================================================================
+// Various constructors.
+// ===========================================================================
+PowerParser::PowerParser()
+{
+    comm = new Comm();
+
+    init();                     // Init vars, setup functions, ...
+    nrb_on_dump = 0;
+    coutbuf = NULL;
+}
+
+PowerParser::PowerParser(string filename)
+{
+    comm = new Comm();
+
+    init();                     // Init vars, setup functions, ...
+    nrb_on_dump = 0;
+    parse_file(filename);       // Parse the file.
+    coutbuf = NULL;
+}
+
+PowerParser::PowerParser(const char *filename)
+{
+    comm = new Comm();
+
+    string fstring(filename);
+
+    init();                     // Init vars, setup functions, ...
+    nrb_on_dump = 0;
+    parse_file(fstring);        // Parse the file.
+    coutbuf = NULL;
+}
+
+// ===========================================================================
+// Destructor
+// ===========================================================================
+PowerParser::~PowerParser()
+{
+    fileout.close();
+    if (coutbuf != NULL) cout.rdbuf(coutbuf); // restore cout's original streambuf
+    delete comm;
+
+    cmd_strings.clear();
+    vmap.clear();
+    fmap.clear();
+    cmds.clear();
+    cmdsf.clear();
+    whenthens.clear();
+    restartblocks.clear();
+    pre_defined_varss.str("");
+}
+
+// ===========================================================================
+// Parse a file. The basic strategy is to read the file into a string on the
+// io processor, broadcast the string to all the other processors, then parse
+// the string.
+// ===========================================================================
+void PowerParser::parse_file(string filename)
+{
+    // Read the file into a string. This simply copies every character in
+    // the file to the string including end of line characters.
+    // Note that only the io processor reads the file into the string.
+    string s_in = "";
+    read_into_string(filename, s_in);
+
+    // Broadcast the buffer string to all the other processors. After this
+    // braodcast, all the processors should have the same buffer string.
+    broadcast_buffer(s_in);
+
+    // Parse the string. After this is done, the parser is ready to be used
+    // by the application code.
+    parse_string(filename, s_in);
+}
+
+void PowerParser::parse_file(const char *filename)
+{
+    string fstring(filename);
+    parse_file(fstring);
+}
+
+int PowerParser::NumIncludeFiles()
+{
+    return IncludeFiles.size();
+}
+
+string PowerParser::GetIncludeFile(int i)
+{
+    if (0 <= i && i < IncludeFiles.size()) return IncludeFiles[i];
+    return string("");
+}
+
+void PowerParser::ListIncludeFiles()
+{
+    int i, num_include;
+    num_include = NumIncludeFiles();
+    std::cerr << "Number of include files = " << num_include << "\n";
+    for (i = 0; i < num_include; ++i)
+    {
+        std::cerr << "Include file << "<< i << " = " << GetIncludeFile(i) << "\n";
+    }
+}
+
+// ===========================================================================
+// Given a multi-line string on every processor, parse it into cmds and words.
+// After calling this function, the parser is ready for use.
+// ===========================================================================
+void PowerParser::parse_string(string filename, string buffer)
+{
+    // Get command lines from the buffer and store them as strings.
+    int current_pos = 0;
+    string sline1 = "";
+    string sline = "";
+    int file_line_number = 0;
+    bool exe_args_inserted = false;
+    for (;;) {
+        // Get the next line from the buffer. No processing is done, just
+        // get each line. This does, however, remove the end of line
+        // characters (either \r\n or only \n) from the string.
+        if (!get_line_from_string(buffer, sline1, current_pos)) break;
+        line_number += 1;
+        file_line_number += 1;
+
+        // Store the line without any processing. This is done so that a
+        // fortran routine can get each original line and echo it to an
+        // output file.
+        cmd_strings.push_back(sline1);
+
+        // The line, sline1, may be composed of sub-lines separated by
+        // semicolons. Loop through the line extracting each semicolon
+        // separated sub-line and process it.
+        int current_sc_pos = 0;
+        for (;;) {
+            if (!get_sc_line_from_string(sline1, sline, current_sc_pos)) break;
+
+            // Flag for making the command or not.
+            bool make_cmd = true;
+
+            // Get rid of leading and trailing blanks and tabs.
+            eliminate_white_space(sline);
+
+            // If after removing white space, the resulting line string is empty,
+            // then do not turn it into a command.
+            if ((int)sline.size() == 0) make_cmd = false;
+
+            // Turn the line into a command. This creates the words. Empty lines
+            // can be skipped.
+            if (make_cmd) {
+                stringstream serr;
+                int ierr = 0;
+                Cmd cmd(sline, &vmap, &fmap, &cmd_strings,
+                        line_number, file_line_number, filename, serr, ierr);
+                process_error(serr, ierr);
+                if (cmd.get_string(0) == "set_index_base_zero") {
+                   // C/C++ index convention
+                   cmd.set_index_base(0);
+                   Variable v(0);
+                   index_base = 0;
+                }
+                if (cmd.get_string(0) == "set_index_base_one") {
+                   // Fortran index convention
+                   cmd.set_index_base(1);
+                   Variable v(1);
+                   index_base = 1;
+                }
+                if (cmd.get_string(0) == "set_case_sensitive") {
+                   cmd.set_case_sensitive(true);
+                   case_sensitive = true;
+                }
+                if (cmd.get_string(0) == "set_case_insensitive") {
+                   cmd.set_case_sensitive(false);
+                   case_sensitive = false;
+                }
+                if (cmd.get_string(0) == "put_exe_args_here") {
+                    if (exe_args_str != "") {
+                        parse_string("execution line arguments", exe_args_str);
+                        exe_args_inserted = true;
+                    }
+                }
+                else if (cmd.is_include()) {
+                    string fname = "";
+                    stringstream ssfiles;
+                    if(comm->isIOProc()) {
+                        fname = cmd.get_cmd_filename(ssfiles);
+                    }
+                    broadcast_buffer(fname);
+		    map<int,string>::iterator ifp;
+		    int isize = IncludeFiles.size();
+		    IncludeFiles[isize] = fname;
+                    if (fname == "") {
+                        stringstream serr;
+                        serr << endl;
+                        serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+                        serr << "    " << cmd_strings[line_number-1] << endl;
+                        serr << "in file: " << filename << endl;
+                        serr << "Could not open include file." << endl;
+                        serr << "The name of the file and any alternates are:" << endl;
+                        serr << ssfiles.str() << endl;
+                        int ierr = 2;
+                        process_error(serr, ierr);
+                        return;
+                    }
+                    parse_file(fname);
+                }
+                else {
+                    cmds.push_back(cmd);                
+                }
+            }
+        }
+    }
+    // process inserted command line args if not parsing just the args
+    if (filename != "execution line arguments" && 
+        filename != "exe_args_tmp" &&
+        exe_args_str != "") {
+      // if inserting manually, remove the tmp insertion at the beginning
+      if (exe_args_inserted) {
+        for (int i=0; i<(int)cmds.size(); i++) {
+          if (cmds[i].get_filename() == "exe_args_tmp") {
+            cmds.erase(cmds.begin()+i);
+            i -= 1;
+          }
+        }
+      }
+      // change the file name to the real name for args
+      else {
+        for (int i=0; i<(int)cmds.size(); i++) {
+          if (cmds[i].get_filename() == "exe_args_tmp") {
+            cmds[i].set_filename("execution line arguments");
+          }
+        }
+      }
+    }
+}
+
+
+// ===========================================================================
+// Handle the execution line arguments.
+// ===========================================================================
+void PowerParser::handle_exe_args(string other_args)
+{
+    if ((int)other_args.size() == 0) return;
+
+    stringstream serr;
+    int ierr = 0;
+    Cmd cmd(other_args, &vmap, &fmap, &cmd_strings,
+            1, 1, "", serr, ierr);
+    process_error(serr, ierr);
+    //print_line(cmd);
+    exe_args_str = "";
+    cmd.handle_exe_args(exe_args_str);
+    if (cmd.get_nwords() == 0) {
+        exe_args_str = "";
+        return;
+    }
+    parse_string("exe_args_tmp", exe_args_str);
+}
+
+
+// ===========================================================================
+// Clear out the parser and re-init.
+// ===========================================================================
+void PowerParser::clear_and_init()
+{
+    // comm does not need to be reset
+    cmd_strings.clear();
+    vmap.clear();
+    fmap.clear();
+    cmds.clear();
+    cmdsf.clear();
+    whenthens.clear();
+    restartblocks.clear();
+    pre_defined_varss.str("");
+
+    // Do not clear out the restart block info from the dump since the whole
+    // point of doing this function is to be able to reset the parser with
+    // the restart block info from the dump.
+
+    //for (int i=0; i<(int)bnames_on_dump.size(); i++) {
+    //    cout << "&&&&&cw PowerParser.cc, clear_and_init, bnames_on_dump = " <<
+    //        bnames_on_dump[i] << endl;
+    //    cout << "&&&&&cw PowerParser.cc, clear_and_init, baflags_on_dump = " <<
+    //        baflags_on_dump[i] << endl;
+    //}
+
+    // Do the initialization again.
+    init();
+}
+
+// ===========================================================================
+// Echo user input to a stringstream.
+// ===========================================================================
+void PowerParser::echo_input_start()
+{
+    ssfout.str("");
+    echo_input_ss(ssfout);
+    ssfout_current_pos = 0;
+}
+void PowerParser::echo_input_ss(stringstream &ssinp)
+{
+    if (!comm->isIOProc()) return;
+    for (int i=0; i<(int)cmd_strings.size(); i++) {
+        ssinp << cmd_strings[i] << endl;
+    }
+}
+
+
+// ===========================================================================
+// The input file(s) has been read and put into commands. Now do the
+// compilation phase.
+// ===========================================================================
+void PowerParser::compile_buffer(int &return_value)
+{
+    // At this point, the list of variables only contains the pre-defined
+    // parser variables, thus if we list the variables at this point we will
+    // have a list of only the pre-defined variables. This is stored in a
+    // stringstream to be printed later.
+    string lv1 = "********** List of pre-defined parser variables";
+    string lv2 = "********** End list of pre-defined parser variables";
+    list_vars_ss(lv1, lv2, "", pre_defined_varss);
+
+    int return_local;
+
+    return_local =-1;
+    return_value = 0;
+
+    // Handle single line (! and //) comments and multi line
+    // comments (/* ... */)
+    // The level variable is used for nested multi line comments.
+    int level = 0;
+    for (int i=0; i<(int)cmds.size(); i++) {
+        cmds[i].single_line_comments();
+        cmds[i].multi_line_comments(level);
+    }
+
+    // Check for matching quotes and remove them.
+    int ierr = 0;
+    stringstream serr;
+    for (int i=0; i<(int)cmds.size(); i++) {
+        cmds[i].handle_quotes(serr, ierr);
+    }
+    return_local = process_error_return_int(serr, ierr);
+    return_value = return_local;
+
+    if (return_local > 0) {
+       cout << "handle quotes gave error " << ierr << endl;
+       if (return_local > 1) return;
+    }
+
+    // Remove empty lines.
+    for (int i=0; i<(int)cmds.size(); i++) {
+        if (cmds[i].get_nwords() == 0) {
+            cmds.erase(cmds.begin()+i);
+            i -= 1;
+            continue;
+        }
+    }
+
+    // Handle continuation lines (ending in & or ,).
+    // Continuation lines are merged into one long (possibly very long)
+    // line.
+    for (int i=(int)cmds.size()-1; i>=0; i--) {
+        int nw1 = cmds[i].get_nwords();
+        if (cmds[i].get_string(nw1-1) == "&" ||
+            cmds[i].get_string(nw1-1) == ",") {
+            if (cmds[i].get_string(nw1-1) == "&")
+                cmds[i].erase_word(nw1-1);
+            int nw2 = cmds[i+1].get_nwords();
+            for (int j=0; j<nw2; j++) {
+                cmds[i].add_word(cmds[i+1].get_string(j),
+                                 cmds[i+1].get_line_number(j),
+                                 cmds[i+1].get_file_line_number(j),
+                                 cmds[i+1].get_filename(j)
+                                 );
+            }
+            cmds.erase(cmds.begin()+i+1);
+            continue;
+        }
+    }
+
+
+    // Reset the command name and type. Consider the following command:
+    //      * lasdkj */ cmd = 5.0
+    // The original command name is "*", but after the multi-line comment is
+    // removed, the command name should be "cmd".
+    for (int i=0; i<(int)cmds.size(); i++) {
+        cmds[i].reset_name_type();
+    }
+
+
+    // Debug: print each command.
+    //for (int i=0; i<(int)cmds.size(); i++) {
+        //stringstream ss3;
+        //cmds[i].print_all_words(ss3);
+        //cmds[i].print_using_words(ss3);
+        //cmds[i].print_original_string(ss3);
+        //cout << ss3.str() << endl;
+
+        //cout << "Command name = " << cmds[i].get_cmd_name() << endl;
+        //cout << "Command type = " << cmds[i].get_cmd_type() << endl;
+    //}
+    //cout << endl;
+
+    // Handle all the variable dimension statements.
+    ierr = 0;
+    serr.str("");
+    for (int i=0; i<(int)cmds.size(); i++) {
+        if (cmds[i].check_for_dimension(serr, ierr)) {
+            cmds.erase(cmds.begin()+i);
+            i--;
+            continue;
+        }            
+    }
+    return_local = process_error_return_int(serr, ierr);
+
+    if (return_local > 0) {
+       cout << "handle variable dimension statement has error " << ierr << endl;
+       if (return_local > 1) return;
+    }
+
+    // Combine things like "end if" into one word, i.e. "endif".
+    for (int i=0; i<(int)cmds.size(); i++) {
+        cmds[i].handle_two_words();
+    }
+
+    // Handle the case of a space between digits and the e for reals.
+    // For example, in the following,
+    //     1.0, 2.3 e14, -5.6
+    // there is a space between 2.3 and e14 which should most likely
+    // be treated as a single number, 2.3e14.
+    ierr = 0;
+    serr.str("");
+    string action = "error";
+    bool action_set = false;
+    for (int i=0; i<(int)cmds.size(); i++) {
+        if (cmds[i].get_cmd_name() == "depcmd_dse") {
+            action = cmds[i].get_string(1);
+            action_set = true;
+            cmds.erase(cmds.begin()+i);
+            i--;
+            continue;
+        }
+        if (cmds[i].get_cmd_name() == "matdef") {
+            if (!action_set) action = "fix";
+        }
+        cmds[i].deprecated_input01(action, serr, ierr);
+    }
+
+    return_local = process_error_return_int(serr, ierr);
+    return_value = return_local;
+    if (return_local > 0) {
+       cout << "handle space between digits has error " << ierr << endl;
+       if (return_local > 1) return;
+    }
+
+
+    // This is the main loop where most everything is done.
+    bool print_final_buffer = false;
+    deque<bool> skip_level;
+    deque<bool> satisfied;
+    deque<int> do_start;
+    string sub_name = "";
+    deque<int> icall, isub;
+    bool skip_sub = false;
+    int nwhen = 0;
+    int when_level = 0;
+    bool single_line_when = false;
+    int nrb = 0;                       // Number of restart blocks
+    bool single_line_rb = false;       // Flag for single line restart blocks
+    bool skiprb = false;               // Flag for skipping cmds in restart block
+    for (int i=0; i<(int)cmds.size(); i++) {
+        // Work with cmdi, so that cmds will be available for do loops.
+        Cmd cmdi = cmds[i];
+        //print_line(cmdi);
+
+        if (cmdi.get_cmd_name() == "parser_redirect_to_file") {
+            string fname;
+            int nw = cmdi.get_nwords();
+            if (nw > 1) {
+               fname = cmdi.get_string(1);
+            } else {
+               fname = "parser.out";
+            }
+            if (comm->isIOProc()) {
+               //cout << "DEBUG fname is " << fname << endl;
+               //cout << "Redirecting output to file" << endl;
+               cout.flush();
+               coutbuf = cout.rdbuf();
+               fileout.open(fname.c_str());
+               cout.rdbuf(fileout.rdbuf());
+               //cout << "Start of output to file" << endl;
+            }
+
+            continue;
+        }
+
+        // Handle restart_block commands.
+        if (cmdi.get_string(0) == "restart_block") {
+            Restartblock rb(nrb, cmdi, skiprb, single_line_rb, 
+                            bnames_on_dump, baflags_on_dump,
+                            rbsatprb_on_dump, rbsat_on_dump,
+                            serr, ierr);
+            restartblocks.push_back(rb);
+
+            for (int rbi=0; rbi<(int)restartblocks.size(); rbi++) {
+                string rbi_name = restartblocks[rbi].get_name();
+                for (int rbj=rbi+1; rbj<(int)restartblocks.size(); rbj++) {
+                    if (rbi_name == restartblocks[rbj].get_name()) {
+                        cmdi.fatal_error(0, serr, ierr);
+                        serr << "Restart block names must be unique." << endl;
+                        serr << "Non unique name = " << rbi_name << endl;
+                        ierr = 2;
+                    }
+                }
+            }
+
+            if (single_line_rb && skiprb) {
+                skiprb = false;
+                continue;
+            }
+            bool cflag = true;
+            if (single_line_rb && !skiprb) cflag = false;
+            if (cflag) continue;
+        }
+        if (cmdi.get_string(0) == "end_restart_block") {
+            skiprb = false;
+            continue;
+        }
+        if (skiprb) continue;
+
+
+        if (skip_sub) {
+            if (cmdi.get_string(0) == "endsubroutine") {
+                skip_sub = false;
+                //cout << "endsubroutine found, turning skip_sub to false" << endl;
+            }
+            continue;
+        }
+
+        return_local = ierr;
+        return_value = return_local;
+        if (return_local > 0) {
+           if (return_local > 1) return;
+        }
+     
+
+
+        // List variables, functions, etc.
+        if (cmdi.get_cmd_name() == "parser_list_variables") {
+            string lv1 = "********** Debugging: list variable names and values "
+                "available in input file.";
+            string lv2 = "********** Debugging END: list variable names and values";
+            string var_to_list = "";
+            int nw = cmdi.get_nwords();
+            if (nw > 1) var_to_list = cmdi.get_string(1);
+            if (comm->isIOProc()) cout << endl;
+            list_vars(lv1, lv2, var_to_list);
+            if (comm->isIOProc()) cout << endl;
+            continue;
+        }
+        if (cmdi.get_cmd_name() == "parser_list_functions") {
+            string lf1 = "********** Debugging: list function names available in input file.";
+            string lf2 = "********** Debugging END: list function names.";
+            if (comm->isIOProc()) cout << endl;
+            list_funcs(lf1, lf2);
+            if (comm->isIOProc()) cout << endl;
+            continue;
+        }
+        if (cmdi.get_cmd_name() == "parser_print_fbuffer") {
+            print_final_buffer = true;
+            continue;
+        }
+
+        // Handle when ... then commands.
+        string wtcmd = cmdi.get_string(0);
+        if (wtcmd == "when" || wtcmd == "whenever") {
+            bool skipwhen = true;
+            bool ever_flag = false;
+            if (wtcmd == "whenever") ever_flag = true;
+            Whenthen wt(nwhen, cmdi, skipwhen, single_line_when, ever_flag, serr, ierr);
+            when_level += 1;
+            whenthens.push_back(wt);
+            if (skipwhen) continue;
+        }
+        if (cmdi.get_string(0) == "endwhen") {
+            when_level -= 1;
+            if (when_level < 0) {
+                cmdi.fatal_error(0, serr, ierr);
+                serr << "Extra endwhen (or end when) found with no matching "
+                        "when command." << endl;
+                serr << "Make sure every when command has one and only one"
+                     << endl << "matching endwhen command." << endl;
+                ierr = 2;
+            }
+            return_local = ierr;
+            if (return_local > return_value) {
+               return_value = return_local;
+            }
+            if (return_local > 0) {
+                 cout << "handle endwhen " << ierr << endl;
+                 return;
+             }
+             continue;
+        }
+
+
+        // Handle if/elseif/else/endif statements.
+        bool skip = false;
+        cmdi.handle_if(skip, skip_level, satisfied, serr, ierr);
+        if (skip) continue;
+
+        // Handle do loops. Note that we terminate if there is an error to
+        // avoid the possibility of an infinite loop.
+        int cdex = i;
+        bool end_do = false;
+        int ierr2 = 0;
+        cmdi.handle_do(skip, do_start, cdex, end_do, serr, ierr2);
+        if (ierr2 == 2) {
+            ierr = 2;
+            return_local = ierr;
+            if (return_local > return_value) {
+               return_value = return_local;
+            }
+            if (return_local > 0) {
+                 cout << "handle endwhen " << ierr << endl;
+             }
+            break;
+        }
+        if (end_do) {
+            if (!end_do_loop(i, do_start, serr, ierr)) break;
+            continue;
+        }
+        i = cdex;
+        if (skip) continue;
+
+        // Handle call/subroutines.
+        bool go_to_sub = false;
+        bool go_to_call = false;
+        cmdi.handle_subroutines(skip, go_to_sub, sub_name, go_to_call,
+                                serr, ierr);
+        if (go_to_call) {
+            end_do_ret(i, do_start, serr, ierr);
+            return_local = jump_to_call(i, icall, isub, serr, ierr);
+            if (return_local > return_value) {
+               return_value = return_local;
+            }
+            if (return_local > 0) {
+                 cout << "jump_to_call error " << ierr << endl;
+             }
+            continue;
+        }
+
+        if (go_to_sub) {
+            icall.push_back(i);
+            return_local = jump_to_sub(i, sub_name, serr, ierr);
+            isub.push_back(i);
+            if (return_local > return_value) {
+               return_value = return_local;
+            }
+            if (return_local > 0) {
+                 cout << "jump_to_sub error " << ierr << endl;
+             }
+            continue;
+        }
+
+        if (cmdi.get_string(0) == "subroutine") {
+            //cout << "subroutine found!!!, turning skip_sub to true" << endl;
+            skip_sub = true;
+            continue;
+        }
+
+        // Check for a variable description command. If found we set the description,
+        // then go to the next line.
+        if (cmdi.check_for_var_description(serr, ierr)) continue;
+
+        // Stop if we hit a stop command or a fatal_error command
+        bool kill_run = false;
+        if (cmdi.check_input_end(kill_run, serr, ierr)) {
+            // Killing the calculation will be done, for example, if the user
+            // issues a fatal_error command.
+            if (kill_run) {
+                return_local = process_error_return_int(serr, ierr);
+               if (return_local > return_value) {
+                   return_value = return_local;
+               }
+               if (return_local > 0) {
+                    cout << "handle endwhen " << ierr << endl;
+                    if (return_local > 1) return;
+                }
+            }
+            
+            // Clear out all do's so we don't get an error about unmatched
+            // do/enddo.
+            do_start.clear();
+            break;
+        }
+
+        cmdi.math_eval(serr, ierr);
+        cmdi.substitute_variables(serr, ierr);   // Sub vars not in math expressions.
+        cmdi.check_ppmm(serr, ierr);             // All ++, -- should be gone.
+        cmdi.remove_commas();
+        cmdi.handle_cmd_unary_minus(serr, ierr);
+        cmdi.handle_cmd_unary_plus(serr, ierr);
+        cmdi.handle_cmd_multiplicity(serr, ierr);
+        cmdi.check_misplaced_math(serr, ierr);
+        cmdi.set_variables(serr, ierr);
+
+        // Copy the command to the final commands deque.
+        if (cmdi.get_cmd_type() == "command") {
+            if (when_level > 0) {
+                whenthens[nwhen-1].add_cmdf(cmdi);
+                if (single_line_when) {
+                    when_level -= 1;
+                    single_line_when = false;
+                }
+            }
+            else {
+                cmdsf.push_back(cmdi);
+            }
+        }
+    }
+    // Print error messages and terminate if fatal.
+    return_local = process_error_return_int(serr, ierr);
+    if (return_local > return_value) {
+        return_value = return_local;
+    }
+    if (return_local > 0) {
+        return_value = return_local;
+        if (return_local > 1) return;
+    }
+
+    // Check that an enddo was found for every do.
+    check_enddo(do_start, serr, ierr);
+
+    // Print error messages and terminate if fatal.
+    return_local = process_error_return_int(serr, ierr);
+    if (return_local > return_value) {
+        return_value = return_local;
+    }
+    if (return_local > 0) {
+        return_value = return_local;
+        if (comm->isIOProc()) {
+           cout << "handle enddo is wrong with err " << ierr << endl;
+        }
+        if (return_local > 1) return;
+    }
+
+    // Set the processed flag in every word in every command to be false.
+    // At the end of parsing, if any word has not been processed in some way,
+    // then that is a fatal error.
+    for (int i=0; i<(int)cmdsf.size(); i++) {
+        cmdsf[i].clear_processed();
+    }
+
+    // If this is a recreate of the parser, then there might be some commands 
+    // that have already been processed, set these.
+    cmd_set_reprocessed(true);
+
+    // Check and print duplicate scalar commands.
+    // Remove duplicate scalar commands.
+    // Process the duplicate_array_values command.
+    return_local = process_dav_cmd();
+    if (return_local > return_value) {
+        return_value = return_local;
+    }
+    if (return_local > 0) {
+        if (comm->isIOProc()) {
+           cout << "Checked for  duplicate arrays and error is " << return_local << endl;
+        }
+    }
+    //check_duplicates();
+
+    // Debug: print each of the final commands to the screen.
+    if (print_final_buffer) {
+        if (comm->isIOProc()) {
+            cout << "********************************************************************************\n"
+                 << "********** Echo final parser buffer, this is what the code uses to set internal \n" 
+                 << "********** code variables." << endl;
+            list_cmdsf("", "");
+            cout << "********** End of echo final parser buffer.\n" 
+                 << "********************************************************************************\n\n" 
+                 << endl;
+
+            cout << "********************************************************************************\n"
+                 << "********** Echo final when...then parser buffers, this is what the code uses to \n"
+                 << "********** set internal code variables when processing when...then commands." << endl;
+            list_wt_cmdsf();
+            cout << "********** End of echo final when...then parser buffers.\n"
+                 << "********************************************************************************\n\n" 
+                 << endl;
+
+            cout << "********************************************************************************\n"
+                 << "********** Echo restart block information." << endl;
+            list_rb();
+            cout << "********** End of echo restart block information.\n"
+                 << "********************************************************************************\n\n"
+                 << endl;
+        }
+    }
+
+    // Return the to the calling program
+
+    return;
+}
+
+
+// ===========================================================================
+// A "endsubroutine" or "return" command has been found. Jump back to the call
+// statement. This sets the loop index i so that we end up on the line after
+// the call.
+// ===========================================================================
+int PowerParser::jump_to_call(int &i, deque<int> &icall, deque<int> &isub,
+                         stringstream &serr, int &ierr)
+{
+    int return_value;
+    int return_local;
+    return_value = 0;
+    return_local = 0;
+
+    int icsize = (int)icall.size();
+    if (icsize == 0) {
+        cmds[i].fatal_error(0, serr, ierr);
+        serr << "icall size = 0, this should never happen." << endl;
+        ierr = 2;
+        return_value = process_error_return_int(serr, ierr);
+        if (return_local > return_value) {
+           return_value = return_local;
+        }
+        if (return_local > 0) {
+             cout << "jump_to_call icall  error " << ierr << endl;
+             if (return_local > 1) return return_value;
+        }
+    }
+    i = icall[icsize-1];
+    icall.erase(icall.begin()+icsize-1);
+
+    vector<string> call_args;
+    vector<bool> call_args_isvar;
+    cmds[i].copy_call_args(call_args, call_args_isvar);
+    int idex_sub = isub[(int)isub.size()-1];
+    vector<string> sub_args;
+    vector<bool> sub_args_isvar;
+    cmds[idex_sub].copy_sub_args(sub_args, sub_args_isvar);
+    //cout << "jump_to_call, sub args, then call args" << endl;
+    //for (int j=0; j<(int)sub_args.size(); j++) {
+    //    cout << sub_args[j] << endl;
+    //}
+    //for (int j=0; j<(int)call_args.size(); j++) {
+    //    cout << call_args[j] << endl;
+    //}
+    //cout << "--------------------------------" << endl;
+
+    for (int j=0; j<(int)sub_args.size(); j++) {
+        string sub_var = sub_args[j];
+        string call_var = call_args[j];
+
+        map<string, Variable>::iterator psub;
+        psub = vmap.find(sub_var);
+        string sub_value = "";
+        if (psub != vmap.end()) {
+            sub_value = psub->second.get_var_value();
+        }
+        else {
+            cmds[i].fatal_error(0, serr, ierr);
+            serr << "Subroutine argument not found." << endl;
+            serr << "This should not happen." << endl;
+            ierr = 2;
+            return_value = process_error_return_int(serr, ierr);
+            if (return_local > return_value) {
+               return_value = return_local;
+            }
+            if (return_local > 0) {
+                 cout << "jump_to_call Subroutine argument not found " << endl;
+                 if (return_local > 1) return return_value;
+            }
+        }
+
+        if (!call_args_isvar[j]) {
+            if (sub_value != call_var) {
+                cmds[i].fatal_error(0, serr, ierr);
+                cmds[isub[(int)isub.size()-1]].fatal_error(0, serr, ierr);
+                serr << "The calling argument, argument number " << j+1
+                     << ", (after any math eval) is " << call_var << endl;
+                serr << "The corrseponding subroutine dummy argument, "
+                     << sub_var << ", has"
+                     << " the value of " << sub_value << endl;
+                serr << "These are different and should not be." << endl;
+                serr << "The calling argument is not a variable and thus" 
+                     " is fixed and cannot be changed." << endl;
+                serr << "The dummy argument was changed in the subroutine," << endl;
+                serr << "thus you are trying to change a fixed quantity which" 
+                     " is not allowed." << endl;
+                ierr = 2;
+                return_value = process_error_return_int(serr, ierr);
+                if (return_local > return_value) {
+                   return_value = return_local;
+                }
+                if (return_local > 0) {
+                     cout << "jump_to_call subroutine arguments errors " << endl;
+                     if (return_local > 1) return return_value;
+            }
+            }
+        }
+        else {
+            map<string, Variable>::iterator pcall;
+            pcall = vmap.find(call_var);
+            if (pcall != vmap.end()) {
+                pcall->second = psub->second;
+                pcall->second.set_temporary(false);
+                pcall->second.set_varname(call_var);
+            }
+            else {
+                cmds[i].fatal_error(0, serr, ierr);
+                serr << "Calling argument not found." << endl;
+                serr << "This should not happen." << endl;
+                ierr = 2;
+                return_value = process_error_return_int(serr, ierr);
+                if (return_local > return_value) {
+                   return_value = return_local;
+                }
+                if (return_local > 0) {
+                     cout << "jump_to_call calling argument not found " << endl;
+                     if (return_local > 1) return return_value;
+                }
+            }
+        }
+
+    }
+
+    // Erase temporary variables.
+    // There should be a better way to do this.
+    map<string, Variable>::iterator p;
+    for (;;) {
+        bool erase_done = false;
+        for(p = vmap.begin(); p != vmap.end(); p++) {
+            if (p->second.is_temporary()) {
+                vmap.erase(p);
+                erase_done = true;
+                break;
+            }
+        }
+        if (!erase_done) break;
+    }
+
+    // Remove the index to the subroutine line.
+    isub.erase(isub.begin()+(int)isub.size()-1);
+
+    return return_value;
+
+}  // End of jump_to_call
+
+
+// ===========================================================================
+// A "call" command has been found. Find the subroutine it is trying to call
+// and set the loop index, i, to the subroutine line so we will end up on the
+// line after the subroutine.
+// ===========================================================================
+int PowerParser::jump_to_sub(int &i, string &sub_name,
+                        stringstream &serr, int &ierr)
+{
+    int return_value;
+    int return_local;
+    return_value = 0;
+    return_local = 0;
+
+    // At this point, i is the index for the call line.
+    //cout << "&&&&&cw PowerParser loop, jump_to_sub, i=" << i << endl;
+
+    // Find the line index, cdex, for the subroutine.
+    int cdex = -1;
+    for (int j=0; j<(int)cmds.size(); j++) {
+        if (cmds[j].find_subroutine(sub_name)) {
+            cdex = j;
+            break;
+        }
+    }
+
+    if (cdex == -1) {
+        cmds[i].fatal_error(0, serr, ierr);
+        serr << "Subroutine " << sub_name << " not found." << endl;
+        ierr = 2;
+        return_value = process_error_return_int(serr, ierr);
+        if (return_local > return_value) {
+           return_value = return_local;
+        }
+        if (return_local > 0) {
+             cout << "Subroutine name not found " << ierr << endl;
+             if (return_local > 1) return return_value;
+        }
+    }
+
+    // Get the calling arguments. This will potentially be a mix
+    // of variable names and numbers. This does not modify the words
+    // in cmds[i], but it does store the call arguments in cmds[i].
+    vector<string> call_args;
+    vector<bool> call_args_isvar;
+    cmds[i].get_call_args(call_args, call_args_isvar, serr, ierr);
+    //for (int j=0; j<(int)call_args.size(); j++) {
+    //    cout << call_args[j] << endl;
+    //}
+
+    // Get the subroutine arguments.
+    // This does not modify the words in cmds[cdex], but it does store
+    // the subroutine arguments in cmds[cdex]
+    //print_line(cdex);
+    vector<string> sub_args;
+    vector<bool> sub_args_isvar;
+    cmds[cdex].get_sub_args(sub_args, sub_args_isvar);
+    //for (int j=0; j<(int)sub_args.size(); j++) {
+    //    cout << sub_args[j] << endl;
+    //}
+
+    // Error checking.
+    int ncall_args = (int)call_args.size();
+    int nsub_args = (int)sub_args.size();
+    if (ncall_args != nsub_args) {
+        cmds[i].fatal_error(0, serr, ierr);
+        cmds[cdex].fatal_error(0, serr, ierr);
+        serr << "Number of calling arguments = " << ncall_args << endl;
+        serr << "Number of subroutine arguments = " << nsub_args << endl;
+        serr << "These must be the same." << endl;
+        ierr = 2;
+        return_local = process_error_return_int(serr, ierr);
+        if (return_local > return_value) {
+           return_value = return_local;
+        }
+        if (return_local > 0) {
+             cout << "Arguments in subroutine and in calling are different " << ierr << endl;
+             if (return_local > 1) return return_value;
+        }
+    }
+
+    for (int j=0; j<(int)sub_args.size(); j++) {
+        if (!sub_args_isvar[j]) {
+            cmds[cdex].fatal_error(0, serr, ierr);
+            serr << "Subroutine dummy arguments must always be variables." << endl;
+            serr << "Argument " << j+1 << ", " << sub_args[j] << ", "
+                 << "is not a variable." << endl;
+            serr << "Remember that variables always begin with a" 
+                " dollar sign, $" << endl;
+            serr << "Note that putting quotes around a variable name makes it" << endl;
+            serr << "a string, not a variable." << endl;
+            ierr = 2;
+            return_local = process_error_return_int(serr, ierr);
+            if (return_local > return_value) {
+               return_value = return_local;
+            }
+            if (return_local > 0) {
+                 cout << "Dummy arguments must be variables " << ierr << endl;
+                 if (return_local > 1) return return_value;
+            }
+        }
+    }
+
+
+    // Define new, temporary variables for the subroutine dummy arguments.
+    // Set their values to the call values.
+    for (int j=0; j<(int)sub_args.size(); j++) {
+        string sub_vname = sub_args[j];
+        string call_vname = call_args[j];
+
+        // Find the subroutine variable name in the variable map.
+        map<string, Variable>::iterator psub;
+        psub = vmap.find(sub_vname);
+
+        if (psub != vmap.end()) {
+            cmds[cdex].fatal_error(0, serr, ierr);
+            serr << "Argument " << j+1 << ", " << sub_args[j] << ", "
+                 << "is both a global variable and a dummy subroutine argument." << endl;
+            serr << "This is not allowed, dummy subroutine arguments "
+                "cannot also be" << endl;
+            serr << "global variables." << endl;
+            ierr = 2;
+            return_local = process_error_return_int(serr, ierr);
+            if (return_local > return_value) {
+               return_value = return_local;
+            }
+            if (return_local > 0) {
+                 cout << "Dummy argument cannot be global variable " << ierr << endl;
+                 if (return_local > 1) return return_value;
+            }
+        }
+        else {
+            // If the calling argument is a variable, then we set the
+            // temporary variable equal to the calling variable. This passes
+            // in the correct value, but it also passes in arrays, and
+            // whatever characteristics the calling variable has.
+            //
+            // If the calling argument is not a variable, then we just
+            // create the new, temporary variable and give it the calling
+            // argument as its value.
+            if (call_args_isvar[j]) {
+                map<string, Variable>::iterator pcall;
+                pcall = vmap.find(call_vname);
+                if (pcall != vmap.end()) {
+                    Variable v = pcall->second;
+                    v.set_varname(sub_vname);
+                    v.set_temporary(true);
+                    vmap.insert(pair<string, Variable>(v.get_varname(), v));
+                }
+                else {
+                    // FATAL ERROR
+                    // calling argument variable not defined.
+                }
+            }
+            else {
+                vector<int> istart(0,0);
+                vector<string> valvec;
+                valvec.push_back(call_vname);
+                int lnum = cmds[cdex].get_line_number(0);
+                int file_lnum = cmds[cdex].get_file_line_number(0);
+                string fname = cmds[cdex].get_filename(0);
+                Variable v(sub_vname, istart, valvec, lnum, file_lnum,
+                           fname, &cmd_strings, serr, ierr);
+                v.set_temporary(true);
+                vmap.insert(pair<string, Variable>(v.get_varname(), v));
+            }
+
+        }
+    }
+
+    // Set the loop index to the index of the subroutine so we
+    // will end up at the line after the subroutine line.
+    i = cdex;
+    return return_value;
+}  // End of jump_to_sub
+
+
+
+
+// ===========================================================================
+// End a do loop. This happens when a do loop has gone through all its
+// iterations or when an exit statment is encountered.
+// Basically, find the matching enddo  and continue after that statement.
+// ===========================================================================
+bool PowerParser::end_do_loop(int &i, deque<int> &do_start,
+                        stringstream &serr, int &ierr)
+{
+    int rtvl = 0;
+    // Find the matching enddo.
+    // Stop checking will be true if we are in main and hit a subroutine
+    // statement or if we are in a subroutine and hit an endsubroutine
+    // statement.
+    int cdex = -1;
+    int dlev = 1;
+    for (int j=i+1; j<(int)cmds.size(); j++) {
+        bool stop_checking = false;
+        if (cmds[j].find_matching_enddo(dlev, stop_checking)) {
+            cdex = j;
+            break;
+        }
+        if (stop_checking) break;
+    }
+
+    // If the matching enddo was not found then that is a fatal error.
+    int nlevels = (int)do_start.size();
+    if (cdex == -1) {
+        if (nlevels > 0) {
+            int ido = do_start[nlevels-1];
+            cmds[ido].fatal_error(0, serr, ierr);
+        }
+        serr << "No enddo found for do statement." << endl;
+        ierr = 2;
+        rtvl = process_error_return_int(serr, ierr);
+        if (rtvl > 0) cout << "Enddo not found " << endl;
+        return false;
+    }
+
+    // We are done with this do loop, so we can get rid of the reference
+    // to it.
+    if (nlevels > 0) {
+        do_start.erase(do_start.begin()+nlevels-1);
+    }
+
+    // Set the loop index to the enddo statement so that we will start
+    // processing immediately after the enddo.
+    i = cdex;
+    return true;
+} // End end_do_loop
+
+
+// ===========================================================================
+// A return statement in a subroutine has been encountered. We need to handle
+// the do loops before returning to the call statement, otherwise the code
+// will complain about do loops without enddo statements.
+// This routine searches from the return statement to the endsubroutine
+// statement, finds any free enddo's and erases the corresponding references
+// to the do statements.
+// ===========================================================================
+void PowerParser::end_do_ret(int &i, deque<int> &do_start,
+                       stringstream &serr, int &ierr)
+{
+    // To suppress compiler warnings of unused parameters
+    //assert(serr == serr);
+    assert(ierr == ierr);
+
+    int istart = i;
+    for (;;) {
+
+        // Find an enddo.
+        // Stop checking will be true if we are in main and hit a subroutine
+        // statement or if we are in a subroutine and hit an endsubroutine
+        // statement.
+        int cdex = -1;
+        int dlev = 1;
+        bool stop_checking = false;
+        for (int j=istart; j<(int)cmds.size(); j++) {
+            if (cmds[j].find_matching_enddo(dlev, stop_checking)) {
+                cdex = j;
+                istart = j+1;
+                break;
+            }
+            if (stop_checking) break;
+        }
+
+        // If we don't find an enddo, then we are done.
+        if (cdex == -1) break;
+        if (stop_checking) break;
+
+        // We are done with this do loop, so we can get rid of the reference
+        // to it.
+        int nlevels = (int)do_start.size();
+        if (nlevels > 0) {
+            do_start.erase(do_start.begin()+nlevels-1);
+        }
+    }
+} // End end_do_ret
+
+
+
+
+// ===========================================================================
+// Check that an enddo was found for every do.
+// ===========================================================================
+void PowerParser::check_enddo(deque<int> &do_start, stringstream &serr, int &ierr)
+{
+    for (int i=0; i<(int)do_start.size(); i++) {
+        int ido = do_start[i];
+        cmds[ido].fatal_error(0, serr, ierr);
+        serr << "No enddo found for do statement." << endl;
+        ierr = 2;
+    }
+}
+
+
+// ===========================================================================
+// Check all processed flags on every command. If any word on any command
+// has not been processed, then that is a fatal error.
+// ===========================================================================
+void PowerParser::check_processed(bool &good)
+{
+    int rtvl = 0;
+    int ierr = 0;
+    stringstream serr;
+
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        (*cmdsfp)[i].check_processed(good, serr, ierr);
+    }
+
+    process_error(serr, ierr);
+}
+
+
+// ===========================================================================
+// Process the duplicate array values command.
+// ===========================================================================
+int PowerParser::process_dav_cmd()
+{
+    int rtvl         = 0;
+    int return_value = 0;
+
+    // Process the duplicate_array_values command.
+    // Note that duplicate array values are processed when the calls are made
+    // from the host code to actually extract information from the final
+    // buffer.
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        string cmdi = (*cmdsfp)[i].get_string(0);
+        if (cmdi != "duplicate_array_values") continue;
+        (*cmdsfp)[i].set_processed(true);
+        string vali = (*cmdsfp)[i].get_string(2);
+        if (vali == "warning") dup_fatal = 1;
+        else if (vali == "fatal") dup_fatal = 2;
+        else if (vali == "none") dup_fatal = 0;
+        else {
+            int ierr = 0;
+            stringstream serr;
+            (*cmdsfp)[i].fatal_error(0, serr, ierr);
+            serr << "The value for the duplicate_array_values command must" << endl <<
+                "be either none, warning, or fatal" << endl;
+            ierr = 2;
+            cout << "The value for the duplicate_array_values command must" << endl;
+            cout << "be either none, warning, or fatal" << endl;
+            rtvl = process_error_return_int(serr, ierr);
+            if (rtvl > return_value) {
+                return_value = rtvl;
+            }
+
+            if (rtvl > 0) {
+                 cout << "Duplicate array values not recognized " << ierr << endl;
+            }
+            return return_value;
+        }
+        if (dup_fatal > 0 ) return dup_fatal;
+    }
+    return 0;
+}
+
+
+// ===========================================================================
+// If commands appear more than once in the input file(s), print a warning
+// to the user.
+// ===========================================================================
+void PowerParser::check_duplicates()
+{
+    // Check for and print and duplicate scalar commands in the input file.
+    //if (comm->isIOProc()) {
+    //    cout << "********************************************************************************" << endl;
+    //}
+    //bool found_any = false;
+    bool fany;
+    check_dup_scalar(-1, fany);
+    //if (fany) found_any = true;
+    for (int wtn=0; wtn<(int)whenthens.size(); wtn++) {
+        check_dup_scalar(wtn, fany);
+        //if (fany) found_any = true;
+    }
+    wt_reset();
+
+    // If duplicate scalar commands are not found, we do not really need to
+    // pollute the output telling the user that.
+    //if (!found_any) {
+    //    if (comm->isIOProc()) {
+    //        cout << "********** No Duplicate Scalar Commands Found in User Input File" << endl;
+    //    }
+    //}
+    //if (comm->isIOProc()) {
+    //    cout << "********************************************************************************" << endl;
+    //    cout << endl << endl;
+    //}
+
+
+    // Remove and duplicate scalar commands from the final buffer.
+    remove_dup_scalar(-1);
+    for (int wtn=0; wtn<(int)whenthens.size(); wtn++) {
+        remove_dup_scalar(wtn);
+    }
+    wt_reset();
+}
+
+// ===========================================================================
+// Check for duplicate scalar commands in the user input file.
+// Print a table of any duplicate scalar commands to stdout.
+// ===========================================================================
+void PowerParser::check_dup_scalar(int wtn, bool &found_any)
+{
+    vector< vector<string> > rows;
+
+    vector<string> row1;
+    row1.push_back(" ");
+    row1.push_back("Line");
+    row1.push_back(" ");
+    rows.push_back(row1);
+
+    vector<string> row2;
+    row2.push_back("Filename");
+    row2.push_back("Number");
+    row2.push_back("Command");
+    rows.push_back(row2);
+
+    int n_header_rows = (int)rows.size();
+
+    if (wtn < 0) cmdsfp = &cmdsf;
+    else         cmdsfp = whenthens[wtn].get_cmdsf_ptr();
+
+    vector<string> cmds_done;
+    found_any = false;
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        bool already_printed_i = false;
+        string cmdi = (*cmdsfp)[i].get_string(0);
+        string w1i = (*cmdsfp)[i].get_string(1);
+        if (w1i == "(") continue;
+
+        bool already_done = false;
+        for (int j=0; j<(int)cmds_done.size(); j++) {
+            if (cmdi == cmds_done[j]) {
+                already_done = true;
+                break;
+            }
+        }
+        if (already_done) continue;
+
+        bool found = false;
+        for (int j=i+1; j<(int)cmdsfp->size(); j++) {
+            string cmdj = (*cmdsfp)[j].get_string(0);
+            if (cmdi == cmdj) {
+                stringstream ss;
+                if (!already_printed_i) {
+                    vector<string> row;
+                    set_dup_row(row, (*cmdsfp)[i], 0);
+                    rows.push_back(row);
+                    already_printed_i = true;
+                }
+                vector<string> row;
+                set_dup_row(row, (*cmdsfp)[j], 0);
+                rows.push_back(row);
+                found = true;
+                found_any = true;
+            }
+        }
+        if (found) {
+            cmds_done.push_back(cmdi);
+            vector<string> row;
+            row.push_back(" ");  row.push_back(" ");  row.push_back(" ");
+            rows.push_back(row);
+        }
+    }
+
+    if (found_any) {
+        if (comm->isIOProc()) {
+            cout << endl;
+            if (wtn < 0) {
+                cout << "********** WARNING: Duplicate Scalar Commands Found in User Input File" << endl;
+                cout << "  The following commands appear more than once in the user input file." << endl;
+            }
+            else {
+                cout << "********** WARNING: Duplicate Scalar Commands Found in when...then" << endl;
+                cout << "  The following commands appear more than once in the when...then." << endl;
+            }
+            cout << "  The last instance of the command will be used." << endl;
+            cout << "  Is this what you want??" << endl << endl;
+            stringstream ssout;
+            Parser_utils putils(index_base);
+            putils.print_strings(rows, n_header_rows, 4, 3, 80, ssout);
+            cout << ssout.str() << endl;
+        }
+    }
+}
+
+
+// ===========================================================================
+// Helper function for check_dup_scalar.
+// The duplicate scalar commands are printed as rows with each row containing
+// the file name the duplicate command was found in, the line number, and the
+// command line itself.
+// Given the duplicate command, this function generates that row of
+// information and adds it to the row vector.
+// ===========================================================================
+void PowerParser::set_dup_row(vector<string> &row, Cmd &cmdi, int iw)
+{
+    int lnum = cmdi.get_line_number(iw);
+    int file_lnum = cmdi.get_file_line_number(iw);
+    string fname = cmdi.get_filename(iw);
+
+    row.push_back(fname);
+    stringstream ss;
+    ss << file_lnum;
+    row.push_back(ss.str());
+    row.push_back(cmd_strings[lnum-1]);
+}
+
+
+
+// ===========================================================================
+// Remove duplicate scalar commands in the user input file.
+// Keep only the last instance of the command.
+// ===========================================================================
+void PowerParser::remove_dup_scalar(int wtn)
+{
+    if (wtn < 0) cmdsfp = &cmdsf;
+    else         cmdsfp = whenthens[wtn].get_cmdsf_ptr();
+
+    for (int i=(int)cmdsfp->size()-1; i>=0; i--) {
+        string cmdi = (*cmdsfp)[i].get_string(0);
+        string w1i = (*cmdsfp)[i].get_string(1);
+        if (w1i == "(") continue;
+
+        for (int j=i-1; j>=0; j--) {
+            string cmdj = (*cmdsfp)[j].get_string(0);
+            if (cmdi == cmdj) {
+                cmdsfp->erase(cmdsfp->begin()+j);
+                i--;
+            }
+        }
+    }
+}
+
+// ===========================================================================
+// Helper function to convert doubles to strings.
+// ===========================================================================
+std::string const to_string( double const x )
+{
+    std::ostringstream tmp;
+    tmp << std::setprecision(16) << x;
+    return tmp.str();
+}
+
+// ===========================================================================
+// Initialize the parser. This will typically be called by the
+// constructors.
+// ===========================================================================
+void PowerParser::init()
+{
+    line_number = 0;
+    cmdsfp = &cmdsf;
+    dup_fatal = 1;
+    ierr_global = 0;
+
+    // make a little smaller (2.0) to avoid floating point excepting on some
+    // compilers
+    double huge_double = numeric_limits<double>::max( )/2.0;
+    Word whuge_double(huge_double, 1, 1, "", NULL);
+    Variable vhuge_double("$huge_double", whuge_double.get_string(), true, "largest double/2.0");
+    vmap.insert(pair<string, Variable>(vhuge_double.get_varname(), vhuge_double));
+
+    float huge_float = numeric_limits<float>::max( );
+    Word whuge_float(huge_float, 1, 1, "", NULL);
+    Variable vhuge_float("$huge_float", whuge_float.get_string(), true, "largest float");
+    vmap.insert(pair<string, Variable>(vhuge_float.get_varname(), vhuge_float));
+
+    int huge_int = numeric_limits<int>::max( );
+    Word whuge_int(huge_int, 1, 1, "", NULL);
+    Variable vhuge_int("$huge_int", whuge_int.get_string(), true, "largest integer");
+    vmap.insert(pair<string, Variable>(vhuge_int.get_varname(), vhuge_int));
+
+    double tiny_double = numeric_limits<double>::min( );
+    Word wtiny_double(tiny_double, 1, 1, "", NULL);
+    Variable vtiny_double("$tiny_double", wtiny_double.get_string(), true, "tiniest double");
+    vmap.insert(pair<string, Variable>(vtiny_double.get_varname(), vtiny_double));
+
+    float tiny_float = numeric_limits<float>::min( );
+    Word wtiny_float(tiny_float, 1, 1, "", NULL);
+    Variable vtiny_float("$tiny_float", wtiny_float.get_string(), true, "tiniest float");
+    vmap.insert(pair<string, Variable>(vtiny_float.get_varname(), vtiny_float));
+
+    int tiny_int = numeric_limits<int>::min( );
+    Word wtiny_int(tiny_int, 1, 1, "", NULL);
+    Variable vtiny_int("$tiny_int", wtiny_int.get_string(), true, "tiniest integer");
+    vmap.insert(pair<string, Variable>(vtiny_int.get_varname(), vtiny_int));
+
+    int ncores_tot = comm->getNumProcs();
+    Word wncores_tot(ncores_tot, 1, 1, "", NULL);
+    Variable vncores_tot("$ncores_tot", wncores_tot.get_string(), true, "total number of cores");
+    vmap.insert(pair<string, Variable>(vncores_tot.get_varname(), vncores_tot));
+
+
+    // ***********************************************************************
+    // Define the default functions.
+    Function facos("acos", true, 1, "real",
+                   "arccosine, radians, arg -1 to 1");
+    fmap.insert(pair<string, Function>(facos.get_name(), facos));
+
+    Function fasin("asin", true, 1, "real",
+                   "arcsine, radians, arg -1 to 1");
+    fmap.insert(pair<string, Function>(fasin.get_name(), fasin));
+
+    Function fatan("atan", true, 1, "real", "arctangent, returns radians");
+    fmap.insert(pair<string, Function>(fatan.get_name(), fatan));
+
+    Function fceil("ceil", true, 1, "real", "round up (smallest int >= arg)");
+    fmap.insert(pair<string, Function>(fceil.get_name(), fceil));
+
+    Function fcos("cos", true, 1, "real", "cosine, arg in radians");
+    fmap.insert(pair<string, Function>(fcos.get_name(), fcos));
+
+    Function fcosh("cosh", true, 1, "real", "hyperbolic cosine");
+    fmap.insert(pair<string, Function>(fcosh.get_name(), fcosh));
+
+    Function fexp("exp", true, 1, "real", "exponential");
+    fmap.insert(pair<string, Function>(fexp.get_name(), fexp));
+
+    Function ffabs("fabs", true, 1, "real", "absolute value of a real");
+    fmap.insert(pair<string, Function>(ffabs.get_name(), ffabs));
+
+    Function ffloor("floor", true, 1, "real",
+                    "round down (largest int <= arg)");
+    fmap.insert(pair<string, Function>(ffloor.get_name(), ffloor));
+
+    Function flog("log", true, 1, "real", "log to base e, arg>0");
+    fmap.insert(pair<string, Function>(flog.get_name(), flog));
+
+    Function flog10("log10", true, 1, "real", "log to base 10, arg>0");
+    fmap.insert(pair<string, Function>(flog10.get_name(), flog10));
+
+    Function fsin("sin", true, 1, "real", "sine, arg in radians");
+    fmap.insert(pair<string, Function>(fsin.get_name(), fsin));
+
+    Function fsinh("sinh", true, 1, "real", "hyperbolic sine");
+    fmap.insert(pair<string, Function>(fsinh.get_name(), fsinh));
+
+    Function fsqrt("sqrt", true, 1, "real", "square root (arg >= 0)");
+    fmap.insert(pair<string, Function>(fsqrt.get_name(), fsqrt));
+
+    Function ftan("tan", true, 1, "real", "tangent, arg in radians");
+    fmap.insert(pair<string, Function>(ftan.get_name(), ftan));
+
+    Function ftanh("tanh", true, 1, "real", "hyperbolic tangent");
+    fmap.insert(pair<string, Function>(ftanh.get_name(), ftanh));
+
+    Function fatan2("atan2", true, 2, "real", "arctangent, 2 args");
+    fmap.insert(pair<string, Function>(fatan2.get_name(), fatan2));
+
+    Function ffmod("fmod", true, 2, "real", "remainder of arg1/arg2");
+    fmap.insert(pair<string, Function>(ffmod.get_name(), ffmod));
+
+    Function fpow("pow", true, 2, "real", "arg1 raised to arg2 power");
+    fmap.insert(pair<string, Function>(fpow.get_name(), fpow));
+
+    Function ffmax("max", true, 2, "real", "return the greater of two args");
+    fmap.insert(pair<string, Function>(ffmax.get_name(), ffmax));
+
+    Function ffmin("min", true, 2, "real", "return the lesser of two args");
+    fmap.insert(pair<string, Function>(ffmin.get_name(), ffmin));
+
+    Function fstrlen("strlen", true, 1, "string", "number of chars in arg");
+    fmap.insert(pair<string, Function>(fstrlen.get_name(), fstrlen));
+
+    Function fstrcat("strcat", true, 2, "string", "concatenate two strings");
+    fmap.insert(pair<string, Function>(fstrcat.get_name(), fstrcat));
+
+    Function fstrerase("strerase", true, 3, "string", "erase chars from string");
+    fmap.insert(pair<string, Function>(fstrerase.get_name(), fstrerase));
+
+    Function fstrinsert("strinsert", true, 3, "string", "insert chars into string");
+    fmap.insert(pair<string, Function>(fstrinsert.get_name(), fstrinsert));
+
+    Function fstrsubstr("strsubstr", true, 3, "string", "get sub string");
+    fmap.insert(pair<string, Function>(fstrsubstr.get_name(), fstrsubstr));
+
+    Function fstrtrim("strtrim", true, 1, "string", "remove trailing whitespace");
+    fmap.insert(pair<string, Function>(fstrtrim.get_name(), fstrtrim));
+
+    Function fdefined("defined", true, 1, "logical", "is a variable defined or not");
+    fmap.insert(pair<string, Function>(fdefined.get_name(), fdefined));
+}
+
+void PowerParser::dictionary_add(char *name, double value, bool pred, char *vdesc)
+{
+   Variable *Var_entry = new Variable(name, to_string(value), pred, vdesc);
+   vmap.insert(pair<string, Variable>(Var_entry->get_varname(), *Var_entry));
+}
+
+void PowerParser::dictionary_env_add(char *name, bool pred)
+{
+   const char *getenv_p;
+   const char *getenv_p_not_defined = "";
+
+   getenv_p = getenv(name);
+   if( getenv_p == NULL ){
+      getenv_p = getenv_p_not_defined;
+   }
+
+   int len_name = strlen(name);
+
+   // One extra character for $ and another for null termination
+   char *varname = (char *)malloc(sizeof(char)*(len_name+2));
+
+   varname[0] = '$';
+   strncpy(varname+1, name, len_name+1);
+
+   Variable *Var_entry = new Variable(varname, getenv_p, pred, name);
+   vmap.insert(pair<string, Variable>(Var_entry->get_varname(), *Var_entry));
+
+   free(varname);
+}
+
+
+// ===========================================================================
+// Read a file into a string.
+// This is only done on the io processor.
+// ===========================================================================
+void PowerParser::read_into_string(string filename, string &s_in)
+{
+    if(comm != NULL) {
+        if(!comm->isIOProc()) return;
+    }
+
+    // Its OK if an input file is not specified.
+    if (filename == " ") {
+        s_in = " ";
+        return;
+    }
+
+    // Open the input data file.
+    ifstream in_stream(filename.c_str(), ios::in);
+    if( !in_stream ) {
+        stringstream serr;
+        serr << endl << "*** FATAL ERROR" << endl;
+        serr << "Could not open input (or include) file." << endl;
+        serr << "The name of the file is " << filename << endl;
+        if (filename == "") {
+            serr << "(The file name is blank.)" << endl;
+        }
+        int ierr = 2;
+        process_error(serr, ierr);
+        return;
+    }
+
+    // Read each character and store in a string. We use a string so we
+    // don't have to fiddle with memory allocation and reallocation.
+    // There are more efficient ways to do this, but whatever way is
+    // used has to handle arbitrarily long files.
+    char c;
+    while (in_stream) {
+        in_stream.get(c);
+        if (!in_stream) break;
+        s_in += c;
+    }
+
+    // Check for a 0 size input file, this might be an indication of a
+    // full file system.
+    if( (int)s_in.size() == 0 ) {
+        stringstream serr;
+        serr << endl << "*** FATAL ERROR" << endl;
+        serr << "The name of the input file is " << filename << endl;
+        serr << "This file exists, but its size is 0 bytes, (empty file)." << endl;
+        serr << "Perhaps the file system is full??" << endl;
+        serr << "Use a unix command like \"df -k .\" to find out how full the"
+             " file system is." << endl;
+        int ierr = 2;
+        process_error(serr, ierr);
+        return;
+    }
+
+    // Now that the file contents are transferred to a string we do not need
+    // the data file anymore and can close it.
+    in_stream.close();
+}
+
+
+//+***************************************************************************
+// ***************************************************************************
+// Driver functions for getting values from the commands.
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Driver for getting boolean values as integers.
+// This works for arrays of any dimension, 0,1,2,3,...
+// ===========================================================================
+void PowerParser::get_bool_int(string &cname,
+                         int *cvalue,
+                         const vector<int> &size,
+                         bool skip)
+{
+    // Note that we do not default cvalue. Its value only changes if the
+    // command is found.
+
+    // Used in checking for duplicate array values
+    int dim = (int)size.size();
+    int tot_size = 1;
+    for (int i=0; i<dim; i++) {
+        tot_size *= size[i];
+    }
+    vector<int> dup_vals(tot_size, 0);
+    vector<Cmd *> dup_cmd1(tot_size);
+    vector<int> dup_wdex1(tot_size, -1);
+
+    int ierr = 0;
+    stringstream serr;
+    if (! case_sensitive) {
+       transform(cname.begin(), cname.end(), cname.begin(), tolower);
+    }
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        if ((*cmdsfp)[i].get_cmd_name() == cname) {
+            (*cmdsfp)[i].get_bool_int(cname, cvalue, size, dup_cmd1, dup_wdex1,
+                                      dup_fatal, dup_vals, skip, serr, ierr);
+            processed_cmd_names.push_back(cname);
+        }
+    }
+
+    // Process errors, global abort if ierr==2
+    process_error(serr, ierr);
+}
+
+void PowerParser::get_bool_int(const char *cname,
+                         int *cvalue,
+                         const vector<int> &size,
+                         bool skip)
+{
+   string cstring(cname);
+   get_bool_int( cstring, cvalue, size, skip);
+}
+
+void PowerParser::get_bool(string &cname,
+                     bool *cvalue,
+                     const vector<int> &size,
+                     bool skip)
+{
+    // Note that we do not default cvalue. Its value only changes if the
+    // command is found.
+
+    // Used in checking for duplicate array values
+    int dim = (int)size.size();
+    int tot_size = 1;
+    for (int i=0; i<dim; i++) {
+        tot_size *= size[i];
+    }
+    vector<int> dup_vals(tot_size, 0);
+    vector<Cmd *> dup_cmd1(tot_size);
+    vector<int> dup_wdex1(tot_size, -1);
+
+    int ierr = 0;
+    stringstream serr;
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        if ((*cmdsfp)[i].get_cmd_name() == cname) {
+            (*cmdsfp)[i].get_bool(cname, cvalue, size, dup_cmd1, dup_wdex1,
+                                  dup_fatal, dup_vals, skip, serr, ierr);
+            processed_cmd_names.push_back(cname);
+        }
+    }
+
+    // Process errors, global abort if ierr==2
+    process_error(serr, ierr);
+}
+
+void PowerParser::get_bool(const char *cname,
+                     bool *cvalue,
+                     const vector<int> &size,
+                     bool skip)
+{
+   string cstring(cname);
+   get_bool( cstring, cvalue, size, skip);
+}
+
+
+// ===========================================================================
+// Driver for getting integer values.
+// This works for arrays of any dimension, 0,1,2,3,...
+// ===========================================================================
+template< typename T >
+void PowerParser::get_int(string &cname, T *cvalue, const vector<int> &size, bool skip)
+{
+    // Note that we do not default cvalue. Its value only changes if the
+    // command is found.
+
+    // Used in checking for duplicate array values
+    int dim = (int)size.size();
+    int tot_size = 1;
+    for (int i=0; i<dim; i++) {
+        tot_size *= size[i];
+    }
+    vector<int> dup_vals(tot_size, 0);
+    vector<Cmd *> dup_cmd1(tot_size);
+    vector<int> dup_wdex1(tot_size, -1);
+
+    int ierr = 0;
+    stringstream serr;
+    if (! case_sensitive) {
+       transform(cname.begin(), cname.end(), cname.begin(), tolower);
+    }
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        if ((*cmdsfp)[i].get_cmd_name() == cname) {
+            (*cmdsfp)[i].get_int(cname, cvalue, size, dup_cmd1, dup_wdex1,
+                                 dup_fatal, dup_vals, skip, serr, ierr);
+            processed_cmd_names.push_back(cname);
+        }
+    }
+
+    // Process errors, global abort if ierr==2
+    process_error(serr, ierr);
+}
+
+//! Explicit instantiation of supported template types.  If more types are
+//! needed those explicit versions must be listed here.  We are not using
+//! automatic inclusion (we would need to move the function definition into
+//! the header file for that).  The listed versions below are the only ones
+//! that will be included in the library.
+template void PowerParser::get_int(
+    string &cname, int     *cvalue, const vector<int> &size, bool skip);
+template void PowerParser::get_int(
+    string &cname, int64_t *cvalue, const vector<int> &size, bool skip);
+
+template< typename T >
+void PowerParser::get_int(const char *cname,
+                    T *cvalue,
+                    const vector<int> &size,
+                    bool skip)
+{
+   string cstring(cname);
+   get_int( cstring, cvalue, size, skip);
+}
+
+template void PowerParser::get_int(
+    const char *cname, int     *cvalue, const vector<int> &size, bool skip);
+template void PowerParser::get_int(
+    const char *cname, int64_t *cvalue, const vector<int> &size, bool skip);
+
+// ===========================================================================
+// Driver for getting real values.
+// This works for arrays of any dimension, 0,1,2,3,...
+// ===========================================================================
+void PowerParser::get_real(string &cname,
+                     double *cvalue,
+                     const vector<int> &size,
+                     bool skip)
+{
+    // Note that we do not default cvalue. Its values only change if the
+    // command is found.
+
+    // Used in checking for duplicate array values
+    int dim = (int)size.size();
+    int tot_size = 1;
+    for (int i=0; i<dim; i++) {
+        tot_size *= size[i];
+    }
+    vector<int> dup_vals(tot_size, 0);
+    vector<Cmd *> dup_cmd1(tot_size);
+    vector<int> dup_wdex1(tot_size, -1);
+
+    int ierr = 0;
+    stringstream serr;
+    if (! case_sensitive) {
+       transform(cname.begin(), cname.end(), cname.begin(), tolower);
+    }
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        if ((*cmdsfp)[i].get_cmd_name() == cname) {
+            (*cmdsfp)[i].get_real(cname, cvalue, size, dup_cmd1, dup_wdex1,
+                                  dup_fatal, dup_vals, skip, serr, ierr);
+            processed_cmd_names.push_back(cname);
+        }
+    }
+
+    // Process errors, global abort if ierr==2
+    process_error(serr, ierr);
+}
+
+void PowerParser::get_real(const char *cname,
+                     double *cvalue,
+                     const vector<int> &size,
+                     bool skip)
+{
+   string cstring(cname);
+   get_real( cstring, cvalue, size, skip);
+}
+
+// ===========================================================================
+// Driver for getting character strings.
+// This works for arrays of any dimension, 0,1,2,3,...
+// ===========================================================================
+void PowerParser::get_char(string &cname,
+                     vector<string> &vstr,
+                     const vector<int> &size,
+                     bool single_char,
+                     bool skip)
+{
+    // Note that we do not default cvalue. Its value only changes if the
+    // command is found.
+
+    // Used in checking for duplicate array values
+    int dim = (int)size.size();
+    int tot_size = 1;
+    for (int i=0; i<dim; i++) {
+        tot_size *= size[i];
+    }
+    vector<int> dup_vals(tot_size, 0);
+    vector<Cmd *> dup_cmd1(tot_size);
+    vector<int> dup_wdex1(tot_size, -1);
+
+    int ierr = 0;
+    stringstream serr;
+    if (! case_sensitive) {
+       transform(cname.begin(), cname.end(), cname.begin(), tolower);
+    }
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        if ((*cmdsfp)[i].get_cmd_name() == cname) {
+            (*cmdsfp)[i].get_char(cname, vstr, size, single_char, dup_cmd1,
+                                  dup_wdex1, dup_fatal, dup_vals,
+                                  skip, serr, ierr);
+            processed_cmd_names.push_back(cname);
+        }
+    }
+
+    // Process errors, global abort if ierr==2
+    process_error(serr, ierr);
+}
+
+void PowerParser::get_char(const char *cname,
+                     vector<string> &vstr,
+                     const vector<int> &size,
+                     bool single_char,
+                     bool skip)
+{
+   string cstring(cname);
+   get_char( cstring, vstr, size, single_char, skip);
+}
+
+// ===========================================================================
+// Driver for getting array sizes.
+// ===========================================================================
+void PowerParser::get_size(string &cname, vector<int> &size)
+{
+    int ierr = 0;
+    stringstream serr;
+    if (! case_sensitive) {
+       transform(cname.begin(), cname.end(), cname.begin(), tolower);
+    }
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        if ((*cmdsfp)[i].get_cmd_name() == cname) {
+            (*cmdsfp)[i].get_size(size, serr, ierr);
+        }
+    }
+
+    // Process errors, global abort if ierr==2
+    process_error(serr, ierr);
+}
+
+
+// ===========================================================================
+// Driver for getting array sizes. Version to get all sizes
+// ===========================================================================
+void PowerParser::get_sizeb(string &cname, vector<int> &size)
+{
+    int ierr = 0;
+    stringstream serr;
+    if (! case_sensitive) {
+       transform(cname.begin(), cname.end(), cname.begin(), tolower);
+    }
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        if ((*cmdsfp)[i].get_cmd_name() == cname) {
+            (*cmdsfp)[i].get_sizeb(size, serr, ierr);
+        }
+    }
+
+    // Process errors, global abort if ierr==2
+    process_error(serr, ierr);
+}
+
+
+// ===========================================================================
+// Check if the input command, cname, appears in the final, parsed user input.
+//
+// The two outputs are in_input and in_whenthen,
+//    in_input     command is in (or not) the main part of the input, i.e.
+//                 everything except the when...then statements.
+//    in_whenthen  command is in (or not) at least one when...then statement.
+// ===========================================================================
+void PowerParser::cmd_in_input(string &cname, bool &in_input, bool &in_whenthen)
+{
+    in_input = false;
+    in_whenthen = false;
+
+    if (! case_sensitive) {
+       transform(cname.begin(), cname.end(), cname.begin(), tolower);
+    }
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        if ((*cmdsfp)[i].get_cmd_name() == cname) {
+            in_input = true;
+            break;
+        }
+    }
+
+    for (int wtn=0; wtn<(int)whenthens.size(); wtn++) {
+        cmdsfp = whenthens[wtn].get_cmdsf_ptr();
+        for (int i=0; i<(int)cmdsfp->size(); i++) {
+            if ((*cmdsfp)[i].get_cmd_name() == cname) {
+                in_whenthen = true;
+                break;
+            }
+        }
+        if (in_whenthen) break;
+    }
+
+    wt_reset();
+}
+
+
+// ===========================================================================
+// Set the processed flag for all words for all commands that match cname.
+// The value to set the processed flag to is bval.
+// This sets the processed flag for commands in the final buffer and in the
+// when...then final buffers.
+// ===========================================================================
+void PowerParser::cmd_set_processed(string &cname, bool bval)
+{
+    if (! case_sensitive) {
+       transform(cname.begin(), cname.end(), cname.begin(), tolower);
+    }
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        if ((*cmdsfp)[i].get_cmd_name() == cname) {
+            (*cmdsfp)[i].set_processed(bval);
+        }
+    }
+    for (int wtn=0; wtn<(int)whenthens.size(); wtn++) {
+        cmdsfp = whenthens[wtn].get_cmdsf_ptr();
+        for (int i=0; i<(int)cmdsfp->size(); i++) {
+            if ((*cmdsfp)[i].get_cmd_name() == cname) {
+                (*cmdsfp)[i].set_processed(bval);
+            }
+        }
+    }
+    wt_reset();
+}
+
+
+// ===========================================================================
+// ===========================================================================
+void PowerParser::cmd_set_reprocessed(bool bval)
+{
+    for (int c=0; c<(int)processed_cmd_names.size(); c++) {
+        string cname = processed_cmd_names[c];
+        cmd_set_processed(cname, bval);
+    }
+}
+
+
+// ===========================================================================
+// Process errors.
+// ===========================================================================
+void PowerParser::process_error_global(int &return_value)
+{
+    int return_val_local;
+    int ierr = ierr_global;
+
+    return_val_local = 0;
+    if (ierr == 0) {
+      return_value = 0;
+      return;
+    }
+
+    return_val_local = process_error_return_int(serr_global, ierr);
+    return_value     = return_val_local;
+}
+
+// ===========================================================================
+//
+
+void PowerParser::process_error(stringstream &serr, int &ierr)
+{
+    if (ierr == 0) return;
+
+    if (ierr == 3) {
+        serr_global << serr.str();
+        ierr_global = ierr;
+        return;
+    }
+
+    string err_type = "Warnings";
+    if (ierr == 2) err_type = "Fatal errors";
+
+    if (comm->isIOProc()) {
+        cout << endl;
+        cout << err_type << " have been encountered while parsing the user"
+            " input file." << endl;
+        cout << "Note that often fixing the first error will also fix the"
+            " other errors." << endl;
+        cout << serr.str() << endl;
+        fflush(NULL);
+    }
+    if (ierr == 2) {
+        // Force all processors to quit.
+        // We have the problem that the non-IO procs may kill the calculation
+        // before the IO proc can finish printing the error messages, thus
+        // force the IO proc to do the global abort but still allow the
+        // possibility that the IO proc may not have aborted, some other
+        // proc might have.
+        if (comm->isIOProc()) {
+            comm->global_abort_parser();
+        }
+        else {
+            sleep(2);
+            comm->global_abort_parser();
+        }
+    }
+
+
+    // A possible sleep function if the library sleep function is not portable.
+    // #include <time.h>
+    // void sleep(unsigned int mseconds)
+    // {
+    //    clock_t goal = mseconds + clock();
+    //    while (goal > clock());
+    // }
+
+
+    // A better function is the following since it uses CLOCKS_PER_SEC and
+    // thus does not assume its value.
+    //
+    //#include <time.h>
+    //void wait ( int seconds )
+    //{
+    //  clock_t endwait;
+    //  endwait = clock () + seconds * CLOCKS_PER_SEC ;
+    //  while (clock() < endwait) {}
+    //}
+
+    // We might want to put this in Comm, i.e. modify global_abort.
+}
+
+
+// ===========================================================================
+int PowerParser::process_error_return_int(stringstream &serr, int &ierr)
+{
+    int return_value;
+
+    return_value = ierr;
+
+    if (ierr == 0) return(return_value);
+
+    return_value = ierr;
+
+    if (ierr == 3) {
+      serr_global << serr.str();
+      ierr_global = ierr;
+      cout << "Error encountered in process_error_return_int -- err code is " << ierr << endl; 
+      fflush(NULL);
+    }
+
+    if (comm->isIOProc()) {
+        cout << endl;
+        cout << "Error encountered while parsing the user input file -- err code is "
+             << ierr << endl;
+        cout << "Note that often fixing the first error will also fix the"
+            " other errors." << endl;
+        cout << serr.str() << endl;
+        cout.flush();
+        fflush(NULL);
+    }
+
+    return(return_value);
+
+
+    // A possible sleep function if the library sleep function is not portable.
+    // #include <time.h>
+    // void sleep(unsigned int mseconds)
+    // {
+    //    clock_t goal = mseconds + clock();
+    //    while (goal > clock());
+    // }
+
+
+    // A better function is the following since it uses CLOCKS_PER_SEC and
+    // thus does not assume its value.
+    //
+    //#include <time.h>
+    //void wait ( int seconds )
+    //{
+    //  clock_t endwait;
+    //  endwait = clock () + seconds * CLOCKS_PER_SEC ;
+    //  while (clock() < endwait) {}
+    //}
+
+}
+
+
+//+***************************************************************************
+// ***************************************************************************
+// When...then commands
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Check if a when...then condition is satisfied.
+// ===========================================================================
+void PowerParser::wt_check(int wtn, vector<string> &code_varnames,
+                     vector<string> &code_values, 
+                     vector<int> &vv_active, int *wtci)
+{
+    stringstream serr;
+    int ierr = 0;
+    whenthens[wtn-1].check_wt(code_varnames, code_values, vv_active, wtci,
+                              serr, ierr);
+    process_error(serr, ierr);
+    if ( (*wtci) == 1) {
+        cmdsfp = whenthens[wtn-1].get_cmdsf_ptr();
+    }
+}
+
+
+// ===========================================================================
+// Set the commands final buffer pointer.
+// This is also done in the check routine.
+// ===========================================================================
+void PowerParser::wt_set_cmdsfp(int wtn)
+{
+    cmdsfp = whenthens[wtn-1].get_cmdsf_ptr();
+}
+
+
+// ===========================================================================
+// Reset the commands final buffer pointer.
+// ===========================================================================
+void PowerParser::wt_reset()
+{
+    cmdsfp = &cmdsf;
+}
+
+
+// ===========================================================================
+// ===========================================================================
+void PowerParser::wt_casize(int wtn, int *wt_casize)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(wt_casize == wt_casize);
+
+    whenthens[wtn-1].get_char_array_size(wt_casize);
+}
+
+
+// ===========================================================================
+// ===========================================================================
+void PowerParser::wt_carray(int wtn, char *wt_ca, int wt_casize)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(wt_casize == wt_casize);
+
+    string sc;
+    whenthens[wtn-1].get_char_array(sc);
+    for (int i=0; i<(int)sc.size(); i++) {
+        wt_ca[i] = sc[i];
+    }
+}
+
+
+// ===========================================================================
+// ===========================================================================
+void PowerParser::wt_satsize(int wtn, int *wt_satsize)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(wt_satsize == wt_satsize);
+
+    whenthens[wtn-1].get_satsize(wt_satsize);
+}
+
+
+// ===========================================================================
+// ===========================================================================
+void PowerParser::wt_getsat(int wtn, int *wt_sat, int wt_satsize)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(wt_satsize == wt_satsize);
+
+    whenthens[wtn-1].getsat(wt_sat);
+}
+
+
+// ===========================================================================
+// ===========================================================================
+void PowerParser::wt_setsat(int wtn, int *wt_sat, int wt_satsize)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(wt_satsize == wt_satsize);
+
+    whenthens[wtn-1].setsat(wt_sat);
+}
+
+
+// ===========================================================================
+// Get and Set the processed flag for a whenthen.
+// ===========================================================================
+void PowerParser::wt_getprocessed(int wtn, int *wtp)
+{
+    whenthens[wtn-1].getprocessed(wtp);
+}
+
+void PowerParser::wt_setprocessed(int wtn, int wtp)
+{
+    whenthens[wtn-1].setprocessed(wtp);
+}
+
+
+// ===========================================================================
+// Get and Set the sequence index for a whenthen.
+// ===========================================================================
+void PowerParser::wt_getseq(int wtn, int *wtseq)
+{
+    whenthens[wtn-1].getseq(wtseq);
+}
+
+void PowerParser::wt_setseq(int wtn, int wtseq)
+{
+    whenthens[wtn-1].setseq(wtseq);
+}
+
+
+
+
+//+***************************************************************************
+// ***************************************************************************
+// restart_block commands
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Check if a restart block condition is satisfied.
+// ===========================================================================
+void PowerParser::rb_check(vector<string> &code_varnames,
+                     vector<string> &code_values,
+                     vector<int> &vv_active, int *rbci,
+                     int *rb_ntriggered, int *rb_triggered_indices)
+{
+    stringstream serr;
+    int ierr = 0;
+    *rbci = 0;
+    *rb_ntriggered = 0;
+    for (int i=0; i<(int)restartblocks.size(); i++) {
+        int ri = 0;
+        restartblocks[i].check_rb(code_varnames, code_values, vv_active, &ri,
+                                  serr, ierr);
+        if (ri == 1) {
+            *rbci = 1;
+            rb_triggered_indices[*rb_ntriggered] = i;
+            *rb_ntriggered += 1;
+        }
+    }
+    process_error(serr, ierr);
+}
+
+
+// ===========================================================================
+// Get/set the restart block names
+// ===========================================================================
+void PowerParser::get_rb_names(vector<string> &rb_names_vstr)
+{
+    rb_names_vstr.clear();
+    for (int i=0; i<(int)restartblocks.size(); i++) {
+        rb_names_vstr.push_back(restartblocks[i].get_name());
+    }
+}
+void PowerParser::set_rb_names(vector<string> &rb_names_vstr)
+{
+    bnames_on_dump.clear();
+    for (int i=0; i<(int)rb_names_vstr.size(); i++) {
+        bnames_on_dump.push_back(rb_names_vstr[i]);
+    }
+}
+
+
+// ===========================================================================
+// Get/set the restart block activity flags.
+// ===========================================================================
+void PowerParser::get_rb_aflags(int *rb_aflags)
+{
+    for (int i=0; i<(int)restartblocks.size(); i++) {
+        rb_aflags[i] = restartblocks[i].get_aflag();
+    }
+}
+void PowerParser::set_rb_aflags(int *rb_aflags, int rb_num)
+{
+    baflags_on_dump.clear();
+    for (int j=0; j<rb_num; j++) {
+        baflags_on_dump.push_back(rb_aflags[j]);
+    }
+}
+
+
+// ===========================================================================
+// Get/set the restart block satsize.
+// satsize is defined as the total number of sub-conditions over all restart
+// blocks.
+// ===========================================================================
+void PowerParser::get_rb_satsize(int *rb_satsize)
+{
+    int rb_sum = 0;
+    for (int i=0; i<(int)restartblocks.size(); i++) {
+        rb_sum += restartblocks[i].get_satsize();
+    }
+    *rb_satsize = rb_sum;
+}
+
+void PowerParser::set_rb_satsize(int rb_satsize)
+{
+    satsize_on_dump = rb_satsize;
+}
+
+
+// ===========================================================================
+// Get/set the number of sub-conditions per restart block
+// ===========================================================================
+void PowerParser::get_rb_satprb(int *rb_satprb)
+{
+    for (int i=0; i<(int)restartblocks.size(); i++) {
+        rb_satprb[i] = restartblocks[i].get_satsize();
+    }
+}
+
+void PowerParser::set_rb_satprb(int *rb_satprb, int rb_num)
+{
+    rbsatprb_on_dump.clear();
+    for (int i=0; i<rb_num; i++) {
+        rbsatprb_on_dump.push_back(rb_satprb[i]);
+    }
+}
+
+
+// ===========================================================================
+// Get/set the satisfied flag for each sub-condition for each restart block
+// ===========================================================================
+void PowerParser::get_rb_sat(int *rb_sat)
+{
+    int k = 0;
+    for (int i=0; i<(int)restartblocks.size(); i++) {
+        for (int j=0; j<(int)restartblocks[i].get_satsize(); j++) {
+            rb_sat[k] = restartblocks[i].get_sat(j);
+            k++;
+        }
+    }
+}
+
+
+void PowerParser::set_rb_sat(int *rb_sat, int rb_satsize)
+{
+    rbsat_on_dump.clear();
+    for (int i=0; i<rb_satsize; i++) {
+        bool b = false;
+        if (rb_sat[i] == 1) b = true;
+        rbsat_on_dump.push_back(b);
+    }
+}
+
+
+// ===========================================================================
+// Get a combined list of the restart block variable names. Note that there
+// might be more than one variable name per restart block depending on how
+// complicated the condition is.
+// ===========================================================================
+int PowerParser::get_rb_num_varnames()
+{
+    int numv = 0;
+    for (int i=0; i<(int)restartblocks.size(); i++) {
+        numv += restartblocks[i].get_num_varnames();
+    }
+    for (int i=0; i<(int)whenthens.size(); i++) {
+        numv += whenthens[i].get_num_varnames();
+    }
+    return numv;
+}
+void PowerParser::get_rb_varnames(vector<string> &rb_varnames_vstr)
+{
+    rb_varnames_vstr.clear();
+    for (int i=0; i<(int)restartblocks.size(); i++) {
+        int numv = restartblocks[i].get_num_varnames();
+        for (int j=0; j<numv; j++) {
+            rb_varnames_vstr.push_back(restartblocks[i].get_varname(j));
+        }
+    }
+    for (int i=0; i<(int)whenthens.size(); i++) {
+        int numv = whenthens[i].get_num_varnames();
+        for (int j=0; j<numv; j++) {
+            rb_varnames_vstr.push_back(whenthens[i].get_varname(j));
+        }
+    }
+}
+
+
+// ===========================================================================
+// Print info about restart blocks.
+// ===========================================================================
+void PowerParser::list_rb()
+{
+    stringstream ssc;
+    list_rb_ss(ssc);
+    if (comm->isIOProc()) {
+        cout << ssc.str();
+    }
+}
+
+void PowerParser::list_rb_start()
+{
+    ssfout.str("");
+    list_rb_ss(ssfout);
+    ssfout_current_pos = 0;
+}
+
+void PowerParser::list_rb_ss(stringstream &ssc)
+{
+    int rblen = (int)restartblocks.size();
+    if (rblen <= 0) {
+        ssc << endl << "No restart blocks have been specified."
+            << endl << endl;
+        return;
+    }
+
+    for (int rb=0; rb<rblen; rb++) {
+        list_one_rb_ss(ssc, rb);
+    }
+}
+
+void PowerParser::list_rb1_start(int *rb)
+{
+    ssfout.str("");
+    list_rb1_ss(ssfout, rb);
+    ssfout_current_pos = 0;
+}
+
+void PowerParser::list_rb1_ss(stringstream &ssc, int *rbp)
+{
+    int rb = *rbp;
+    int rblen = (int)restartblocks.size();
+    if (rb < 0) {
+        ssc << endl << "List restart block error: rb<0"
+            << endl << endl;
+        return;
+    }
+    if (rb >= rblen) {
+        ssc << endl << "List restart block error: rb>=rblen"
+            << endl << endl;
+        return;
+    }
+
+    list_one_rb_ss(ssc, rb);
+}
+
+
+// ===========================================================================
+// List info for one restart block, index=rb
+// ===========================================================================
+void PowerParser::list_one_rb_ss(stringstream &ssc, int rb)
+{
+    ssc << endl;
+    ssc << "** Echo restart block info, restart block name = "
+        << restartblocks[rb].get_name() << endl;
+    string s = "false";
+    if (restartblocks[rb].get_aflag() == 1) s = "true";
+    ssc << "    Active flag = " << s << endl;
+    ssc << "    Condition for this restart block =" << endl;
+    restartblocks[rb].list_condition("        ", "        ", ssc);
+    ssc << endl;
+    ssc << "    Number of sub-conditions = " <<
+        restartblocks[rb].get_satsize() << endl;
+    for (int i=0; i<restartblocks[rb].get_satsize(); i++) {
+        string t = "false";
+        if (restartblocks[rb].get_sat(i) == 1) t = "true";
+        ssc << "        For sub-condition " << i+1 <<
+            ", satisfied flag = " << t << endl;
+    }
+    ssc << endl;
+}
+
+//+***************************************************************************
+// ***************************************************************************
+// Debugging commands/functions.
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Print a cmds line.
+// ===========================================================================
+void PowerParser::print_line(int i)
+{
+    if (!comm->isIOProc()) return;
+    stringstream ss3;
+    cmds[i].print_using_words(ss3);
+    cout << ss3.str() << endl;
+}
+
+void PowerParser::print_line(Cmd &cmd)
+{
+    if (!comm->isIOProc()) return;
+    //cout << cmd.get_cmd_name() << endl;
+    //if (cmd.get_cmd_name() != "acmd5") return;
+    stringstream ss3;
+    cmd.print_using_words(ss3);
+    //cmd.print_all_words(ss3);
+    cout << ss3.str() << endl;
+}
+
+
+// ===========================================================================
+// List variables
+//
+// lv1 and lv2 are header and footer strings to delimit the list.
+//
+// var_to_list is a specific variable to list. If it is blank then all vars
+// will be listed, otherwise only the specific var will be listed.
+// ===========================================================================
+void PowerParser::list_vars(string lv1, string lv2, string var_to_list)
+{
+    stringstream ssv;
+    list_vars_ss(lv1, lv2, var_to_list, ssv);
+    if (comm->isIOProc()) {
+        cout << ssv.str();
+    }
+}
+    
+void PowerParser::list_vars_start()
+{
+    ssfout.str("");
+    ssfout << pre_defined_varss.str() << endl;
+    list_vars_ss("", "", "", ssfout);
+    ssfout_current_pos = 0;
+}
+
+void PowerParser::list_vars_ss(string lv1, string lv2, string var_to_list,
+                         stringstream &ssvars)
+{
+    if (!comm->isIOProc()) return;
+    ssvars << lv1 << endl;
+
+    map<string, Variable>::iterator p;
+
+    // Holds the various header and data rows to be printed.
+    vector< vector<string> > rows;
+
+    // Construct the header row.
+    vector<string> header_row;
+    header_row.push_back("Variable name");
+    header_row.push_back("Value");
+    header_row.push_back("Description");
+    rows.push_back(header_row);
+    int n_header_rows = (int)rows.size();
+
+    // Set the number of columns.
+    int ncol = (int)header_row.size();
+
+    // Construct the data rows.
+    for(p = vmap.begin(); p != vmap.end(); p++) {
+        string vname = p->first;
+        if (var_to_list != "") {
+            if (vname != var_to_list) continue;
+        }
+        int nvalues = p->second.get_nvalues();
+        string description = p->second.get_description();
+
+        int ndim = p->second.get_ndim();
+        vector<int> istart(ndim,0);
+
+        for (int n=0; n<nvalues; n++) {
+            string vnamep = vname;
+            if (nvalues > 1) {
+                p->second.get_indices(n, istart);
+                stringstream ss;
+                ss << vname << "(";
+                for (int d=0; d<ndim; d++) {
+                    if (d < ndim-1) ss << istart[d] << ",";
+                    else            ss << istart[d];
+                }
+                ss << ")";
+                vnamep = ss.str();
+            }
+            string value = p->second.get_var_value(n);
+            vector<string> sv;
+            for (int c=0; c<ncol; c++) {
+                if (c == 0) sv.push_back(vnamep);
+                if (c == 1) sv.push_back(value);
+                if (c == 2) sv.push_back(description);
+            }
+            rows.push_back(sv);
+        }
+    }
+
+    // List the data with the columns lined up.
+    Parser_utils putils(index_base);
+    putils.print_strings(rows, n_header_rows, 3, 3, 85, ssvars);
+
+    ssvars << lv2 << endl;
+}
+
+
+// ===========================================================================
+// List functions.
+// ===========================================================================
+void PowerParser::list_funcs(string lf1, string lf2)
+{
+    stringstream ssf;
+    list_funcs_ss(lf1, lf2, ssf);
+    if (comm->isIOProc()) {
+        cout << ssf.str();
+    }
+
+    // Alternate method. 
+    //list_funcs_start();
+    //for (;;) {
+    //    string sline;
+    //    if (!get_ssfout_line(sline)) break;
+    //    if (comm->isIOProc()) {
+    //        cout << sline << endl;
+    //    }
+    //}
+}
+
+void PowerParser::list_funcs_start()
+{
+    ssfout.str("");
+    list_funcs_ss("", "", ssfout);
+    ssfout_current_pos = 0;
+}
+
+void PowerParser::list_funcs_ss(string lf1, string lf2, stringstream &ssfunc)
+{
+    if (!comm->isIOProc()) return;
+    ssfunc << lf1 << endl;
+
+    map<string, Function>::iterator p;
+
+    // Holds the various header and data rows to be printed.
+    vector< vector<string> > rows;
+
+    // Construct the header row.
+    vector<string> header_row;
+    header_row.push_back("Function name");
+    header_row.push_back("nargs");
+    header_row.push_back("type");
+    header_row.push_back("Description");
+    rows.push_back(header_row);
+    int n_header_rows = (int)rows.size();
+
+    // Set the number of columns.
+    int ncol = (int)header_row.size();
+
+    // Construct the data rows.
+    for(p = fmap.begin(); p != fmap.end(); p++) {
+        string fname = p->first;
+        vector<string> sv;
+        int nargs = p->second.get_num_args();
+        stringstream ss;
+        ss << nargs;
+        string type = p->second.get_type();
+        string fdes = p->second.get_description();
+        for (int c=0; c<ncol; c++) {
+            if (c == 0) sv.push_back(fname);
+            if (c == 1) sv.push_back(ss.str());
+            if (c == 2) sv.push_back(type);
+            if (c == 3) sv.push_back(fdes);
+        }
+        rows.push_back(sv);
+    }
+
+    // List the data with the columns lined up.
+    Parser_utils putils(index_base);
+    putils.print_strings(rows, n_header_rows, 3, 4, 85, ssfunc);
+
+    ssfunc << lf2 << endl;
+}
+
+
+
+// ===========================================================================
+// List final set of commands.
+// ===========================================================================
+void PowerParser::list_cmdsf(string lc1, string lc2)
+{
+    stringstream ssc;
+    list_cmdsf_ss(lc1, lc2, ssc);
+    if (comm->isIOProc()) {
+        cout << ssc.str();
+    }
+}
+
+void PowerParser::list_cmdsf_start()
+{
+    ssfout.str("");
+    list_cmdsf_ss("", "", ssfout);
+    ssfout_current_pos = 0;
+}
+
+void PowerParser::list_cmdsf_ss(string lc1, string lc2,
+                          stringstream &ssc)
+{
+    if (!comm->isIOProc()) return;
+    ssc << lc1;
+
+    for (int i=0; i<(int)cmdsfp->size(); i++) {
+        (*cmdsfp)[i].print_using_words_fm(ssc);
+        //(*cmdsfp)[i].print_all_words(ssc);
+        //(*cmdsfp)[i].print_original_string(ssc);
+        ssc << endl;
+    }
+
+    for (int wt=0; wt<(int)whenthens.size(); wt++) {
+        ssc << "when (";
+        whenthens[wt].list_condition("", "      ", ssc);
+        ssc << ") then " << endl;
+        whenthens[wt].list_cmdsf_ss(ssc);
+        ssc << "endwhen" << endl;
+    }
+
+    ssc << lc2;
+}
+
+
+void PowerParser::list_wt_cmdsf()
+{
+    stringstream ssc;
+    list_wt_cmdsf_ss(ssc);
+    if (comm->isIOProc()) {
+        cout << ssc.str();
+    }
+}
+
+void PowerParser::list_wt_cmdsf_start()
+{
+    ssfout.str("");
+    list_wt_cmdsf_ss(ssfout);
+    ssfout_current_pos = 0;
+}
+
+void PowerParser::list_wt_cmdsf_ss(stringstream &ssc)
+{
+    int wtlen = (int)whenthens.size();
+    if (wtlen <= 0) {
+        ssc << endl << "No when...then commands have been specified."
+            << endl << endl;
+        return;
+    }
+
+    for (int wt=0; wt<wtlen; wt++) {
+        ssc << endl;
+        ssc << "** Echo when...then final buffer, when...then number = "
+               << wt+1 << endl;
+        ssc << "    Condition for this when...then =" << endl;
+        whenthens[wt].list_condition("        ", "        ", ssc);
+        ssc << endl;
+        ssc << "    Commands for this when...then =" << endl;
+        whenthens[wt].list_cmdsf_ss(ssc);
+        ssc << endl;
+    }
+}
+
+
+
+//+***************************************************************************
+// ***************************************************************************
+// Low level functions.
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Get a line from the ssfout stringstream.
+// ===========================================================================
+bool PowerParser::get_ssfout_line(string &sline)
+{
+    string s = ssfout.str();
+    if (!get_line_from_string(s, sline,
+                              ssfout_current_pos)) return false;
+    return true;
+}
+
+
+// ===========================================================================
+// Broadcast the buffer the all the other processors.
+// ===========================================================================
+void PowerParser::broadcast_buffer(string &s_in)
+{
+    // If there is no comm or if there is only one processor, then we don't
+    // have to do anything.
+    if (comm == NULL) return;
+    if (comm->getNumProcs() == 1) return;
+
+    // Get the length of the input string on the io processor.
+    int cbuffer_len = 0;
+    if(comm->isIOProc()) cbuffer_len = (int)s_in.size();
+
+    // Broadcast the length of the input buffer to the other processors.
+    comm->broadcast((char*)&cbuffer_len, sizeof(int));
+
+    // All processors allocate memory for the buffer.
+    vector<char> cbuffer(cbuffer_len, 'a');
+
+    // The io processor fills the buffer from the string.
+    if(comm->isIOProc()) {
+        for (int i=0; i<cbuffer_len; i++) cbuffer[i] = s_in[i];
+    }
+
+    // The io processor broadcasts the buffer to everyone.
+    char *cb = &cbuffer[0];
+    comm->broadcast(cb, cbuffer_len);
+
+    // On all the other processors, copy the buffer into the string.
+    if(!comm->isIOProc()) {
+        s_in.resize(cbuffer_len);
+        for (int i=0; i<cbuffer_len; i++) s_in[i] = cbuffer[i];
+    }
+}
+
+
+
+// ===========================================================================
+// Get the next line from the buffer without any processing.
+// Starting at the current position in the buffer, current_pos, search for
+// the next \n. The output string is from the current position to the \n (but
+// does not include the \n). Also remove any \r in the string.
+// ===========================================================================
+bool PowerParser::get_line_from_string(string &strn, string &sout, int &current_pos)
+{
+    // Default the output.
+    sout = "";
+
+    // If the current position is at or beyond the end of the input string,
+    // then nothing further needs to be done.
+    int strn_len = (int)strn.size();
+    if (current_pos >= strn_len) return false;
+
+    for (int i = current_pos; i<strn_len; i++) {
+        // If we encounter an eol, then we are done.
+        if (strn[i] == '\n') {
+            current_pos = i+1;
+            return true;
+        }
+
+        // Some systems use \r\n instead of \n only. Ignore any \r characters.
+        if (strn[i] == '\r') continue;
+
+        // If we get to this point, then all that remains is to add the
+        // character to the output string.
+        sout += strn[i];
+    }
+
+    // If we reach this point then we have gone through the entire input
+    // string and have found that it does not end in a \n. This is ok and does
+    // happen sometimes. We just set the current position to one past the
+    // end of the buffer and return success.
+    current_pos = strn_len;
+    return true;
+}
+
+
+// ===========================================================================
+// This is similar to the get_line_from_string() but this routine gets
+// lines from the input string that are separated by semicolons.
+// Starting at the current position in the buffer, current_pos, search for
+// the next ;. The output string is from the current position to the ; (but
+// does not include the ;).
+// ===========================================================================
+bool PowerParser::get_sc_line_from_string(string &strn, string &sout, int &current_pos)
+{
+    // Default the output.
+    sout = "";
+
+    // If the current position is at or beyond the end of the input string,
+    // then nothing further needs to be done.
+    int strn_len = (int)strn.size();
+    if (current_pos >= strn_len) return false;
+
+    bool ignore_sc = false;
+    for (int i = current_pos; i<strn_len; i++) {
+        if (strn[i] == '!') ignore_sc = true;
+        if (strn[i] == '#') ignore_sc = true;
+        if (i < strn_len-1) {
+            if (strn[i] == '/' && strn[i+1] == '/') ignore_sc = true;            
+        }
+
+        // If we encounter a semicolon, then we are done.
+        if (!ignore_sc) {
+            if (strn[i] == ';') {
+                current_pos = i+1;
+                return true;
+            }
+        }
+
+        // If we get to this point, then all that remains is to add the
+        // character to the output string.
+        sout += strn[i];
+    }
+
+    // If we reach this point then we have gone through the entire input
+    // string and have found that it does not end in a ;. This is ok and does
+    // happen sometimes. We just set the current position to one past the
+    // end of the buffer and return success.
+    current_pos = strn_len;
+    return true;
+}
+
+
+// ===========================================================================
+// Get rid of leading and trailing blanks and tabs.
+// ===========================================================================
+void PowerParser::eliminate_white_space(string &sline)
+{
+    int NPOS = (int)string::npos;
+
+    // Eliminate leading stuff first.
+    int len = (int)sline.size();
+    if (len == 0) return;
+    string whitespace = " \t";
+    int istart = sline.find_first_not_of(whitespace, 0);
+    if (istart == NPOS) istart = (int)sline.size();
+    sline.erase(0, istart);
+
+    // Now eliminate trailing stuff.
+    len = (int)sline.size();
+    if (len == 0) return;
+    int iend = sline.find_last_not_of(whitespace, len - 1);
+    if (iend == NPOS) return;
+    sline.erase(iend+1, (len-1) -(iend+1) + 1);
+
+    return;
+}
+
+
+// ===========================================================================
+// Convert an array of characters into a vector of C++ strings.
+//
+// chars_1d   The array of characters (input). This is composed of a sequence
+//            of strings, each one nchar long. The number of strings is nv.
+// vstr       Vector of C++ strings (output). There will be nv number of
+//            C++ strings in this vector. The length of each C++ string will
+//            vary depending on how much white space is removed.
+// nv         Number of strings in chars_1d (input).
+// nchar      Number of characters in each string in chars_1d (input).
+//
+// Why would anyone want to do this?
+// When passing strings between Fortran and C++ it is cleaner and easier to
+// pass a packed array of single characters. This routine takes that packed
+// array and converts it to something familiar to C++ developers.
+// ===========================================================================
+void PowerParser::chars_to_vstr(char *chars_1d, vector<string> &vstr,
+                          int nv, int nchar)
+{
+    // Temporary storage for each string in the array of characters.
+    char *cnchar = new char[nchar];
+
+    // Loop through all the strings in the array of characters.
+    for (int i=0; i<nv; i++) {
+
+        // Copy each string in the 1d array into a temporary array of chars.
+        // This will be used to create the C++ string.
+        int istart = i * nchar;
+        for (int c=istart; c<istart+nchar; c++) {
+            cnchar[c-istart] = chars_1d[c];
+        }
+        int cnchar_len = nchar;
+
+        // Remove trailing spaces.
+        for (int c=nchar-1; c >= 0; c--) {
+            if (cnchar[c] != ' ') {
+                cnchar_len = c+1;
+                break;
+            }
+        }
+
+        // Create the C++ string.
+        string s(cnchar,cnchar_len);
+
+        // Remove leading spaces.
+        int i2=0;
+        for (int c=0; c<(int)s.size(); c++) {
+            if (s[c] != ' ') {
+                i2=c;
+                break;
+            }
+        }
+        if (i2 != 0) s.erase(s.begin(), s.begin()+i2);
+
+        // Add the string to the vector of strings.
+        vstr.push_back(s);
+    }
+    delete [] cnchar;
+}
+
+
+// ===========================================================================
+// Convert  a vector of C++ strings into a packed array of characters.
+//
+// chars_1d   The array of characters (output). This is composed of a sequence
+//            of strings, each one nchar long. The number of strings is nv.
+// vstr       Vector of C++ strings (input). There will be nv number of
+//            C++ strings in this vector. The length of each C++ string will
+//            vary, whitespace is added to each C++ string to make its length
+//            nchar.
+// nv         Number of strings in chars_1d (input).
+// nchar      Number of characters in each string in chars_1d (input).
+//
+// Why would anyone want to do this?
+// When passing strings between Fortran and C++ it is cleaner and easier to
+// pass a packed array of single characters. This routine takes the vector
+// of strings and converts that to a packed character array.
+// ===========================================================================
+void PowerParser::vstr_to_chars(char *chars_1d, vector<string> &vstr,
+                          int nv, int nchar)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(nv == nv);
+
+    // Loop through each string in the vector of strings.
+    for (int strdex=0; strdex<(int)vstr.size(); strdex++) {
+
+        // Starting location in the 1d array of characters for each string.
+        int i1d = strdex * nchar;
+
+        // Number of characters in the C++ string. Should be smaller or 
+        // equal to nchar, but we handle the case where it is larger
+        // than nchar.
+        int nc = (int)vstr[strdex].size();
+        if (nc > nchar) nc = nchar;
+
+        // Copy the string into the 1d character array.
+        for (int c=i1d; c<i1d+nc; c++) {
+            chars_1d[c] = vstr[strdex][c-i1d];
+        }
+
+        // Pad with blanks.
+        for (int c=i1d+nc; c<i1d+nchar; c++) {
+            chars_1d[c] = ' ';
+        }
+    }
+}
+
+
+
+
+} // end of PP namespace
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Restartblock.hh
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Restartblock.hh
@@ -0,0 +1,136 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+#ifndef RESTARTBLOCKHHINCLUDE
+#define RESTARTBLOCKHHINCLUDE
+
+// ***************************************************************************
+// ***************************************************************************
+// Restart Blocks
+// Run the code until a restart block condition is satisfied. Set the restart
+// block as active, write a restart dump, stop the code, and restart.
+// ***************************************************************************
+// ***************************************************************************
+
+#include <string>
+#include <deque>
+#include <vector>
+#include <map>
+#include <sstream>
+
+#include "Word.hh"
+
+namespace PP
+{
+using std::string;
+using std::deque;
+using std::vector;
+using std::map;
+using std::stringstream;
+
+class Restartblock
+{
+
+public:
+    Restartblock();
+    Restartblock(int &nrb, Cmd &cmdi, bool &skiprb,
+                 bool &single_line_rb,
+                 deque<string> &bnames_on_dump,
+                 deque<bool> &baflags_on_dump,
+                 stringstream &serr, int &ierr);
+
+    Restartblock(int &nrb, Cmd &cmdi, bool &skiprb,
+                 bool &single_line_rb,
+                 deque<string> &bnames_on_dump,
+                 deque<bool> &baflags_on_dump,
+                 deque<int> &rbsatprb_on_dump,
+                 deque<bool> &rbsat_on_dump,
+                 stringstream &serr, int &ierr);
+
+
+    void check_rb(vector<string> &code_varnames,
+                  vector<string> &code_values,
+                  vector<int> &vv_active, int *rbci,
+                  stringstream &serr, int &ierr);
+
+    void list_condition(string offset1, string offset2,
+                        stringstream &ssc);
+
+    string get_name() { return name; }
+    int get_aflag() { return active; }
+    void set_aflag(int af) { active = af; }
+    int get_satsize() { return (int)satisfied.size(); }
+    int get_sat(int j) { if (satisfied[j] == "true") return 1; return 0; }
+    int get_num_varnames() { return (int)varname.size(); }
+    string get_varname(int i) { return varname[i].get_string(); }
+
+
+private:
+
+    void add_word(Cmd &cmdi, int idex, deque<Word> &wq);
+    void add_word(Cmd &cmdi, int idex, deque<Word> &wq, string sadd);
+    void process_words(deque <Word> &words, vector<string> &code_varnames,
+                       vector<string> &code_values, vector<int> &vv_active,
+                       stringstream &serr, int &ierr);
+    void delete_words(int i1, int i2, deque <Word> &words);
+    void replace_words(int i1, int i2, deque <Word> &words, Word &w);
+
+
+    // The condition:   varname relation value  logical  varname relation value etc.
+    // Example:           time     .gt.   3.0    .and.    ncycle   .ge.    50
+    // The condition is thought of as a sequence of subconditions connected by
+    // logical operators. The above example has two subconditions connected by the
+    // .and. logical operator.
+    deque<Word>   varname;    // Host code variable name to be replaced by host code value.
+    deque<Word>   relation;   // Relation between varname and value, like .gt., .hglt., ...
+    deque<Word>   value;      // Value to compare with host code value.
+    deque<Word>   logop;      // Logical operator connecting subconditions.
+    deque<string> satisfied;  // Satisfied flag for each subcondition.
+    deque<bool>   has_got;    // Has got flag for the relation. This is true if
+                              // the relation is .hggt., .hglt., ..., false otherwise.
+
+    // The restart_block commands are processed if active is true (== 1)
+    int active;
+
+    // The name of this restart block.
+    string name;
+};
+
+
+} // end of PP namespace
+
+#endif
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Restartblock.cc
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Restartblock.cc
@@ -0,0 +1,598 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+// ***************************************************************************
+// ***************************************************************************
+// Restart Blocks
+// Run the code until a restart block condition is satisfied. Set the restart
+// block as active, write a restart dump, stop the code, and restart.
+// ***************************************************************************
+// ***************************************************************************
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <string>
+#include <vector>
+#include <deque>
+#include <sstream>
+#include <map>
+#include <math.h>
+
+#include "Variable.hh"
+#include "Function.hh"
+#include "Word.hh"
+#include "Parser_math.hh"
+#include "Cmd.hh"
+#include "Restartblock.hh"
+
+namespace PP
+{
+using std::cout;
+using std::endl;
+using std::string;
+using std::deque;
+using std::vector;
+using std::stringstream;
+using std::pair;
+using std::ifstream;
+using std::ios;
+
+
+// ===========================================================================
+// Default constructor.
+// ===========================================================================
+Restartblock::Restartblock() 
+{
+    active = -1;
+}
+
+// ===========================================================================
+// Usual constructor.
+//    restart_block name (time .eq. 50) then
+// ===========================================================================
+Restartblock::Restartblock(int &nrb, Cmd &cmdi, bool &skiprb,
+                           bool &single_line_rb,
+                           deque<string> &bnames_on_dump,
+                           deque<bool> &baflags_on_dump,
+                           deque<int> &rbsatprb_on_dump,
+                           deque<bool> &rbsat_on_dump,
+                           stringstream &serr, int &ierr)
+{
+    //cout << "&&&&&cw ********** Restartblock.cc, Enter Constructor **********" << endl;
+    active = -1;
+    nrb += 1;
+    skiprb = true;
+    single_line_rb = false;
+    int nwords = cmdi.get_nwords();
+
+    // &&&&&cw
+    //stringstream ssprint;
+    //cmdi.print_using_words(ssprint);
+    //cout << ssprint.str() << endl;
+
+    if (nwords < 8) {
+        cmdi.fatal_error(0, serr, ierr);
+        serr << "A restart block line must have at least 8 words on it (the "
+             << endl
+             << "opening and closing parentheses each count as a word)"
+             << endl;
+        serr << "This restart block command only has " << nwords <<
+            " words on it." << endl;
+        serr << "Expected something like (this has 8 words):" << endl;
+        serr << "    restart_block after5 (time .gt. 5) then" << endl;
+        serr << "Or perhaps a single line restart block like (this has 13 words):"
+             << endl;
+        serr << "    restart_block after5 (time .gt. 5) sizemat(2) = 0.005" << endl;
+        ierr = 2;
+        return;
+    }
+
+    string p2 = cmdi.get_string(2);
+    if (p2 != "(") {
+        cmdi.fatal_error(2, serr, ierr);
+        serr << "Expected an open parentheses following the restart block name.."
+             << endl;
+        serr << "Instead found " << p2 << " following the restart block name."
+             << endl;
+        serr << "The restart block command should be something like:" << endl;
+        serr << "    restart_block t_is_gt_5 (time .gt. 5) then" << endl;
+        serr << "Or perhaps a single line restart block like:" << endl;
+        serr << "    restart_block t_is_gt_5 (time .gt. 5) sizemat(2) = 0.005" << endl;
+        ierr = 2;
+        return;
+    }
+
+
+    for (int i=1; i<nwords-1; i++) {
+        string t1 = cmdi.get_string(i);
+        if (t1 == "then") {
+            cmdi.fatal_error(i, serr, ierr);
+            serr << "Found a then keyword embedded in the restart_block command."
+                 << endl;
+            serr << "If a then keyword is present it must be the last "
+                 "word on the line." << endl;
+            serr << "The restart_block command should be something like:" << endl;
+            serr << "    restart_block t_is_gt_5 (time .gt. 5) then" << endl;
+            serr << "Or perhaps a single line restart block like:" << endl;
+            serr << "    restart_block t_is_gt_5 (time .gt. 5) sizemat(2) = 0.005" << endl;
+            ierr = 2;
+            return;
+        }
+    }
+
+
+    // Find the closing parenthesis
+    int close_paren_dex = -1;
+    for (int i=2; i<nwords; i++) {
+        string pi = cmdi.get_string(i);
+        if (pi == "then") break;
+        if (pi == ")") {
+            close_paren_dex = i;
+            break;
+        }
+    }
+
+    if (close_paren_dex == -1) {
+        cmdi.fatal_error(0, serr, ierr);
+        serr << "Expected a close parentheses following the condition."
+             << endl;
+        serr << "Did not find a close parentheses." << endl;
+        serr << "The restart_block command should be something like:" << endl;
+        serr << "    restart_block t_is_gt_5 (time .gt. 5) then" << endl;
+        serr << "Or perhaps a single line restart block like:" << endl;
+        serr << "    restart_block t_is_gt_5 (time .gt. 5) sizemat(2) = 0.005" << endl;
+        ierr = 2;
+        return;
+    }
+
+
+    int nw = close_paren_dex - 3;
+    if ((nw+1)%4 != 0) {
+        cmdi.fatal_error(0, serr, ierr);
+        serr << "Wrong number of words in the restart_block condition."
+             << endl;
+        serr << "The number of words in this condition is " << nw << endl;
+        serr << "The number of words + 1 should be a multiple of 4." << endl;
+        serr << "The condition should be something like:" << endl;
+        serr << "    time .gt. 5" << endl;
+        serr << "This has 3 words and 3+1 is a multiple of 4." << endl;
+        serr << "Or the following is valid" << endl;
+        serr << "    time .gt. 5 .and. ncycle .ge. 10" << endl;
+        serr << "This has 7 words and 7+1 is a multiple of 4." << endl;
+        ierr = 2;
+        return;
+    }
+
+    
+
+
+    for (int i=3; i<close_paren_dex; i+=4) {
+        add_word(cmdi, i,   varname);
+        add_word(cmdi, i+1, relation);
+        add_word(cmdi, i+2, value);
+
+        if (i+3 < close_paren_dex) add_word(cmdi, i+3, logop);
+        else                       add_word(cmdi, i+3, logop, "none");
+
+        satisfied.push_back("false");
+    }
+
+
+    // Check to make sure that the relation is valid.
+    for (int n=0; n<(int)varname.size(); n++) {
+        bool valid_relation = false;
+        if (relation[n].get_string() == ".hglt.") valid_relation = true;
+        if (relation[n].get_string() == ".hgle.") valid_relation = true;
+        if (relation[n].get_string() == ".hgeq.") valid_relation = true;
+        if (relation[n].get_string() == ".hgne.") valid_relation = true;
+        if (relation[n].get_string() == ".hggt.") valid_relation = true;
+        if (relation[n].get_string() == ".hgge.") valid_relation = true;
+        if (relation[n].get_string() == ".lt.") valid_relation = true;
+        if (relation[n].get_string() == ".le.") valid_relation = true;
+        if (relation[n].get_string() == ".eq.") valid_relation = true;
+        if (relation[n].get_string() == ".ne.") valid_relation = true;
+        if (relation[n].get_string() == ".gt.") valid_relation = true;
+        if (relation[n].get_string() == ".ge.") valid_relation = true;
+
+        if (!valid_relation) {
+            relation[n].fatal_error(serr, ierr);
+            serr << "Invalid restart_block relation." << endl;
+            serr << "Expected  .lt., .le., .eq., .ne., .gt., .ge." << endl;
+            serr << "Also could be .hglt., .hgle., .hgeq., .hgne., .hggt., .hgge." << endl;
+            serr << "Instead found relation:  " << relation[n].get_string() << endl;
+            ierr = 2;
+            return;
+        }
+    }
+
+    // The name of the restart block is the second word on the
+    // restart_block command.
+    name = cmdi.get_string(1);
+    //cout << "&&&&&cw Restartblock.cc, name1 = " << name << endl;
+    //cout << "&&&&&cw Restartblock.cc, satsize = " << satisfied.size() << endl;
+
+    // If this is a restart, then restart block names and active flags
+    // could be stored on the restart dump. If this restart block matches
+    // any stored on the dump, then set the active flag to what is on
+    // the dump.
+    for (int i=0; i<(int)bnames_on_dump.size(); i++) {
+        //cout << "&&&&&cw Restartblock.cc, name = " << name << endl;
+        //cout << "&&&&&cw Restartblock.cc, bnames_on_dump = " << 
+        //    bnames_on_dump[i] << endl;
+        //cout << "&&&&&cw Restartblock.cc, baflags_on_dump = " << 
+        //    baflags_on_dump[i] << endl;
+        if (name == bnames_on_dump[i]) {
+            active = 0;
+            if (baflags_on_dump[i]) active = 1;
+
+            int satdex = 0;
+            for (int j=0; j<i; j++) {
+                satdex += rbsatprb_on_dump[j];
+            }
+            for (int j=satdex; j<satdex+rbsatprb_on_dump[i]; j++) {
+                string s = "false";
+                if (rbsat_on_dump[j]) s = "true";
+                satisfied[j-satdex] = s;
+            }
+
+            break;
+        }
+    }
+    //cout << "&&&&&cw Restartblock.cc, after set restart" << endl;
+
+
+    // If this restart block is active, that means we want to process
+    // the commands in the block, therefore we set the skip flag to false.
+    if (active == 1) skiprb = false;
+
+
+    // Set the has gotten to flags.
+    for (int n=0; n<(int)varname.size(); n++) {
+        bool hg = false;
+
+        if (relation[n].get_string() == ".hglt.") {
+            hg = true;
+            relation[n].set_value(".lt.");
+        }
+        else if (relation[n].get_string() == ".hgle.") {
+            hg = true;
+            relation[n].set_value(".le.");
+        }
+        else if (relation[n].get_string() == ".hgeq.") {
+            hg = true;
+            relation[n].set_value(".eq.");
+        }
+        else if (relation[n].get_string() == ".hgne.") {
+            hg = true;
+            relation[n].set_value(".ne.");
+        }
+        else if (relation[n].get_string() == ".hggt.") {
+            hg = true;
+            relation[n].set_value(".gt.");
+        }
+        else if (relation[n].get_string() == ".hgge.") {
+            hg = true;
+            relation[n].set_value(".ge.");
+        }
+
+
+        has_got.push_back(hg);
+    }
+
+
+    // Handle single line restart_block
+    if (cmdi.get_string(nwords-1) != "then") {
+        single_line_rb = true;
+        cmdi.delete_words(0, 5);
+        cmdi.reset_name_type();
+    }
+    //cout << "&&&&&cw ********** Restartblock.cc, Exit Constructor **********" << endl;
+}
+
+
+// ===========================================================================
+// Add word to the deque wq.
+// ===========================================================================
+void Restartblock::add_word(Cmd &cmdi, int idex, deque<Word> &wq)
+{
+    int ln = cmdi.get_line_number(idex);
+    int file_ln = cmdi.get_file_line_number(idex);
+    string fname = cmdi.get_filename(idex);
+    deque<string> *lines = cmdi.get_lines();
+    Word w(cmdi.get_string(idex), ln, file_ln, fname, lines);
+    wq.push_back(w);
+}
+
+void Restartblock::add_word(Cmd &cmdi, int idex, deque<Word> &wq, string sadd)
+{
+    int ln = cmdi.get_line_number(idex);
+    int file_ln = cmdi.get_file_line_number(idex);
+    string fname = cmdi.get_filename(idex);
+    deque<string> *lines = cmdi.get_lines();
+    Word w(sadd, ln, file_ln, fname, lines);
+    wq.push_back(w);
+}
+
+
+// ===========================================================================
+// This is the check for when the condition is satisfied.
+// ===========================================================================
+void Restartblock::check_rb(vector<string> &code_varnames,
+                            vector<string> &code_values,
+                            vector<int> &vv_active, int *rbci,
+                            stringstream &serr, int &ierr)
+{
+    *rbci = 0;
+    //if (active==1) return;
+
+    Parser_math pmath;
+
+    deque<Word> wordsf;
+
+    bool skip_sat = false;
+    int num_sub_cond = (int)varname.size();
+    for (int n=0; n<num_sub_cond; n++) {
+        deque<Word> words;
+
+        if (satisfied[n] == "true") {
+            int ln = varname[n].get_line_number();
+            int file_ln = varname[n].get_file_line_number();
+            string fname = varname[n].get_filename();
+            deque<string> *lines = varname[n].get_lines();
+            Word w("true", ln, file_ln, fname, lines);
+            words.push_back(w);
+        }
+        else {
+            words.push_back(varname[n]);
+            words.push_back(relation[n]);
+            words.push_back(value[n]);
+
+            process_words(words, code_varnames, code_values, vv_active,
+                          serr, ierr);
+
+            if (has_got[n]) {
+                if (words[0].get_bool(serr, ierr)) {
+                    bool doit = true;
+                    if (n > 0) {
+                        if (logop[n-1].get_string() == ".andthen." && skip_sat) {
+                            doit = false;
+                        }
+                    }
+                    if (doit) satisfied[n] = "true";
+                }
+                else {
+                    skip_sat = true;
+                }
+            }
+        }
+
+        wordsf.push_back(words[0]);
+        if (logop[n].get_string() == "none") break;
+        else wordsf.push_back(logop[n]);
+    }
+
+    process_words(wordsf, code_varnames, code_values, vv_active,
+                  serr, ierr);
+
+    // rbci is an output flag telling the code to write a dump and end
+    // the calculation or not. rbci=0 means do not end the calc,
+    // rbci=1 tells the code to end the calc.
+    // Basically, if the condition changes from its previous value, then
+    // set rbci to 1.
+
+    // This is the current value of the condition that was calculated above.
+    bool b = wordsf[0].get_bool(serr, ierr);
+
+    // *rcbi is the key output result from this function
+    //     *rbci = 0    Calling code does nothing
+    //     *rbci = 1    Calling code stops calculation, normally does restart
+    *rbci = 0;
+
+    // Here we check to see if the condition has changed, i.e. is b different
+    // from the active flag. If so, then we end the calculation.
+    // When the restart block is first created, the active flag is set to -1,
+    // this is for runs from scratch.
+    // If this is a restart, then the active flag will come from the dump and
+    // be either 0 or 1.
+    // So if active is -1 and the condition is true, then we end the calculation
+    // right away (this should not be common, but could happen).
+    //
+    // Changed on 7/2/10 - The original idea for restart blocks was that they
+    // would trigger when the condition changed from false to true. But they
+    // would also trigger when the condition changed back from true to false.
+    // This causes problems for the users when the restart block would
+    // repeatedly trigger because the condition oscillates between true and
+    // false. Therefore, change the restart blocks so they trigger once and
+    // only once (which happens when the condition first becomes true). If
+    // the users ever need a restart block that also triggers when the
+    // condition changes from true to false, then some
+    // sort of option could be put in to allow this.
+    if (b  && active == -1) { *rbci = 1; active = 1; return; }   // Trigger
+    if (b  && active ==  0) { *rbci = 1; active = 1; return; }   // Trigger
+    if (b  && active ==  1) { *rbci = 0;             return; }   // Do nothing
+    if (!b && active == -1) { *rbci = 0; active = 0; return; }   // Do nothing
+    if (!b && active ==  0) { *rbci = 0;             return; }   // Do nothing
+
+    // This is the true to false trigger that causes problems.
+    //if (!b && active ==  1) { *rbci = 1; active = 0; return; }   // Trigger
+}
+
+
+
+// ===========================================================================
+// Given a deque of words, go through them evaluating relational and logical
+// operators. The words should evaluate to one final word.
+// ===========================================================================
+void Restartblock::process_words(deque <Word> &words, vector<string> &code_varnames,
+                                 vector<string> &code_values,
+                                 vector<int> &vv_active,
+                                 stringstream &serr, int &ierr)
+{
+    Parser_math pmath;
+
+    // Replace any code vars with their values.
+    int i2 = (int)words.size();
+    for (int i=0; i<i2; i++) {
+        for (int j=0; j<(int)code_varnames.size(); j++) {
+            if (words[i].get_string() == code_varnames[j]) {
+                int ln = words[i].get_line_number();
+                int file_ln = words[i].get_file_line_number();
+                string fname = words[i].get_filename();
+                deque<string> *lines = words[i].get_lines();
+                if (vv_active[j] == 0) {
+                    Word wj("false", ln, file_ln, fname, lines);
+                    replace_words(i, i+2, words, wj);
+                    i2 -= 2;
+                    break;
+                }
+                else {
+                    Word wj(code_values[j], ln, file_ln, fname, lines);
+                    words[i] = wj;
+                }
+            }
+        }
+    }
+
+    int i1 = 0;
+    i2 = (int)words.size() - 1;
+    for (int level=6; level>=0; level--) {
+        for (int i=i1; i<=i2; i+=1) {
+            if (words[i].is_operator(level)) {
+                int ln = words[i].get_line_number();
+                int file_ln = words[i].get_file_line_number();
+                string fname = words[i].get_filename();
+                deque<string> *lines = words[i].get_lines();
+                Word w("", ln, file_ln, fname, lines);
+
+                string op_type = words[i].get_op_type();
+
+                if (op_type == "relational") {
+                    pmath.do_op_relational(i-1, i, i+1, words, w, serr, ierr);
+                }
+
+                if (op_type == "logical" && level == 2)   // .not. is unary
+                    pmath.do_op_not(i, i+1, words, w, serr, ierr);
+
+                if (op_type == "logical" && level != 2)
+                    pmath.do_op_logical(i-1, i, i+1, words, w, serr, ierr);
+
+                // level 2, .not., is unary and is handled differently.
+                if (level == 2) {
+                    replace_words(i, i+1, words, w);
+                    i2 -= 1;
+                }
+                else {
+                    replace_words(i-1, i+1, words, w);
+                    i2 -= 2;
+                    i -= 1;
+                }
+                continue;
+            }
+        }
+    }
+
+    // The condition has to evaluate to a single boolean value.
+    if ((int)words.size() != 1) {
+        words[0].fatal_error(serr, ierr);
+        serr << "restart_block condition did not evaluate to a single boolean value."
+             << endl;
+        serr << "Fix the restart_block condition" << endl;
+        ierr = 2;
+    }
+}
+
+
+// ===========================================================================
+// List the condition for this restart block to a stringstream.
+// This is done to let the user indentify this restart block. It is
+// also useful for debugging.
+// ===========================================================================
+void Restartblock::list_condition(string offset1, string offset2,
+                                  stringstream &ssc)
+{
+    for (int n=0; n<(int)varname.size(); n++) {
+
+        string relstr = relation[n].get_string();
+        string rstr = relstr;
+        if (has_got[n]) {
+            if (relstr == ".lt.") rstr = ".hglt.";
+            if (relstr == ".le.") rstr = ".hgle.";
+            if (relstr == ".eq.") rstr = ".hgeq.";
+            if (relstr == ".ne.") rstr = ".hgne.";
+            if (relstr == ".gt.") rstr = ".hggt.";
+            if (relstr == ".ge.") rstr = ".hgge.";
+        }
+        relstr = rstr;
+
+        string offset = offset1;
+        if (n > 0) offset = offset2;
+
+        ssc << offset << varname[n].get_string() << " "
+            << relstr << " " << value[n].get_string();
+
+        if (logop[n].get_string() == "none") break;
+        ssc << " " << logop[n].get_string();
+        ssc << endl;
+    }
+}
+
+
+// ===========================================================================
+// Delete words i1 through i2 inclusive from the deque.
+// ===========================================================================
+void Restartblock::delete_words(int i1, int i2, deque <Word> &words)
+{
+    deque<Word>::iterator p = words.begin();
+    words.erase(p + i1, p + i2 + 1);
+}
+
+
+// ===========================================================================
+// Replace words i1 through i2 inclusive with word w.
+// ===========================================================================
+void Restartblock::replace_words(int i1, int i2, deque <Word> &words, Word &w)
+{
+    delete_words(i1, i2, words);
+    deque<Word>::iterator p = words.begin();
+    words.insert(p + i1, w);
+}
+
+
+
+} // End of the PP namespace
+
+
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Variable.hh
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Variable.hh
@@ -0,0 +1,130 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+#ifndef VARIABLEHHINCLUDE
+#define VARIABLEHHINCLUDE
+
+// ***************************************************************************
+// ***************************************************************************
+// This class holds information about a variable.
+// ***************************************************************************
+// ***************************************************************************
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <deque>
+
+namespace PP
+{
+using std::string;
+using std::stringstream;
+using std::vector;
+using std::deque;
+
+
+
+class Variable
+{
+
+public:
+    Variable();
+    Variable(int base);
+    Variable(string nme, string v, bool pred, string tdes);
+    Variable(string nme);
+    Variable(string nme, vector<int> &istart, vector<string> &vvec,
+             int lnum, int file_lnum, string fname, deque<string> *lines,
+             stringstream &serr, int &ierr);
+
+    // Accessor methods.
+    string get_varname() { return name; }
+    void set_varname(string s) { name = s; }
+    int get_ndim() { return ndim; }
+
+    int get_nvalues() { return (int)value.size(); }
+
+    string get_var_value()   { return value[0]; }
+    string get_var_value(int idex)   { return value[idex]; }
+    string get_var_value(vector<int> &adex, string vname, int lnum,
+                         int file_lnum, string fname, deque<string> *lines,
+                         stringstream &serr, int &ierr);
+
+
+    void set_var_value(vector<int> &istart, vector<string> &valvec,
+                       int lnum, int file_lnum, string fname,
+                       deque<string> *lines, stringstream &serr, int &ierr);
+    void bump_var(vector<int> &istart, int increment,
+                  int lnum, int file_lnum, string fname,
+                  deque<string> *lines, stringstream &serr, int &ierr);
+
+
+    void set_bounds(vector<int> &bounds, int lnum, int file_lnum,
+                    string fname, deque<string> *lines,
+                    stringstream &serr, int &ierr);
+
+    void get_indices(int icdex, vector<int> &adex);
+
+    string get_description() { return description; }
+    void set_description(string vardes) { description = vardes; }
+
+    bool is_pre_defined() { return pre_defined; }
+
+    void set_temporary(bool b) { temporary = b; }
+    bool is_temporary() { return temporary; }
+
+private:
+
+    // name         The name of the variable.
+    // value        Vector containing the values of the variable.
+    // ndim         Number of dimensions, for example var(9,3) has ndim=2
+    // maxdim       Max num for each dimension except the last.
+    // lnum_ndim    The line number where ndim was set.
+    // lnum_bounds  The line number where maxdim was set.
+    // pre_defined  Pre-defined vars cannot be redefined.
+    // description  Text description of the variable.
+    // temporary    A temporary variable.
+    string name;
+    vector<string> value;
+    int ndim, lnum_bounds, lnum_ndim;
+    vector<int> maxdim;
+    bool pre_defined, temporary;
+    string description;
+};
+
+
+} // End of the PP namespace
+
+#endif
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Variable.cc
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Variable.cc
@@ -0,0 +1,491 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+// ***************************************************************************
+// ***************************************************************************
+// This class holds information about a variable. It is mostly for use with
+// the parser.
+// ***************************************************************************
+// ***************************************************************************
+
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <deque>
+
+#include "Parser_utils.hh"
+#include "Word.hh"
+#include "Variable.hh"
+
+namespace PP
+{
+using std:: string;
+using std::cout;
+using std::endl;
+using std::stringstream;
+using std::setprecision;
+using std::vector;
+using std::deque;
+
+static int index_base = 1;
+
+
+// ===========================================================================
+// Default constructor.
+// ===========================================================================
+Variable::Variable()
+{
+    name = "__NO_NAME_GIVEN__";
+    value.push_back("__NO_VALUE_GIVEN__");
+    ndim = -1;
+    lnum_ndim = 0;
+    lnum_bounds = 0;
+    pre_defined = false;
+    description = "";
+    temporary = false;
+}
+
+// ===========================================================================
+// Constructor to reset index base
+// ===========================================================================
+Variable::Variable(int base)
+{
+    index_base = base;
+}
+
+// ===========================================================================
+// Constructor given a string as input. This constructs a scalar variable.
+// ===========================================================================
+Variable::Variable(string nme, string v, bool pred, string tdes)
+{
+    name = nme;
+    value.push_back(v);
+    ndim = 0;
+    lnum_ndim = 0;
+    lnum_bounds = 0;
+    pre_defined = pred;
+    description = tdes;
+    temporary = false;
+}
+
+
+// ===========================================================================
+// Constructor for variables with no value.
+// ===========================================================================
+Variable::Variable(string nme)
+{
+    name = nme;
+    value.push_back("__NO_VALUE_GIVEN__");
+    ndim = -1;
+    lnum_ndim = 0;
+    lnum_bounds = 0;
+    pre_defined = false;
+    description = "";
+    temporary = false;
+}
+
+
+// ===========================================================================
+// Constructor given a vector of strings as input.
+// ===========================================================================
+Variable::Variable(string nme, vector<int> &istart, vector<string> &valvec,
+                   int lnum, int file_lnum, string fname, deque<string> *lines,
+                   stringstream &serr, int &ierr)
+{
+    name = nme;
+    ndim = -1;
+    lnum_ndim = 0;
+    lnum_bounds = 0;
+    pre_defined = false;
+    description = "";
+    temporary = false;
+    set_var_value(istart, valvec, lnum, file_lnum, fname, lines, serr, ierr);
+}
+
+
+// ===========================================================================
+// istart gives the starting location in the array for setting values.
+// The istart indices start from 1 (fortran based).
+//
+// This function works for any dimension, 0,1,2,3,...
+// ===========================================================================
+void Variable::set_var_value(vector<int> &istart, vector<string> &valvec,
+                             int lnum, int file_lnum, string fname,
+                             deque<string> *lines, stringstream &serr, int &ierr)
+{
+    // Cannot redefine a pre-defined variable.
+    if (pre_defined) {
+        serr << endl;
+        serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+        serr << "    " << (*lines)[lnum-1] << endl;
+        serr << "in file: " << fname << endl;
+        serr << "Cannot redefine a pre-defined variable." << endl;
+        ierr = 2;
+        return;
+    }
+    
+    // Set the array dimension and make sure the user is not trying to
+    // change it.
+    int ndim_new = (int)istart.size();
+    if (ndim >= 0) {
+        if (ndim != ndim_new) {
+            // Throw an error
+            serr << endl;
+            serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+            serr << "    " << (*lines)[lnum-1] << endl;
+            serr << "in file: " << fname << endl;
+            serr << "Cannot redefine the dimensionality of a variable." << endl;
+            serr << "Original number of dimensions = " << ndim << endl;
+            serr << "New number of dimensions = " << ndim_new << endl;
+            if (lnum_ndim > 0) {
+                serr << "Previously set in line " << lnum_ndim << ":" << endl;
+                serr << "    " << (*lines)[lnum_ndim-1] << endl;
+            }
+            ierr = 2;
+            return;
+        }
+    }
+    else {
+        ndim = ndim_new;
+        lnum_ndim = lnum;
+    }
+
+    int bsize = (int)maxdim.size();
+
+    if (ndim == 0 && bsize > 0) {
+        serr << endl;
+        serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+        serr << "    " << (*lines)[lnum-1] << endl;
+        serr << "in file: " << fname << endl;
+        serr << "Array boundaries not allowed for scalar variable." << endl;
+        if (lnum_bounds > 0) {
+            serr << "Array boundaries were set in line " << lnum_bounds << ":" << endl;
+            serr << "    " << (*lines)[lnum_bounds-1] << endl;
+        }
+        ierr = 2;
+        return;
+    }
+
+    if (ndim > 0) {
+        if (ndim != bsize+1) {
+            serr << endl;
+            serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+            serr << "    " << (*lines)[lnum-1] << endl;
+            serr << "in file: " << fname << endl;
+            serr << "Number of dimensions = " << ndim << endl;
+            serr << "Number of array boundaries + 1 = " << bsize+1 << endl;
+            serr << "These should match but don't. " << endl;
+            ierr = 2;
+            return;
+        }
+    }
+
+    // Find the 1d starting position given multiple array indices.
+    Parser_utils putils(index_base);
+    int i1 = putils.start_dex(istart, maxdim);
+
+    // nvals    Number of values after the = sign.
+    // Note that multiplicity is already handled, i.e. valvec has already
+    // been expanded to include multiplicites.
+    int nvals = (int)valvec.size();
+
+    // Get memory that we need.
+    if (i1+nvals > (int)value.size()) {
+        value.resize(i1+nvals, "");
+    }
+
+    // Store the array values.
+    for (int i=i1; i<i1+nvals; i++) {
+        value[i] = valvec[i-i1];
+    }
+}
+
+
+// ===========================================================================
+// Increment (or decrement) a variable value by an integer amount.
+// ===========================================================================
+void Variable::bump_var(vector<int> &istart, int increment,
+                        int lnum, int file_lnum, string fname,
+                        deque<string> *lines, stringstream &serr, int &ierr)
+{
+    // Find the 1d starting position given multiple array indices.
+    Parser_utils putils(index_base);
+    int i1 = putils.start_dex(istart, maxdim);
+
+    // We are incrementing an existing variable, so i1 should be valid.
+    if (i1 >= (int)value.size()) {
+        // Fatal Error
+    }
+
+    Word w1(value[i1], lnum, file_lnum, fname, lines);
+    if (!w1.is_number()) {  } // FATAL ERROR 
+
+    stringstream ss;
+    if (w1.is_integer()) {
+        int ia1 = w1.get_int(serr, ierr);
+        int ia  = ia1 + increment;
+        ss << ia;
+    }
+    else {
+        double d1 = w1.get_double(serr, ierr);
+        double d = d1 + increment;
+        ss << setprecision(15) << d;
+    }
+    value[i1] = ss.str();
+}
+
+
+
+// ===========================================================================
+// The problem with multi-dimensional variable arrays is that the user has
+// to tell us the bounds on every dimension except the last one. This info
+// is input in the bounds vector and stored.
+//
+// This function works for any dimension, 0,1,2,3,...
+// ===========================================================================
+void Variable::set_bounds(vector<int> &bounds, int lnum, int file_lnum,
+                          string fname, deque<string> *lines,
+                          stringstream &serr, int &ierr)
+
+{
+    // Cannot redefine a pre-defined variable.
+    if (pre_defined) {
+        serr << endl;
+        serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+        serr << "    " << (*lines)[lnum-1] << endl;
+        serr << "in file: " << fname << endl;
+        serr << "Cannot redefine a pre-defined variable." << endl;
+        ierr = 2;
+        return;
+    }
+    
+    // Set the array dimension and make sure the user is not trying to
+    // change it.
+    int ndim_new = (int)bounds.size() + 1;
+    if (ndim >= 0) {
+        if (ndim != ndim_new) {
+            serr << endl;
+            serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+            serr << "    " << (*lines)[lnum-1] << endl;
+            serr << "in file: " << fname << endl;
+            serr << "Cannot redefine the dimensionality of a variable "
+                "(set_bounds)." << endl;
+            serr << "Original number of dimensions = " << ndim << endl;
+            serr << "New number of dimensions = " << ndim_new << endl;
+            if (lnum_ndim > 0) {
+                serr << "Previously set in line " << lnum_ndim << ":" << endl;
+                serr << "    " << (*lines)[lnum_ndim-1] << endl;
+            }
+            ierr = 2;
+            return;
+        }
+    }
+    else {
+        ndim = ndim_new;
+        lnum_ndim = lnum; // Store line num for better err messages.
+    }
+
+    // Check to make sure the user is not reseting the bounds.
+    if ((int)maxdim.size() > 0) {
+        serr << endl;
+        serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+        serr << "    " << (*lines)[lnum-1] << endl;
+        serr << "in file: " << fname << endl;
+        serr << "The bounds on this array has already been set," << endl;
+        serr << "cannot reset them." << endl;
+        if (lnum_bounds > 0) {
+            serr << "Previously set in line " << lnum_bounds << ":" << endl;
+            serr << "    " << (*lines)[lnum_bounds-1] << endl;
+        }
+        ierr = 2;
+        return;
+    }
+
+    // Store the line num where bounds were set for better err messages.
+    lnum_bounds = lnum;
+
+    // Store the bounds.
+    maxdim.clear();
+    for (int i=0; i<(int)bounds.size(); i++) {
+        maxdim.push_back(bounds[i]);
+    }
+}
+
+
+// ===========================================================================
+// Given indices, in adex, get the value of the variable.
+// For example, suppose you want the value of
+//    $var2d(3,5)
+// The adex vector contains 2 numbers, 3 and 5, for the fortran indices.
+// The start_dex function is used to get the 1d index into the value array.
+//
+// This function works for any dimension, 0,1,2,3,...
+// ===========================================================================
+string Variable::get_var_value(vector<int> &adex, string vname, int lnum,
+                               int file_lnum, string fname, deque<string> *lines,
+                               stringstream &serr, int &ierr)
+{
+    int adex_size = (int)adex.size();
+
+    // Special case for scalar variables.
+    if (ndim == 0 || adex_size == 0) return value[0];
+
+    // The adex indices and bounds indices must match.
+    if (adex_size - 1 != (int)maxdim.size()) {
+        serr << endl;
+        serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+        serr << "    " << (*lines)[lnum-1] << endl;
+        serr << "in file: " << fname << endl;
+        serr << "The dimensionality of variable " << vname << endl;
+        serr << "does not match what was previously set." << endl;
+        if (lnum_bounds > 0) {
+            serr << "Previous dimensionality set in line " << lnum_bounds << ":" << endl;
+            serr << "    " << (*lines)[lnum_bounds-1] << endl;
+        }
+        ierr = 2;
+        return "";
+    }
+    
+    // Indices cannot exceed max allowed.
+    // Remember that adex if referenced from 1 (fortran index).
+    for (int d=0; d<(int)maxdim.size(); d++) {
+        if (adex[d] > maxdim[d]) {
+            serr << endl;
+            serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+            serr << "    " << (*lines)[lnum-1] << endl;
+            serr << "in file: " << fname << endl;
+            serr << "Variable name = " << vname << endl;
+            serr << "The value for dimension " << d+1 << " = " << adex[d] << endl;
+            serr << "This exceeds the max dimension of " << maxdim[d] << endl;
+            if (lnum_bounds > 0) {
+                serr << "The array bounds were set in line " << lnum_bounds << ":" << endl;
+                serr << "    " << (*lines)[lnum_bounds-1] << endl;
+            }
+            ierr = 2;
+        }
+    }
+
+    if (ierr == 2) return "";
+
+    // Indices cannot be < 1.
+    for (int d=0; d<(int)adex.size(); d++) {
+        if (adex[d] < 1) {
+            serr << endl;
+            serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+            serr << "    " << (*lines)[lnum-1] << endl;
+            serr << "in file: " << fname << endl;
+            serr << "Variable name = " << vname << endl;
+            serr << "Expected index greater than or equal to 1 " << endl;
+            serr << "Instead, index =  " << adex[d] << endl;
+            ierr = 2;
+        }
+    }
+
+    if (ierr == 2) return "";
+
+
+    // Using the indices in adex and the bounds for multi-d arrays, maxdim,
+    // get the 1d index into the value array.
+    Parser_utils putils(index_base);
+    int i1 = putils.start_dex(adex, maxdim);
+
+    // Check that the value array size has not been exceeded.
+    if (i1 >= (int)value.size()) {
+        serr << endl;
+        serr << "*** FATAL ERROR in line " << file_lnum << ":" << endl;
+        serr << "    " << (*lines)[lnum-1] << endl;
+        serr << "in file: " << fname << endl;
+        serr << "Variable name = " << vname << endl;
+        serr << "Exceeded array bounds. Check to make sure you are not" << endl;
+        serr << "requesting an array element you have not yet set." << endl;
+
+        vector<int> maxdex((int)adex.size(), 0);
+        get_indices((int)value.size()-1, maxdex);
+        for (int d=0; d<(int)adex.size(); d++) {
+            string s = "";
+            if (adex[d] > maxdex[d]) s = "  ERROR, max exceeded";
+            serr << "    Requested index = " << adex[d]
+                 << "  Max index = " << maxdex[d] << s << endl;
+        }
+
+        ierr = 2;
+        return "";
+    }
+
+    // Return the value.
+    return value[i1];
+}
+
+
+
+// ===========================================================================
+// Given the 1d index, icdex (starting from 0), find the corresponding
+// multi dimensional fortran indices (each starting from 1).
+//
+// Example 1: Consider a 1d array
+//     var1d(1) = 1 3 5 9 -4 -5 6
+// Suppose icdex=3, corresponding to array value 9.
+// This 1d case is very simple, all we do is add 1 to icdex to get a reference
+// from 1, thus returning 4.
+//
+// Example 1: Consider a 2d array
+//     $var2d(1,1) = 11. 21. 31.   12. 22. 32.   13. 23. 33.
+// Where the max of the first dimension is 3. Suppose the user specifies
+// icdex = 5, this corresponds to array value 32. The two indices returned
+// would be 3,2 (referenced from 1).
+//
+// The adex vector contains the output indices, for example 2 this would be 3
+// and 2.
+//
+// This function works for any dimension, 0,1,2,3,...
+// ===========================================================================
+void Variable::get_indices(int icdex, vector<int> &adex)
+{
+    // Nothing to do for scalar variables.
+    if (ndim == 0) return;
+
+    // Given icdex, get the indices.
+    int nvalues = (int)value.size();
+    Parser_utils putils(index_base);
+    putils.reverse_dex(icdex, nvalues, adex, maxdim);
+}
+
+
+
+
+} // End of the PP namespace
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Whenthen.hh
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Whenthen.hh
@@ -0,0 +1,139 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+#ifndef WHENTHENHHINCLUDE
+#define WHENTHENHHINCLUDE
+
+// ***************************************************************************
+// ***************************************************************************
+
+// ***************************************************************************
+// ***************************************************************************
+
+#include <string>
+#include <deque>
+#include <vector>
+#include <map>
+#include <sstream>
+
+#include "Word.hh"
+
+namespace PP
+{
+using std::string;
+using std::deque;
+using std::vector;
+using std::map;
+using std::stringstream;
+
+class Whenthen
+{
+
+public:
+    Whenthen();
+    Whenthen(int &nwhen, Cmd &cmdi, bool &skipwhen, bool &single_line_when,
+             bool eflag, stringstream &serr, int &ierr);
+    void add_cmdf(Cmd &cmdi);
+    void list_condition(string offset1, string offset2,
+                        stringstream &ssc);
+    void list_cmdsf_ss(stringstream &ssc);
+
+    void check_wt(vector<string> &code_varnames,
+                  vector<string> &code_values,
+                  vector<int> &vv_active,
+                  int *wtci, stringstream &serr, int &ierr);
+
+    deque<Cmd> *get_cmdsf_ptr() { return &cmdsf; }
+
+    void get_char_array_size(int *ca_size);
+    void get_char_array(string &sc);
+
+    void get_satsize(int *sat_size);
+    void getsat(int *sat);
+    void setsat(int *sat);
+    void getprocessed(int *wtp);
+    void setprocessed(int wtp);
+    void getseq(int *wtseq);
+    void setseq(int wtseq);
+    int get_num_varnames() { return (int)varname.size(); }
+    string get_varname(int i) { return varname[i].get_string(); }
+
+
+private:
+
+    void add_word(Cmd &cmdi, int idex, deque<Word> &wq);
+    void add_word(Cmd &cmdi, int idex, deque<Word> &wq, string sadd);
+    void process_words(deque <Word> &words, vector<string> &code_varnames,
+                       vector<string> &code_values,
+                       vector<int> &vv_active,
+                       stringstream &serr, int &ierr);
+    void delete_words(int i1, int i2, deque <Word> &words);
+    void replace_words(int i1, int i2, deque <Word> &words, Word &w);
+
+    // The condition:   varname relation value  logical  varname relation value etc.
+    // Example:           time     .gt.   3.0    .and.    ncycle   .ge.    50
+    // The condition is thought of as a sequence of subconditions connected by
+    // logical operators. The above example has two subconditions connected by the
+    // .and. logical operator.
+    deque<Word>   varname;    // Host code variable name to be replaced by host code value.
+    deque<Word>   relation;   // Relation between varname and value, like .gt., .hglt., ...
+    deque<Word>   value;      // Value to compare with host code value.
+    deque<Word>   logop;      // Logical operator connecting subconditions.
+    deque<string> satisfied;  // Satisfied flag for each subcondition.
+    deque<bool>   has_got;    // Has got flag for the relation. This is true if
+                              // the relation is .hggt., .hglt., ..., false otherwise.
+
+    // Commands to be done when the condition is satisfied.
+    deque<Cmd> cmdsf;
+
+    // The whenthen is only done once when the condition is satisfied.
+    // This flag keeps it from being done again.
+    bool processed;
+
+    // This flag is used to distinguish between the when command and the
+    // whenever command.
+    bool ever_flag;
+
+    // This is a sequence index to keep track of what order the whenthen's
+    // have been processed in.
+    int seqdex;
+};
+
+
+} // end of PP namespace
+
+#endif
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Whenthen.cc
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Whenthen.cc
@@ -0,0 +1,633 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+// ***************************************************************************
+// ***************************************************************************
+// ***************************************************************************
+// ***************************************************************************
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <string>
+#include <vector>
+#include <deque>
+#include <sstream>
+#include <map>
+#include <math.h>
+
+#include "Variable.hh"
+#include "Function.hh"
+#include "Word.hh"
+#include "Parser_math.hh"
+#include "Cmd.hh"
+#include "Whenthen.hh"
+
+namespace PP
+{
+using std::cout;
+using std::endl;
+using std::string;
+using std::deque;
+using std::vector;
+using std::stringstream;
+using std::pair;
+using std::ifstream;
+using std::ios;
+
+
+// ===========================================================================
+// Default constructor.
+// ===========================================================================
+Whenthen::Whenthen() 
+{
+    processed = false;
+    seqdex = -1;
+    ever_flag = false;
+}
+
+
+// ===========================================================================
+// Usual constructor.
+// ===========================================================================
+Whenthen::Whenthen(int &nwhen, Cmd &cmdi, bool &skipwhen,
+                   bool &single_line_when, bool eflag,
+                   stringstream &serr, int &ierr)
+{
+    processed = false;
+    seqdex = -1;
+    ever_flag = eflag;
+    nwhen += 1;
+    skipwhen = true;
+    single_line_when = false;
+    int nwords = cmdi.get_nwords();
+
+    // &&&&&cw
+    //stringstream ssprint;
+    //cmdi.print_using_words(ssprint);
+    //cout << ssprint.str() << endl;
+
+    if (nwords < 7) {
+        cmdi.fatal_error(0, serr, ierr);
+        serr << "A when command line must have at least 7 words on it (the "
+             << endl
+             << "opening and closing parenthses each count as a word)"
+             << endl;
+        serr << "This when command only has " << nwords << " words on it." << endl;
+        serr << "Expected something like (this has 7 words):" << endl;
+        serr << "    when (time .gt. 5) then" << endl;
+        serr << "Or perhaps a single line when like (this has 9 words):" << endl;
+        serr << "    when (time .gt. 5) shortmodcyc = 5" << endl;
+        ierr = 2;
+        return;
+    }
+
+    string p1 = cmdi.get_string(1);
+    if (p1 != "(") {
+        cmdi.fatal_error(1, serr, ierr);
+        serr << "Expected an open parentheses following the when keyword."
+             << endl;
+        serr << "Instead found " << p1 << " following the when keyword."
+             << endl;
+        serr << "The when command should be something like:" << endl;
+        serr << "    when (time .gt. 5) then" << endl;
+        serr << "Or perhaps a single line when like:" << endl;
+        serr << "    when (time .gt. 5) shortmodcyc = 5" << endl;
+        ierr = 2;
+        return;
+    }
+
+
+    for (int i=1; i<nwords-1; i++) {
+        string t1 = cmdi.get_string(i);
+        if (t1 == "then") {
+            cmdi.fatal_error(i, serr, ierr);
+            serr << "Found a then keyword embedded in the when command."
+                 << endl;
+            serr << "If a then keyword is present it must be the last "
+                 "word on the line." << endl;
+            serr << "The when command should be something like:" << endl;
+            serr << "    when (time .gt. 5) then" << endl;
+            serr << "Or perhaps a single line when like:" << endl;
+            serr << "    when (time .gt. 5) shortmodcyc = 5" << endl;
+            ierr = 2;
+            return;
+        }
+    }
+
+
+    // Find the closing parenthesis
+    int close_paren_dex = -1;
+    for (int i=2; i<nwords; i++) {
+        string pi = cmdi.get_string(i);
+        if (pi == "then") break;
+        if (pi == ")") {
+            close_paren_dex = i;
+            break;
+        }
+    }
+
+    if (close_paren_dex == -1) {
+        cmdi.fatal_error(0, serr, ierr);
+        serr << "Expected a close parentheses following the condition."
+             << endl;
+        serr << "Did not find a close parentheses." << endl;
+        serr << "The when command should be something like:" << endl;
+        serr << "    when (time .gt. 5) then" << endl;
+        serr << "Or perhaps a single line when like:" << endl;
+        serr << "    when (time .gt. 5) shortmodcyc = 5" << endl;
+        ierr = 2;
+        return;
+    }
+
+
+    int nw = close_paren_dex - 2;
+    if ((nw+1)%4 != 0) {
+        cmdi.fatal_error(0, serr, ierr);
+        serr << "Wrong number of words in the when...then condition."
+             << endl;
+        serr << "The number of words in this condition is " << nw << endl;
+        serr << "The number of words + 1 should be a multiple of 4." << endl;
+        serr << "The condition should be something like:" << endl;
+        serr << "    time .gt. 5" << endl;
+        serr << "This has 3 words and 3+1 is a multiple of 4." << endl;
+        serr << "Or the following is valid" << endl;
+        serr << "    time .gt. 5 .and. ncycle .ge. 10" << endl;
+        serr << "This has 7 words and 7+1 is a multiple of 4." << endl;
+        ierr = 2;
+        return;
+    }
+
+    
+
+
+    for (int i=2; i<close_paren_dex; i+=4) {
+        add_word(cmdi, i,   varname);
+        add_word(cmdi, i+1, relation);
+        add_word(cmdi, i+2, value);
+
+        if (i+3 < close_paren_dex) add_word(cmdi, i+3, logop);
+        else                       add_word(cmdi, i+3, logop, "none");
+
+        satisfied.push_back("false");
+    }
+
+
+    // Check to make sure that the relation is valid.
+    for (int n=0; n<(int)varname.size(); n++) {
+        bool valid_relation = false;
+        if (relation[n].get_string() == ".hglt.") valid_relation = true;
+        if (relation[n].get_string() == ".hgle.") valid_relation = true;
+        if (relation[n].get_string() == ".hgeq.") valid_relation = true;
+        if (relation[n].get_string() == ".hgne.") valid_relation = true;
+        if (relation[n].get_string() == ".hggt.") valid_relation = true;
+        if (relation[n].get_string() == ".hgge.") valid_relation = true;
+        if (relation[n].get_string() == ".lt.") valid_relation = true;
+        if (relation[n].get_string() == ".le.") valid_relation = true;
+        if (relation[n].get_string() == ".eq.") valid_relation = true;
+        if (relation[n].get_string() == ".ne.") valid_relation = true;
+        if (relation[n].get_string() == ".gt.") valid_relation = true;
+        if (relation[n].get_string() == ".ge.") valid_relation = true;
+
+        if (!valid_relation) {
+            relation[n].fatal_error(serr, ierr);
+            serr << "Invalid when...then relation." << endl;
+            serr << "Expected  .lt., .le., .eq., .ne., .gt., .ge." << endl;
+            serr << "Also could be .hglt., .hgle., .hgeq., .hgne., .hggt., .hgge." << endl;
+            serr << "Instead found relation:  " << relation[n].get_string() << endl;
+            ierr = 2;
+            return;
+        }
+    }
+
+
+    // Set the has gotten to flags.
+    for (int n=0; n<(int)varname.size(); n++) {
+        bool hg = false;
+
+        if (relation[n].get_string() == ".hglt.") {
+            hg = true;
+            relation[n].set_value(".lt.");
+        }
+        else if (relation[n].get_string() == ".hgle.") {
+            hg = true;
+            relation[n].set_value(".le.");
+        }
+        else if (relation[n].get_string() == ".hgeq.") {
+            hg = true;
+            relation[n].set_value(".eq.");
+        }
+        else if (relation[n].get_string() == ".hgne.") {
+            hg = true;
+            relation[n].set_value(".ne.");
+        }
+        else if (relation[n].get_string() == ".hggt.") {
+            hg = true;
+            relation[n].set_value(".gt.");
+        }
+        else if (relation[n].get_string() == ".hgge.") {
+            hg = true;
+            relation[n].set_value(".ge.");
+        }
+
+
+        has_got.push_back(hg);
+    }
+
+
+    // Handle single line when...then
+    if (cmdi.get_string(nwords-1) != "then") {
+        single_line_when = true;
+        cmdi.delete_words(0, 5);
+        cmdi.reset_name_type();
+        skipwhen = false;
+    }
+}
+
+
+// ===========================================================================
+// Add word.
+// ===========================================================================
+void Whenthen::add_word(Cmd &cmdi, int idex, deque<Word> &wq)
+{
+    int ln = cmdi.get_line_number(idex);
+    int file_ln = cmdi.get_file_line_number(idex);
+    string fname = cmdi.get_filename(idex);
+    deque<string> *lines = cmdi.get_lines();
+    Word w(cmdi.get_string(idex), ln, file_ln, fname, lines);
+    wq.push_back(w);
+}
+
+void Whenthen::add_word(Cmd &cmdi, int idex, deque<Word> &wq, string sadd)
+{
+    int ln = cmdi.get_line_number(idex);
+    int file_ln = cmdi.get_file_line_number(idex);
+    string fname = cmdi.get_filename(idex);
+    deque<string> *lines = cmdi.get_lines();
+    Word w(sadd, ln, file_ln, fname, lines);
+    wq.push_back(w);
+}
+
+
+
+// ===========================================================================
+// Add a command to the deque of commands for this whenthen.
+// ===========================================================================
+void Whenthen::add_cmdf(Cmd &cmdi)
+{
+    cmdsf.push_back(cmdi);
+}
+
+
+// ===========================================================================
+// This is the check for when the condition is satisfied.
+// ===========================================================================
+void Whenthen::check_wt(vector<string> &code_varnames,
+                        vector<string> &code_values,
+                        vector<int> &vv_active,
+                        int *wtci, stringstream &serr, int &ierr)
+{
+    *wtci = 0;
+    if (processed) return;
+
+    Parser_math pmath;
+
+    deque<Word> wordsf;
+
+    bool skip_sat = false;
+    int num_sub_cond = (int)varname.size();
+    for (int n=0; n<num_sub_cond; n++) {
+        deque<Word> words;
+
+        if (satisfied[n] == "true") {
+            int ln = varname[n].get_line_number();
+            int file_ln = varname[n].get_file_line_number();
+            string fname = varname[n].get_filename();
+            deque<string> *lines = varname[n].get_lines();
+            Word w("true", ln, file_ln, fname, lines);
+            words.push_back(w);
+        }
+        else {
+            words.push_back(varname[n]);
+            words.push_back(relation[n]);
+            words.push_back(value[n]);
+
+            process_words(words, code_varnames, code_values, vv_active,
+                          serr, ierr);
+
+            if (has_got[n]) {
+                if (words[0].get_bool(serr, ierr)) {
+                    bool doit = true;
+                    if (n > 0) {
+                        if (logop[n-1].get_string() == ".andthen." && skip_sat) {
+                            doit = false;
+                        }
+                    }
+                    if (doit) satisfied[n] = "true";
+                }
+                else {
+                    skip_sat = true;
+                }
+            }
+        }
+
+        wordsf.push_back(words[0]);
+        if (logop[n].get_string() == "none") break;
+        else wordsf.push_back(logop[n]);
+    }
+
+    process_words(wordsf, code_varnames, code_values, vv_active,
+                  serr, ierr);
+
+    // The output value, wtci, defaults to false (0). If the condition
+    // is satisfied then the output is true (1). 
+    if (wordsf[0].get_bool(serr, ierr)) {
+        *wtci = 1;
+        if (!ever_flag) processed = true;
+        return;
+    }
+}
+
+
+
+// ===========================================================================
+// Given a deque of words, go through them evaluating relational and logical
+// operators. The words should evaluate to one final word.
+// ===========================================================================
+void Whenthen::process_words(deque <Word> &words, vector<string> &code_varnames,
+                             vector<string> &code_values,
+                             vector<int> &vv_active,
+                             stringstream &serr, int &ierr)
+{
+    Parser_math pmath;
+
+    // Replace any code vars with their values.
+    int i2 = (int)words.size();
+    for (int i=0; i<i2; i++) {
+        for (int j=0; j<(int)code_varnames.size(); j++) {
+            if (words[i].get_string() == code_varnames[j]) {
+                int ln = words[i].get_line_number();
+                int file_ln = words[i].get_file_line_number();
+                string fname = words[i].get_filename();
+                deque<string> *lines = words[i].get_lines();
+                if (vv_active[j] == 0) {
+                    Word wj("false", ln, file_ln, fname, lines);
+                    replace_words(i, i+2, words, wj);
+                    i2 -= 2;
+                    break;
+                }
+                else {
+                    Word wj(code_values[j], ln, file_ln, fname, lines);
+                    words[i] = wj;
+                }
+            }
+        }
+    }
+
+    int i1 = 0;
+    i2 = (int)words.size() - 1;
+    for (int level=6; level>=0; level--) {
+        for (int i=i1; i<=i2; i+=1) {
+            if (words[i].is_operator(level)) {
+                int ln = words[i].get_line_number();
+                int file_ln = words[i].get_file_line_number();
+                string fname = words[i].get_filename();
+                deque<string> *lines = words[i].get_lines();
+                Word w("", ln, file_ln, fname, lines);
+
+                string op_type = words[i].get_op_type();
+
+                if (op_type == "relational") {
+                    pmath.do_op_relational(i-1, i, i+1, words, w, serr, ierr);
+                }
+
+                if (op_type == "logical" && level == 2)   // .not. is unary
+                    pmath.do_op_not(i, i+1, words, w, serr, ierr);
+
+                if (op_type == "logical" && level != 2)
+                    pmath.do_op_logical(i-1, i, i+1, words, w, serr, ierr);
+
+                // level 2, .not., is unary and is handled differently.
+                if (level == 2) {
+                    replace_words(i, i+1, words, w);
+                    i2 -= 1;
+                }
+                else {
+                    replace_words(i-1, i+1, words, w);
+                    i2 -= 2;
+                    i -= 1;
+                }
+                continue;
+            }
+        }
+    }
+
+    // The condition has to evaluate to a single boolean value.
+    if ((int)words.size() != 1) {
+        words[0].fatal_error(serr, ierr);
+        serr << "When...then condition did not evaluate to a single boolean value."
+             << endl;
+        serr << "Fix the when...then condition" << endl;
+        ierr = 2;
+    }
+}
+
+
+// ===========================================================================
+// ===========================================================================
+void Whenthen::get_char_array_size(int *ca_size)
+{
+    string sc;
+    get_char_array(sc);
+    (*ca_size) = (int)sc.size();
+}
+
+
+// ===========================================================================
+// ===========================================================================
+void Whenthen::get_char_array(string &sc)
+{
+    for (int n=0; n<(int)varname.size(); n++) {
+        sc += varname[n].get_string();
+        sc += relation[n].get_string();
+        sc += value[n].get_string();
+        sc += logop[n].get_string();
+        if (has_got[n]) sc += "hasgot";
+    }
+    for (int n=0; n<(int)cmdsf.size(); n++) {
+        int nw = cmdsf[n].get_nwords();
+        for (int i=0; i<nw; i++) {
+            sc += cmdsf[n].get_string(i);
+        }
+    }
+}
+
+
+// ===========================================================================
+// ===========================================================================
+void Whenthen::get_satsize(int *sat_size)
+{
+    (*sat_size) = (int)satisfied.size();
+}
+
+
+// ===========================================================================
+// Get and Set the satisfied flags.
+// ===========================================================================
+void Whenthen::getsat(int *sat)
+{
+    for (int i=0; i<(int)satisfied.size(); i++) {
+        if (satisfied[i] == "true")  sat[i] = 1;
+        if (satisfied[i] == "false") sat[i] = 0;
+    }
+}
+
+void Whenthen::setsat(int *sat)
+{
+    for (int i=0; i<(int)satisfied.size(); i++) {
+        if (sat[i] == 1) satisfied[i] = "true";
+        if (sat[i] == 0) satisfied[i] = "false";
+    }
+}
+
+
+// ===========================================================================
+// Get and Set the processed flag for a whenthen.
+// ===========================================================================
+void Whenthen::getprocessed(int *wtp)
+{
+    *wtp = 0;
+    if (processed) *wtp = 1;
+}
+
+void Whenthen::setprocessed(int wtp)
+{
+    processed = false;
+    if (wtp == 1) processed = true;
+}
+
+
+// ===========================================================================
+// Get and Set the sequence index.
+// ===========================================================================
+void Whenthen::getseq(int *wtseq)
+{
+    *wtseq = seqdex;
+}
+
+void Whenthen::setseq(int wtseq)
+{
+    seqdex = wtseq;
+}
+
+
+
+
+// ===========================================================================
+// List the final commands deque for this whenthen to a stringstream.
+// This is done to let the user know what the final commands are. It is
+// also useful for debugging.
+// ===========================================================================
+void Whenthen::list_cmdsf_ss(stringstream &ssc)
+{
+    for (int i=0; i<(int)cmdsf.size(); i++) {
+        ssc << "        ";
+        cmdsf[i].print_using_words_fm(ssc);
+        ssc << endl;
+    }
+}
+
+
+// ===========================================================================
+// List the condition for this whenthen to a stringstream.
+// This is done to let the user indentify this whenthen. It is
+// also useful for debugging.
+// ===========================================================================
+void Whenthen::list_condition(string offset1, string offset2,
+                              stringstream &ssc)
+{
+    for (int n=0; n<(int)varname.size(); n++) {
+
+        string relstr = relation[n].get_string();
+        string rstr = relstr;
+        if (has_got[n]) {
+            if (relstr == ".lt.") rstr = ".hglt.";
+            if (relstr == ".le.") rstr = ".hgle.";
+            if (relstr == ".eq.") rstr = ".hgeq.";
+            if (relstr == ".ne.") rstr = ".hgne.";
+            if (relstr == ".gt.") rstr = ".hggt.";
+            if (relstr == ".ge.") rstr = ".hgge.";
+        }
+        relstr = rstr;
+
+        string offset = offset1;
+        if (n > 0) offset = offset2;
+
+        ssc << offset << varname[n].get_string() << " "
+            << relstr << " " << value[n].get_string();
+
+        if (logop[n].get_string() == "none") break;
+        ssc << " " << logop[n].get_string();
+        ssc << endl;
+    }
+}
+
+
+// ===========================================================================
+// Delete words i1 through i2 inclusive from the deque.
+// ===========================================================================
+void Whenthen::delete_words(int i1, int i2, deque <Word> &words)
+{
+    deque<Word>::iterator p = words.begin();
+    words.erase(p + i1, p + i2 + 1);
+}
+
+
+// ===========================================================================
+// Replace words i1 through i2 inclusive with word w.
+// ===========================================================================
+void Whenthen::replace_words(int i1, int i2, deque <Word> &words, Word &w)
+{
+    delete_words(i1, i2, words);
+    deque<Word>::iterator p = words.begin();
+    words.insert(p + i1, w);
+}
+
+
+
+
+
+} // End of the PP namespace
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Word.hh
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Word.hh
@@ -0,0 +1,244 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+#ifndef WORDHHINCLUDE
+#define WORDHHINCLUDE
+
+// ***************************************************************************
+// ***************************************************************************
+// This class holds each word from the line.
+// ***************************************************************************
+// ***************************************************************************
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <map>
+#include <deque>
+#include <stdint.h>
+
+namespace PP
+{
+using std::string;
+using std::stringstream;
+using std::vector;
+using std::map;
+using std::deque;
+
+enum WordType {WUNKNOWN, WSTRING, INTEGER, DOUBLE, EQUALS, OPERATOR,
+               OPEN_PARENS, CLOSED_PARENS,
+               OPEN_SQUARE_BRACKET, CLOSED_SQUARE_BRACKET,
+               OPEN_BRACE, CLOSED_BRACE,
+               COMMA, VARIABLE};
+
+class Word
+{
+
+public:
+    Word();
+    Word(string s);
+    Word(string s, int lnum, int file_lnum, string fname,
+         deque<string> *lstr);
+    Word(double d, int lnum, int file_lnum, string fname,
+         deque<string> *lstr);
+    Word(int ia, int lnum, int file_lnum, string fname,
+         deque<string> *lstr);
+    Word operator=(const Word &);
+    Word(const Word &);
+    ~Word();
+
+    // Given a word, change its value.
+    void set_value(double d);
+    void set_value(string s);
+    void set_value(const char *s);
+    void set_value(bool b);
+
+    // Print the type of word to a stringstream.
+    void print_type(stringstream &ss);
+
+    // Set the type of word.
+    void set_type();
+
+    // Get the operator type, arithmetic, relational, ...
+    string get_op_type() {return op_type;}
+
+    bool is_operator() { if (type == OPERATOR) return true; return false; }
+
+    bool is_operator(int level) {
+        if (type != OPERATOR) return false;
+        if (level == op_level) return true;
+        return false;
+    }
+
+    bool is_bool();
+
+    bool is_string()   { if (type == WSTRING)  return true; return false; }
+
+    bool is_integer()  { if (type == INTEGER) return true; return false; }
+
+    bool is_number()   { if (type == INTEGER || type == DOUBLE) return true;
+                         return false; }
+
+    bool is_numvar()   { if (type == INTEGER || type == DOUBLE ||
+                             type == VARIABLE) return true;
+                         return false; }
+
+    bool has_value()   { if (type == INTEGER || type == DOUBLE) return true;
+                         return false; }
+
+    bool is_variable() { if (type == VARIABLE) return true; return false; }
+
+    bool is_comma()    { if (type == COMMA) return true; return false; }
+
+    /*! \brief Get the word as a string. */
+    string get_string()    { return wstr; }
+    string get_print_string(bool enc_quotes);
+    string get_stringp()   { processed=true; return wstr; }
+    char get_single_char(stringstream &serr, int &ierr);
+
+    /*! \brief Get the word as a float. */
+    float get_float();
+
+    /*! \brief Get the word as a double. */
+    double get_double();
+    double get_double(stringstream &serr, int &ierr);
+
+    // Get the word as an int, without and with error processing.
+    int get_int();
+    int get_int(stringstream &serr, int &ierr);
+
+    int64_t get_int64_t();
+    int64_t get_int64_t(stringstream &serr, int &ierr);
+
+    /*! \brief Get the word as a boolean. */
+    bool get_bool(stringstream &serr, int &ierr);
+
+    /*! \brief Templated get method for get_double, get_int, etc. */
+    template< class T >
+    T get_val( T &dummy );
+
+    /*!
+     * \brief Convert string s to the type of the first argument.  Function
+     * returns the converted value as a reference and as the function
+     * result. 
+     *
+     * These overloaded functions are the base for the templated accessor
+     * functions "get_val()".
+     *
+     * \param rtti - Convert the string s into the type of rtti and return it.
+     */
+    int     convertFromString ( const int     &rtti, const string &s ) const; 
+    int64_t convertFromString ( const int64_t &rtti, const string &s ) const; 
+    string  convertFromString ( const string  &rtti, const string &s ) const;
+    float   convertFromString ( const float   &rtti, const string &s ) const;
+    double  convertFromString ( const double  &rtti, const string &s ) const;
+    //bool    convertFromString ( const bool   &rtti, const string &s ) const;
+
+    /*! \brief Negate the word or set a flag to negate it later. */
+    void negate_value();
+
+    // Handle errors.
+    void fatal_error(stringstream &serr, int &ierr);
+    void warning(stringstream &serr, int &ierr);
+
+    // Miscellaneous functions.
+    void handle_quotes(stringstream &serr, int &ierr);
+    void erase_char(int ic);
+
+    // Accessor methods.
+    void   set_word(string str)    { wstr = str; }
+    void   set_processed(bool p)   { processed = p; }
+    bool   get_processed()         { return processed; }
+    int    get_line_number()       { return line_number; }
+    int    get_file_line_number()  { return file_line_number; }
+    string get_filename()          { return filename; }
+    deque<string> *get_lines()     { return lines; }
+    void   set_filename(string fn) { filename = fn; }
+    int    get_multiplicity()      { return multiplicity; }
+    void   set_multiplicity(int m) { multiplicity = m; }
+
+private:
+    void init();
+    bool check_before_e(string s, int i1, int i2);
+    bool check_after_e(string s, int i1, int i2);
+
+    // This is the basic storage for the word.
+    string wstr;
+
+    // The type of word, like operator, string, variable, etc.
+    WordType type;
+
+    // Flag for testing whether this word was processed or not.
+    bool processed;
+
+    // Flag to negate a variable.
+    bool negate;
+
+    // This word is repeated multiplicity times.
+    int multiplicity;
+
+    // If the word is an operator, then this is its level, i.e. "**" has
+    // the highest level, then "*","/", etc.
+    int op_level;
+
+    // The operator type, arithmetic, relational, ...
+    string op_type;
+
+    // Convert the input string to lower case.
+    void string_to_lower( string &s ) const;
+
+    // This is needed for telling the user what line in the input
+    // file or include file the error occurred on.
+    //
+    // line_number  The line_number corresponding to this command, this is
+    //              an index into lines and starts from 1, not 0.
+    // lines        Pointer to the deque of original lines. This contains all
+    //              the lines from the input file and any include files.
+    // file_line_number  The line number in the input file or include file.
+    // filename          The name of the input file or include file.
+    //
+    // file_line_number and filename are needed to that the user can open
+    // the file and go to the line in error.
+    int line_number, file_line_number;
+    string filename;
+    deque<string> *lines;
+    
+};
+
+
+} // end of PP namespace
+
+#endif
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Word.cc
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/Word.cc
@@ -0,0 +1,1193 @@
+/* Copyright 2015.  Los Alamos National Security, LLC. This material was produced
+ * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
+ * for the U.S. Department of Energy. The U.S. Government has rights to use,
+ * reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ * to produce derivative works, such modified software should be clearly marked,
+ * so as not to confuse it with the version available from LANL.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * Under this license, it is required to include a reference to this work. We
+ * request that each derivative work contain a reference to LANL Copyright
+ * Disclosure C15076/LA-CC-15-054 so that this work's impact can be roughly
+ * measured.
+ *
+ * This is LANL Copyright Disclosure C15076/LA-CC-15-054
+ */
+
+/*
+ *  PowerParser is a general purpose input file parser for software applications.
+ *
+ *  Authors: Chuck Wingate   XCP-2   caw@lanl.gov
+ *           Robert Robey    XCP-2   brobey@lanl.gov
+ */
+
+// ***************************************************************************
+// ***************************************************************************
+// This class holds each word from the line.
+// ***************************************************************************
+// ***************************************************************************
+
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <map>
+#include <deque>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#ifdef SGI
+#else
+#include <cctype>
+#endif
+
+#include "Word.hh"
+
+namespace PP
+{
+using std::string;
+using std::cout;
+using std::endl;
+//using std::isdigit;
+using std::stringstream;
+using std::setprecision;
+using std::vector;
+using std::map;
+using std::deque;
+using std::pair;
+
+
+// ===========================================================================
+// Default constructor.
+// ===========================================================================
+Word::Word() 
+{
+    wstr = "";
+    init();
+}
+
+
+// ===========================================================================
+// Construct given a string.
+// ===========================================================================
+Word::Word(string s)
+{
+    wstr = s;
+    init();
+    set_type();
+}
+
+
+// ===========================================================================
+// Construct given a string. Also set the map of variables.
+// ===========================================================================
+Word::Word(string s, int lnum, int file_lnum, string fname,
+           deque<string> *lstr)
+{
+    wstr = s;
+    init();
+    line_number = lnum;
+    file_line_number = file_lnum;
+    filename = fname;
+    lines = lstr;
+    set_type();
+}
+
+
+// ===========================================================================
+// Construct given a double
+// ===========================================================================
+Word::Word(double d, int lnum, int file_lnum, string fname,
+           deque<string> *lstr)
+{
+    stringstream ss;
+    ss << setprecision(15) << d;
+    wstr = ss.str();
+    init();
+    type = DOUBLE;
+    line_number = lnum;
+    file_line_number = file_lnum;
+    filename = fname;
+    lines = lstr;
+}
+
+
+// ===========================================================================
+// Construct given an integer.
+// ===========================================================================
+Word::Word(int ia, int lnum, int file_lnum, string fname,
+           deque<string> *lstr)
+{
+    stringstream ss;
+    ss << ia;
+    wstr = ss.str();
+    init();
+    type = DOUBLE;
+    line_number = lnum;
+    file_line_number = file_lnum;
+    filename = fname;
+    lines = lstr;
+}
+
+
+// ===========================================================================
+/*! = operator. */
+// ===========================================================================
+Word Word::operator=(const Word &ws) 
+{
+   if (&ws == this) return *this;
+   wstr = ws.wstr;
+   processed = ws.processed;
+   type = ws.type;
+   negate = ws.negate;
+   line_number = ws.line_number;
+   file_line_number = ws.file_line_number;
+   filename = ws.filename;
+   lines = ws.lines;
+   multiplicity = ws.multiplicity;
+   op_level = ws.op_level;
+   op_type = ws.op_type;
+   return *this;
+}
+
+
+// ===========================================================================
+/*! Copy constructor. */
+// ===========================================================================
+Word::Word(const Word &ws) 
+{
+   wstr = ws.wstr;
+   processed= ws.processed;
+   type = ws.type;
+   negate = ws.negate;
+   line_number = ws.line_number;
+   file_line_number = ws.file_line_number;
+   filename = ws.filename;
+   lines = ws.lines;
+   multiplicity = ws.multiplicity;
+   op_level = ws.op_level;
+   op_type = ws.op_type;
+}
+
+
+// ===========================================================================
+// Common initialization routine called from constructors.
+// ===========================================================================
+void Word::init()
+{
+    processed = false;
+    type = WUNKNOWN;
+    negate = false;
+    lines = NULL;
+    line_number = 0;
+    file_line_number = 0;
+    filename = "";
+    multiplicity = 1;
+    op_level = -1;
+    op_type = "";
+}
+
+
+
+// ===========================================================================
+/*! Destructor */
+// ===========================================================================
+Word::~Word()
+{
+}
+
+
+// ***************************************************************************
+// ***************************************************************************
+// ***************************************************************************
+// Change the value of a word.
+// ***************************************************************************
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Set the word to a double.
+// ===========================================================================
+void Word::set_value(double d)
+{
+    stringstream ss;
+    ss << setprecision(15) << d;
+    wstr = ss.str();
+    set_type();
+}
+
+
+// ===========================================================================
+// Set the word to a string.
+// ===========================================================================
+void Word::set_value(string s)
+{
+    wstr = s;
+    set_type();
+}
+
+
+// ===========================================================================
+// Set the word to a string.
+// Use this when you want to do set_value("lasjdf"), otherwise c++ cannot
+// get which set_type to use (and it does not tell you it is having trouble).
+// ===========================================================================
+void Word::set_value(const char *s)
+{
+    wstr = s;
+    set_type();
+}
+
+
+// ===========================================================================
+// Set the word to a boolean.
+// ===========================================================================
+void Word::set_value(bool b)
+{
+    if (!b) wstr = "false";
+    if (b)  wstr = "true";
+    set_type();
+}
+
+
+
+
+// ***************************************************************************
+// ***************************************************************************
+// ***************************************************************************
+// This section handles the type of the word, whether it is an operator,
+// a function, a string, etc.
+// ***************************************************************************
+// ***************************************************************************
+// ***************************************************************************
+
+
+// ===========================================================================
+/*! Set the type of word. */
+// ===========================================================================
+void Word::set_type()
+{
+   // Make sure the type is initialized. If the word is not anything else,
+   // then it is a string.
+   type = WSTRING;
+
+   // Just for convenience.
+   int len = (int)wstr.size();
+
+   // First determine if the word starts and ends with quotes. If it does,
+   // then it is a string. We do not strip off the quote symbols at this
+   // point because we might be in a comment region where the quotes donot
+   // matter. Later after the comments are stripped out, we check for
+   // matching quotes and remove them.
+   if ((wstr[0] == '\"') || (wstr[0] == '\'') ||
+       (wstr[len-1] == '\"') || (wstr[len-1] == '\'')) {
+      type = WSTRING;
+      //wstr.erase(wstr.end() - 1);
+      //wstr.erase(wstr.begin());
+      return;
+   }
+
+   // Check for a delimiter.
+   if (wstr == "(") { type = OPEN_PARENS; return; }
+   if (wstr == ")") { type = CLOSED_PARENS; return; }
+   if (wstr == "[") { type = OPEN_SQUARE_BRACKET; return; }
+   if (wstr == "]") { type = CLOSED_SQUARE_BRACKET; return; }
+   if (wstr == "{") { type = OPEN_BRACE; return; }
+   if (wstr == "}") { type = CLOSED_BRACE; return; }
+
+   // Comma is used for a couple of things.
+   if (wstr == ",") { type = COMMA; return; }
+
+   // Variables always begin with $. Of course, if the word is in quotes it is
+   // not a variable even if it does begin with $.
+   if (wstr[0] == '$') { type = VARIABLE; return; }
+
+   // Check for an operator.
+   if (wstr == "++")    { type=OPERATOR; op_level=7; op_type="arithmetic"; return; }
+   if (wstr == "--")    { type=OPERATOR; op_level=7; op_type="arithmetic"; return; }
+
+   if (wstr == "**")    { type=OPERATOR; op_level=6; op_type="arithmetic"; return; }
+
+   // Do not implement the % operator, it is too much like the fortran %
+   // operator which is for referencing components of a fortran structure.
+   //if (wstr == "%")     { type=OPERATOR; op_level=5; op_type="arithmetic"; return; }
+   if (wstr == "*")     { type=OPERATOR; op_level=5; op_type="arithmetic"; return; }
+   if (wstr == "/")     { type=OPERATOR; op_level=5; op_type="arithmetic"; return; }
+
+   if (wstr == "+")     { type=OPERATOR; op_level=4; op_type="arithmetic"; return; }
+   if (wstr == "-")     { type=OPERATOR; op_level=4; op_type="arithmetic"; return; }
+
+   if (wstr == ".gt.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+   if (wstr == ".ge.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+   if (wstr == ".lt.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+   if (wstr == ".le.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+   if (wstr == ".eq.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+   if (wstr == ".ne.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+
+   if (wstr == ".hggt.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+   if (wstr == ".hgge.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+   if (wstr == ".hglt.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+   if (wstr == ".hgle.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+   if (wstr == ".hgeq.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+   if (wstr == ".hgne.")  { type=OPERATOR; op_level=3; op_type="relational"; return; }
+
+   if (wstr == ".not.") { type=OPERATOR; op_level=2; op_type="logical";    return; }
+
+   if (wstr == ".and.") { type=OPERATOR; op_level=1; op_type="logical";    return; }
+
+   if (wstr == ".or.")  { type=OPERATOR; op_level=0; op_type="logical";    return; }
+
+   // Equals sign.
+   if (wstr == "=")  { type = EQUALS; return; }
+
+   // At this point the word is either a string or a number.
+
+   // If the word begins with a + or - sign, then it is numeric.
+   bool start_with_pm = false;
+   if (wstr[0] == '+') start_with_pm = true;
+   if (wstr[0] == '-') start_with_pm = true;
+
+   // If the word does not begin with a + or - sign or a digit, or a
+   // ., or an e or a d then it is a string.
+   if (!start_with_pm) {
+       if(!isdigit(wstr[0])) {
+           if (wstr[0] != '.') {
+               if (wstr[0] != 'e') {
+                   if (wstr[0] != 'E') {
+                       if (wstr[0] != 'd') {
+                           if (wstr[0] != 'D') {
+                               type = WSTRING; return;
+                           }
+                       }
+                   }
+               }
+           }
+       }
+   }
+
+   // Check for all digits, i.e. an integer.
+   bool is_number = true;
+   int istart = 0;
+   if (start_with_pm) istart = 1;
+   for (int i=istart; i<(int)wstr.size(); i++) {
+      if (!isdigit(wstr[i])) {
+         is_number = false;
+         break;
+      }
+   }
+   if (is_number) { type = INTEGER; return; }
+
+   // Check for floating point.
+   // If there is anything in the string that is not a component of a
+   // floating point number, then that makes it a string.
+   is_number = true;
+   for (int i=0; i<(int)wstr.size(); i++) {
+      if (!isdigit(wstr[i]) && wstr[i]!='.' && wstr[i]!='e' && wstr[i]!='E' &&
+          wstr[i]!='d' && wstr[i]!='D' && wstr[i]!='+' && wstr[i]!='-') {
+         is_number = false;
+         break;
+      }
+   }
+   //if (is_number) { type = DOUBLE; return; }
+   if (!is_number) { type = WSTRING; return; }
+
+   // We suspect a floating point number.
+   // At this point, everything in the string is a component of a floating
+   // point number, i.e.  "+ - digit e E d D ."
+   type = DOUBLE;
+
+   // Check that strings that start with "e E d D" really are numbers.
+   // And for numbers like e+01, e-05, etc, the atof or strtod functions do not
+   // interpret those as numbers, therefore we insert a 1 in front of the e
+   // so that atof and strtod will call it a number.
+   if (wstr[0] == 'e' || wstr[0] == 'E' || wstr[0] == 'd' || wstr[0] == 'D') {
+
+       // Check for proper syntax after the "e E d D".
+       if (!check_after_e(wstr, 1, (int)wstr.size()-1)) {
+           type = WSTRING;
+           return;
+       }
+
+       // This appears to be a number, insert the digit.
+       wstr.insert(0, "1");
+       return;
+   }
+
+   // The string appears to be a floating point number (fpn), check syntax.
+   // First, find the location of the "e E d D". Check that there can be
+   // only one "e E d D" in the string.
+   int ie = -1;
+   for (int i=0; i<(int)wstr.size(); i++) {
+       if (wstr[i] == 'e' || wstr[i] == 'E' || wstr[i] == 'd' ||
+           wstr[i] == 'D') {
+           ie = i;
+           break;
+       }
+   }
+   if (ie > -1) {
+       for (int i=ie+1; i<(int)wstr.size(); i++) {
+           if (wstr[i] == 'e' || wstr[i] == 'E' || wstr[i] == 'd' ||
+               wstr[i] == 'D') {
+               type = WSTRING;
+               return;
+           }
+       }
+   }
+
+   // Check that the characters before the "e E d D" are valid. If no
+   // "e E d D" was found then check the entire string as if it preceeded
+   // an "e E d D".
+   int ic1 = 0;
+   int ic2 = (int)wstr.size() - 1;
+   if (ie > -1) {
+       ic2 = ie - 1;
+   }
+   if (!check_before_e(wstr, ic1, ic2)) {
+       type = WSTRING;
+       return;
+   }
+
+   // All other cases handled, this must be a fpn (type DOUBLE).
+   return;
+}
+
+
+// ===========================================================================
+// The input string, s, could be a floating point number (fpn). It has been
+// determined that s contains an "e E d D" located at position i2+1. Check
+// everything before the "e E d D", postions i1 through i2 inclusive to
+// verify that this is a fpn.
+//
+// It is also possible that an "e E d D" was not found in which case the
+// entire string is checked as if it preceeded an "e E d D".
+//
+// Return false if this is a string
+//        true  if this could be a fpn
+// ===========================================================================
+bool Word::check_before_e(string s, int i1, int i2)
+{
+    // If there is nothing before the "e E d D" then this still could be a fpn.
+    int size = i2 - i1 + 1;
+    if (size < 1) return true;
+           
+    // The first character could be "+ -", but the remaining characters
+    // cannot be "+ -".
+    int ie1 = i1;
+    if (s[i1] == '+' || s[i1] == '-') ie1 = i1+1;
+    for (int i=ie1; i<=i2; i++) {
+        if (s[i] == '+' || s[i] == '-') return false;
+    }
+
+    // Locate the optional "." character. There can only be one ".".
+    int pointdex = -1;
+    for (int i=ie1; i<=i2; i++) {
+        if (s[i] == '.') {
+            pointdex = i;
+            break;
+        }
+    }
+    if (pointdex > -1) {
+        for (int i=pointdex+1; i<=i2; i++) {
+            if (s[i] == '.') return false;
+        }
+    }
+
+
+    // Everything before and after the point must be a digit (except
+    // that the very first character could be "+ -").
+    if (pointdex > -1) {
+        for (int i=ie1; i<pointdex; i++) {
+            if (!isdigit(s[i])) return false;
+        }
+        for (int i=pointdex+1; i<=i2; i++) {
+            if (!isdigit(s[i])) return false;
+        }
+    }
+
+    // If there is no point then everything must be digits (except
+    // that the very first character could be "+ -").
+    if (pointdex == -1) {
+        for (int i=ie1; i<=i2; i++) {
+            if (!isdigit(s[i])) return false;
+        }
+    }
+
+    // All other cases handled, this could be a fpn.
+    return true;
+}
+    
+
+// ===========================================================================
+// The input string, s, could be a floating point number (fpn). It has been
+// determined that s contains an "e E d D" located at position i1-1. Check
+// everything after the "e E d D", postions i1 through i2 inclusive to
+// verify that this is a fpn.
+//
+// Return false if this is a string
+//        true  if this could be a fpn
+// ===========================================================================
+bool Word::check_after_e(string s, int i1, int i2)
+{
+    // If there is nothing after the "e E d D" then that is not a number.
+    int size = i2 - i1 + 1;
+    if (size < 1) return false;
+           
+    // The character following the "e E d D" must be "+ - digit"
+    if (s[i1] != '+' && s[i1] != '-' && (!isdigit(s[i1])))
+        return false;
+
+    // Everything after the e or e+ or e- must be a digit.
+    int ie1 = i1;
+    if (s[i1] == '+' || s[i1] == '-') ie1 = i1+1;
+    for (int i=ie1; i<=i2; i++) {
+        if (!isdigit(s[i])) return false;
+    }
+
+    // All other cases handled, this could be a fpn.
+    return true;
+}
+    
+
+
+// ===========================================================================
+/*! Print the type of word. */
+// ===========================================================================
+void Word::print_type(stringstream &ss)
+{
+   if (type == WUNKNOWN) ss << "unknown";
+   if (type == WSTRING) ss << "string";
+   if (type == INTEGER) ss << "integer";
+   if (type == DOUBLE) ss << "double";
+   if (type == EQUALS) ss << "=";
+   if (type == OPERATOR) ss << "operator";
+   if (type == OPEN_PARENS) ss << "(";
+   if (type == CLOSED_PARENS) ss << ")";
+   if (type == OPEN_SQUARE_BRACKET) ss << "[";
+   if (type == CLOSED_SQUARE_BRACKET) ss << "]";
+   if (type == OPEN_BRACE) ss << "{";
+   if (type == CLOSED_BRACE) ss << "}";
+   if (type == COMMA) ss << ",";
+   if (type == VARIABLE) ss << "variable";
+}
+
+
+// ===========================================================================
+// Check to see if the word is boolean (true or false)
+// ===========================================================================
+bool Word::is_bool()
+{
+    bool retValue = false;
+
+    // We don't want to test all possible combinations of spellings for
+    // "True" vs "true" vs "tRue" etc.  So we create a temporary string that
+    // is all lower case and compare against the temporary.
+
+    // Create a copy of the string that can be modified locally.
+    string str( wstr );
+
+    // Force the string to all lower case.
+    string_to_lower( str );
+    
+    // The comparison is a character-by-character over the length of the
+    // first string.  To avoid uninitialized memory reads we convert char
+    // array literals to strings.
+    if( str == string("true") || str == string(".true.") ) 
+        retValue = true;
+    if ( str == string("false") || str == string(".false.") )
+        retValue = true;
+
+   return retValue;
+}
+
+
+// ***************************************************************************
+// ***************************************************************************
+// ***************************************************************************
+// This section provides the methods for returning the word as double,
+// int, bool, etc.
+// ***************************************************************************
+// ***************************************************************************
+// ***************************************************************************
+
+// ===========================================================================
+// Get the word as a boolean
+// ===========================================================================
+bool Word::get_bool(stringstream &serr, int &ierr)
+{
+    // Mark this word as having been processed.
+    processed = true;
+
+    bool retValue = false;
+
+    // We don't want to test all possible combinations of spellings for
+    // "True" vs "true" vs "tRue" etc.  So we create a temporary string that
+    // is all lower case and compare against the temporary.
+    string str( wstr );
+
+    // Force the string to all lower case.
+    string_to_lower( str );
+    
+    // The comparison is a character-by-character over the length of the
+    // first string.  To avoid uninitialized memory reads we convert char
+    // array literals to strings.
+    if( str == string("true") || str == string(".true.") ) 
+        retValue = true;
+    else if ( str == string("false") || str == string(".false.") )
+        retValue = false;
+    else { // We default any other bool value to false but also warn the user. 
+        if (lines != NULL) {
+            serr << endl;
+            serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+            serr << "    " << (*lines)[line_number-1] << endl;
+            serr << "in file: " << filename << endl;
+        }
+        serr << "Values on this line should be true or false "
+            "(or .true. or .false.)" << endl;
+        serr << "    (any case is fine, for example true, True, TrUe "
+            "are all ok)" << endl;
+        serr << "Instead found value: " << wstr << endl << endl;
+        ierr = 2;
+        retValue = false;
+    }
+
+    // Apply the negate flag if it is turned on.
+    if (negate) {
+        bool b = false;
+        if (retValue == false) b = true;
+        retValue = b;
+    }
+
+    return retValue;
+}        
+
+
+// ===========================================================================
+// Get the word as as int, error processing version.
+// ===========================================================================
+int Word::get_int(stringstream &serr, int &ierr)
+{
+    // Mark this word as having been processed.
+    processed = true;
+
+    // The word must at least be a number.
+    if ((type != DOUBLE) && (type != INTEGER)) {
+        if (lines != NULL) {
+            serr << endl;
+            serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+            serr << "    " << (*lines)[line_number-1] << endl;
+            serr << "in file: " << filename << endl;
+        }
+        serr << "Expected a numerical, integer value." << endl;
+        serr << "Instead got: " << wstr << endl << endl;
+        ierr = 2;
+        return 0;
+    }
+
+    // The word might begin with a + or - sign.
+    bool start_with_pm = false;
+    if (wstr[0] == '+') start_with_pm = true;
+    if (wstr[0] == '-') start_with_pm = true;
+
+    // We allow 2. or 2.0 for example as an integer, but not 2.3.
+    bool dot_found = false;
+    bool is_int = true;
+    int istart = 0;
+    if (start_with_pm) istart = 1;
+    for (int i=istart; i<(int)wstr.size(); i++) {
+        if (!isdigit(wstr[i]) && wstr[i]!='.' ) {
+            is_int = false;
+            break;
+        }
+        if (wstr[i]=='.' ) {
+            dot_found = true;
+            continue;
+        }
+        if (dot_found && wstr[i]!='0' ) {
+            is_int = false;
+            break;
+        }
+    }
+    if (!is_int) {
+        if (lines != NULL) {
+            serr << endl;
+            serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+            serr << "    " << (*lines)[line_number-1] << endl;
+            serr << "in file: " << filename << endl;
+        }
+        serr << "Expected an integer." << endl;
+        serr << "For example, 2 or 3, even 2. or 2.0 is ok." << endl;
+        serr << "Instead got: " << wstr << endl << endl;
+        ierr = 2;
+        return 0;
+    }
+
+    
+    // Apply the negate flag if it is turned on.
+    int iret = atoi(wstr.c_str() ); 
+    if (negate) iret *= -1;
+    return iret;
+}
+
+int64_t Word::get_int64_t(stringstream &serr, int &ierr)
+{
+    // Mark this word as having been processed.
+    processed = true;
+
+    // The word must at least be a number.
+    if ((type != DOUBLE) && (type != INTEGER)) {
+        if (lines != NULL) {
+            serr << endl;
+            serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+            serr << "    " << (*lines)[line_number-1] << endl;
+            serr << "in file: " << filename << endl;
+        }
+        serr << "Expected a numerical, integer value." << endl;
+        serr << "Instead got: " << wstr << endl << endl;
+        ierr = 2;
+        return 0;
+    }
+
+    // The word might begin with a + or - sign.
+    bool start_with_pm = false;
+    if (wstr[0] == '+') start_with_pm = true;
+    if (wstr[0] == '-') start_with_pm = true;
+
+    // We allow 2. or 2.0 for example as an integer, but not 2.3.
+    bool dot_found = false;
+    bool is_int = true;
+    int istart = 0;
+    if (start_with_pm) istart = 1;
+    for (int i=istart; i<(int)wstr.size(); i++) {
+        if (!isdigit(wstr[i]) && wstr[i]!='.' ) {
+            is_int = false;
+            break;
+        }
+        if (wstr[i]=='.' ) {
+            dot_found = true;
+            continue;
+        }
+        if (dot_found && wstr[i]!='0' ) {
+            is_int = false;
+            break;
+        }
+    }
+    if (!is_int) {
+        if (lines != NULL) {
+            serr << endl;
+            serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+            serr << "    " << (*lines)[line_number-1] << endl;
+            serr << "in file: " << filename << endl;
+        }
+        serr << "Expected an integer." << endl;
+        serr << "For example, 2 or 3, even 2. or 2.0 is ok." << endl;
+        serr << "Instead got: " << wstr << endl << endl;
+        ierr = 2;
+        return 0;
+    }
+
+    std::stringstream sstr(wstr);
+    int64_t iret;
+    sstr >> iret;
+    // Apply the negate flag if it is turned on.
+    if (negate) iret *= -1;
+    return iret;
+}        
+
+// ===========================================================================
+// Get the word as a double, error processing version.
+// ===========================================================================
+double Word::get_double(stringstream &serr, int &ierr)
+{
+    // Mark this word as having been processed.
+    processed = true;
+
+    // The word must at least be a number.
+    if ((type != DOUBLE) && (type != INTEGER)) {
+        if (lines != NULL) {
+            serr << endl;
+            serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+            serr << "    " << (*lines)[line_number-1] << endl;
+            serr << "in file: " << filename << endl;
+        }
+        serr << "Expected a numerical value." << endl;
+        serr << "Instead got: " << wstr << endl << endl;
+        ierr = 2;
+        return 0;
+    }
+
+    // Use a temporary string that might be modified.
+    string s = wstr;
+
+    // We allow exponents using d and D in addition to e and E, for example
+    // 1.d14 or -1.38D-18. The problem with this is that atof and strtod
+    // do not allow d or D, therefore we have to replace d or D with e
+    // before sending it to atof or strtod.
+    if (type == DOUBLE) {
+        for (int i=0; i<(int)s.size(); i++) {
+            if (s[i] == 'd') s[i] = 'e';
+            if (s[i] == 'D') s[i] = 'e';
+        }
+    }
+
+    // Convert the string to a double.
+    double d = atof(s.c_str()); 
+
+    // Apply the negate flag if it is turned on.
+    if (negate) d *= -1.0;
+    return d;
+}        
+
+
+// ===========================================================================
+// Get the word as a single character, error processing version.
+// ===========================================================================
+char Word::get_single_char(stringstream &serr, int &ierr)
+{
+    // To suppress compiler warnings of unused parameters
+    //assert(serr == serr);
+    assert(ierr == ierr);
+
+    // Mark this word as having been processed.
+    processed = true;
+
+    return wstr[0];
+}
+
+
+// ===========================================================================
+// Get the word as an int, no error processing version.
+// ===========================================================================
+int Word::get_int()
+{
+   int dummy;
+   return get_val( dummy );
+}
+
+// support uint64_t
+int64_t Word::get_int64_t()
+{
+   int64_t dummy;
+   return get_val( dummy );
+}
+
+// ===========================================================================
+// Get the word as a float
+// ===========================================================================
+float Word::get_float()
+{
+   float dummy;
+   return get_val( dummy );
+}        
+
+// ===========================================================================
+// Get the word as a double
+// ===========================================================================
+double Word::get_double()
+{
+   double dummy;
+   return get_val( dummy );
+}        
+
+
+// ===========================================================================
+// Get the word as a Type T. 
+// ===========================================================================
+template< class T >
+T Word::get_val( T &dummyValue )
+{
+   // To suppress compiler warnings of unused parameters
+   assert(dummyValue == dummyValue);
+
+   T retValue;
+
+   // Mark this word as having been processed.
+   processed = true;
+
+   // Convert the word to the requested data type.
+   retValue = convertFromString( retValue, wstr );
+
+   return retValue;
+}        
+
+//! Explicit instantiation of supported template types.  If more types are
+//! needed those explicit versions must be listed here.  We are not using
+//! automatic inclusion (we would need to move the function definition into
+//! the header file for that).  The listed versions below are the only ones
+//! that will be included in the library.
+template int     Word::get_val( int&     );
+template int64_t Word::get_val( int64_t& );
+template float   Word::get_val( float&   );
+template double  Word::get_val( double&  );
+//template bool  Word::get_val( bool&    );
+template string  Word::get_val( string&  );
+
+// ===========================================================================
+// Convert from string to return type explicitly.
+// ===========================================================================
+
+//! rtti is only used for type identification (each overloaded function must
+//! have a unique signature.  The return value is not part of the signature).
+
+// Convert string to integer.
+int    Word::convertFromString( const int &rtti, const string &s ) const 
+{ 
+   // To suppress compiler warnings of unused parameters
+   assert(rtti == rtti);
+
+   int iret = atoi( s.c_str() ); 
+   if (negate) iret *= -1;
+   return iret;
+}
+
+// Convert string to int64_t.
+int64_t Word::convertFromString( const int64_t &rtti, const string &s ) const 
+{
+   // To suppress compiler warnings of unused parameters
+   assert(rtti == rtti);
+
+   int64_t iret;
+   std::stringstream( s ) >> iret;
+   if (negate) iret *= -1;
+   return iret;
+}
+
+// Convert string to string (do nothing).
+string Word::convertFromString( const string &rtti, const string &s ) const 
+{ 
+   // To suppress compiler warnings of unused parameters
+   assert(rtti == rtti);
+
+    return s; 
+}
+
+// Convert string to float.
+float Word::convertFromString( const float &rtti, const string &s ) const 
+{ 
+   // To suppress compiler warnings of unused parameters
+   assert(rtti == rtti);
+
+    // Use a temporary string that might be modified.
+    string sm = s;
+
+    // We allow exponents using d and D in addition to e and E, for example
+    // 1.d14 or -1.38D-18. The problem with this is that atof and strtod
+    // do not allow d or D, therefore we have to replace d or D with e
+    // before sending it to atof or strtod.
+    for (int i=0; i<(int)sm.size(); i++) {
+        if (sm[i] == 'd') sm[i] = 'e';
+        if (sm[i] == 'D') sm[i] = 'e';
+    }
+
+    float f = (float)atof( sm.c_str() );  
+    if (negate) f *= -1.;
+    return f;
+}
+
+// Convert string to double.
+double Word::convertFromString( const double &rtti, const string &s ) const 
+{ 
+    // To suppress compiler warnings of unused parameters
+    assert(rtti == rtti);
+
+    // Use a temporary string that might be modified.
+    string sm = s;
+
+    // We allow exponents using d and D in addition to e and E, for example
+    // 1.d14 or -1.38D-18. The problem with this is that atof and strtod
+    // do not allow d or D, therefore we have to replace d or D with e
+    // before sending it to atof or strtod.
+    for (int i=0; i<(int)sm.size(); i++) {
+        if (sm[i] == 'd') sm[i] = 'e';
+        if (sm[i] == 'D') sm[i] = 'e';
+    }
+
+    double d = atof( sm.c_str() );  
+    if (negate) d *= -1.;
+    return d;
+}
+
+
+// ***************************************************************************
+// ***************************************************************************
+// ***************************************************************************
+// Utility functions.
+// ***************************************************************************
+// ***************************************************************************
+// ***************************************************************************
+
+
+// ===========================================================================
+// Return the word as a string for printing. This is usually just the word
+// but if it has multiplicity, then include that in the return string.
+// ===========================================================================
+string Word::get_print_string(bool enc_quotes)
+{
+    bool equotes = true;
+    if (!enc_quotes) equotes = false;
+    if (wstr == "true") equotes = false;
+    if (wstr == "false") equotes = false;
+
+
+    string sq = "";
+
+    if (equotes) {
+        if (type == WSTRING) sq = "\"";
+        sq += wstr;
+        if (type == WSTRING) sq += "\"";
+    }
+    else {
+        sq = wstr;
+    }
+
+    if (multiplicity <= 1) return sq;
+
+    stringstream ss;
+    ss << multiplicity;
+    string s = "";
+    s = ss.str() + "*" + sq;
+    return s;
+}
+
+
+// ===========================================================================
+// If a word starts or ends with quotes, make sure the quotes match, if not
+// generate a fatal error, and then strip off the quotes.
+// ===========================================================================
+void Word::handle_quotes(stringstream &serr, int &ierr)
+{
+    // Just for convenience.
+    int len = (int)wstr.size();
+
+    // Check for matching quotes, generate a fatal error if they do not match.
+    bool ferr = false;
+    if ((wstr[0] == '\"') && (wstr[len-1] != '\"')) ferr = true;
+    if ((wstr[0] == '\'') && (wstr[len-1] != '\'')) ferr = true;
+    if ((wstr[len-1] == '\"') && (wstr[0] != '\"')) ferr = true;
+    if ((wstr[len-1] == '\'') && (wstr[0] != '\'')) ferr = true;
+
+    if (ferr) {
+        fatal_error(serr, ierr);
+        serr << "Quotes mismatch found." << endl;
+        serr << "A starting quotes must have a closing quotes." << endl;
+        serr << "Double quotes, \", must be matched with double quotes."
+             << endl;
+        serr << "Single quotes, \', must be matched with single quotes."
+             << endl;
+        ierr = 2;
+    }
+
+    // Determine if the word starts and ends with quotes. If it does,
+    // then we strip off the quote symbols.
+    if (((wstr[0] == '\"') || (wstr[0] == '\'')) &&
+        ((wstr[len-1] == '\"') || (wstr[len-1] == '\''))) {
+        wstr.erase(wstr.end() - 1);
+        wstr.erase(wstr.begin());
+        return;
+    }
+}
+
+
+// ===========================================================================
+// Erase a single character from the word, ic is the index of the character
+// to be erased (starting from 0).
+// ===========================================================================
+void Word::erase_char(int ic)
+{
+    if (ic >= (int)wstr.size()) return;
+    wstr.erase(wstr.begin() + ic);
+}
+
+
+
+// ===========================================================================
+// Fatal error
+// ===========================================================================
+void Word::fatal_error(stringstream &serr, int &ierr)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(ierr == ierr);
+
+    serr << endl;
+    serr << "*** FATAL ERROR in line " << file_line_number << ":" << endl;
+    serr << "    " << (*lines)[line_number-1] << endl;
+    serr << "in file: " << filename << endl;
+}
+
+void Word::warning(stringstream &serr, int &ierr)
+{
+    // To suppress compiler warnings of unused parameters
+    assert(ierr == ierr);
+
+    serr << endl;
+    serr << "*** WARNING in line " << file_line_number << ":" << endl;
+    serr << "    " << (*lines)[line_number-1] << endl;
+    serr << "in file: " << filename << endl;
+}
+
+
+// ===========================================================================
+// Negate a word.
+// ===========================================================================
+void Word::negate_value()
+{
+   int len = (int)wstr.size();
+
+   if (type == INTEGER || type == DOUBLE) {
+      negate = false;
+
+      // If the string starts with a - sign, then delete it.
+      for (int i=0; i<len; i++) {
+         if (wstr[i] == ' ' || wstr[i] == '\t') continue;
+         if (wstr[i] == '-') {
+            wstr[i] = ' ';
+            return;
+         }
+         break;
+      }
+
+      // The string did not start with a minus sign so insert one.
+      // Using insert causes link problems with SGI CC
+      string s = "-" + wstr;
+      wstr = s;
+      //wstr.insert(0, s);
+      return;
+   }
+}
+
+
+
+// ===========================================================================
+// Convert the input string to lower case.
+// ===========================================================================
+void Word::string_to_lower( string &s ) const 
+{
+   int i, c, d;
+   int len = (int)s.size();
+   for (i=0; i<len; i++) {
+      c = (int)s[i];
+      d = c;
+      if (isalpha(c)) d = tolower(c);
+      s[i] = (char)d;
+   }
+}
+
+
+
+} // End of the PP namespace
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/clamr_cpuonly.cpp
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/clamr_cpuonly.cpp
@@ -0,0 +1,832 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+
+#include <algorithm>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <vector>
+//#include "graphics/display.h"
+#include "graphics.h"
+#include "input.h"
+#include "mesh.h"
+#include "partition.h"
+#include "state.h"
+#include "timer.h"
+#include "memstats.h"
+#include "crux.h"
+#include "PowerParser.hh"
+#include "MallocPlus.h"
+#ifdef HAVE_ITTNOTIFY
+#include <ittnotify.h>
+#endif
+
+using namespace PP;
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+#undef DEBUG_RESTORE_VALS
+
+#define MIN3(x,y,z) ( min( min(x,y), z) )
+
+static int do_cpu_calc = 1;
+static int do_gpu_calc = 0;
+
+typedef unsigned int uint;
+
+static bool do_display_graphics = false;
+
+#ifdef HAVE_GRAPHICS
+static double circle_radius=-1.0;
+#ifdef FULL_PRECISION
+   void (*set_display_cell_coordinates)(double *, double *, double *, double *) = &set_display_cell_coordinates_double;
+   void (*set_display_cell_data)(double *) = &set_display_cell_data_double;
+#else
+   void (*set_display_cell_coordinates)(float *, float *, float *, float *) = &set_display_cell_coordinates_float;
+   void (*set_display_cell_data)(float *) = &set_display_cell_data_float;
+#endif
+#endif
+
+static int view_mode = 0;
+
+#ifdef FULL_PRECISION
+#define  SUM_ERROR 2.0e-16
+   void (*set_graphics_cell_coordinates)(double *, double *, double *, double *) = &set_graphics_cell_coordinates_double;
+   void (*set_graphics_cell_data)(double *) = &set_graphics_cell_data_double;
+#else
+#define  SUM_ERROR 1.0e-8
+   void (*set_graphics_cell_coordinates)(float *, float *, float *, float *) = &set_graphics_cell_coordinates_float;
+   void (*set_graphics_cell_data)(float *) = &set_graphics_cell_data_float;
+#endif
+
+void store_crux_data(Crux *crux, int ncycle);
+void restore_crux_data_bootstrap(Crux *crux, char *restart_file, int rollback_counter);
+void restore_crux_data(Crux *crux);
+
+bool        restart,        //  Flag to start from a back up file; init in input.cpp::parseInput().
+            verbose,        //  Flag for verbose command-line output; init in input.cpp::parseInput().
+            localStencil,   //  Flag for use of local stencil; init in input.cpp::parseInput().
+            face_based,     //  Flag for face-based finite difference;
+            outline;        //  Flag for drawing outlines of cells; init in input.cpp::parseInput().
+int         outputInterval, //  Periodicity of output; init in input.cpp::parseInput().
+            crux_type,      //  Type of checkpoint/restart -- CRUX_NONE, CRUX_IN_MEMORY, CRUX_DISK;
+                            //  init in input.cpp::parseInput().
+            enhanced_precision_sum,//  Flag for enhanced precision sum (default true); init in input.cpp::parseInput().
+            lttrace_on,     //  Flag to turn on logical time trace package;
+            do_quo_setup,   //  Flag to turn on quo dynamic scheduling policies package;
+            levmx,          //  Maximum number of refinement levels; init in input.cpp::parseInput().
+            nx,             //  x-resolution of coarse grid; init in input.cpp::parseInput().
+            ny,             //  y-resolution of coarse grid; init in input.cpp::parseInput().
+            niter,          //  Maximum iterations; init in input.cpp::parseInput().
+            graphic_outputInterval, // Periodicity of graphic output that is saved; init in input.cpp::parseInput()
+            checkpoint_outputInterval, // Periodicity of checkpoint output that is saved; init in input.cpp::parseInput()
+            num_of_rollback_states,// Maximum number of rollback states to maintain; init in input.cpp::parseInput()
+            backup_file_num,//  Backup file number to restart simulation from; init in input.cpp::parseInput()
+            numpe,          //  
+            ndim    = 2;    //  Dimensionality of problem (2 or 3).
+double      upper_mass_diff_percentage; //  Flag for the allowed pecentage difference to the total
+                                        //  mass per output intervals; init in input.cpp::parseInput().
+
+char *restart_file;
+
+static int it = 0;
+
+enum partition_method initial_order,  //  Initial order of mesh.
+                      cycle_reorder;  //  Order of mesh every cycle.
+static Mesh        *mesh;           //  Object containing mesh information
+static State       *state;          //  Object containing state information corresponding to mesh
+static Crux        *crux;           //  Object containing checkpoint/restart information
+static PowerParser *parse;          //  Object containing input file parsing
+
+static real_t circ_radius = 0.0;
+static int next_cp_cycle = 0;
+static int next_graphics_cycle = 0;
+
+//  Set up timing information.
+static struct timeval tstart, tstart_cpu, tstart_partmeas;
+
+static double H_sum_initial = 0.0;
+static double cpu_time_graphics = 0.0;
+static double cpu_time_calcs    = 0.0;
+static double cpu_time_partmeas = 0.0;
+
+static int     ncycle  = 0;
+static double  simTime = 0.0;
+static double  deltaT = 0.0;
+char total_sim_time_log[] = {"total_execution_time.log"};
+struct timeval total_exec;
+
+static int mype=0;
+int main(int argc, char **argv) {
+
+   // Needed for code to compile correctly on the Mac
+   int numpe=-1;
+
+   //  Process command-line arguments, if any.
+   parseInput(argc, argv);
+
+#ifdef _OPENMP
+   int nt = 0;
+   int tid = 0;
+
+   nt = omp_get_max_threads();
+   tid = omp_get_thread_num();
+   if (0 == tid && mype == 0) {
+        printf("--- max num openmp threads: %d\n", nt);
+   }
+#pragma omp parallel firstprivate(nt, tid)
+   {
+      nt = omp_get_num_threads();
+      tid = omp_get_thread_num();
+
+#pragma omp master
+      if (mype == 0) {
+         printf("--- num openmp threads in parallel region: %d\n", nt);
+      }
+   }
+#endif
+
+   parse = new PowerParser();
+
+   struct timeval tstart_setup;
+   cpu_timer_start(&tstart_setup);
+
+   crux = new Crux(crux_type, num_of_rollback_states, restart);
+
+   circ_radius = 6.0;
+   //  Scale the circle appropriately for the mesh size.
+   circ_radius = circ_radius * (real_t) nx / 128.0;
+   int boundary = 1;
+   int parallel_in = 0;
+   double deltax_in = 1.0;
+   double deltay_in = 1.0;
+
+   if (restart){
+      restore_crux_data_bootstrap(crux, restart_file, 0);
+      mesh  = new Mesh(nx, ny, levmx, ndim, deltax_in, deltay_in, boundary, parallel_in, do_gpu_calc);
+      mesh->init(nx, ny, circ_radius, initial_order, do_gpu_calc);
+
+      state = new State(mesh);
+      restore_crux_data(crux);
+      mesh->proc.resize(mesh->ncells);
+      mesh->calc_distribution(numpe);
+   } else {
+      mesh = new Mesh(nx, ny, levmx, ndim, deltax_in, deltay_in, boundary, parallel_in, do_gpu_calc);
+      if (DEBUG) {
+         //if (mype == 0) mesh->print();
+
+         char filename[10];
+         sprintf(filename,"out%1d",mype);
+         mesh->fp=fopen(filename,"w");
+
+         //mesh->print_local();
+      }
+
+      mesh->init(nx, ny, circ_radius, initial_order, do_gpu_calc);
+      state = new State(mesh);
+      state->init(do_gpu_calc);
+      mesh->proc.resize(mesh->ncells);
+      mesh->calc_distribution(numpe);
+      state->fill_circle(circ_radius, 100.0, 7.0);
+   }
+
+   size_t &ncells = mesh->ncells;
+
+   if (graphic_outputInterval > niter) next_graphics_cycle = graphic_outputInterval;
+   if (checkpoint_outputInterval > niter) next_cp_cycle = checkpoint_outputInterval;
+
+
+   //  Kahan-type enhanced precision sum implementation.
+   double H_sum = state->mass_sum(enhanced_precision_sum);
+   if (mype == 0) printf ("Mass of initialized cells equal to %14.12lg\n", H_sum);
+   H_sum_initial = H_sum;
+
+   if(upper_mass_diff_percentage < 0){
+      upper_mass_diff_percentage = H_sum_initial * SUM_ERROR;
+      //printf("Setting sum mass error to %16.8lg\n",upper_mass_diff_percentage);
+   }
+
+   double cpu_time_main_setup = cpu_timer_stop(tstart_setup);
+   #ifdef TIMING
+   mesh->parallel_output("CPU:  setup time               time was",cpu_time_main_setup, 0, "s");
+   #endif 
+
+   long long mem_used = memstats_memused();
+   #ifdef MEMORY
+   if (mem_used > 0) {
+      mesh->parallel_output("Memory used      in startup ",mem_used, 0, "kB");
+      mesh->parallel_output("Memory peak      in startup ",memstats_mempeak(), 0, "kB");
+      mesh->parallel_output("Memory free      at startup ",memstats_memfree(), 0, "kB");
+      mesh->parallel_output("Memory available at startup ",memstats_memtotal(), 0, "kB");
+   }
+   #endif
+
+   if (mype == 0) {
+      if (ncycle != 0){
+         printf("Iteration %3d timestep %lf Sim Time %lf cells %ld Mass Sum %14.12lg\n",
+            ncycle, deltaT, simTime, ncells, H_sum);
+      } else {
+         printf("Iteration   0 timestep      n/a Sim Time      0.0 cells %ld Mass Sum %14.12lg\n", ncells, H_sum);
+      }
+   }
+
+   for (int i = 0; i < MESH_COUNTER_SIZE; i++){
+      mesh->cpu_counters[i]=0;
+   }
+   for (int i = 0; i < MESH_TIMER_SIZE; i++){
+      mesh->cpu_timers[i]=0.0;
+   }
+
+   cpu_timer_start(&tstart_cpu);
+
+#ifdef HAVE_GRAPHICS
+   do_display_graphics = true;
+   set_display_mysize(ncells);
+   set_display_cell_coordinates(&mesh->x[0], &mesh->dx[0], &mesh->y[0], &mesh->dy[0]);
+   set_display_cell_data(&state->H[0]);
+   set_display_cell_proc(&mesh->proc[0]);
+
+   set_display_window((float)mesh->xmin, (float)mesh->xmax,
+                      (float)mesh->ymin, (float)mesh->ymax);
+   set_display_outline((int)outline);
+   set_display_viewmode(view_mode);
+#endif
+
+   if (ncycle == next_graphics_cycle){
+      set_graphics_outline(outline);
+      set_graphics_window((float)mesh->xmin, (float)mesh->xmax,
+                          (float)mesh->ymin, (float)mesh->ymax);
+      set_graphics_mysize(ncells);
+      set_graphics_cell_coordinates(&mesh->x[0], &mesh->dx[0],
+                                    &mesh->y[0], &mesh->dy[0]);
+      set_graphics_cell_data(&state->H[0]);
+      set_graphics_cell_proc(&mesh->proc[0]);
+      set_graphics_viewmode(view_mode);
+
+      if (mype == 0) {
+         init_graphics_output();
+         set_graphics_cell_proc(&mesh->proc[0]);
+         write_graphics_info(0,0,0.0,0,0);
+      }
+      next_graphics_cycle += graphic_outputInterval;
+   }
+
+#ifdef HAVE_GRAPHICS
+   set_display_circle_radius(circle_radius);
+   init_display(&argc, argv, "Shallow Water");
+   draw_scene();
+   //if (verbose) sleep(5);
+   sleep(2);
+
+   //  Clear superposition of circle on grid output.
+   circle_radius = -1.0;
+#endif
+   cpu_time_graphics += cpu_timer_stop(tstart_cpu);
+
+   //  Set flag to show mesh results rather than domain decomposition.
+   view_mode = 1;
+
+   if (ncycle == next_cp_cycle) store_crux_data(crux, ncycle); 
+
+   cpu_timer_start(&tstart);
+#ifdef HAVE_GRAPHICS
+   set_idle_function(&do_calc);
+   start_main_loop();
+#else
+#ifdef HAVE_ITTNOTIFY
+__itt_resume();
+__SSC_MARK(0x111);
+#endif
+   for (it = ncycle; it < 10000000; it++) {
+      do_calc();
+   }
+#ifdef HAVE_ITTNOTIFY
+__itt_pause();
+__SSC_MARK(0x222);
+#endif
+#endif
+   
+   return 0;
+}
+
+extern "C" void do_calc(void)
+{  double g     = 9.80;
+   double sigma = 0.95;
+   int icount, jcount;
+   static int rollback_attempt = 0;
+   static double total_program_time = 0;
+
+   //  Initialize state variables for GPU calculation.
+   size_t &ncells    = mesh->ncells;
+
+   vector<int>     mpot;
+   
+   size_t new_ncells = 0;
+   double H_sum = -1.0;
+
+   //  Main loop.
+   int endcycle = MIN3(niter, next_cp_cycle, next_graphics_cycle);
+
+   cpu_timer_start(&tstart_cpu);
+
+   for (int nburst = ncycle % outputInterval; nburst < outputInterval && ncycle < endcycle; nburst++, ncycle++) {
+
+#ifdef _OPENMP
+#pragma omp parallel
+      {
+#endif
+         //  Calculate the real time step for the current discrete time step.
+         double mydeltaT = state->set_timestep(g, sigma); // Private variable to avoid write conflict
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+           deltaT = mydeltaT;
+           simTime += deltaT;
+#ifdef _OPENMP
+         }
+#endif
+
+         mesh->calc_neighbors(ncells);
+
+         cpu_timer_start(&tstart_partmeas);
+         mesh->partition_measure();
+
+#ifdef _OPENMP
+#pragma omp master
+#endif
+         cpu_time_partmeas += cpu_timer_stop(tstart_partmeas);
+
+         // Currently not working -- may need to be earlier?
+         //if (do_cpu_calc && ! mesh->have_boundary) {
+         //  state->add_boundary_cells(mesh);
+         //}
+
+         // Apply BCs is currently done as first part of gpu_finite_difference and so comparison won't work here
+
+         mesh->set_bounds(ncells);
+
+         //  Execute main kernel
+         if (face_based) {
+            state->calc_finite_difference_via_faces(deltaT);
+         } else {
+            state->calc_finite_difference(deltaT);
+         }
+
+         //  Size of arrays gets reduced to just the real cells in this call for have_boundary = 0
+         state->remove_boundary_cells();
+#ifdef _OPENMP
+      } // end parallel region
+#endif
+
+      mpot.resize(ncells);
+      new_ncells = state->calc_refine_potential(mpot, icount, jcount);
+
+      //  Resize the mesh, inserting cells where refinement is necessary.
+
+#ifdef _OPENMP
+#pragma omp parallel
+      {
+#endif
+      state->rezone_all(icount, jcount, mpot);
+
+      // Clear does not delete mpot, so have to swap with an empty vector to get
+      // it to delete the mpot memory. This is all to avoid valgrind from showing
+      // it as a reachable memory leak
+#ifdef _OPENMP
+#pragma omp master
+      {
+#endif
+      //mpot.clear();
+      vector<int>().swap(mpot);
+
+      mesh->ncells = new_ncells;
+      ncells = new_ncells;
+#ifdef _OPENMP
+      }
+#pragma omp barrier
+#endif
+      mesh->set_bounds(ncells);
+
+#ifdef _OPENMP
+#pragma omp master
+      {
+#endif
+   //cpu_timer_start(&tstart_check);
+      mesh->proc.resize(ncells);
+      if (icount)
+      {  vector<int> index(ncells);
+         mesh->partition_cells(numpe, index, cycle_reorder);
+         state->state_reorder(index);
+         state->memory_reset_ptrs();
+      }
+   //cpu_time_check += cpu_timer_stop(tstart_check);
+#ifdef _OPENMP
+      }
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+      } // end parallel region
+#endif
+      
+   } // End burst loop
+
+   cpu_time_calcs += cpu_timer_stop(tstart_cpu);
+
+   H_sum = state->mass_sum(enhanced_precision_sum);
+
+   int error_status = STATUS_OK;
+
+   if (isnan(H_sum)) {
+      printf("Got a NAN on cycle %d\n",ncycle);
+      error_status = STATUS_NAN;
+   }
+
+   double percent_mass_diff = fabs(H_sum - H_sum_initial)/H_sum_initial * 100.0;
+   if (percent_mass_diff >= upper_mass_diff_percentage) {
+      printf("Mass difference outside of acceptable range on cycle %d percent_mass_diff %lg upper limit %lg\n",ncycle,percent_mass_diff, upper_mass_diff_percentage);
+      error_status = STATUS_MASS_LOSS;
+   }
+
+   if (error_status != STATUS_OK){
+      if (crux_type != CRUX_NONE) {
+
+         rollback_attempt++;
+         if (rollback_attempt > num_of_rollback_states) {
+            printf("Can not recover from error from back up files. Killing program...\n");
+            total_program_time = cpu_timer_stop(total_exec);
+            FILE *fp = fopen(total_sim_time_log,"w");
+            fprintf(fp,"The total execution time of the program before failure was %g seconds\n", total_program_time);
+            fclose(fp);
+            state->print_failure_log(ncycle, simTime, H_sum_initial, H_sum, percent_mass_diff, true);
+            exit(-1);
+         }
+
+         if (graphic_outputInterval <= niter){
+            mesh->calc_spatial_coordinates(0);
+            set_graphics_mysize(ncells);
+            set_graphics_viewmode(view_mode);
+            set_graphics_cell_coordinates(&mesh->x[0], &mesh->dx[0], &mesh->y[0], &mesh->dy[0]);
+            set_graphics_cell_data(&state->H[0]);
+            set_graphics_cell_proc(&mesh->proc[0]);
+            write_graphics_info(ncycle/graphic_outputInterval,ncycle,simTime,1,rollback_attempt);
+         }
+
+         if((ncycle - (rollback_attempt)*checkpoint_outputInterval) < 0){
+            printf("Rolling simulation back to to ncycle 0\n");
+         }
+         else{
+            printf("Rolling simulation back to to ncycle %d\n", ncycle - (rollback_attempt*checkpoint_outputInterval));
+         }
+
+         state->print_rollback_log(ncycle, simTime, H_sum_initial, H_sum, percent_mass_diff, rollback_attempt, num_of_rollback_states, error_status);
+
+         int rollback_num = crux->get_rollback_number();
+
+         restore_crux_data_bootstrap(crux, NULL, rollback_num);
+         mesh->terminate();
+         state->terminate();
+         restore_crux_data(crux);
+
+
+      } else {
+         printf("failure.log has been created\n");
+         state->print_failure_log(ncycle, simTime, H_sum_initial, H_sum, percent_mass_diff, true);
+         exit(-1);
+      }
+   }
+
+   if (mype == 0 && ncycle % outputInterval == 0) {
+      printf("Iteration %3d timestep %lf Sim Time %lf cells %ld Mass Sum %14.12lg Mass Change %12.6lg\n",
+         ncycle, deltaT, simTime, ncells, H_sum, H_sum - H_sum_initial);
+   }
+
+   if (ncycle == next_cp_cycle) store_crux_data(crux, ncycle); 
+
+   cpu_timer_start(&tstart_cpu);
+
+   if(do_display_graphics || ncycle == next_graphics_cycle ||
+      (ncycle >= niter && graphic_outputInterval < niter) ){
+
+      mesh->calc_spatial_coordinates(0);
+   }
+
+   if(ncycle == next_graphics_cycle){
+      set_graphics_mysize(ncells);
+      set_graphics_viewmode(view_mode);
+      set_graphics_cell_coordinates(&mesh->x[0], &mesh->dx[0], &mesh->y[0], &mesh->dy[0]);
+      set_graphics_cell_data(&state->H[0]);
+      set_graphics_cell_proc(&mesh->proc[0]);
+
+      write_graphics_info(ncycle/graphic_outputInterval,ncycle,simTime,0,0);
+      next_graphics_cycle += graphic_outputInterval;
+   }
+
+#ifdef HAVE_GRAPHICS
+   if(ncycle % outputInterval == 0){
+      if(ncycle != next_graphics_cycle){
+         set_display_mysize(ncells);
+         set_display_viewmode(view_mode);
+         set_display_cell_coordinates(&mesh->x[0], &mesh->dx[0], &mesh->y[0], &mesh->dy[0]);
+         set_display_cell_data(&state->H[0]);
+         set_display_cell_proc(NULL);
+      }
+      set_display_circle_radius(circle_radius);
+      draw_scene();
+   }
+
+#endif
+
+   cpu_time_graphics += cpu_timer_stop(tstart_cpu);
+
+   //  Output final results and timing information.
+   if (ncycle >= niter) {
+      //free_display();
+      
+      if(graphic_outputInterval < niter){
+         cpu_timer_start(&tstart_cpu);
+
+#ifdef HAVE_GRAPHICS
+         set_display_viewmode(view_mode);
+         set_display_mysize(ncells);
+         set_display_cell_coordinates(&mesh->x[0], &mesh->dx[0], &mesh->y[0], &mesh->dy[0]);
+         set_display_cell_data(&state->H[0]);
+         set_display_cell_proc(NULL);
+#endif
+
+         if (mype == 0) {
+            write_graphics_info(ncycle/graphic_outputInterval,ncycle,simTime,0,0);
+         }
+         next_graphics_cycle += graphic_outputInterval;
+
+         cpu_time_graphics += cpu_timer_stop(tstart_cpu);
+      }
+
+      //  Get overall program timing.
+      double elapsed_time = cpu_timer_stop(tstart);
+      
+      long long mem_used = memstats_memused();
+      #ifdef MEMORY
+      if (mem_used > 0) {
+         printf("Memory used      %lld kB\n",mem_used);
+         printf("Memory peak      %lld kB\n",memstats_mempeak());
+         printf("Memory free      %lld kB\n",memstats_memfree());
+         printf("Memory available %lld kB\n",memstats_memtotal());
+      }
+      #endif
+      state->output_timing_info(do_cpu_calc, do_gpu_calc, elapsed_time);
+      #ifdef TIMING
+      mesh->parallel_output("CPU:  calc incl part meas     time was",cpu_time_calcs,    0, "s");
+      mesh->parallel_output("CPU:  calculation only        time was",cpu_time_calcs-cpu_time_partmeas,    0, "s");
+      mesh->parallel_output("CPU:  partition measure       time was",cpu_time_partmeas, 0, "s");
+      mesh->parallel_output("CPU:  graphics                time was",cpu_time_graphics, 0, "s");
+      //mesh->parallel_output("CPU:  check                   time was",cpu_time_check,    0, "s");
+      #endif
+
+      mesh->print_partition_measure();
+      mesh->print_calc_neighbor_type();
+      mesh->print_partition_type();
+
+      printf("CPU:  rezone frequency                \t %8.4f\tpercent\n",     (double)mesh->get_cpu_counter(MESH_COUNTER_REZONE)/(double)ncycle*100.0 );
+      printf("CPU:  calc neigh frequency            \t %8.4f\tpercent\n",     (double)mesh->get_cpu_counter(MESH_COUNTER_CALC_NEIGH)/(double)ncycle*100.0 );
+      printf("CPU:  refine_smooth_iter per rezone   \t %8.4f\t\n",            (double)mesh->get_cpu_counter(MESH_COUNTER_REFINE_SMOOTH)/(double)mesh->get_cpu_counter(MESH_COUNTER_REZONE) );
+
+      mesh->terminate();
+      state->terminate();
+
+      terminate_graphics_output();
+
+      delete mesh;
+      delete state;
+      delete crux;
+      delete parse;
+
+      total_program_time = cpu_timer_stop(total_exec);
+      FILE *fp = fopen(total_sim_time_log,"w");
+      fprintf(fp,"The total execution time of the program was %g seconds\n", total_program_time);
+      fclose(fp);
+      exit(0);
+   }  //  Complete final output.
+   
+} // end do_calc
+
+const int CRUX_CLAMR_VERSION = 101;
+const int num_int_vals       = 15;
+const int num_double_vals    =  5;
+
+MallocPlus clamr_bootstrap_memory;
+
+void store_crux_data(Crux *crux, int ncycle)
+{
+   size_t nsize = num_int_vals*sizeof(int) +
+                  num_double_vals*sizeof(double);
+   nsize += state->get_checkpoint_size();
+
+   next_cp_cycle += checkpoint_outputInterval;
+
+   int int_vals[num_int_vals];
+
+   int_vals[ 0] = CRUX_CLAMR_VERSION; // Version number
+   int_vals[ 1] = nx;
+   int_vals[ 2] = ny;
+   int_vals[ 3] = levmx;
+   int_vals[ 4] = ndim;
+   int_vals[ 5] = outputInterval;
+   int_vals[ 6] = enhanced_precision_sum;
+   int_vals[ 7] = niter;
+   int_vals[ 8] = it;
+   int_vals[ 9] = ncycle;
+   int_vals[10] = crux_type;
+   int_vals[11] = graphic_outputInterval;
+   int_vals[12] = checkpoint_outputInterval;
+   int_vals[13] = next_cp_cycle;
+   int_vals[14] = next_graphics_cycle;
+
+   double double_vals[num_double_vals];
+   double_vals[ 0] = circ_radius;
+   double_vals[ 1] = H_sum_initial;
+   double_vals[ 2] = simTime;
+   double_vals[ 3] = deltaT;
+   double_vals[ 4] = upper_mass_diff_percentage;
+
+   clamr_bootstrap_memory.memory_add(int_vals, size_t(num_int_vals), 4, "bootstrap_int_vals", RESTART_DATA);
+   clamr_bootstrap_memory.memory_add(double_vals, size_t(num_double_vals), 8, "bootstrap_double_vals", RESTART_DATA);
+
+   crux->store_begin(nsize, ncycle);
+
+   crux->store_MallocPlus(clamr_bootstrap_memory);
+
+   state->store_checkpoint(crux);
+
+   crux->store_end();
+
+   clamr_bootstrap_memory.memory_remove(int_vals);
+   clamr_bootstrap_memory.memory_remove(double_vals);
+}
+
+void restore_crux_data_bootstrap(Crux *crux, char *restart_file, int rollback_counter)
+{
+   crux->restore_begin(restart_file, rollback_counter);
+
+   int int_vals[num_int_vals];
+
+   double double_vals[num_double_vals];
+
+   clamr_bootstrap_memory.memory_add(int_vals, size_t(num_int_vals), 4, "bootstrap_int_vals", RESTART_DATA);
+   clamr_bootstrap_memory.memory_add(double_vals, size_t(num_double_vals), 8, "bootstrap_double_vals", RESTART_DATA);
+
+   crux->restore_MallocPlus(clamr_bootstrap_memory);
+
+   if (int_vals[ 0] != CRUX_CLAMR_VERSION) {
+      printf("CRUX version mismatch for clamr data, version on file is %d, version in code is %d\n",
+         int_vals[0], CRUX_CLAMR_VERSION);
+      exit(0);
+   }
+
+   nx                        = int_vals[ 1];
+   ny                        = int_vals[ 2];
+   levmx                     = int_vals[ 3];
+   ndim                      = int_vals[ 4];
+   outputInterval            = int_vals[ 5];
+   enhanced_precision_sum    = int_vals[ 6];
+   niter                     = int_vals[ 7];
+   it                        = int_vals[ 8];
+   ncycle                    = int_vals[ 9];
+   crux_type                 = int_vals[10];
+   graphic_outputInterval    = int_vals[11];
+   checkpoint_outputInterval = int_vals[12];
+   next_cp_cycle             = int_vals[13];
+   next_graphics_cycle       = int_vals[14];
+
+   circ_radius                = double_vals[ 0];
+   H_sum_initial              = double_vals[ 1];
+   simTime                    = double_vals[ 2];
+   deltaT                     = double_vals[ 3];
+   upper_mass_diff_percentage = double_vals[ 4];
+
+   // need to reset crux type, because initialize to none
+   // before checkpoint is read
+   crux->set_crux_type(crux_type);
+
+   clamr_bootstrap_memory.memory_remove(int_vals);
+   clamr_bootstrap_memory.memory_remove(double_vals);
+
+#ifdef DEBUG_RESTORE_VALS
+   if (DEBUG_RESTORE_VALS) {
+      const char *int_vals_descriptor[num_int_vals] = {
+         "CRUX_CLAMR_VERSION",
+         "nx",
+         "ny",
+         "levmx",
+         "ndim",
+         "outputInterval",
+         "enhanced_precision_sum",
+         "niter",
+         "it",
+         "ncycle",
+         "crux_type",
+         "graphic_outputInterval",
+         "checkpoint_outputInterval",
+         "next_cp_cycle",
+         "next_graphics_cycle"
+      };
+      printf("\n");
+      printf("       === Restored bootstrap int_vals ===\n");
+      for (int i = 0; i < num_int_vals; i++){
+         printf("       %-30s %d\n",int_vals_descriptor[i], int_vals[i]);
+      }
+      printf("       === Restored bootstrap int_vals ===\n");
+      printf("\n");
+   }
+#endif
+
+#ifdef DEBUG_RESTORE_VALS
+   if (DEBUG_RESTORE_VALS) {
+      const char *double_vals_descriptor[num_double_vals] = {
+         "circ_radius",
+         "H_sum_initial",
+         "simTime",
+         "deltaT",
+         "upper_mass_diff_percentage"
+      };
+      printf("\n");
+      printf("       === Restored bootstrap double_vals ===\n");
+      for (int i = 0; i < num_double_vals; i++){
+         printf("       %-30s %lg\n",double_vals_descriptor[i], double_vals[i]);
+      }
+      printf("       === Restored bootstrap double_vals ===\n");
+      printf("\n");
+   }
+#endif
+}
+
+void restore_crux_data(Crux *crux)
+{
+   state->restore_checkpoint(crux);
+
+   crux->restore_end();
+}
+
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/crux.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/crux.h
@@ -0,0 +1,114 @@
+/*
+ *  Copyright (c) 2014, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  
+ *  Authors: Brian Atkinson          bwa@g.clemson.edu
+             Bob Robey        XCP-2  brobey@lanl.gov
+ */
+
+#ifndef CRUX_H_
+#define CRUX_H_
+
+#include <stdio.h>
+#include "MallocPlus.h"
+
+enum crux_types{
+   CRUX_NONE,
+   CRUX_DISK,
+   CRUX_IN_MEMORY
+};
+
+class Crux
+{
+   int num_of_rollback_states;
+   int crux_type;
+   int checkpoint_counter;
+
+public:
+
+   Crux(int crux_type_in, int num_of_rollback_states_in, bool restart);
+   ~Crux();
+
+   void store_MallocPlus(MallocPlus memory);
+   void store_begin(size_t nsize, int ncycle);
+   void store_field_header(const char *name, int name_size);
+   void store_bools(bool *bool_vals, size_t nelem);
+   void store_ints(int *int_vals, size_t nelem);
+   void store_longs(long long *long_vals, size_t nelem);
+   void store_sizets(size_t *size_t_vals, size_t nelem);
+   void store_doubles(double *double_vals, size_t nelem);
+   void store_int_array(int *int_array, size_t nelem);
+   void store_long_array(long long *long_array, size_t nelem);
+   void store_float_array(float *float_array, size_t nelem);
+   void store_double_array(double *double_array, size_t nelem);
+   void store_replicated_int_array(int *int_array, size_t nelem);
+   void store_replicated_double_array(double *double_array, size_t nelem);
+   void store_named_ints(const char *name, int name_size, int *int_vals, size_t nelem);
+#ifdef HAVE_MPI
+   void store_distributed_int_array(int *int_array, size_t nelem, int flags);
+   void store_distributed_double_array(double *double_array, size_t nelem, int flags);
+#endif
+   void store_end(void);
+
+   void       restore_MallocPlus(MallocPlus memory);
+   void       restore_begin(char *restart_file, int rollback_counter);
+   void       restore_field_header(char *name, int name_size);
+   void       restore_bools(bool *bool_vals, size_t nelem);
+   void       restore_ints(int *int_vals, size_t nelem);
+   void       restore_longs(long long *long_vals, size_t nelem);
+   void       restore_sizets(size_t *size_t_vals, size_t nelem);
+   void       restore_doubles(double *double_vals, size_t nelem);
+   int       *restore_int_array(int *int_array, size_t nsize);
+   long long *restore_long_array(long long *long_array, size_t nsize);
+   float     *restore_float_array(float *float_array, size_t nsize);
+   double    *restore_double_array(double *double_array, size_t nsize);
+   int       *restore_replicated_int_array(int *int_array, size_t nsize);
+   double    *restore_replicated_double_array(double *double_array, size_t nsize);
+   void      restore_named_ints(const char *name, int name_size, int *int_vals, size_t nelem);
+#ifdef HAVE_MPI
+   int       *restore_distributed_int_array(int *int_array, size_t nsize, int flags);
+   double    *restore_distributed_double_array(double *double_array, size_t nsize, int flags);
+#endif
+   void       restore_end(void);
+
+   int get_rollback_number();
+   void set_crux_type(int crux_type_in);
+
+};
+#endif // CRUX_H_
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/crux.cpp
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/crux.cpp
@@ -0,0 +1,1054 @@
+/*
+ *  Copyright (c) 2014, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  
+ *  Authors: Brian Atkinson          bwa@g.clemson.edu
+             Bob Robey        XCP-2  brobey@lanl.gov
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <algorithm>
+#include <assert.h>
+#include "PowerParser.hh"
+
+#include "crux.h"
+#include "timer.h"
+#include "fmemopen.h"
+
+#ifdef HAVE_HDF5
+#include "hdf5.h"
+#endif
+#ifdef HAVE_MPI
+#include "mpi.h"
+#endif
+
+const bool CRUX_TIMING = true;
+bool do_crux_timing = false;
+
+#define RESTORE_NONE     0
+#define RESTORE_RESTART  1
+#define RESTORE_ROLLBACK 2
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+#define DEBUG_RESTORE_VALS 1
+
+using namespace std;
+using PP::PowerParser;
+// Pointers to the various objects.
+PowerParser *parse;
+
+char checkpoint_directory[] = "checkpoint_output";
+int cp_num, rs_num;
+int *backup;
+void **crux_data;
+size_t *crux_data_size;
+#ifdef HAVE_HDF5
+bool USE_HDF5 = true; //MSB
+hid_t h5_fid;
+herr_t h5err;
+bool is_restart = false;
+
+hid_t create_hdf5_parallel_file_plist();
+
+void map_name_to_hdf5 (const char*, int, char*, char*);
+
+void access_named_hdf5_values (const char *name, int name_size,
+                              hsize_t rank, hsize_t *cur_size, 
+                              void *values, hid_t datatype,
+                              bool store);
+#endif
+
+
+FILE *crux_time_fp;
+struct timeval tcheckpoint_time;
+struct timeval trestore_time;
+int checkpoint_timing_count = 0;
+float checkpoint_timing_sum = 0.0f;
+float checkpoint_timing_size = 0.0f;
+int rollback_attempt = 0;
+FILE *store_fp, *restore_fp;
+#ifdef HAVE_MPI
+static MPI_File mpi_store_fp, mpi_restore_fp;
+#endif
+static int mype = 0, npes = 1;
+
+Crux::Crux(int crux_type_in, int num_of_rollback_states_in, bool restart)
+{
+#ifdef HAVE_MPI
+   MPI_Comm_rank(MPI_COMM_WORLD,&mype);
+   MPI_Comm_size(MPI_COMM_WORLD,&npes);
+#endif
+
+   num_of_rollback_states = num_of_rollback_states_in;
+   crux_type = crux_type_in;
+   checkpoint_counter = 0;
+
+   if (crux_type != CRUX_NONE || restart){
+      do_crux_timing = CRUX_TIMING;
+      struct stat stat_descriptor;
+      if (stat(checkpoint_directory,&stat_descriptor) == -1){
+        mkdir(checkpoint_directory,0777);
+      }
+   }
+
+   crux_data = (void **)malloc(num_of_rollback_states*sizeof(void *));
+   for (int i = 0; i < num_of_rollback_states; i++){
+      crux_data[i] = NULL;
+   }
+   crux_data_size = (size_t *)malloc(num_of_rollback_states*sizeof(size_t));
+
+
+   if (do_crux_timing){
+      char checkpointtimelog[60];
+      sprintf(checkpointtimelog,"%s/crux_timing.log",checkpoint_directory);
+      crux_time_fp = fopen(checkpointtimelog,"w");
+   }
+}
+
+Crux::~Crux()
+{
+   for (int i = 0; i < num_of_rollback_states; i++){
+      free(crux_data[i]);
+   }
+   free(crux_data);
+   free(crux_data_size);
+
+   if (do_crux_timing){
+      if (checkpoint_timing_count > 0) {
+         printf("CRUX checkpointing time averaged %f msec, bandwidth %f Mbytes/sec\n",
+                checkpoint_timing_sum/(float)checkpoint_timing_count*1.0e3,
+                checkpoint_timing_size/checkpoint_timing_sum*1.0e-6);
+
+         fprintf(crux_time_fp,"CRUX checkpointing time averaged %f msec, bandwidth %f Mbytes/sec\n",
+                checkpoint_timing_sum/(float)checkpoint_timing_count*1.0e3,
+                checkpoint_timing_size/checkpoint_timing_sum*1.0e-6);
+
+      fclose(crux_time_fp);
+      }
+   }
+}
+
+void Crux::store_MallocPlus(MallocPlus memory){
+
+    malloc_plus_memory_entry *memory_item;
+
+    for (memory_item = memory.memory_entry_by_name_begin(); 
+	 memory_item != memory.memory_entry_by_name_end();
+	 memory_item = memory.memory_entry_by_name_next() ){
+
+        void *mem_ptr = memory_item->mem_ptr;
+        if ((memory_item->mem_flags & RESTART_DATA) == 0) continue;
+
+
+
+        if (DEBUG) {
+            printf("MallocPlus ptr  %p: name %10s ptr %p dims %lu nelem (",
+                    mem_ptr,memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_ndims);
+
+            char nelemstring[80];
+            char *str_ptr = nelemstring;
+            str_ptr += sprintf(str_ptr,"%lu", memory_item->mem_nelem[0]);
+            for (uint i = 1; i < memory_item->mem_ndims; i++){
+                str_ptr += sprintf(str_ptr,", %lu", memory_item->mem_nelem[i]);
+            }
+            printf("%12s",nelemstring);
+
+            printf(") elsize %lu flags %d capacity %lu\n",
+                    memory_item->mem_elsize,memory_item->mem_flags,memory_item->mem_capacity);
+        }
+
+#ifdef HAVE_HDF5
+        if(USE_HDF5) {
+            access_named_hdf5_values (memory_item->mem_name, 
+                              strlen (memory_item->mem_name),
+                              (hsize_t) memory_item->mem_ndims, 
+                              (hsize_t *) memory_item->mem_nelem, 
+                              mem_ptr, 
+                              memory_item->mem_elsize == 4 ? 
+                              H5T_NATIVE_INT : H5T_NATIVE_DOUBLE,
+                              true);
+        } else {
+#endif
+            int num_elements = 1;
+            for (uint i = 0; i < memory_item->mem_ndims; i++){
+                num_elements *= memory_item->mem_nelem[i];
+            }
+            store_field_header(memory_item->mem_name,30);
+            if (memory_item->mem_flags & REPLICATED_DATA) { 
+                if (memory_item->mem_elsize == 4){
+                    store_replicated_int_array((int *)mem_ptr, num_elements);
+                } else {
+                    store_replicated_double_array((double *)mem_ptr, num_elements);
+                }
+            } else {
+                if (memory_item->mem_elsize == 4){
+                    store_int_array((int *)mem_ptr, num_elements);
+                } else {
+                    store_double_array((double *)mem_ptr, num_elements);
+                }
+            }
+        }
+#ifdef HAVE_HDF5   
+    }
+#endif   
+}
+
+void Crux::store_begin(size_t nsize, int ncycle)
+{
+
+   int mype = 0;
+
+#ifdef HAVE_MPI
+   MPI_Comm_rank(MPI_COMM_WORLD,&mype);
+#endif
+
+   cp_num = checkpoint_counter % num_of_rollback_states;
+   cpu_timer_start(&tcheckpoint_time);
+
+   if(crux_type == CRUX_IN_MEMORY) {
+      if (crux_data[cp_num] != NULL) free(crux_data[cp_num]);
+      crux_data[cp_num] = (int *)malloc(nsize);
+      crux_data_size[cp_num] = nsize;
+      store_fp = fmemopen(crux_data[cp_num], nsize, "w");
+   } else if(crux_type == CRUX_DISK) {
+      char backup_file_w_dir[60];
+      char backup_file[40];
+#ifdef HAVE_HDF5
+      if(USE_HDF5) {
+
+	hid_t plist_id = create_hdf5_parallel_file_plist();
+	
+#ifdef HDF5_FF
+	if(is_restart)
+	  sprintf(backup_file_w_dir,"rbackup%05d.h5",ncycle);
+	else
+	  sprintf(backup_file_w_dir,"backup%05d.h5",ncycle);
+#else
+          sprintf(backup_file_w_dir,"%s/backup%05d.h5",checkpoint_directory,ncycle);
+          sprintf(backup_file,"backup%05d.h5",ncycle);
+#endif
+          if(!(h5_fid = H5Fcreate(backup_file_w_dir, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id))) {
+              printf("HDF5: Could not write HDF5 %s at iteration %d\n",backup_file_w_dir,ncycle);
+          }
+          H5Pclose(plist_id);
+      } else {
+#endif
+          sprintf(backup_file_w_dir,"%s/backup%05d.crx",checkpoint_directory,ncycle);
+          sprintf(backup_file,"backup%05d.crx",ncycle);
+#ifdef HAVE_MPI
+          int iret = MPI_File_open(MPI_COMM_WORLD, backup_file_w_dir, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_store_fp);
+          if(iret != MPI_SUCCESS) {
+              printf("Could not write %s at iteration %d\n",backup_file_w_dir,ncycle);
+          }
+#else
+          store_fp = fopen(backup_file_w_dir,"w");
+          if(!store_fp){
+              printf("Could not write %s at iteration %d\n",backup_file_w_dir,ncycle);
+          }
+#endif
+          if (mype == 0) {
+              char symlink_file[60];
+              sprintf(symlink_file,"%s/backup%1d.crx",checkpoint_directory,cp_num);
+              unlink(symlink_file);
+              symlink(backup_file, symlink_file);
+              //      int ireturn = symlink(backup_file, symlink_file);
+              //      if (ireturn == -1) {
+              //         printf("Warning: error returned with symlink call for file %s and symlink %s\n",
+              //                backup_file,symlink_file);
+              //      }
+          }
+      }
+#ifdef HAVE_HDF5
+    }
+#endif    
+   if (do_crux_timing) {
+      checkpoint_timing_size += nsize;
+   }
+}
+
+void Crux::store_field_header(const char *name, int name_size){
+#ifdef HAVE_MPI
+   assert(name != NULL);
+   MPI_Status status;
+   MPI_File_write_shared(mpi_store_fp, (void *)name, name_size, MPI_CHAR, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_CHAR, &count);
+   printf("%d:Wrote %d characters at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+
+#else
+   assert(name != NULL && store_fp != NULL);
+   fwrite(name,sizeof(char),name_size,store_fp);
+#endif
+}
+
+#ifdef HAVE_HDF5
+hid_t create_hdf5_parallel_file_plist()
+{
+    hid_t plist_id = H5P_DEFAULT;
+
+    if( (plist_id = H5Pcreate(H5P_FILE_ACCESS)) < 0)
+        printf("HDF5: Could not create property list \n");
+
+#ifdef HAVE_MPI
+    if( H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) < 0)
+        printf("HDF5: Could set libver bounds \n");
+# ifdef HDF5_FF
+    H5Pset_fapl_daosm(plist_id, MPI_COMM_WORLD, MPI_INFO_NULL);
+    if(H5Pset_all_coll_metadata_ops(plist_id, true) < 0)
+        printf("HDF5: Could not set collective metadata \n");
+# else
+    H5Pset_fapl_mpio(plist_id, MPI_COMM_WORLD, MPI_INFO_NULL);
+#endif
+#endif
+    return plist_id;
+}
+
+void map_name_to_hdf5 (const char *name, int name_size,
+                        char *group,
+                        char *label)
+{
+    static const char * default_group = "default";
+    int i, j;
+    group[0] = '/';
+    for (i=0; i<name_size; i++)
+        if (name[i] == '_') break;
+    if (i < name_size) {
+        for (j=0; j<i; j++)
+            group[1+j] = name[j];
+        ++i;
+    } else {
+        for (j=0; default_group[j]; j++)
+            group[1+j] = default_group[j];
+        i=0;
+    }    
+    group[1+j] = '\0';
+    for (j=i; name[j]; j++)
+        label[j-i] = name[j];
+    label[j-i] = '\0';    
+}
+
+void access_named_hdf5_values (const char *name, int name_size,
+                              hsize_t rank, hsize_t *sizes, 
+                              void *values, hid_t datatype,
+                              bool store)
+{
+    size_t length = 0, count = 1, offset = 0;
+    char groupname[512], fieldname[512];
+    hid_t hid_group, hid_space, hid_mem, hid_dataset, hid_plist = H5P_DEFAULT;
+
+    map_name_to_hdf5(name, name_size, groupname, fieldname);
+    for (hsize_t i=0; i<rank; i++)
+        count *= sizes[i];
+#ifdef HAVE_MPI
+    hid_plist = H5Pcreate(H5P_DATASET_XFER);
+#   ifndef HDF5_FF
+    H5Pset_dxpl_mpio(hid_plist, H5FD_MPIO_COLLECTIVE);
+#   endif
+    if (npes > 1) {
+        size_t *counts = new size_t[npes];
+        MPI_Allgather (&count, sizeof(count), MPI_BYTE,
+                       counts, sizeof *counts, MPI_BYTE,
+                       MPI_COMM_WORLD);
+        for (int i=0; i<npes; i++) {
+            if (i == mype)
+                offset = length;
+            length += counts[i];
+        }
+        delete[] counts;
+    } else {
+#endif
+        length = count;
+#ifdef HAVE_MPI    
+    }
+#endif
+    
+#ifndef HDF5_FF
+    if (!store || H5Lexists(h5_fid, groupname, H5P_DEFAULT))
+      hid_group = H5Gopen (h5_fid, groupname, H5P_DEFAULT);
+    else
+      hid_group = H5Gcreate (h5_fid, groupname, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+#else
+    int mpi_rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+
+    if(store) {
+	    hbool_t ret;
+
+	    if(mpi_rank == 0)
+		    ret = H5Lexists(h5_fid, groupname, H5P_DEFAULT);
+
+	    MPI_Bcast(&ret, sizeof(hbool_t), MPI_BYTE, 0, MPI_COMM_WORLD);
+	    if(!ret)
+		    hid_group = H5Gcreate (h5_fid, groupname, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+	    else
+		    hid_group = H5Gopen (h5_fid, groupname, H5P_DEFAULT);
+    }	       
+    if (!store) {
+	    hid_group = H5Gopen (h5_fid, groupname, H5P_DEFAULT);
+    }
+#endif
+    if (hid_group == -1) {
+      fprintf(stderr, "Unable to create group: %30s\n", groupname);
+      exit(1);
+    }
+    hid_mem = H5Screate_simple (1, (hsize_t *) &count, NULL);
+    hid_space = H5Screate_simple (1, (hsize_t *) &length, NULL);
+    if (hid_space == -1) {
+        fprintf(stderr, "Unable to create space\n");
+        exit(1);
+    }
+    if (store) {
+        hid_dataset = H5Dcreate (hid_group, fieldname, datatype, hid_space,
+                             H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+    } else
+      hid_dataset = H5Dopen (hid_group, fieldname, H5P_DEFAULT);
+
+    if(hid_dataset == -1) {
+	    fprintf(stderr, "Unable to access dataset %s\n", fieldname);
+	    exit(1);
+    }
+
+    if (!hid_dataset) {
+        fprintf(stderr, "Unable to create/open dataset: %30s\n", fieldname);
+        exit(1);
+    }
+
+    herr_t status;
+    status = H5Sselect_hyperslab (hid_space, H5S_SELECT_SET,
+                                 (hsize_t *) &offset, NULL,
+                                 (hsize_t *) &count, NULL);
+    if(status < 0) {
+        fprintf(stderr, "Unable to select correct hyperslab\n");
+        exit(1);
+    }
+    if (store)
+        status = H5Dwrite (hid_dataset, datatype, hid_mem, hid_space, hid_plist, values);
+    else
+      status = H5Dread (hid_dataset, datatype, hid_mem, hid_space, hid_plist, values);
+
+    H5Dclose (hid_dataset);
+    H5Gclose (hid_group);
+    H5Sclose (hid_space);
+    H5Sclose (hid_mem);
+#ifdef HAVE_MPI
+    H5Pclose (hid_plist);
+#endif
+}
+#endif
+
+void Crux::store_named_ints(const char *name, int name_size, int *int_vals, size_t nelem)
+{
+#ifdef HAVE_HDF5
+    if (USE_HDF5) {
+        access_named_hdf5_values (name, name_size, 1, (hsize_t *) &nelem, 
+                                 int_vals, H5T_NATIVE_INT, true);
+
+    } else {
+#endif
+        store_field_header (name, name_size);
+        store_int_array (int_vals, nelem);
+#ifdef HAVE_HDF5
+    }
+#endif    
+}
+
+void Crux::restore_named_ints(const char *name, int name_size, int *int_vals, size_t nelem)
+{
+#ifdef HAVE_HDF5
+    if (USE_HDF5) {
+        access_named_hdf5_values (name, name_size, 1, (hsize_t *) &nelem, 
+                                 int_vals, H5T_NATIVE_INT, false);
+
+    } else {
+#endif
+        char fname[512];
+        restore_field_header (fname, name_size);
+        restore_int_array (int_vals, nelem);
+#ifdef HAVE_HDF5
+    }
+#endif    
+}
+
+void Crux::store_bools(bool *bool_vals, size_t nelem)
+{
+   assert(bool_vals != NULL && store_fp != NULL);
+   fwrite(bool_vals,sizeof(bool),nelem,store_fp);
+}
+
+void Crux::store_ints(int *int_vals, size_t nelem)
+{
+   assert(int_vals != NULL && store_fp != NULL);
+   fwrite(int_vals,sizeof(int),nelem,store_fp);
+}
+
+void Crux::store_longs(long long *long_vals, size_t nelem)
+{
+   assert(long_vals != NULL && store_fp != NULL);
+   fwrite(long_vals,sizeof(long long),nelem,store_fp);
+}
+
+void Crux::store_sizets(size_t *size_t_vals, size_t nelem)
+{
+   assert(size_t_vals != NULL && store_fp != NULL);
+   fwrite(size_t_vals,sizeof(size_t),nelem,store_fp);
+}
+
+void Crux::store_doubles(double *double_vals, size_t nelem)
+{
+   assert(double_vals != NULL && store_fp != NULL);
+   fwrite(double_vals,sizeof(double),nelem,store_fp);
+}
+
+void Crux::store_int_array(int *int_array, size_t nelem)
+{
+#ifdef HAVE_MPI
+   assert(int_array != NULL);
+   MPI_Status status;
+   MPI_File_write_shared(mpi_store_fp, int_array, (int)nelem, MPI_INT, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_INT, &count);
+   printf("%d:Wrote %d integers at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+
+#else
+   assert(int_array != NULL && store_fp != NULL);
+   fwrite(int_array,sizeof(int),nelem,store_fp);
+#endif
+}
+
+void Crux::store_long_array(long long *long_array, size_t nelem)
+{
+   assert(long_array != NULL && store_fp != NULL);
+   fwrite(long_array,sizeof(long long),nelem,store_fp);
+}
+
+void Crux::store_float_array(float *float_array, size_t nelem)
+{
+   assert(float_array != NULL && store_fp != NULL);
+   fwrite(float_array,sizeof(float),nelem,store_fp);
+}
+
+void Crux::store_double_array(double *double_array, size_t nelem)
+{
+#ifdef HAVE_MPI
+   assert(double_array != NULL);
+   MPI_Status status;
+   MPI_File_write_shared(mpi_store_fp, double_array, (int)nelem, MPI_DOUBLE, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_DOUBLE, &count);
+   printf("%d:Wrote %d doubles at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+
+#else
+   assert(double_array != NULL && store_fp != NULL);
+   fwrite(double_array,sizeof(double),nelem,store_fp);
+#endif
+}
+
+void Crux::store_replicated_int_array(int *int_array, size_t nelem)
+{
+#ifdef HAVE_MPI
+   assert(int_array != NULL);
+   MPI_Status status;
+   MPI_File_write_shared(mpi_store_fp, int_array, (int)nelem, MPI_INT, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_INT, &count);
+   printf("%d:Wrote %d integers at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+
+#else
+   assert(int_array != NULL && store_fp != NULL);
+   fwrite(int_array,sizeof(int),nelem,store_fp);
+#endif
+}
+
+void Crux::store_replicated_double_array(double *double_array, size_t nelem)
+{
+#ifdef HAVE_MPI
+   assert(double_array != NULL);
+   MPI_Status status;
+   MPI_File_write_shared(mpi_store_fp, double_array, (int)nelem, MPI_DOUBLE, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_DOUBLE, &count);
+   printf("%d:Wrote %d doubles at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+
+#else
+   assert(double_array != NULL && store_fp != NULL);
+   fwrite(double_array,sizeof(double),nelem,store_fp);
+#endif
+}
+
+#ifdef HAVE_MPI
+void Crux::store_distributed_int_array(int *int_array, size_t nelem, int flags)
+{
+   assert(int_array != NULL);
+   //MPI_Datatype datatype = get_crux_datatype(DISTRIBUTED_INT_DATA);
+   MPI_Status status;
+   //MPI_File_write_shared(mpi_store_fp, int_array, nelem, MPI_INT, &status);
+   printf("writing crux data type 8\n");
+   //MPI_File_write_shared(mpi_store_fp, int_array, 1, crux_datatype[8], &status);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_INT, &count);
+   printf("%d:Wrote %d integers at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+}
+void Crux::store_distributed_double_array(double *double_array, size_t nelem, int flags)
+{
+   assert(double_array != NULL);
+   //MPI_Datatype datatype = get_crux_datatype(DISTRIBUTED_DOUBLE_DATA);
+   MPI_Status status;
+   //MPI_File_write_shared(mpi_store_fp, double_array, nelem, datatype, &status);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_DOUBLE_PRECISION, &count);
+   printf("%d:Wrote %d doubles at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+}
+#endif
+
+void Crux::store_end(void)
+{
+#ifdef HAVE_HDF5
+   if(USE_HDF5) {
+    if(H5Fclose(h5_fid) != 0)
+       printf("HDF5: Could not close HDF5 file \n");
+   } else {
+#endif
+#ifdef HAVE_MPI
+       MPI_File_close(&mpi_store_fp);
+#else
+       assert(store_fp != NULL);
+       fclose(store_fp);
+#endif
+#ifdef HAVE_HDF5
+    }
+#endif    
+
+   double checkpoint_total_time = cpu_timer_stop(tcheckpoint_time);
+
+   if (do_crux_timing){
+      fprintf(crux_time_fp, "Total time for checkpointing was %g seconds\n", checkpoint_total_time);
+      checkpoint_timing_count++;
+      checkpoint_timing_sum += checkpoint_total_time;
+   }
+
+   checkpoint_counter++;
+}
+
+int restore_type = RESTORE_NONE;
+
+void Crux::restore_MallocPlus(MallocPlus memory){
+    char test_name[34];
+    malloc_plus_memory_entry *memory_item;
+    for (memory_item = memory.memory_entry_by_name_begin(); 
+	    memory_item != memory.memory_entry_by_name_end();
+	    memory_item = memory.memory_entry_by_name_next() ){
+        void *mem_ptr = memory_item->mem_ptr;
+        if ((memory_item->mem_flags & RESTART_DATA) == 0) continue;
+
+        if (DEBUG) {
+            printf("MallocPlus ptr  %p: name %10s ptr %p dims %lu nelem (",
+                    mem_ptr,memory_item->mem_name,memory_item->mem_ptr,memory_item->mem_ndims);
+
+            char nelemstring[80];
+            char *str_ptr = nelemstring;
+            str_ptr += sprintf(str_ptr,"%lu", memory_item->mem_nelem[0]);
+            for (uint i = 1; i < memory_item->mem_ndims; i++){
+                str_ptr += sprintf(str_ptr,", %lu", memory_item->mem_nelem[i]);
+            }
+            printf("%12s",nelemstring);
+
+            printf(") elsize %lu flags %d capacity %lu\n",
+                    memory_item->mem_elsize,memory_item->mem_flags,memory_item->mem_capacity);
+        }
+#ifdef HAVE_HDF5
+        if(USE_HDF5) {
+            access_named_hdf5_values (memory_item->mem_name, 
+                    strlen (memory_item->mem_name),
+                    (hsize_t) memory_item->mem_ndims, 
+                    (hsize_t *) memory_item->mem_nelem, 
+                    mem_ptr, 
+                    memory_item->mem_elsize == 4 ? 
+                    H5T_NATIVE_INT : H5T_NATIVE_DOUBLE, false);
+        } else {
+#endif
+            int num_elements = 1;
+            for (uint i = 0; i < memory_item->mem_ndims; i++){
+                num_elements *= memory_item->mem_nelem[i];
+            }
+            restore_field_header(test_name,30);
+            if (strcmp(test_name,memory_item->mem_name) != 0) {
+                printf("ERROR in restore checkpoint for %s %s\n",test_name,memory_item->mem_name);
+#ifdef HAVE_MPI
+                MPI_Finalize();
+#endif
+                exit(-1);
+            }
+            if (memory_item->mem_flags & REPLICATED_DATA) { 
+                if (memory_item->mem_elsize == 4){
+                    restore_replicated_int_array((int *)mem_ptr, num_elements);
+                } else {
+                    restore_replicated_double_array((double *)mem_ptr, num_elements);
+                }
+            } else {
+                if (memory_item->mem_elsize == 4){
+                    restore_int_array((int *)mem_ptr, num_elements);
+                } else {
+                    restore_double_array((double *)mem_ptr, num_elements);
+                }
+            }
+        }
+#ifdef HAVE_HDF5
+    }
+#endif    
+}
+
+void Crux::restore_begin(char *restart_file, int rollback_counter)
+{
+    rs_num = rollback_counter % num_of_rollback_states;
+
+    cpu_timer_start(&trestore_time);
+
+    if (restart_file != NULL){
+        if (mype == 0) {
+            printf("\n  ================================================================\n");
+            printf(  "  Restoring state from disk file %s\n",restart_file);
+            printf(  "  ================================================================\n\n");
+        }
+#ifdef HAVE_HDF5
+	is_restart = true;
+        if (USE_HDF5) {
+            hid_t plist_id = create_hdf5_parallel_file_plist();
+
+            if(!(h5_fid = H5Fopen(restart_file, H5F_ACC_RDWR, plist_id)))
+                printf("HDF5: Could not restart from HDF5 file: %s\n", restart_file);
+            H5Pclose(plist_id);
+        } else {
+#endif
+#ifdef HAVE_MPI
+            int iret = MPI_File_open(MPI_COMM_WORLD, restart_file, MPI_MODE_RDONLY | MPI_MODE_UNIQUE_OPEN, MPI_INFO_NULL, &mpi_restore_fp);
+            if(iret != MPI_SUCCESS){
+                //printf("Could not write %s at iteration %d\n",restart_file,crux_int_vals[8]);
+                printf("Could not open restart file %s\n",restart_file);
+            }
+#else
+            restore_fp = fopen(restart_file,"r");
+            if(!restore_fp){
+                //printf("Could not write %s at iteration %d\n",restart_file,crux_int_vals[8]);
+                printf("Could not open restart file %s\n",restart_file);
+            }
+#endif
+#ifdef HAVE_HDF5
+        }
+#endif    
+        restore_type = RESTORE_RESTART;
+    } else if(crux_type == CRUX_IN_MEMORY){
+        printf("Restoring state from memory rollback number %d rollback_counter %d\n",rs_num,rollback_counter);
+        restore_fp = fmemopen(crux_data[rs_num], crux_data_size[rs_num], "r");
+        restore_type = RESTORE_ROLLBACK;
+    } else if(crux_type == CRUX_DISK){
+        char backup_file_w_dir[60];
+
+        sprintf(backup_file_w_dir,"%s/backup%d.crx",checkpoint_directory,rs_num);
+        printf("Restoring state from disk file %s rollback_counter %d\n",backup_file_w_dir,rollback_counter);
+        restore_fp = fopen(backup_file_w_dir,"r");
+        if(!restore_fp){
+            //printf("Could not write %s at iteration %d\n",backup_file_w_dir,crux_int_vals[8]);
+            printf("Could not open restore file %s\n",backup_file_w_dir);
+        }
+        restore_type = RESTORE_ROLLBACK;
+    }
+}
+
+void Crux::restore_field_header(char *name, int name_size)
+{
+#ifdef HAVE_MPI
+   assert(name != NULL);
+   MPI_Status status;
+   MPI_File_read_shared(mpi_restore_fp, name, name_size, MPI_CHAR, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_CHAR, &count);
+   printf("%d:Read %d characters at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+
+#else
+   int name_read = fread(name,sizeof(char),name_size,restore_fp);
+   if (name_read != name_size){
+      printf("Warning: number of elements read %d is not equal to request %d\n",name_read,name_size);
+   }
+#endif
+}
+
+void Crux::restore_bools(bool *bool_vals, size_t nelem)
+{
+   size_t nelem_read = fread(bool_vals,sizeof(bool),nelem,restore_fp);
+   if (nelem_read != nelem){
+      printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem);
+   }
+}
+
+void Crux::restore_ints(int *int_vals, size_t nelem)
+{
+   size_t nelem_read = fread(int_vals,sizeof(int),nelem,restore_fp);
+   if (nelem_read != nelem){
+      printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem);
+   }
+}
+
+void Crux::restore_longs(long long *long_vals, size_t nelem)
+{
+   size_t nelem_read = fread(long_vals,sizeof(long),nelem,restore_fp);
+   if (nelem_read != nelem){
+      printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem);
+   }
+}
+
+void Crux::restore_sizets(size_t *size_t_vals, size_t nelem)
+{
+   size_t nelem_read = fread(size_t_vals,sizeof(size_t),nelem,restore_fp);
+   if (nelem_read != nelem){
+      printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem);
+   }
+}
+
+void Crux::restore_doubles(double *double_vals, size_t nelem)
+{
+   size_t nelem_read = fread(double_vals,sizeof(double),nelem,restore_fp);
+   if (nelem_read != nelem){
+      printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem);
+   }
+}
+
+int *Crux::restore_int_array(int *int_array, size_t nelem)
+{
+#ifdef HAVE_MPI
+   assert(int_array != NULL);
+   MPI_Status status;
+   MPI_File_read_shared(mpi_restore_fp, int_array, (int)nelem, MPI_INT, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_INT, &count);
+   printf("%d:Read %d integers at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+
+#else
+   size_t nelem_read = fread(int_array,sizeof(int),nelem,restore_fp);
+   if (nelem_read != nelem){
+      printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem);
+   }
+#endif
+   return(int_array);
+}
+
+long long *Crux::restore_long_array(long long *long_array, size_t nelem)
+{
+   size_t nelem_read = fread(long_array,sizeof(long long),nelem,restore_fp);
+   if (nelem_read != nelem){
+      printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem);
+   }
+   return(long_array);
+}
+
+float *Crux::restore_float_array(float *float_array, size_t nelem)
+{
+   size_t nelem_read = fread(float_array,sizeof(float),nelem,restore_fp);
+   if (nelem_read != nelem){
+      printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem);
+   }
+   return(float_array);
+}
+
+double *Crux::restore_double_array(double *double_array, size_t nelem)
+{
+#ifdef HAVE_MPI
+   MPI_Status status;
+   MPI_File_read_shared(mpi_restore_fp, double_array, (int)nelem, MPI_DOUBLE, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_DOUBLE, &count);
+   printf("%d:Read %d doubles at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+  
+#else
+   size_t nelem_read = fread(double_array,sizeof(double),nelem,restore_fp);
+   if (nelem_read != nelem){
+      printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem);
+   }
+#endif
+   return(double_array);
+}
+
+int *Crux::restore_replicated_int_array(int *int_array, size_t nelem)
+{
+#ifdef HAVE_MPI
+   assert(int_array != NULL);
+   MPI_Status status;
+   MPI_File_read_shared(mpi_restore_fp, int_array, (int)nelem, MPI_INT, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_INT, &count);
+   printf("%d:Read %d integers at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+
+#else
+   size_t nelem_read = fread(int_array,sizeof(int),nelem,restore_fp);
+   if (nelem_read != nelem){
+      printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem);
+   }
+#endif
+   return(int_array);
+}
+
+double *Crux::restore_replicated_double_array(double *double_array, size_t nelem)
+{
+#ifdef HAVE_MPI
+   MPI_Status status;
+   MPI_File_read_shared(mpi_restore_fp, double_array, (int)nelem, MPI_DOUBLE, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_DOUBLE, &count);
+   printf("%d:Read %d doubles at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+  
+#else
+   size_t nelem_read = fread(double_array,sizeof(double),nelem,restore_fp);
+   if (nelem_read != nelem){
+      printf("Warning: number of elements read %lu is not equal to request %lu\n",nelem_read,nelem);
+   }
+#endif
+   return(double_array);
+}
+
+#ifdef HAVE_MPI
+int *Crux::restore_distributed_int_array(int *int_array, size_t nelem, int flags)
+{
+   assert(int_array != NULL);
+   //MPI_Datatype datatype = get_crux_datatype(DISTRIBUTED_INT_DATA);
+   MPI_Status status;
+   //MPI_File_read_shared(mpi_restore_fp, int_array, (int)nelem, datatype, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_INT, &count);
+   printf("%d:Read %d integers at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+
+   return(int_array);
+}
+
+double *Crux::restore_distributed_double_array(double *double_array, size_t nelem, int flags)
+{
+   //MPI_Datatype datatype = get_crux_datatype(DISTRIBUTED_DOUBLE_DATA);
+   MPI_Status status;
+   //MPI_File_read_shared(mpi_restore_fp, double_array, (int)nelem, datatype, &status);
+   MPI_Barrier(MPI_COMM_WORLD);
+#ifdef DEBUG_RESTORE_VALS
+   int count;
+   MPI_Get_count(&status, MPI_DOUBLE, &count);
+   printf("%d:Read %d doubles at line %d in file %s\n",mype,count,__LINE__,__FILE__);
+#endif
+  
+   return(double_array);
+}
+#endif
+
+void Crux::restore_end(void)
+{
+   double restore_total_time = cpu_timer_stop(trestore_time);
+
+   if (do_crux_timing){
+      if (restore_type == RESTORE_RESTART) {
+         fprintf(crux_time_fp, "Total time for restore was %g seconds\n", restore_total_time);
+      } else if (restore_type == RESTORE_ROLLBACK){
+         fprintf(crux_time_fp, "Total time for rollback %d was %g seconds\n", rollback_attempt, restore_total_time);
+      }
+   }
+#ifdef HAVE_HDF5
+   if(USE_HDF5) {
+     if(H5Fclose(h5_fid) != 0) {
+       printf("HDF5: Could not close HDF5 file!!\n");
+     }
+   } else {
+#endif
+#ifdef HAVE_MPI
+       MPI_File_close(&mpi_store_fp);
+#else
+       assert(restore_fp != NULL);
+       fclose(restore_fp);
+#endif
+#ifdef HAVE_HDF5
+    }
+#endif
+}
+
+int Crux::get_rollback_number()
+{
+  rollback_attempt++;
+  return(checkpoint_counter % num_of_rollback_states);
+}
+
+void Crux::set_crux_type(int crux_type_in)
+{
+  crux_type = crux_type_in;
+}
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/fmemopen.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/fmemopen.h
@@ -0,0 +1,35 @@
+#ifndef FMEMOPEN_H_
+#define FMEMOPEN_H_
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+/**
+ * A BSD port of the fmemopen Linux method using funopen.
+ *
+ * man docs for fmemopen:
+ * http://linux.die.net/man/3/fmemopen
+ *
+ * man docs for funopen:
+ * https://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/funopen.3.html
+ *
+ * This method is ported from ingenuitas' python-tesseract project.
+ *
+ * You must call fclose on the returned file pointer or memory will be leaked.
+ *
+ * @param buf The data that will be used to back the FILE* methods. Must be at least
+ *            @c size bytes.
+ * @param size The size of the @c buf data.
+ * @param mode The permitted stream operation modes.
+ * @return A pointer that can be used in the fread/fwrite/fseek/fclose family of methods.
+ *         If a failure occurred NULL will be returned.
+ * @ingroup NimbusMemoryMappping
+ */
+FILE *fmemopen(void *buf, size_t size, const char *mode);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // #ifndef FMEMOPEN_H_
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/genmalloc.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/genmalloc.h
@@ -0,0 +1,98 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* memory routines */
+#define genvector(  inum, elsize) \
+      ( genvector_p(inum, elsize, __FILE__, __LINE__) )
+#define genvectorfree(  var) \
+      ( genvectorfree_p(var, __FILE__, __LINE__) )
+#define genmatrix(  jnum, inum, elsize) \
+      ( genmatrix_p(jnum, inum, elsize, __FILE__, __LINE__) )
+#define gentrimatrix(  knum, jnum, inum, elsize) \
+      ( gentrimatrix_p(knum, jnum, inum, elsize, __FILE__, __LINE__) )
+#define genmatrixfree(  var) \
+      ( genmatrixfree_p(var, __FILE__, __LINE__) )
+#define gentrimatrixfree(  var) \
+      ( gentrimatrixfree_p(var, __FILE__, __LINE__) )
+
+#define genmalloc_memory_add(  malloc_mem_ptr, size) \
+      ( genmalloc_memory_add_p(malloc_mem_ptr, size, __FILE__, __LINE__) )
+#define genmalloc_memory_remove(  malloc_mem_ptr) \
+      ( genmalloc_memory_remove_p(malloc_mem_ptr, __FILE__, __LINE__) )
+#define genmem_free_all() \
+      ( genmem_free_all_p(__FILE__, __LINE__) ) 
+
+
+void *genvector_p(int inum, size_t elsize, const char *file, const int line);
+void genvectorfree_p(void *var, const char *file, const int line);
+void **genmatrix_p(int jnum, int inum, size_t elsize, const char *file, const int line);
+void ***gentrimatrix_p(int knum, int jnum, int inum, size_t elsize, const char *file, const int line);
+void genmatrixfree_p(void **var, const char *file, const int line);
+void gentrimatrixfree_p(void ***var, const char *file, const int line);
+
+void *genmalloc_memory_add_p(void *malloc_mem_ptr, size_t size, const char *file, const int line);
+void genmalloc_memory_remove_p(void *malloc_mem_ptr, const char *file, const int line);
+void genmem_free_all_p(const char *file, const int line);
+
+#ifdef __cplusplus
+}
+#endif
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/genmalloc.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/genmalloc.c
@@ -0,0 +1,281 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "genmalloc.h"
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+
+double ***gentrimatrix_double_p(int knum, int jnum, int inum, const char *file, const int line);
+int ***gentrimatrix_int_p(int knum, int jnum, int inum, const char *file, const int line);
+
+SLIST_HEAD(slist_genmalloc_memory_head, genmalloc_memory_entry) genmalloc_memory_head = SLIST_HEAD_INITIALIZER(genmalloc_memory_head);
+struct slist_genmalloc_memory_head *genmalloc_memory_headp;
+struct genmalloc_memory_entry {
+   void *mem_ptr;
+   size_t mem_size;
+   SLIST_ENTRY(genmalloc_memory_entry) genmalloc_memory_entries;
+} *genmalloc_memory_item;
+
+void *genvector_p(int inum, size_t elsize, const char *file, const int line)
+{
+   // Just to get rid of warning
+   if (1 == 2) printf("Warning file %s line %d\n", file, line);
+
+   void *out;
+   size_t mem_size;
+
+   mem_size = inum*elsize;
+   out      = (void *)calloc((size_t)inum, elsize);
+   genmalloc_memory_add(out, mem_size);
+
+   return (out);
+}
+
+void genvectorfree_p(void *var, const char *file, const int line)
+{
+   // Just to get rid of warning
+   if (1 == 2) printf("Warning file %s line %d\n", file, line);
+
+   genmalloc_memory_remove(var);
+}
+
+void **genmatrix_p(int jnum, int inum, size_t elsize, const char *file, const int line)
+{
+   // Just to get rid of warning
+   if (1 == 2) printf("Warning file %s line %d\n", file, line);
+
+   void **out;
+   size_t mem_size;
+  
+   mem_size = jnum*sizeof(void *);
+   out      = (void **)malloc(mem_size);
+   genmalloc_memory_add(out, mem_size);
+  
+   mem_size = jnum*inum*elsize;
+   out[0]    = (void *)calloc((size_t)jnum*(size_t)inum, elsize);
+   genmalloc_memory_add(out[0], mem_size);
+  
+   for (int i = 1; i < jnum; i++) {
+      out[i] = out[i-1] + inum*elsize;
+   }
+  
+   return (out);
+}
+
+void genmatrixfree_p(void **var, const char *file, const int line)
+{
+   // Just to get rid of warning
+   if (1 == 2) printf("Warning file %s line %d\n", file, line);
+
+   genmalloc_memory_remove(var[0]);
+   genmalloc_memory_remove(var);
+}
+
+void ***gentrimatrix_p(int knum, int jnum, int inum, size_t elsize, const char *file, const int line)
+{
+   // Just to get rid of warning
+   if (1 == 2) printf("Warning file %s line %d\n", file, line);
+
+   void ***out = NULL;
+   if (elsize == 8) {
+      out = (void ***)gentrimatrix_double_p(knum, jnum, inum, file, line);
+   } else if (elsize == 4) {
+      out = (void ***)gentrimatrix_int_p(knum, jnum, inum, file, line);
+   } else {
+      printf("Error -- element size not supported in genmalloc for call at %s line %d\n",file,line);
+   }
+
+   return(out);
+}
+
+double ***gentrimatrix_double_p(int knum, int jnum, int inum, const char *file, const int line)
+{
+   // Just to get rid of warning
+   if (1 == 2) printf("Warning file %s line %d\n", file, line);
+
+   double ***out;
+   size_t mem_size;
+   const size_t elsize = 8;
+
+   mem_size  = knum*sizeof(void **);
+   out       = (double ***)malloc(mem_size);
+   genmalloc_memory_add(out, mem_size);
+
+   mem_size  = knum*jnum*sizeof(void *);
+   out[0]    = (double **) malloc(mem_size);
+   genmalloc_memory_add(out[0], mem_size);
+
+   size_t nelems = knum*jnum*inum;
+   mem_size  = nelems*elsize;
+   out[0][0] = (void *)calloc(nelems, elsize);
+   genmalloc_memory_add(out[0][0], mem_size);
+
+   for (int k = 0; k < knum; k++)
+   {
+      if (k > 0)
+      {
+         out[k] = out[k-1] + jnum;
+         out[k][0] = out[k-1][0] + (jnum*inum);
+      }
+
+      for (int j = 1; j < jnum; j++)
+      {
+         out[k][j] = out[k][j-1] + inum;
+      }
+   }
+
+   return (out);
+}
+
+int ***gentrimatrix_int_p(int knum, int jnum, int inum, const char *file, const int line)
+{
+   // Just to get rid of warning
+   if (1 == 2) printf("Warning file %s line %d\n", file, line);
+
+   int ***out;
+   size_t mem_size;
+   const size_t elsize = 4;
+
+   mem_size  = knum*sizeof(void **);
+   out       = (int ***)malloc(mem_size);
+   genmalloc_memory_add(out, mem_size);
+
+   mem_size  = knum*jnum*sizeof(void *);
+   out[0]    = (int **) malloc(mem_size);
+   genmalloc_memory_add(out[0], mem_size);
+
+   size_t nelems = knum*jnum*inum;
+   mem_size  = nelems*elsize;
+   out[0][0] = (void *)calloc(nelems, elsize);
+   genmalloc_memory_add(out[0][0], mem_size);
+
+   for (int k = 0; k < knum; k++)
+   {
+      if (k > 0)
+      {
+         out[k] = out[k-1] + jnum;
+         out[k][0] = out[k-1][0] + (jnum*inum);
+      }
+
+      for (int j = 1; j < jnum; j++)
+      {
+         out[k][j] = out[k][j-1] + inum;
+      }
+   }
+
+   return (out);
+}
+
+void gentrimatrixfree_p(void ***var, const char *file, const int line)
+{
+   // Just to get rid of warning
+   if (1 == 2) printf("Warning file %s line %d\n", file, line);
+
+   genmalloc_memory_remove(var[0][0]);
+   genmalloc_memory_remove(var[0]);
+   genmalloc_memory_remove(var);
+}
+
+void *genmalloc_memory_add_p(void *malloc_mem_ptr, size_t size, const char *file, const int line){
+   // Just to get rid of warning
+   if (1 == 2) printf("Warning file %s line %d\n", file, line);
+
+   if (SLIST_EMPTY(&genmalloc_memory_head)) SLIST_INIT(&genmalloc_memory_head);
+
+   genmalloc_memory_item = malloc(sizeof(struct genmalloc_memory_entry));
+   genmalloc_memory_item->mem_ptr = malloc_mem_ptr;
+   genmalloc_memory_item->mem_size = size;
+   if (DEBUG) printf("GENMALLOC_MEMORY_ADD: DEBUG -- malloc memory pointer is %p called from file %s line %d\n",malloc_mem_ptr,file,line);
+
+   SLIST_INSERT_HEAD(&genmalloc_memory_head, genmalloc_memory_item, genmalloc_memory_entries);
+
+   return(malloc_mem_ptr);
+}
+
+void genmalloc_memory_remove_p(void *malloc_mem_ptr, const char *file, const int line){
+   // Just to get rid of warning
+   if (1 == 2) printf("Warning file %s line %d\n", file, line);
+
+   SLIST_FOREACH(genmalloc_memory_item, &genmalloc_memory_head, genmalloc_memory_entries){
+      if (genmalloc_memory_item->mem_ptr == malloc_mem_ptr) {
+         if (DEBUG) printf("GENMALLOC_MEMORY_REMOVE: DEBUG -- freeing malloc memory pointer %p called from file %s line %d\n",malloc_mem_ptr,file,line);
+         free(malloc_mem_ptr);
+         SLIST_REMOVE(&genmalloc_memory_head, genmalloc_memory_item, genmalloc_memory_entry, genmalloc_memory_entries);
+         free(genmalloc_memory_item);
+         break;
+      }
+   }
+}
+
+void genmem_free_all_p(const char *file, const int line){
+   // Just to get rid of warning
+   if (1 == 2) printf("Warning file %s line %d\n", file, line);
+
+   while (!SLIST_EMPTY(&genmalloc_memory_head)) {
+      genmalloc_memory_item = SLIST_FIRST(&genmalloc_memory_head);
+      if (DEBUG) printf("GENMEM_FREE_ALL: DEBUG -- freeing genmalloc memory %p called from file %s line %d\n",genmalloc_memory_item->mem_ptr,file,line);
+      free(genmalloc_memory_item->mem_ptr);
+      SLIST_REMOVE_HEAD(&genmalloc_memory_head, genmalloc_memory_entries);
+      free(genmalloc_memory_item);
+   }
+}
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/graphics.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/graphics.h
@@ -0,0 +1,102 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#ifndef _GRAPHICS_H_
+#define _GRAPHICS_H_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+enum graphics_file_type{
+   GRAPHICS_NONE,
+   GRAPHICS_DATA,
+   GRAPHICS_BMP,
+   GRAPHICS_GIF,
+   GRAPHICS_JPEG,
+   GRAPHICS_MPEG,
+   GRAPHICS_PDF,
+   GRAPHICS_PNG,
+   GRAPHICS_SVG
+};
+
+void set_graphics_window(float graphics_xmin_in, float graphics_xmax_in,
+                         float graphics_ymin_in, float graphics_ymax_in);
+void init_graphics_output(void);
+void terminate_graphics_output(void);
+void set_graphics_viewmode(int graphics_view_mode_in);
+void set_graphics_mysize(int graphics_mysize_in);
+void set_graphics_outline(int graphics_outline_in);
+void set_graphics_cell_data_double(double *data_in);
+void set_graphics_cell_data_float(float *data_in);
+void set_graphics_cell_proc(int *graphics_proc_in);
+void set_graphics_cell_coordinates_double(double *x_in, double *dx_in,
+                                          double *y_in, double *dy_in);
+void set_graphics_cell_coordinates_float(float *x_in, float *dx_in,
+                                         float *y_in, float *dy_in);
+void write_graphics_info(int graph_num, int ncycle, double simTime,
+                         int rollback_img, int rollback_num);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/graphics.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/graphics.c
@@ -0,0 +1,706 @@
+/*
+ *  Copyright (c) 2011, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include "graphics.h"
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+static int magick_on = 0;
+
+#ifdef HAVE_MAGICKWAND
+#include <wand/MagickWand.h>
+
+#define MAGICK_NCOLORS 1280
+
+void Magick_Scale();
+
+//static int graphics_movie = 0;
+static MagickWand *magick_wand = NULL;
+static DrawingWand *draw_wand  = NULL;
+static PixelWand *pixel_wand   = NULL;
+
+struct MagickColorTable {
+   int Red;
+   int Blue;
+   int Green;
+};
+
+static struct MagickColorTable MagickRainbow[MAGICK_NCOLORS];
+#endif
+
+
+#define WINSIZE 800
+
+void DrawSquaresToFile(int graph_num, int ncycle, double simTime, int rollback_img, int rollback_num);
+
+/*
+ * Variables that I added to make everything work for getting graphics
+ * data output to files while running no graphics with CLAMR
+ * Brian Atkinson
+*/
+static int autoscale = 0;
+static double xconversion = 0.0;
+static double yconversion = 0.0;
+static int Ncolors = 256;
+static int iteration = 0;
+
+char *graphics_directory = "graphics_output";
+enum graphics_file_type graphics_type; // type of graphics output
+
+//static int mode = MOVE;
+
+static int width;
+static float graphics_xmin=0.0, graphics_xmax=0.0, graphics_ymin=0.0, graphics_ymax=0.0;
+
+static int graphics_outline   = 0;
+static int graphics_view_mode = 0;
+static int graphics_mysize    = 0;
+
+enum spatial_data_type {SPATIAL_DOUBLE, SPATIAL_FLOAT};
+static int spatial_type = SPATIAL_FLOAT;
+
+static double *x_double=NULL, *y_double=NULL, *dx_double=NULL, *dy_double=NULL;
+static float *x_float=NULL, *y_float=NULL, *dx_float=NULL, *dy_float=NULL;
+
+enum plot_data_type {DATA_DOUBLE, DATA_FLOAT};
+static int data_type = DATA_FLOAT;
+static double *data_double=NULL;
+static float *data_float=NULL;
+static int *graphics_proc=NULL;
+
+void init_graphics_output(void){
+   width = (WINSIZE / (graphics_ymax - graphics_ymin)) * (graphics_xmax - graphics_xmin);
+   xconversion = (double)WINSIZE/ (graphics_xmax - graphics_xmin);
+   yconversion = (double)WINSIZE/(graphics_ymax - graphics_ymin);
+
+   struct stat stat_descriptor;
+   if (stat(graphics_directory,&stat_descriptor) == -1){
+     mkdir(graphics_directory,0777);
+   }
+
+   if (graphics_type != GRAPHICS_DATA && graphics_type != GRAPHICS_NONE) magick_on = 1;
+
+#ifdef HAVE_MAGICKWAND
+   if (magick_on){
+      //MagickWandGenesis(); 
+      // Create wand
+      magick_wand = NewMagickWand();
+
+      Magick_Scale();
+   }
+#endif
+}
+
+void terminate_graphics_output(void){
+#ifdef HAVE_MAGICKWAND
+   if (magick_on){
+      magick_wand = DestroyMagickWand(magick_wand);
+      MagickWandTerminus();
+   }
+#endif
+}
+
+void set_graphics_window(float graphics_xmin_in, float graphics_xmax_in, float graphics_ymin_in, float graphics_ymax_in){
+   graphics_xmin = graphics_xmin_in;
+   graphics_xmax = graphics_xmax_in;
+   graphics_ymin = graphics_ymin_in;
+   graphics_ymax = graphics_ymax_in;
+}
+void set_graphics_cell_data_double(double *data_in){
+   data_type = DATA_DOUBLE;
+   data_double = data_in;
+}
+void set_graphics_cell_data_float(float *data_in){
+   data_type = DATA_FLOAT;
+   data_float = data_in;
+}
+void set_graphics_cell_proc(int *graphics_proc_in){
+   graphics_proc = graphics_proc_in;
+}
+void set_graphics_cell_coordinates_double(double *x_in, double *dx_in, double *y_in, double *dy_in){
+   spatial_type = SPATIAL_DOUBLE;
+   x_double = x_in;
+   dx_double = dx_in;
+   y_double = y_in;
+   dy_double = dy_in;
+}
+void set_graphics_cell_coordinates_float(float *x_in, float *dx_in, float *y_in, float *dy_in){
+   spatial_type = SPATIAL_FLOAT;
+   x_float = x_in;
+   dx_float = dx_in;
+   y_float = y_in;
+   dy_float = dy_in;
+}
+void set_graphics_viewmode(int graphics_view_mode_in){
+   graphics_view_mode = graphics_view_mode_in;
+}
+void set_graphics_mysize(int graphics_mysize_in){
+   graphics_mysize = graphics_mysize_in;
+}
+void set_graphics_outline(int graphics_outline_in){
+   graphics_outline = graphics_outline_in;
+}
+
+/*
+ * Created this function get graphics data while running
+ * the no graphic version of CLAMR. The output for the main
+ * cell data is written out to graph#.data files and the gridline
+ * data is writeen out to outline#.lin files.
+ * Brian Atkinson
+*/
+void DrawSquaresToFile(int graph_num, int ncycle, double simTime, int rollback_img, int rollback_num){
+#ifdef HAVE_MAGICKWAND
+   if (magick_on) {
+      draw_wand   = NewDrawingWand();
+      pixel_wand  = NewPixelWand();
+
+      MagickSetSize(magick_wand,WINSIZE,WINSIZE);
+      MagickSetColorspace(magick_wand,sRGBColorspace);
+      MagickReadImage(magick_wand,"xc:white");
+
+      DrawSetViewbox(draw_wand, 0, 0, WINSIZE, WINSIZE);
+      DrawScale(draw_wand, xconversion, -yconversion);
+      DrawTranslate(draw_wand, -graphics_xmin, graphics_ymin);
+
+      int npart = graphics_mysize/16;
+      for (int i=0; i<graphics_mysize; i++){
+         graphics_proc[i] = i/npart;
+      }
+
+      int magick_step = MAGICK_NCOLORS/(graphics_proc[graphics_mysize-1]+1);
+
+      if (graphics_outline) {
+         PixelGetBlack(pixel_wand);
+
+         DrawSetStrokeColor(draw_wand,pixel_wand);
+         DrawSetStrokeWidth(draw_wand,0.01);
+         DrawSetStrokeAntialias(draw_wand,1);
+         DrawSetStrokeOpacity(draw_wand,1);
+      }
+
+      if (data_type == DATA_DOUBLE){
+
+         for(int i = 0; i < graphics_mysize; i++) {
+            int magick_color = graphics_proc[i]*magick_step;
+            char cstring[40];
+            sprintf(cstring,"rgba(%d,%d,%d,%d)",MagickRainbow[magick_color].Red,
+                                                MagickRainbow[magick_color].Green,
+                                                MagickRainbow[magick_color].Blue,120);
+            PixelSetColor(pixel_wand, cstring);
+            
+
+            DrawSetFillColor(draw_wand, pixel_wand); 
+
+            DrawRectangle(draw_wand, x_double[i],              y_double[i],
+                                     x_double[i]+dx_double[i], y_double[i]+dy_double[i]);
+/*
+           printf("DEBUG -- i %d magick_color %d magick_step %d graphics_proc %d cstring %s corners %lg %lg %lg %lg\n",
+               i,magick_color,magick_step,graphics_proc[i],cstring,
+               x_double[i],              y_double[i],
+               x_double[i]+dx_double[i], y_double[i]+dy_double[i]);
+*/
+         }
+
+         if (graphics_outline) {
+	    PixelSetColor(pixel_wand,"black");
+	    DrawSetStrokeColor(draw_wand,pixel_wand);
+	    DrawSetStrokeWidth(draw_wand,0.01);
+
+            double xold = x_double[0]+0.5*dx_double[0];
+            double yold = y_double[0]+0.5*dy_double[0];
+
+            for(int i = 0; i < graphics_mysize; i++) {
+               char cstring[40];
+               sprintf(cstring,"%d",i);
+
+               double xnew = x_double[i]+0.5*dx_double[i];
+               double ynew = y_double[i]+0.5*dy_double[i];
+
+               DrawLine(draw_wand, xold, yold, xnew, ynew);
+
+               xold = xnew;
+               yold = ynew;
+            }
+         }
+
+/*
+         // Set up a 12 point black font 
+	 PixelSetColor(pixel_wand,"black");
+	 DrawSetFillColor(draw_wand,pixel_wand);
+	 DrawSetFont (draw_wand, "Courier" ) ;
+	 DrawSetFontSize(draw_wand,0.01);
+	 DrawSetStrokeColor(draw_wand,pixel_wand);
+	 DrawSetStrokeWidth(draw_wand,0.01);
+         DrawSetTextDirection(draw_wand, RightToLeftDirection);
+         DrawSetTextAlignment(draw_wand, CenterAlign);
+         DrawSetTextAntialias(draw_wand,MagickTrue);
+
+         for(int i = 1; i < graphics_mysize; i++) {
+            char cstring[40];
+            sprintf(cstring,"%d",i);
+
+
+            DrawAnnotation(draw_wand, x_double[i]+0.5*dx_double[i], y_double[i]+0.5*dy_double[i], cstring);
+         }
+*/
+
+      } else {
+
+         for(int i = 0; i < graphics_mysize; i++) {
+            int magick_color = graphics_proc[i]*magick_step;
+            char cstring[40];
+            sprintf(cstring,"rgba(%d,%d,%d,%d)",MagickRainbow[magick_color].Red,
+                                                MagickRainbow[magick_color].Green,
+                                                MagickRainbow[magick_color].Blue,120);
+            PixelSetColor(pixel_wand, cstring);
+
+            DrawSetFillColor(draw_wand, pixel_wand);
+
+            DrawRectangle(draw_wand, x_float[i],             y_float[i],
+                                     x_float[i]+dx_float[i], y_float[i]+dy_float[i]);
+         }
+
+         if (graphics_outline) {
+	    PixelSetColor(pixel_wand,"black");
+	    DrawSetStrokeColor(draw_wand,pixel_wand);
+	    DrawSetStrokeWidth(draw_wand,0.01);
+
+            float xold = x_float[0]+0.5*dx_float[0];
+            float yold = y_float[0]+0.5*dy_float[0];
+
+            for(int i = 0; i < graphics_mysize; i++) {
+               char cstring[40];
+               sprintf(cstring,"%d",i);
+
+               float xnew = x_float[i]+0.5*dx_float[i];
+               float ynew = y_float[i]+0.5*dy_float[i];
+
+               DrawLine(draw_wand, xold, yold, xnew, ynew);
+
+               xold = xnew;
+               yold = ynew;
+            }
+         }
+
+      }
+
+      MagickDrawImage(magick_wand, draw_wand);
+
+      char filename[50];
+      char graphics_file_extension[10];
+      if (graphics_type == GRAPHICS_BMP)  strcpy(graphics_file_extension,".bmp");
+      if (graphics_type == GRAPHICS_GIF)  strcpy(graphics_file_extension,".gif");
+      if (graphics_type == GRAPHICS_JPEG) strcpy(graphics_file_extension,".jpeg");
+      if (graphics_type == GRAPHICS_MPEG) strcpy(graphics_file_extension,".mpeg");
+      if (graphics_type == GRAPHICS_PDF)  strcpy(graphics_file_extension,".pdf");
+      if (graphics_type == GRAPHICS_PNG)  strcpy(graphics_file_extension,".png");
+      if (graphics_type == GRAPHICS_SVG)  strcpy(graphics_file_extension,".svg");
+      sprintf(filename,"%s/graph%05d%s", graphics_directory, graph_num, graphics_file_extension);
+      MagickWriteImage(magick_wand, filename);
+      //MagickDisplayImage(magick_wand, "x:");
+
+      draw_wand = DestroyDrawingWand(draw_wand);
+      pixel_wand = DestroyPixelWand(pixel_wand);
+   }
+#endif
+
+   if (graphics_type == GRAPHICS_DATA){
+      int i, color;
+      int step = Ncolors/(graphics_proc[graphics_mysize-1]+1);
+      int xloc, xwid, yloc, ywid;
+      int xloc1, xloc2, yloc1, yloc2;
+      char filename[50], filename2[50];
+   
+      if(rollback_img){
+         sprintf(filename,"%s/graph%dcp%05d.data", graphics_directory, graph_num, rollback_num);
+         sprintf(filename2,"%s/outline%dcp%05d.lin",graphics_directory, graph_num, rollback_num);
+      }
+      else{
+         sprintf(filename,"%s/graph%05d.data", graphics_directory, graph_num);
+         sprintf(filename2,"%s/outline%05d.lin",graphics_directory, graph_num);
+      }
+      FILE *fp = fopen(filename,"w");
+      FILE *fp2 = fopen(filename2,"w");
+      if(fp && fp2){
+         fprintf(fp,"%d,%lf\n",ncycle,simTime);
+         if (data_type == DATA_DOUBLE){
+            for(i = 0; i < graphics_mysize; i++) {
+               xloc = (int)((x_double[i]-graphics_xmin)*xconversion);
+               xwid = (int)((x_double[i]+dx_double[i]-graphics_xmin)*xconversion-xloc);
+               yloc = (int)((graphics_ymax-(y_double[i]+dy_double[i]))*yconversion);
+               ywid = (int)((graphics_ymax-y_double[i])*yconversion);
+               ywid -= yloc;
+               color = graphics_proc[i]*step;
+               //fprintf(fp,"%d,%d,%d,%d,%f\n",xloc,yloc,xwid,ywid,data[i]);
+               fprintf(fp,"%d,%d,%d,%d,%d\n",xloc,yloc,xwid,ywid,color);
+            
+               xloc1 = (int)((x_double[i]-graphics_xmin)*xconversion);
+               xloc2 = (int)((x_double[i]+dx_double[i]-graphics_xmin)*xconversion);
+               yloc1 = (int)((graphics_ymax-y_double[i])*yconversion);
+               yloc2 = (int)((graphics_ymax-(y_double[i]+dy_double[i]))*yconversion);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc2,xloc2,yloc2);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc2,yloc1);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc1,yloc2);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc2,yloc1,xloc2,yloc2);
+            }
+         } else {
+            for(i = 0; i < graphics_mysize; i++) {
+               xloc = (int)((x_float[i]-graphics_xmin)*xconversion);
+               xwid = (int)((x_float[i]+dx_float[i]-graphics_xmin)*xconversion-xloc);
+               yloc = (int)((graphics_ymax-(y_float[i]+dy_float[i]))*yconversion);
+               ywid = (int)((graphics_ymax-y_float[i])*yconversion);
+               ywid -= yloc;
+               color = graphics_proc[i]*step;
+               //fprintf(fp,"%d,%d,%d,%d,%f\n",xloc,yloc,xwid,ywid,data[i]);
+               fprintf(fp,"%d,%d,%d,%d,%d\n",xloc,yloc,xwid,ywid,color);
+         
+               xloc1 = (int)((x_float[i]-graphics_xmin)*xconversion);
+               xloc2 = (int)((x_float[i]+dx_float[i]-graphics_xmin)*xconversion);
+               yloc1 = (int)((graphics_ymax-y_float[i])*yconversion);
+               yloc2 = (int)((graphics_ymax-(y_float[i]+dy_float[i]))*yconversion);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc2,xloc2,yloc2);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc2,yloc1);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc1,yloc2);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc2,yloc1,xloc2,yloc2);
+            }
+         }
+         fclose(fp);
+         fclose(fp2);
+         iteration++;
+      }   
+      else{
+            if(fp == NULL){
+               printf("Could not create %s in DrawSqaures\n", filename);
+            }
+            else{
+               printf("Could not create %s in DrawSqaures\n", filename2);
+            }
+      }
+   }
+}
+
+/*
+ * Created this function get graphics data while running
+ * the no graphic version of CLAMR. The output for the main
+ * cell data is written out to graph#.data files and the gridline
+ * data is writeen out to outline#.lin files.
+ * Brian Atkinson
+*/
+void DisplayStateToFile(int graph_num, int ncycle, double simTime, int rollback_img, int rollback_num){
+#ifdef HAVE_MAGICKWAND
+   if (magick_on) {
+      double scaleMax = 25.0, scaleMin = 0.0;
+
+      draw_wand   = NewDrawingWand();
+      pixel_wand  = NewPixelWand();
+
+      MagickSetSize(magick_wand,WINSIZE,WINSIZE);
+      MagickSetColorspace(magick_wand,sRGBColorspace);
+      MagickReadImage(magick_wand,"xc:white");
+
+      DrawSetViewbox(draw_wand, 0, 0, WINSIZE, WINSIZE);
+      DrawScale(draw_wand, xconversion, -yconversion);
+      DrawTranslate(draw_wand, -graphics_xmin, graphics_ymin);
+
+      if (autoscale) {
+         scaleMax=-1.0e30;
+         scaleMin=1.0e30;
+         if (data_type == DATA_DOUBLE){
+            for(int i = 0; i<graphics_mysize; i++) {
+               if (data_double[i] > scaleMax) scaleMax = data_double[i];
+               if (data_double[i] < scaleMin) scaleMin = data_double[i];
+            }
+         } else {
+            for(int i = 0; i<graphics_mysize; i++) {
+               if (data_float[i] > scaleMax) scaleMax = data_float[i];
+               if (data_float[i] < scaleMin) scaleMin = data_float[i];
+            }
+         }
+      }
+
+      int magick_step = MAGICK_NCOLORS/(scaleMax - scaleMin);
+
+      if (graphics_outline) {
+         PixelGetBlack(pixel_wand);
+
+         DrawSetStrokeColor(draw_wand,pixel_wand);
+         DrawSetStrokeWidth(draw_wand,0.01);
+         DrawSetStrokeAntialias(draw_wand,1);
+         DrawSetStrokeOpacity(draw_wand,1);
+      }
+
+      if (data_type == DATA_DOUBLE){
+
+         for(int i = 0; i < graphics_mysize; i++) {
+            int magick_color;
+            if (data_type == DATA_DOUBLE){
+               magick_color = (int)(data_double[i]-scaleMin)*magick_step;
+            } else {
+               magick_color = (int)(data_float[i]-scaleMin)*magick_step;
+            }
+            magick_color = MAGICK_NCOLORS-magick_color;
+            if (magick_color < 0) {
+               magick_color=0;
+            }
+            if (magick_color >= MAGICK_NCOLORS) magick_color = MAGICK_NCOLORS-1;
+
+            char cstring[40];
+            sprintf(cstring,"rgba(%d,%d,%d,%d)",MagickRainbow[magick_color].Red,
+                                                MagickRainbow[magick_color].Green,
+                                                MagickRainbow[magick_color].Blue,120);
+            PixelSetColor(pixel_wand, cstring);
+
+            DrawSetFillColor(draw_wand, pixel_wand);
+
+            DrawRectangle(draw_wand, x_double[i],              y_double[i],
+                                     x_double[i]+dx_double[i], y_double[i]+dy_double[i]);
+/*
+           printf("DEBUG -- i %d magick_color %d magick_step %d graphics_proc %d cstring %s corners %lg %lg %lg %lg\n",
+               i,magick_color,magick_step,graphics_proc[i],cstring,
+               x_double[i],              y_double[i],
+               x_double[i]+dx_double[i], y_double[i]+dy_double[i]);
+*/
+         }
+      } else {
+
+         for(int i = 0; i < graphics_mysize; i++) {
+            int magick_color;
+            if (data_type == DATA_DOUBLE){
+               magick_color = (int)(data_double[i]-scaleMin)*magick_step;
+            } else {
+               magick_color = (int)(data_float[i]-scaleMin)*magick_step;
+            }
+            magick_color = MAGICK_NCOLORS-magick_color;
+            if (magick_color < 0) {
+               magick_color=0;
+            }
+            if (magick_color >= MAGICK_NCOLORS) magick_color = MAGICK_NCOLORS-1;
+
+            char cstring[40];
+            sprintf(cstring,"rgba(%d,%d,%d,%d)",MagickRainbow[magick_color].Red,
+                                                MagickRainbow[magick_color].Green,
+                                                MagickRainbow[magick_color].Blue,120);
+            PixelSetColor(pixel_wand, cstring);
+
+            DrawSetFillColor(draw_wand, pixel_wand);
+
+            DrawRectangle(draw_wand, x_float[i],             y_float[i],
+                                     x_float[i]+dx_float[i], y_float[i]+dy_float[i]);
+         }
+      }
+
+      MagickDrawImage(magick_wand, draw_wand);
+
+      char filename[50];
+      char graphics_file_extension[10];
+      if (graphics_type == GRAPHICS_BMP)  strcpy(graphics_file_extension,".bmp");
+      if (graphics_type == GRAPHICS_GIF)  strcpy(graphics_file_extension,".gif");
+      if (graphics_type == GRAPHICS_JPEG) strcpy(graphics_file_extension,".jpeg");
+      if (graphics_type == GRAPHICS_MPEG) strcpy(graphics_file_extension,".mpeg");
+      if (graphics_type == GRAPHICS_PDF)  strcpy(graphics_file_extension,".pdf");
+      if (graphics_type == GRAPHICS_PNG)  strcpy(graphics_file_extension,".png");
+      if (graphics_type == GRAPHICS_SVG)  strcpy(graphics_file_extension,".svg");
+      sprintf(filename,"%s/graph%05d%s", graphics_directory, graph_num, graphics_file_extension);
+      MagickWriteImage(magick_wand, filename);
+      //MagickDisplayImage(magick_wand, "x:");
+
+      draw_wand = DestroyDrawingWand(draw_wand);
+      pixel_wand = DestroyPixelWand(pixel_wand);
+   }
+#endif
+   if (graphics_type == GRAPHICS_DATA){
+      double scaleMax = 25.0, scaleMin = 0.0;
+      int i;
+      int color;
+      char filename[50], filename2[50];
+   
+      if(rollback_img){
+         sprintf(filename,"%s/graph%dcp%05d.data", graphics_directory, graph_num, rollback_num);
+         sprintf(filename2,"%s/outline%dcp%05d.lin",graphics_directory, graph_num, rollback_num);
+      }
+      else{
+         sprintf(filename,"%s/graph%05d.data", graphics_directory, graph_num);
+         sprintf(filename2,"%s/outline%05d.lin",graphics_directory, graph_num);
+      }
+      FILE *fp = fopen(filename,"w");
+      FILE *fp2 = fopen(filename2,"w");
+      if(fp && fp2){
+         fprintf(fp,"%d,%lf\n",ncycle,simTime);
+         if (autoscale) {
+            scaleMax=-1.0e30;
+            scaleMin=1.0e30;
+            if (data_type == DATA_DOUBLE){
+               for(i = 0; i<graphics_mysize; i++) {
+                  if (data_double[i] > scaleMax) scaleMax = data_double[i];
+                  if (data_double[i] < scaleMin) scaleMin = data_double[i];
+               }
+            } else {
+               for(i = 0; i<graphics_mysize; i++) {
+                  if (data_float[i] > scaleMax) scaleMax = data_float[i];
+                  if (data_float[i] < scaleMin) scaleMin = data_float[i];
+               }
+            }
+         }
+
+         double step = Ncolors/(scaleMax - scaleMin);
+         int xloc, xwid, yloc, ywid;
+         int xloc1, xloc2, yloc1, yloc2;
+         for(i = 0; i < graphics_mysize; i++) {
+            if (data_type == DATA_DOUBLE){
+               color = (int)(data_double[i]-scaleMin)*step;
+            } else {
+               color = (int)(data_float[i]-scaleMin)*step;
+            }
+            color = Ncolors-color;
+            if (color < 0) {
+               color=0;
+            }
+            if (color >= Ncolors) color = Ncolors-1;
+
+            if (data_type == DATA_DOUBLE){
+               xloc = (int)((x_double[i]-graphics_xmin)*xconversion);
+               xwid = (int)((x_double[i]+dx_double[i]-graphics_xmin)*xconversion-xloc);
+               yloc = (int)((graphics_ymax-(y_double[i]+dy_double[i]))*yconversion);
+               ywid = (int)((graphics_ymax-y_double[i])*yconversion);
+               ywid -= yloc;
+               //fprintf(fp,"%d,%d,%d,%d,%f\n",xloc,yloc,xwid,ywid,data[i]);
+               fprintf(fp,"%d,%d,%d,%d,%d\n",xloc,yloc,xwid,ywid,color);
+         
+               xloc1 = (int)((x_double[i]-graphics_xmin)*xconversion);
+               xloc2 = (int)((x_double[i]+dx_double[i]-graphics_xmin)*xconversion);
+               yloc1 = (int)((graphics_ymax-y_double[i])*yconversion);
+               yloc2 = (int)((graphics_ymax-(y_double[i]+dy_double[i]))*yconversion);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc2,xloc2,yloc2);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc2,yloc1);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc1,yloc2);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc2,yloc1,xloc2,yloc2);
+            } else {
+               xloc = (int)((x_float[i]-graphics_xmin)*xconversion);
+               xwid = (int)((x_float[i]+dx_float[i]-graphics_xmin)*xconversion-xloc);
+               yloc = (int)((graphics_ymax-(y_float[i]+dy_float[i]))*yconversion);
+               ywid = (int)((graphics_ymax-y_float[i])*yconversion);
+               ywid -= yloc;
+               //fprintf(fp,"%d,%d,%d,%d,%f\n",xloc,yloc,xwid,ywid,data[i]);
+               fprintf(fp,"%d,%d,%d,%d,%d\n",xloc,yloc,xwid,ywid,color);
+         
+               xloc1 = (int)((x_float[i]-graphics_xmin)*xconversion);
+               xloc2 = (int)((x_float[i]+dx_float[i]-graphics_xmin)*xconversion);
+               yloc1 = (int)((graphics_ymax-y_float[i])*yconversion);
+               yloc2 = (int)((graphics_ymax-(y_float[i]+dy_float[i]))*yconversion);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc2,xloc2,yloc2);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc2,yloc1);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc1,yloc1,xloc1,yloc2);
+               fprintf(fp2,"%d,%d,%d,%d\n",xloc2,yloc1,xloc2,yloc2);
+            }
+         }
+         fclose(fp);
+         fclose(fp2);   
+         iteration++;
+     }
+     else{
+        if(fp == NULL){
+            printf("Could not open %s in DisplayStateToFile\n", filename);
+        }
+        else{
+            printf("Could not open %s in DisplayStateToFile\n", filename2);
+        }
+     }
+  }
+}
+
+void write_graphics_info(int graph_num, int ncycle, double simTime, int rollback_img, int rollback_num){
+   if (graphics_view_mode == 0) {
+      DrawSquaresToFile(graph_num, ncycle, simTime, rollback_img, rollback_num);
+   } else {
+      DisplayStateToFile(graph_num, ncycle, simTime, rollback_img, rollback_num);
+   }
+}
+
+#ifdef HAVE_MAGICKWAND
+void Magick_Scale() {
+   int i, r;
+   for (i=0, r=0;   i<256; i++, r++) {
+         MagickRainbow[     i].Red   = 0;
+         MagickRainbow[     i].Green = r;
+         MagickRainbow[     i].Blue  = 255;
+   }
+   for (i=0, r=255; i<256; i++, r--) {
+         MagickRainbow[ 256+i].Red   = 0;
+         MagickRainbow[ 256+i].Green = 255;
+         MagickRainbow[ 256+i].Blue  = r;
+   }
+   for (i=0, r=0;   i<256; i++, r++) {
+         MagickRainbow[ 512+i].Red   = r;
+         MagickRainbow[ 512+i].Green = 255;
+         MagickRainbow[ 512+i].Blue  = 0;
+   }
+   for (i=0, r=255; i<256; i++, r--) {
+         MagickRainbow[ 768+i].Red   = 255;
+         MagickRainbow[ 768+i].Green = r;
+         MagickRainbow[ 768+i].Blue  = 0;
+   }
+   for (i=0, r=0;   i<256; i++, r++) {
+         MagickRainbow[1024+i].Red   = 255;
+         MagickRainbow[1024+i].Green = 0;
+         MagickRainbow[1024+i].Blue  = r;
+   }
+}
+#endif
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hash.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hash.h
@@ -0,0 +1,108 @@
+// Uses LANL Copyright Disclosure C14043/LA-CC-14-003
+
+#ifndef _HASH_H
+#define _HASH_H
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#ifdef HAVE_OPENCL
+#include "ezcl/ezcl.h"
+#endif
+
+enum choose_hash_method
+{  METHOD_UNSET = 0,            //  use 0 for no method set
+   PERFECT_HASH,                //  perfect hash 1
+   LINEAR,                      //  linear hash 2
+   QUADRATIC,                   //  quadratic hash 3
+   PRIME_JUMP  };               //  prime_jump hash 4
+
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+int *compact_hash_init(int ncells, uint isize, uint jsize, uint report_level);
+
+#ifdef _OPENMP
+   int *compact_hash_init_openmp(int ncells, uint isize, uint jsize, uint report_level);
+#endif
+
+int get_hash_method(void);
+long long get_hash_table_size(void);
+
+void write_hash_perfect(uint ic, ulong hashkey, int *hash);
+void write_hash_linear(uint ic, ulong hashkey, int *hash);
+void write_hash_linear_report_level_1(uint ic, ulong hashkey, int *hash);
+void write_hash_linear_report_level_2(uint ic, ulong hashkey, int *hash);
+void write_hash_linear_report_level_3(uint ic, ulong hashkey, int *hash);
+void write_hash_quadratic(uint ic, ulong hashkey, int *hash);
+void write_hash_quadratic_report_level_1(uint ic, ulong hashkey, int *hash);
+void write_hash_quadratic_report_level_2(uint ic, ulong hashkey, int *hash);
+void write_hash_quadratic_report_level_3(uint ic, ulong hashkey, int *hash);
+void write_hash_primejump(uint ic, ulong hashkey, int *hash);
+void write_hash_primejump_report_level_1(uint ic, ulong hashkey, int *hash);
+void write_hash_primejump_report_level_2(uint ic, ulong hashkey, int *hash);
+void write_hash_primejump_report_level_3(uint ic, ulong hashkey, int *hash);
+extern void (*write_hash)(uint ic, ulong hashkey, int *hash); // declared in hash.c
+
+#ifdef _OPENMP
+   void write_hash_linear_openmp(uint ic, ulong hashkey, int *hash);
+   void write_hash_linear_openmp_report_level_1(uint ic, ulong hashkey, int *hash);
+   void write_hash_linear_openmp_report_level_2(uint ic, ulong hashkey, int *hash);
+   void write_hash_linear_openmp_report_level_3(uint ic, ulong hashkey, int *hash);
+   void write_hash_quadratic_openmp(uint ic, ulong hashkey, int *hash);
+   void write_hash_quadratic_openmp_report_level_1(uint ic, ulong hashkey, int *hash);
+   void write_hash_quadratic_openmp_report_level_2(uint ic, ulong hashkey, int *hash);
+   void write_hash_quadratic_openmp_report_level_3(uint ic, ulong hashkey, int *hash);
+   void write_hash_primejump_openmp(uint ic, ulong hashkey, int *hash);
+   void write_hash_primejump_openmp_report_level_1(uint ic, ulong hashkey, int *hash);
+   void write_hash_primejump_openmp_report_level_2(uint ic, ulong hashkey, int *hash);
+   void write_hash_primejump_openmp_report_level_3(uint ic, ulong hashkey, int *hash);
+   extern void (*write_hash)(uint ic, ulong hashkey, int *hash); // declared in hash.c
+#endif
+
+int read_hash_perfect(ulong hashkey, int *hash);
+int read_hash_linear(ulong hashkey, int *hash);
+int read_hash_linear_report_level_1(ulong hashkey, int *hash);
+int read_hash_linear_report_level_2(ulong hashkey, int *hash);
+int read_hash_linear_report_level_3(ulong hashkey, int *hash);
+int read_hash_quadratic(ulong hashkey, int *hash);
+int read_hash_quadratic_report_level_1(ulong hashkey, int *hash);
+int read_hash_quadratic_report_level_2(ulong hashkey, int *hash);
+int read_hash_quadratic_report_level_3(ulong hashkey, int *hash);
+int read_hash_primejump(ulong hashkey, int *hash);
+int read_hash_primejump_report_level_1(ulong hashkey, int *hash);
+int read_hash_primejump_report_level_2(ulong hashkey, int *hash);
+int read_hash_primejump_report_level_3(ulong hashkey, int *hash);
+extern int (*read_hash)(ulong hashkey, int *hash); // declared in hash.c
+
+void compact_hash_delete(int *hash);
+
+void write_hash_collision_report(void);
+void read_hash_collision_report(void);
+void final_hash_collision_report(void);
+
+const char *get_hash_kernel_source_string(void);
+void hash_lib_init(void);
+void hash_lib_terminate(void);
+
+#ifdef HAVE_OPENCL
+cl_mem gpu_compact_hash_init(ulong ncells, int imaxsize, int jmaxsize, int gpu_hash_method, uint hash_report_level_in,
+   ulong *gpu_hash_table_size, ulong *hashsize, cl_mem *dev_hash_header_in);
+cl_mem gpu_get_hash_header(void);
+void gpu_compact_hash_delete(cl_mem dev_hash, cl_mem dev_hash_header);
+#endif
+int read_dev_hash(int hash_method, ulong hash_table_size, ulong AA, ulong BB, ulong hashkey, int *hash);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif // _HASH_H
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hash.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hash.c
@@ -0,0 +1,1269 @@
+//#if defined __INTEL_COMPILER
+
+#include <stdio.h>
+#define __USE_XOPEN
+#include <stdlib.h>
+#include "hash.h"
+#include "genmalloc.h"
+#ifdef HAVE_OPENCL
+#include "hashlib_kern.inc"
+#include "hashlib_source_kern.inc"
+#endif
+
+static ulong AA;
+static ulong BB;
+static ulong prime=4294967291;
+static uint hashtablesize;
+static uint hash_stride;
+static uint hash_ncells;
+static uint write_hash_collisions;
+static uint read_hash_collisions;
+static double write_hash_collisions_runsum = 0.0;
+static double read_hash_collisions_runsum = 0.0;
+static uint write_hash_collisions_count = 0;
+static uint read_hash_collisions_count = 0;
+static uint hash_report_level = 2;
+static uint hash_queries;
+static int hash_method = METHOD_UNSET;
+static uint hash_jump_prime = 41;
+static double hash_mult = 3.0;
+
+size_t hash_header_size = 16;
+
+#ifdef HAVE_OPENCL
+cl_mem dev_hash_header = NULL;
+#endif
+
+float mem_opt_factor;
+
+int   choose_hash_method = METHOD_UNSET;
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+
+int (*read_hash)(ulong, int *);
+void (*write_hash)(uint, ulong, int *);
+
+int get_hash_method(void) {
+  return(hash_method);
+}
+
+long long get_hashtablesize(void) {
+  return(hashtablesize);
+}
+
+int *compact_hash_init(int ncells, uint isize, uint jsize, uint report_level){
+   hash_ncells = 0;
+   write_hash_collisions = 0;
+   read_hash_collisions = 0;
+   hash_queries = 0;
+   hash_report_level = report_level;
+   hash_stride = isize;
+   int *hash = NULL;
+
+   if (choose_hash_method != METHOD_UNSET) hash_method = choose_hash_method;
+
+   uint compact_hash_size = (uint)((double)ncells*hash_mult);
+   uint perfect_hash_size = (uint)(isize*jsize);
+
+   if (hash_method == METHOD_UNSET){
+      float hash_mem_factor = 20.0;
+      float hash_mem_ratio = (double)perfect_hash_size/(double)compact_hash_size;
+      if (mem_opt_factor != 1.0) hash_mem_factor /= (mem_opt_factor*0.2); 
+      hash_method = (hash_mem_ratio < hash_mem_factor) ? PERFECT_HASH : QUADRATIC;
+
+      if (hash_report_level >= 2) printf("DEBUG hash_method %d hash_mem_ratio %f hash_mem_factor %f mem_opt_factor %f perfect_hash_size %u compact_hash_size %u\n",
+         hash_method,hash_mem_ratio,hash_mem_factor,mem_opt_factor,perfect_hash_size,compact_hash_size);
+   }
+
+   int do_compact_hash = (hash_method == PERFECT_HASH) ? 0 : 1;
+
+   if (hash_report_level >= 2) printf("DEBUG do_compact_hash %d hash_method %d perfect_hash_size %u compact_hash_size %u\n",
+      do_compact_hash,hash_method,perfect_hash_size,compact_hash_size);
+
+   if (do_compact_hash) {
+      hashtablesize = compact_hash_size;
+      AA = (ulong)(1.0+(double)(prime-1)*drand48());
+      BB = (ulong)(0.0+(double)(prime-1)*drand48());
+      if (AA > prime-1 || BB > prime-1) exit(0);
+      if (hash_report_level > 1) printf("Factors AA %lu BB %lu\n",AA,BB);
+
+      hash = (int *)genvector(2*hashtablesize,sizeof(int));
+      for (uint ii = 0; ii<2*hashtablesize; ii+=2){
+         hash[ii] = -1;
+      }
+
+      if (hash_method == LINEAR){
+         if (hash_report_level == 0){
+            read_hash  = read_hash_linear;
+            write_hash = write_hash_linear;
+         } else if (hash_report_level == 1){
+            read_hash  = read_hash_linear_report_level_1;
+            write_hash = write_hash_linear_report_level_1;
+         } else if (hash_report_level == 2){
+            read_hash  = read_hash_linear_report_level_2;
+            write_hash = write_hash_linear_report_level_2;
+         } else if (hash_report_level == 3){
+            read_hash  = read_hash_linear_report_level_3;
+            write_hash = write_hash_linear_report_level_3;
+         }
+      } else if (hash_method == QUADRATIC) {
+         if (hash_report_level == 0){
+            read_hash  = read_hash_quadratic;
+            write_hash = write_hash_quadratic;
+         } else if (hash_report_level == 1){
+            read_hash  = read_hash_quadratic_report_level_1;
+            write_hash = write_hash_quadratic_report_level_1;
+         } else if (hash_report_level == 2){
+            read_hash  = read_hash_quadratic_report_level_2;
+            write_hash = write_hash_quadratic_report_level_2;
+         } else if (hash_report_level == 3){
+            read_hash  = read_hash_quadratic_report_level_3;
+            write_hash = write_hash_quadratic_report_level_3;
+         }
+      } else if (hash_method == PRIME_JUMP) {
+         if (hash_report_level == 0){
+            read_hash  = read_hash_primejump;
+            write_hash = write_hash_primejump;
+         } else if (hash_report_level == 1){
+            read_hash  = read_hash_primejump_report_level_1;
+            write_hash = write_hash_primejump_report_level_1;
+         } else if (hash_report_level == 2){
+            read_hash  = read_hash_primejump_report_level_2;
+            write_hash = write_hash_primejump_report_level_2;
+         } else if (hash_report_level == 3){
+            read_hash  = read_hash_primejump_report_level_3;
+            write_hash = write_hash_primejump_report_level_3;
+         }
+      }
+   } else {
+      hashtablesize = perfect_hash_size;
+
+      hash = (int *)genvector(hashtablesize,sizeof(int));
+      for (uint ii = 0; ii<hashtablesize; ii++){
+         hash[ii] = -1;
+      }
+
+      read_hash  = read_hash_perfect;
+      write_hash = write_hash_perfect;
+   }
+
+   if (hash_report_level >= 2) {
+      printf("Hash table size %u perfect hash table size %u memory savings %d by percentage %lf\n",
+        hashtablesize,isize*jsize,(int)isize*(int)jsize-(int)hashtablesize,
+        (double)hashtablesize/(double)(isize*jsize) * 100.0);
+   }
+
+   return(hash);
+}
+
+#ifdef _OPENMP
+int *compact_hash_init_openmp(int ncells, uint isize, uint jsize, uint report_level){
+   static int *hash = NULL;
+
+   static float hash_mem_factor;
+   static float hash_mem_ratio;
+   static int do_compact_hash;
+   static uint compact_hash_size;
+   static uint perfect_hash_size;
+
+#pragma omp barrier
+#pragma omp master
+   {
+
+      hash_ncells = 0;
+      write_hash_collisions = 0;
+      read_hash_collisions = 0;
+      hash_queries = 0;
+      hash_report_level = report_level;
+      hash_stride = isize;
+
+      if (choose_hash_method != METHOD_UNSET) hash_method = choose_hash_method;
+
+      compact_hash_size = (uint)((double)ncells*hash_mult);
+      perfect_hash_size = (uint)(isize*jsize);
+
+      if (hash_method == METHOD_UNSET){
+         hash_mem_factor = 20.0;
+         hash_mem_ratio = (double)perfect_hash_size/(double)compact_hash_size;
+         if (mem_opt_factor != 1.0) hash_mem_factor /= (mem_opt_factor*0.2); 
+         hash_method = (hash_mem_ratio < hash_mem_factor) ? PERFECT_HASH : QUADRATIC;
+         //hash_method = QUADRATIC;
+
+         if (hash_report_level >= 2) printf("DEBUG hash_method %d hash_mem_ratio %f hash_mem_factor %f mem_opt_factor %f perfect_hash_size %u compact_hash_size %u\n",
+            hash_method,hash_mem_ratio,hash_mem_factor,mem_opt_factor,perfect_hash_size,compact_hash_size);
+      }
+
+      do_compact_hash = (hash_method == PERFECT_HASH) ? 0 : 1;
+
+      if (hash_report_level >= 2) printf("DEBUG do_compact_hash %d hash_method %d perfect_hash_size %u compact_hash_size %u\n",
+         do_compact_hash,hash_method,perfect_hash_size,compact_hash_size);
+
+   } // end omp master
+#pragma omp barrier
+
+   if (do_compact_hash) {
+#pragma omp master
+      {
+         hashtablesize = compact_hash_size;
+         //srand48(0);
+         AA = (ulong)(1.0+(double)(prime-1)*drand48());
+         BB = (ulong)(0.0+(double)(prime-1)*drand48());
+         if (AA > prime-1 || BB > prime-1) exit(0);
+         if (hash_report_level > 1) printf("Factors AA %lu BB %lu\n",AA,BB);
+
+         hash = (int *)genvector(2*hashtablesize,sizeof(int));
+      } // end omp master
+#pragma omp barrier
+
+#pragma omp for
+      for (uint ii = 0; ii<hashtablesize; ii++){
+         hash[2*ii] = -1;
+      }
+
+#pragma omp master
+      {
+         if (hash_method == LINEAR){
+            if (hash_report_level == 0){
+               read_hash  = read_hash_linear;
+               write_hash = write_hash_linear_openmp;
+            } else if (hash_report_level == 1){
+               read_hash  = read_hash_linear_report_level_1;
+               write_hash = write_hash_linear_openmp_report_level_1;
+            } else if (hash_report_level == 2){
+               read_hash  = read_hash_linear_report_level_2;
+               write_hash = write_hash_linear_openmp_report_level_2;
+            } else if (hash_report_level == 3){
+               read_hash  = read_hash_linear_report_level_3;
+               write_hash = write_hash_linear_openmp_report_level_3;
+            }
+         } else if (hash_method == QUADRATIC) {
+            if (hash_report_level == 0){
+               read_hash  = read_hash_quadratic;
+               write_hash = write_hash_quadratic_openmp;
+            } else if (hash_report_level == 1){
+               read_hash  = read_hash_quadratic_report_level_1;
+               write_hash = write_hash_quadratic_openmp_report_level_1;
+            } else if (hash_report_level == 2){
+               read_hash  = read_hash_quadratic_report_level_2;
+               write_hash = write_hash_quadratic_openmp_report_level_2;
+            } else if (hash_report_level == 3){
+               read_hash  = read_hash_quadratic_report_level_3;
+               write_hash = write_hash_quadratic_openmp_report_level_3;
+            }
+         } else if (hash_method == PRIME_JUMP) {
+            if (hash_report_level == 0){
+               read_hash  = read_hash_primejump;
+               write_hash = write_hash_primejump_openmp;
+            } else if (hash_report_level == 1){
+               read_hash  = read_hash_primejump_report_level_1;
+               write_hash = write_hash_primejump_openmp_report_level_1;
+            } else if (hash_report_level == 2){
+               read_hash  = read_hash_primejump_report_level_2;
+               write_hash = write_hash_primejump_openmp_report_level_2;
+            } else if (hash_report_level == 3){
+               read_hash  = read_hash_primejump_report_level_3;
+               write_hash = write_hash_primejump_openmp_report_level_3;
+            }
+         }
+      } // end omp master
+#pragma omp barrier
+
+   } else {
+
+#pragma omp master
+      {
+         hashtablesize = perfect_hash_size;
+
+         hash = (int *)genvector(hashtablesize,sizeof(int));
+      } // end omp master
+#pragma omp barrier
+
+#pragma omp for
+      for (uint ii = 0; ii<hashtablesize; ii++){
+         hash[ii] = -1;
+      }
+
+#pragma omp master
+      {
+         read_hash  = read_hash_perfect;
+         write_hash = write_hash_perfect;
+      } // end omp master
+#pragma omp barrier
+   }
+
+#pragma omp master
+   {
+      if (hash_report_level >= 2) {
+         printf("Hash table size %u perfect hash table size %u memory savings %u by percentage %lf\n",
+           hashtablesize,isize*jsize,isize*jsize-hashtablesize,
+           (double)hashtablesize/(double)(isize*jsize));
+      }
+   }
+#pragma omp barrier
+
+   return(hash);
+}
+#endif
+
+void write_hash_perfect(uint ic, ulong hashkey, int *hash){
+   hash[hashkey] = ic;
+}
+
+void write_hash_linear(uint ic, ulong hashkey, int *hash){
+   uint hashloc;
+
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc++,hashloc = hashloc%hashtablesize);
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+void write_hash_linear_report_level_1(uint ic, ulong hashkey, int *hash){
+   uint hashloc;
+
+   hash_ncells++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc++,hashloc = hashloc%hashtablesize){
+      write_hash_collisions++;
+   }
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+void write_hash_linear_report_level_2(uint ic, ulong hashkey, int *hash){
+   uint hashloc;
+
+   hash_ncells++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc++,hashloc = hashloc%hashtablesize){
+      write_hash_collisions++;
+   }
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+void write_hash_linear_report_level_3(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc;
+
+   hash_ncells++;
+   hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+   printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc++,hashloc = hashloc%hashtablesize){
+      int hashloctmp = hashloc+1;
+      hashloctmp = hashloctmp%hashtablesize;
+      printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+      icount++;
+   }
+   write_hash_collisions += icount;
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+void write_hash_quadratic(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc;
+
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize) {
+      icount++;
+   }
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+void write_hash_quadratic_report_level_1(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc;
+
+   hash_ncells++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){
+      icount++;
+   }
+   write_hash_collisions += icount;
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+void write_hash_quadratic_report_level_2(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc;
+
+   hash_ncells++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){
+      icount++;
+   }
+   write_hash_collisions += icount;
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+void write_hash_quadratic_report_level_3(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc;
+
+   hash_ncells++;
+   hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+   printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){
+      icount++;
+      int hashloctmp = hashloc+icount*icount;
+      hashloctmp = hashloctmp%hashtablesize;
+      printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+   }
+   write_hash_collisions += icount;
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+void write_hash_primejump(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc;
+
+   uint jump = 1+hashkey%hash_jump_prime;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize) {
+      icount++;
+   }
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+void write_hash_primejump_report_level_1(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc;
+
+   uint jump = 1+hashkey%hash_jump_prime;
+   hash_ncells++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){
+      icount++;
+   }
+   write_hash_collisions += icount;
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+void write_hash_primejump_report_level_2(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc;
+
+   uint jump = 1+hashkey%hash_jump_prime;
+   hash_ncells++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){
+      icount++;
+   }
+   write_hash_collisions += icount;
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+void write_hash_primejump_report_level_3(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc;
+
+   uint jump = 1+hashkey%hash_jump_prime;
+   hash_ncells++;
+   hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+   printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != -1 && hash[2*hashloc]!= (int)hashkey; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){
+      icount++;
+      int hashloctmp = hashloc+1;
+      hashloctmp = hashloctmp%hashtablesize;
+      printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+   }
+   write_hash_collisions += icount;
+
+   hash[2*hashloc] = hashkey;
+   hash[2*hashloc+1] = ic;
+}
+
+#ifdef _OPENMP
+void write_hash_linear_openmp(uint ic, ulong hashkey, int *hash){
+   int icount;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;;
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc++;
+      hashloc %= hashtablesize;
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+}
+
+void write_hash_linear_openmp_report_level_1(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;;
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc++;
+      hashloc %= hashtablesize;
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+      icount++;
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+
+#pragma omp atomic
+   write_hash_collisions += icount;;
+#pragma omp atomic
+   hash_ncells++;
+}
+
+void write_hash_linear_openmp_report_level_2(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;;
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc++;
+      hashloc %= hashtablesize;
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+      icount++;
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+
+#pragma omp atomic
+   write_hash_collisions += icount;;
+#pragma omp atomic
+   hash_ncells++;
+}
+
+void write_hash_linear_openmp_report_level_3(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;;
+   printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc++;
+      hashloc %= hashtablesize;
+      printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+      icount++;
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+
+#pragma omp atomic
+   write_hash_collisions += icount;;
+#pragma omp atomic
+   hash_ncells++;
+}
+
+void write_hash_quadratic_openmp(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc+=(icount*icount);
+      hashloc %= hashtablesize;
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+}
+
+void write_hash_quadratic_openmp_report_level_1(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc+=(icount*icount);
+      hashloc %= hashtablesize;
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+
+#pragma omp atomic
+   write_hash_collisions += icount;;
+#pragma omp atomic
+   hash_ncells++;
+}
+
+void write_hash_quadratic_openmp_report_level_2(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc+=(icount*icount);
+      hashloc %= hashtablesize;
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+
+#pragma omp atomic
+   write_hash_collisions += icount;;
+#pragma omp atomic
+   hash_ncells++;
+}
+
+void write_hash_quadratic_openmp_report_level_3(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+   printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc+=(icount*icount);
+      hashloc %= hashtablesize;
+      printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+
+#pragma omp atomic
+   write_hash_collisions += icount;;
+#pragma omp atomic
+   hash_ncells++;
+}
+
+void write_hash_primejump_openmp(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint jump = 1+hashkey%hash_jump_prime;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc+=(icount*jump);
+      hashloc %= hashtablesize;
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+}
+
+void write_hash_primejump_openmp_report_level_1(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint jump = 1+hashkey%hash_jump_prime;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc+=(icount*jump);
+      hashloc %= hashtablesize;
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+
+#pragma omp atomic
+   write_hash_collisions += icount;;
+#pragma omp atomic
+   hash_ncells++;
+}
+
+void write_hash_primejump_openmp_report_level_2(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint jump = 1+hashkey%hash_jump_prime;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc+=(icount*jump);
+      hashloc %= hashtablesize;
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+
+#pragma omp atomic
+   write_hash_collisions += icount;;
+#pragma omp atomic
+   hash_ncells++;
+}
+
+void write_hash_primejump_openmp_report_level_3(uint ic, ulong hashkey, int *hash){
+   int icount = 0;
+   uint jump = 1+hashkey%hash_jump_prime;
+   uint hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+   printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+
+   int MaxTries = 1000;
+
+   int old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   //printf("old_key is %d\n",old_key);
+
+   for (icount = 1; old_key != hashkey && old_key != -1 && icount < MaxTries; icount++){
+      hashloc+=(icount*jump);
+      hashloc %= hashtablesize;
+      printf("%d: cell %d hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,ic,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+
+      old_key = __sync_val_compare_and_swap(&hash[2*hashloc], -1, hashkey); 
+   }
+
+   if (icount < MaxTries) hash[2*hashloc+1] = ic;
+
+#pragma omp atomic
+   write_hash_collisions += icount;;
+#pragma omp atomic
+   hash_ncells++;
+}
+#endif
+
+int read_hash_perfect(ulong hashkey, int *hash){
+   return(hash[hashkey]);
+}
+
+int read_hash_linear(ulong hashkey, int *hash){
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){
+      icount++;
+   }
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+int read_hash_linear_report_level_1(ulong hashkey, int *hash){
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   hash_queries++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){
+      icount++;
+   }
+   read_hash_collisions += icount;
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+int read_hash_linear_report_level_2(ulong hashkey, int *hash){
+   int max_collisions_allowed = 1000;
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   hash_queries++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){
+      icount++;
+      if (icount > max_collisions_allowed) {
+         printf("Error -- too many read hash collisions\n");
+         exit(0);
+      }
+   }
+   read_hash_collisions += icount;
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+int read_hash_linear_report_level_3(ulong hashkey, int *hash){
+   int max_collisions_allowed = 1000;
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   hash_queries++;
+   hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+   printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){
+      icount++;
+      uint hashloctmp = hashloc+1;
+      hashloctmp = hashloctmp%hashtablesize;
+      printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+      if (icount > max_collisions_allowed) {
+         printf("Error -- too many read hash collisions\n");
+         exit(0);
+      }
+   }
+   read_hash_collisions += icount;
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+int read_hash_quadratic(ulong hashkey, int *hash){
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){
+      icount++;
+   }
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+int read_hash_quadratic_report_level_1(ulong hashkey, int *hash){
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   hash_queries++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){
+      icount++;
+   }
+   read_hash_collisions += icount;
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+int read_hash_quadratic_report_level_2(ulong hashkey, int *hash){
+   int max_collisions_allowed = 1000;
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   hash_queries++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){
+      icount++;
+      if (icount > max_collisions_allowed) {
+         printf("Error -- too many read hash collisions\n");
+         exit(0);
+      }
+   }
+   read_hash_collisions += icount;
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+int read_hash_quadratic_report_level_3(ulong hashkey, int *hash){
+   int max_collisions_allowed = 1000;
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   hash_queries++;
+   hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+   printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){
+      icount++;
+      uint hashloctmp = hashloc+1;
+      hashloctmp = hashloctmp%hashtablesize;
+      printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+      if (icount > max_collisions_allowed) {
+         printf("Error -- too many read hash collisions\n");
+         exit(0);
+      }
+   }
+   read_hash_collisions += icount;
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+int read_hash_primejump(ulong hashkey, int *hash){
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   uint jump = 1+hashkey%hash_jump_prime;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){
+      icount++;
+   }
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+int read_hash_primejump_report_level_1(ulong hashkey, int *hash){
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   uint jump = 1+hashkey%hash_jump_prime;
+   hash_queries++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){
+      icount++;
+   }
+   read_hash_collisions += icount;
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+int read_hash_primejump_report_level_2(ulong hashkey, int *hash){
+   int max_collisions_allowed = 1000;
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   uint jump = 1+hashkey%hash_jump_prime;
+   hash_queries++;
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){
+      icount++;
+      if (icount > max_collisions_allowed) {
+         printf("Error -- too many read hash collisions\n");
+         exit(0);
+      }
+   }
+   read_hash_collisions += icount;
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+int read_hash_primejump_report_level_3(ulong hashkey, int *hash){
+   int max_collisions_allowed = 1000;
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+
+   uint jump = 1+hashkey%hash_jump_prime;
+   hash_queries++;
+   hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+   printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+   for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){
+      icount++;
+      uint hashloctmp = hashloc+1;
+      hashloctmp = hashloctmp%hashtablesize;
+      printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+      if (icount > max_collisions_allowed) {
+         printf("Error -- too many read hash collisions\n");
+         exit(0);
+      }
+   }
+   read_hash_collisions += icount;
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
+void compact_hash_delete(int *hash){
+   read_hash = NULL;
+   genvectorfree((void *)hash);
+   hash_method = METHOD_UNSET;
+}
+
+void write_hash_collision_report(void){
+   if (hash_method == PERFECT_HASH) return;
+   if (hash_report_level == 1) {
+      write_hash_collisions_runsum += (double)write_hash_collisions/(double)hash_ncells;
+      write_hash_collisions_count++;
+   } else if (hash_report_level >= 2) {
+      printf("Write hash collision report -- collisions per cell %lf, collisions %d cells %d\n",(double)write_hash_collisions/(double)hash_ncells,write_hash_collisions,hash_ncells);
+   }
+}
+
+void read_hash_collision_report(void){
+   //printf("hash table size  bytes %ld\n",hashtablesize*sizeof(int));
+   if (hash_method == PERFECT_HASH) return;
+   if (hash_report_level == 1) {
+      read_hash_collisions_runsum += (double)read_hash_collisions/(double)hash_queries;
+      read_hash_collisions_count++;
+   } else if (hash_report_level >= 2) {
+      printf("Read hash collision report -- collisions per cell %lf, collisions %d cells %d\n",(double)read_hash_collisions/(double)hash_queries,read_hash_collisions,hash_queries);
+      hash_queries = 0;
+      read_hash_collisions = 0;
+   }
+}
+
+void final_hash_collision_report(void){
+   printf("hash table size  bytes %ld\n",hashtablesize*sizeof(int));
+   if (hash_report_level >= 1 && read_hash_collisions_count > 0) { 
+      printf("Final hash collision report -- write/read collisions per cell %lf/%lf\n",write_hash_collisions_runsum/(double)write_hash_collisions_count,read_hash_collisions_runsum/(double)read_hash_collisions_count);
+   }
+}
+
+#ifdef HAVE_OPENCL
+const char *get_hash_kernel_source_string(void)
+{
+   return(hashlib_source_kern_source);
+}
+#endif
+
+#ifdef HAVE_OPENCL
+static cl_kernel kernel_hash_init;
+void hash_lib_init(void){
+   cl_context context = ezcl_get_context();
+
+   const char *defines = NULL;
+   cl_program program = ezcl_create_program_wsource(context, defines, hashlib_kern_source);
+
+   kernel_hash_init = ezcl_create_kernel_wprogram(program, "hash_init_cl");
+
+   ezcl_program_release(program);
+}
+
+void hash_lib_terminate(void){
+   ezcl_kernel_release(kernel_hash_init);
+}
+
+cl_mem gpu_compact_hash_init(ulong ncells, int imaxsize, int jmaxsize, int gpu_hash_method, uint hash_report_level_in,
+   ulong *gpu_hashtablesize, ulong *hashsize, cl_mem *dev_hash_header_in)
+{
+   hash_report_level = hash_report_level_in;
+
+   uint gpu_compact_hash_size = (uint)((double)ncells*hash_mult);
+   uint gpu_perfect_hash_size = (uint)(imaxsize*jmaxsize);
+
+   if (gpu_hash_method == METHOD_UNSET) {
+      float gpu_hash_mem_factor = 20.0;
+      float gpu_hash_mem_ratio = (double)gpu_perfect_hash_size/(double)gpu_compact_hash_size;
+      if (mem_opt_factor != 1.0) gpu_hash_mem_factor /= (mem_opt_factor*0.2);
+      gpu_hash_method = (gpu_hash_mem_ratio < gpu_hash_mem_factor) ? PERFECT_HASH : QUADRATIC;
+   }
+
+   int gpu_do_compact_hash = (gpu_hash_method == PERFECT_HASH) ? 0 : 1;
+
+   ulong gpu_AA = 1;
+   ulong gpu_BB = 0;
+   if (gpu_do_compact_hash){
+      (*gpu_hashtablesize) = gpu_compact_hash_size;
+      gpu_AA = (ulong)(1.0+(double)(prime-1)*drand48());
+      gpu_BB = (ulong)(0.0+(double)(prime-1)*drand48());
+      //if ( gpu_AA > prime-1 || gpu_BB > prime-1) exit(0);
+      (*hashsize) = 2*gpu_compact_hash_size;
+   } else {
+      (*gpu_hashtablesize) = gpu_perfect_hash_size;
+      (*hashsize) = gpu_perfect_hash_size;
+   }
+
+   hashtablesize = (*hashsize);
+
+   const uint TILE_SIZE = 128;
+
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   cl_mem dev_hash = ezcl_malloc(NULL, "dev_hash", hashsize, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+   ulong *gpu_hash_header = (ulong *)genvector(hash_header_size, sizeof(ulong));
+   gpu_hash_header[0] = (ulong)gpu_hash_method; 
+   gpu_hash_header[1] =        (*gpu_hashtablesize);
+   gpu_hash_header[2] =        gpu_AA;
+   gpu_hash_header[3] =        gpu_BB;
+   dev_hash_header = ezcl_malloc(NULL, "dev_hash_header", &hash_header_size, sizeof(cl_ulong),  CL_MEM_READ_WRITE, 0);
+   ezcl_enqueue_write_buffer(command_queue, dev_hash_header, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &gpu_hash_header[0], NULL);
+
+   genvectorfree(gpu_hash_header);
+
+   (*dev_hash_header_in) = dev_hash_header;
+
+   size_t hash_local_work_size  = MIN((*hashsize), TILE_SIZE);
+   size_t hash_global_work_size = (((*hashsize)+hash_local_work_size - 1) /hash_local_work_size) * hash_local_work_size;
+
+   ezcl_set_kernel_arg(kernel_hash_init, 0, sizeof(cl_int),  (void *)hashsize);
+   ezcl_set_kernel_arg(kernel_hash_init, 1, sizeof(cl_mem),  (void *)&dev_hash);
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_init,   1, NULL, &hash_global_work_size, &hash_local_work_size, NULL);
+
+   return(dev_hash);
+}
+
+void gpu_compact_hash_delete(cl_mem dev_hash, cl_mem dev_hash_header){
+   ezcl_device_memory_delete(dev_hash);
+   ezcl_device_memory_delete(dev_hash_header);
+   hash_method = METHOD_UNSET;
+}
+
+cl_mem gpu_get_hash_header(void){
+   return(dev_hash_header);
+}
+#endif
+
+int read_dev_hash(int hash_method, ulong hashtablesize, ulong AA, ulong BB, ulong hashkey, int *hash){
+   //int hash_report_level = 3;
+   int max_collisions_allowed = 1000;
+   int hashval = -1;
+   uint hashloc;
+   int icount=0;
+   if (hash_method == PERFECT_HASH) {
+      return(hash[hashkey]);
+   }
+   if (hash_method == LINEAR) {
+      if (hash_report_level == 0) {
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){
+            icount++;
+         }
+      } else if (hash_report_level == 1) {
+         hash_queries++;
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){
+            icount++;
+         }
+         read_hash_collisions += icount;
+      } else if (hash_report_level == 2) {
+         hash_queries++;
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){
+            icount++;
+            if (icount > max_collisions_allowed) {
+               printf("Error -- too many read hash collisions\n");
+               exit(0);
+            }
+         }
+         read_hash_collisions += icount;
+      } else if (hash_report_level == 3) {
+         hash_queries++;
+         hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+         printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc++,hashloc = hashloc%hashtablesize){
+            icount++;
+            uint hashloctmp = hashloc+1;
+            hashloctmp = hashloctmp%hashtablesize;
+            printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+            if (icount > max_collisions_allowed) {
+               printf("Error -- too many read hash collisions\n");
+               exit(0);
+            }
+         }
+         read_hash_collisions += icount;
+      } else {
+         printf("Error -- Illegal value of hash_report_level %d\n",hash_report_level);
+         exit(1);
+      }
+   } else if (hash_method == QUADRATIC) {
+      if (hash_report_level == 0) {
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){
+            icount++;
+         }
+      } else if (hash_report_level == 1) {
+         hash_queries++;
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){
+            icount++;
+         }
+         read_hash_collisions += icount;
+      } else if (hash_report_level == 2) {
+         hash_queries++;
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){
+            icount++;
+            if (icount > max_collisions_allowed) {
+               printf("Error -- too many read hash collisions\n");
+               exit(0);
+            }
+         }
+         read_hash_collisions += icount;
+      } else if (hash_report_level == 3) {
+         hash_queries++;
+         hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+         printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*icount),hashloc = hashloc%hashtablesize){
+            icount++;
+            uint hashloctmp = hashloc+1;
+            hashloctmp = hashloctmp%hashtablesize;
+            printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+            if (icount > max_collisions_allowed) {
+               printf("Error -- too many read hash collisions\n");
+               exit(0);
+            }
+         }
+         read_hash_collisions += icount;
+      } else {
+         printf("Error -- Illegal value of hash_report_level %d\n",hash_report_level);
+         exit(1);
+      }
+   } else if (hash_method == PRIME_JUMP) {
+      uint jump = 1+hashkey%hash_jump_prime;
+      if (hash_report_level == 0) {
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){
+            icount++;
+         }
+      } else if (hash_report_level == 1) {
+         hash_queries++;
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){
+            icount++;
+         }
+         read_hash_collisions += icount;
+      } else if (hash_report_level == 2) {
+         hash_queries++;
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){
+            icount++;
+            if (icount > max_collisions_allowed) {
+               printf("Error -- too many read hash collisions\n");
+               exit(0);
+            }
+         }
+         read_hash_collisions += icount;
+      } else if (hash_report_level == 3) {
+         hash_queries++;
+         hashloc = (hashkey*AA+BB)%prime%hashtablesize;
+         printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloc,hash[2*hashloc],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+         for (hashloc = (hashkey*AA+BB)%prime%hashtablesize; hash[2*hashloc] != (int)hashkey && hash[2*hashloc] != -1; hashloc+=(icount*jump),hashloc = hashloc%hashtablesize){
+            icount++;
+            uint hashloctmp = hashloc+1;
+            hashloctmp = hashloctmp%hashtablesize;
+            printf("%d: hashloc is %d hash[2*hashloc] = %d hashkey %lu ii %lu jj %lu\n",icount,hashloctmp,hash[2*hashloctmp],hashkey,hashkey%hash_stride,hashkey/hash_stride);
+            if (icount > max_collisions_allowed) {
+               printf("Error -- too many read hash collisions\n");
+               exit(0);
+            }
+         }
+         read_hash_collisions += icount;
+      } else {
+         printf("Error -- Illegal value of hash_report_level %d\n",hash_report_level);
+         exit(1);
+      }
+   } else {
+      printf("Error -- Illegal value of hash_method %d\n",hash_method);
+      exit(1);
+   }
+
+   if (hash[2*hashloc] != -1) hashval = hash[2*hashloc+1];
+   return(hashval);
+}
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfc.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfc.h
@@ -0,0 +1,86 @@
+/* ---------------------------------------------------------------------
+Author:     H. Carter Edwards 
+            hcedwar@sandia.gov
+
+Copyright:  Copyright (C) 1997   H. Carter Edwards
+            Graduate Student
+            University of Texas
+
+Re-release: Copyright (C) 2011-2012   H. Carter Edwards
+
+Purpose:    Domain paritioning based upon Hilbert Space-Filling Curve
+            ordering.
+
+License:    Re-release under the less-restrictive CLAMR software terms.
+            Permitted by email with H. Carter Edwards on 9/13/2011
+
+Disclaimer:
+
+    These routines comes with ABSOLUTELY NO WARRANTY;
+    This is free software, and you are welcome to redistribute it
+    under certain conditions. See License terms in file 'LICENSE'.
+--------------------------------------------------------------------- */
+
+/*----------------------------------------------------------------------
+Description:
+  Inverse of the Hilbert Space-Filling Curve Map from a 2D or 3D
+domain to the 1D domain.  Two different 2D and 3D domains are
+supported.
+
+For the routines 'hsfc2d' and 'hsfc3d' the 2D and 3D domains are
+defined as follows.
+Note that
+  *     0   is the minimum value of an unsigned integer
+  *   ~(0u) is the maximum value of an unsigned integer - all bits set
+thus the 2D and 3D domains are
+  *   [0,~(0u)] x [0,~(0u)]
+  *   [0,~(0u)] x [0,~(0u)] x [0,~(0u)]
+respectively.
+
+For the routines 'fhsfc2d' and 'fhsfc3d' the 2D and 3D domains are
+defines as:
+  *   [0.0,1.0] x [0.0,1.0]
+  *   [0.0,1.0] x [0.0,1.0] x [0.0,1.0]
+respectively.
+
+The 1D domain is a multiword (array of unsigned integers) key.
+This key is essentially an unsigned integer of an arbitrary
+number of bits.  The most significant bit is the leading bit
+of the first (0th) word of the key.  The least significant
+bit is the trailing bit of the last word.
+
+----------------------------------------------------------------------*/
+
+#ifndef __HILBERT_SPACE_FILLING_CURVE_MAPPING__
+#define __HILBERT_SPACE_FILLING_CURVE_MAPPING__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void hsfc2d(
+  unsigned   coord[] , /* IN: Normalized integer 2D coordinate */
+  unsigned   nkey ,    /* IN: Word length of key */
+  unsigned   key[] );  /* OUT: space-filling curve key */
+
+extern void hsfc3d(
+  unsigned   coord[] , /* IN: Normalized integer 3D coordinate */
+  unsigned   nkey ,    /* IN: Word length of 'key' */
+  unsigned   key[] );  /* OUT: space-filling curve key */
+
+extern void fhsfc2d(
+  double     coord[] , /* IN: Normalized floating point 2D coordinate */
+  unsigned   nkey ,    /* IN: Word length of key */
+  unsigned   key[] );  /* OUT: space-filling curve key */
+
+extern void fhsfc3d(
+  double     coord[] , /* IN: Normalized floating point 3D coordinate */
+  unsigned   nkey ,    /* IN: Word length of key */
+  unsigned   key[] );  /* OUT: space-filling curve key */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfc.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfc.c
@@ -0,0 +1,279 @@
+/* ---------------------------------------------------------------------
+Author:     H. Carter Edwards 
+            hcedwar@sandia.gov
+
+Copyright:  Copyright (C) 1997   H. Carter Edwards
+            Graduate Student
+            University of Texas
+
+Re-release: Copyright (C) 2011-2012   H. Carter Edwards
+
+Purpose:    Domain paritioning based upon Hilbert Space-Filling Curve
+            ordering.
+
+License:    Re-release under the less-restrictive CLAMR software terms.
+            Permitted by email with H. Carter Edwards on 9/13/2011
+
+Disclaimer:
+
+    These routines comes with ABSOLUTELY NO WARRANTY;
+    This is free software, and you are welcome to redistribute it
+    under certain conditions. See License terms in file 'LICENSE'.
+--------------------------------------------------------------------- */
+
+/*----------------------------------------------------------------------
+Description:
+  Inverse of the Hilbert Space-Filling Curve Map from a 2D or 3D
+domain to the 1D domain.  Two different 2D and 3D domains are
+supported.
+
+For the routines 'hsfc2d' and 'hsfc3d' the 2D and 3D domains are
+defined as follows.
+Note that
+  *     0   is the minimum value of an unsigned integer
+  *   ~(0u) is the maximum value of an unsigned integer - all bits set
+thus the 2D and 3D domains are
+  *   [0,~(0u)] x [0,~(0u)]
+  *   [0,~(0u)] x [0,~(0u)] x [0,~(0u)]
+respectively.
+
+For the routines 'fhsfc2d' and 'fhsfc3d' the 2D and 3D domains are
+defines as:
+  *   [0.0,1.0] x [0.0,1.0]
+  *   [0.0,1.0] x [0.0,1.0] x [0.0,1.0]
+respectively.
+
+The 1D domain is a multiword (array of unsigned integers) key.
+This key is essentially an unsigned integer of an arbitrary
+number of bits.  The most significant bit is the leading bit
+of the first (0th) word of the key.  The least significant
+bit is the trailing bit of the last word.
+
+----------------------------------------------------------------------*/
+
+#include <stdlib.h>
+#include <limits.h>
+
+/* Bits per unsigned word */
+
+#define MaxBits ( sizeof(unsigned) * CHAR_BIT )
+
+/*--------------------------------------------------------------------*/
+/* 2D Hilbert Space-filling curve */
+
+void hsfc2d(
+  unsigned   coord[] , /* IN: Normalized integer coordinates */
+  unsigned   nkey ,    /* IN: Word length of key */
+  unsigned   key[] )   /* OUT: space-filling curve key */
+{
+  static int init = 0 ;
+  static unsigned char gray_inv[ 2 * 2 ] ;
+
+  const unsigned NKey  = ( 2 < nkey ) ? 2 : (nkey) ;
+  const unsigned NBits = ( MaxBits * NKey ) / 2 ;
+
+  unsigned i ;
+  unsigned char order[2+2] ;
+  unsigned char reflect ;
+  
+  /* GRAY coding */
+
+  if ( ! init ) {
+    unsigned char gray[ 2 * 2 ] ;
+    register unsigned k ;
+    register unsigned j ;
+
+    gray[0] = 0 ;
+    for ( k = 1 ; k < sizeof(gray) ; k <<= 1 ) {
+      for ( j = 0 ; j < k ; j++ ) gray[k+j] = k | gray[k-(j+1)] ;
+    }
+    for ( k = 0 ; k < sizeof(gray) ; k++ ) gray_inv[ gray[k] ] = k ;
+    init = 1 ;
+  }
+
+  /* Zero out the key */
+
+  for ( i = 0 ; i < NKey ; ++i ) key[i] = 0 ;
+
+  order[0] = 0 ;
+  order[1] = 1 ;
+  reflect = ( 0 << 0 ) | ( 0 );
+
+  for ( i = 1 ; i <= NBits ; i++ ) {
+    const unsigned s = MaxBits - i ;
+    const unsigned c = gray_inv[ reflect ^ (
+      ( ( ( coord[0] >> s ) & 01 ) << order[0] ) |
+      ( ( ( coord[1] >> s ) & 01 ) << order[1] ) ) ];
+     
+    const unsigned off   = 2 * i ;                   /* Bit offset */
+    const unsigned which = off / MaxBits ;           /* Which word to update */
+    const unsigned shift = MaxBits - off % MaxBits ; /* Which bits to update */
+
+    /* Set the two bits */
+
+    if ( shift == MaxBits ) { /* Word boundary */
+      key[ which - 1 ] |= c ;
+    }
+    else {
+      key[ which ] |= c << shift ;
+    }
+
+    /* Determine the recursive quadrant */
+
+    switch( c ) {
+    case 3:
+      reflect ^= 03 ;
+    case 0:
+      order[2+0] = order[0] ;
+      order[2+1] = order[1] ;
+      order[0] = order[2+1] ;
+      order[1] = order[2+0] ;
+      break ;
+    }
+  }
+}
+
+/*--------------------------------------------------------------------*/
+/* 3D Hilbert Space-filling curve */
+
+void hsfc3d(
+  unsigned   coord[] , /* IN: Normalized integer coordinates */
+  unsigned   nkey ,    /* IN: Word length of 'key' */
+  unsigned   key[] )   /* OUT: space-filling curve key */
+{
+  static int init = 0 ;
+  static unsigned char gray_inv[ 2*2*2 ] ;
+
+  const unsigned NKey  = ( 3 < nkey ) ? 3 : (nkey) ;
+  const unsigned NBits = ( MaxBits * NKey ) / 3 ;
+
+  unsigned i ;
+  unsigned char axis[3+3] ;
+  
+  /* GRAY coding */
+
+  if ( ! init ) {
+    unsigned char gray[ 2*2*2 ] ;
+    register unsigned k ;
+    register unsigned j ;
+
+    gray[0] = 0 ;
+    for ( k = 1 ; k < sizeof(gray) ; k <<= 1 ) {
+      for ( j = 0 ; j < k ; j++ ) gray[k+j] = k | gray[k-(j+1)] ;
+    }
+    for ( k = 0 ; k < sizeof(gray) ; k++ ) gray_inv[ gray[k] ] = k ;
+    init = 1 ;
+  }
+
+  /* Zero out the key */
+
+  for ( i = 0 ; i < NKey ; ++i ) key[i] = 0 ;
+
+  axis[0] = 0 << 1 ;
+  axis[1] = 1 << 1 ;
+  axis[2] = 2 << 1 ;
+
+  for ( i = 1 ; i <= NBits ; i++ ) {
+    const unsigned s = MaxBits - i ;
+    const unsigned c = gray_inv[
+      (((( coord[ axis[0] >> 1 ] >> s ) ^ axis[0] ) & 01 ) << 0 ) |
+      (((( coord[ axis[1] >> 1 ] >> s ) ^ axis[1] ) & 01 ) << 1 ) |
+      (((( coord[ axis[2] >> 1 ] >> s ) ^ axis[2] ) & 01 ) << 2 ) ];
+    unsigned n ;
+
+    /* Set the 3bits */
+
+    for ( n = 0 ; n < 3 ; ++n ) {
+      const unsigned bit   = 01 & ( c >> ( 2 - n ) );  /* Bit value  */
+      const unsigned off   = 3 * i + n ;               /* Bit offset */
+      const unsigned which = off / MaxBits ;           /* Which word */
+      const unsigned shift = MaxBits - off % MaxBits ; /* Which bits */
+
+      if ( MaxBits == shift ) { /* Word boundary */
+        key[ which - 1 ] |= bit ;
+      }
+      else {
+        key[ which ] |= bit << shift ;
+      }
+    }
+
+    /* Determine the recursive quadrant */
+
+    axis[3+0] = axis[0] ;
+    axis[3+1] = axis[1] ;
+    axis[3+2] = axis[2] ;
+
+    switch( c ) {
+    case 0:
+      axis[0] = axis[3+2];
+      axis[1] = axis[3+1];
+      axis[2] = axis[3+0];
+      break ;
+    case 1:
+      axis[0] = axis[3+0];
+      axis[1] = axis[3+2];
+      axis[2] = axis[3+1];
+      break ;
+    case 2:
+      axis[0] = axis[3+0];
+      axis[1] = axis[3+1];
+      axis[2] = axis[3+2];
+      break ;
+    case 3:
+      axis[0] = axis[3+2] ^ 01 ;
+      axis[1] = axis[3+0] ^ 01 ;
+      axis[2] = axis[3+1];
+      break ;
+    case 4:
+      axis[0] = axis[3+2];
+      axis[1] = axis[3+0] ^ 01 ;
+      axis[2] = axis[3+1] ^ 01 ;
+      break ;
+    case 5:
+      axis[0] = axis[3+0];
+      axis[1] = axis[3+1];
+      axis[2] = axis[3+2];
+      break ;
+    case 6:
+      axis[0] = axis[3+0];
+      axis[1] = axis[3+2] ^ 01 ;
+      axis[2] = axis[3+1] ^ 01 ;
+      break ;
+    case 7:
+      axis[0] = axis[3+2] ^ 01 ;
+      axis[1] = axis[3+1];
+      axis[2] = axis[3+0] ^ 01 ;
+      break ;
+    default:
+      exit(-1);
+    }
+  }
+}
+
+/*--------------------------------------------------------------------*/
+
+void fhsfc2d(
+  double     coord[] , /* IN: Normalized floating point coordinates */
+  unsigned   nkey ,    /* IN: Word length of key */
+  unsigned   key[] )   /* OUT: space-filling curve key */
+{
+  const double imax = ~(0u);
+  unsigned c[2] ;
+  c[0] = coord[0] * imax ;
+  c[1] = coord[1] * imax ;
+  hsfc2d( c , nkey , key );
+}
+
+void fhsfc3d(
+  double     coord[] , /* IN: Normalized floating point coordinates */
+  unsigned   nkey ,    /* IN: Word length of key */
+  unsigned   key[] )   /* OUT: space-filling curve key */
+{
+  const double imax = ~(0u);
+  unsigned c[3] ;
+  c[0] = coord[0] * imax ;
+  c[1] = coord[1] * imax ;
+  c[2] = coord[2] * imax ;
+  hsfc3d( c , nkey , key );
+}
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfcsort.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/hsfcsort.c
@@ -0,0 +1,268 @@
+/* ---------------------------------------------------------------------
+Author:     H. Carter Edwards 
+            hcedwar@sandia.gov
+
+Copyright:  Copyright (C) 1997   H. Carter Edwards
+            Graduate Student
+            University of Texas
+
+Re-release: Copyright (C) 2011-2012   H. Carter Edwards
+
+Purpose:    Domain paritioning based upon Hilbert Space-Filling Curve
+            ordering.
+
+License:    Re-release under the less-restrictive CLAMR software terms.
+            Permitted by email with H. Carter Edwards on 9/13/2011
+
+Disclaimer:
+
+    These routines comes with ABSOLUTELY NO WARRANTY;
+    This is free software, and you are welcome to redistribute it
+    under certain conditions. See License terms in file 'LICENSE'.
+--------------------------------------------------------------------- */
+
+#include <limits.h>
+#include <stdlib.h>
+
+#include "hsfc.h"
+
+/*--------------------------------------------------------------------*/
+/* Make it callable from FORTRAN:
+ *   Interface types: INTEGER and REAL*8
+ */
+
+void hsfc2sort(
+               const int      N ,     /* IN: Number of points */
+               const double * X ,     /* IN: array of X-Coordinates */
+               const double * Y ,     /* IN: array of Y-Coordinates */
+               const int      ibase,  /* 0 for C and 1 for Fortran */
+               int          * Info ,  /* OUT: (1 <= LDInfo) [ HSFC ordering ]
+                                 (2 <= LDInfo) [ HSFC index, #1 ]
+                                 (3 <= LDInfo) [ HSFC index, #2 ] */
+               int            LDInfo /* IN:  Leading dimension of Info */
+               );
+
+/*--------------------------------------------------------------------*/
+
+#define MaxBits ( sizeof(unsigned) * CHAR_BIT )
+
+#define NBITC     (32)  /* 32 Bits per coordinate, resolve data at 2^31 */
+#define NKEY(ND)  ((NBITC * ND + MaxBits - 1) / MaxBits)
+
+/*--------------------------------------------------------------------*/
+
+static int ui1comp( const void * const I1 , const void * const I2 )
+{
+  return (
+    ( ((const unsigned *)I1)[0] != ((const unsigned *)I2)[0] ) ? (
+    ( ((const unsigned *)I1)[0] <  ((const unsigned *)I2)[0] ) ? -1 : 1 ) : (
+       0 ));
+}
+
+static int ui2comp( const void * const I1 , const void * const I2 )
+{
+  return (
+    ( ((const unsigned *)I1)[0] != ((const unsigned *)I2)[0] ) ? (
+    ( ((const unsigned *)I1)[0] <  ((const unsigned *)I2)[0] ) ? -1 : 1 ) : (
+    ( ((const unsigned *)I1)[1] != ((const unsigned *)I2)[1] ) ? (
+    ( ((const unsigned *)I1)[1] <  ((const unsigned *)I2)[1] ) ? -1 : 1 ) : (
+       0 )));
+}
+
+/*--------------------------------------------------------------------*/
+
+static int ui3comp( const void * const I1 , const void * const I2 )
+{
+  return (
+    ( ((const unsigned *)I1)[0] != ((const unsigned *)I2)[0] ) ? (
+    ( ((const unsigned *)I1)[0] <  ((const unsigned *)I2)[0] ) ? -1 : 1 ) : (
+    ( ((const unsigned *)I1)[1] != ((const unsigned *)I2)[1] ) ? (
+    ( ((const unsigned *)I1)[1] <  ((const unsigned *)I2)[1] ) ? -1 : 1 ) : (
+    ( ((const unsigned *)I1)[2] != ((const unsigned *)I2)[2] ) ? (
+    ( ((const unsigned *)I1)[2] <  ((const unsigned *)I2)[2] ) ? -1 : 1 ) : (
+       0 ))));
+}
+
+static int N_uiNcomp = 0 ;
+
+static int uiNcomp( const void * const I1 , const void * const I2 )
+{
+  const int N = N_uiNcomp ;
+  register int i ;
+
+  for ( i = 0 ; i < N &&
+    ((const unsigned *)I1)[i] != ((const unsigned *)I2)[i] ; ++i );
+
+  return ( i < N ) ? (
+    ( ((const unsigned *)I1)[i] < ((const unsigned *)I2)[i] ) ? -1 : 1 ) : 0 ;
+}
+
+/*--------------------------------------------------------------------*/
+
+void hsfc2sort(
+  const int      N ,     /* IN: Number of points */
+  const double * X ,     /* IN: array of X-Coordinates */
+  const double * Y ,     /* IN: array of Y-Coordinates */
+  const int      ibase,  /* 0 for C and 1 for Fortran */
+        int    * Info ,  /* OUT: (1 <= LDInfo) [ HSFC ordering ]
+                                   (2 <= LDInfo) [ HSFC index, #1 ]
+                                   (3 <= LDInfo) [ HSFC index, #2 ] */
+        int      LDInfo )/* IN:  Leading dimension of Info */
+{
+  /*------------------------------------------------------------------*/
+
+  const double imax = ((double) ~(0u)) ;
+
+  const unsigned ldinfo = LDInfo ;
+  const unsigned long long npt    = N ;
+  const unsigned nkey   = NKEY(2) ;
+  const unsigned ldT    = nkey + 1 ;
+
+  unsigned * const T = (unsigned *) malloc( sizeof(unsigned) * ldT * npt );
+
+  int i , ix , iy , ii , it ;
+
+  /* Fill SFC table */
+
+  for ( i = it = ix = iy = 0 ; (unsigned long long)i < npt ;
+        ++i , ix++ , iy++ , it += ldT ) {
+    double xy[2] ;
+    unsigned coord[2] ;
+
+    xy[0] = X[ix] ;
+    xy[1] = Y[iy] ;
+
+    coord[0] = xy[0] * imax ;
+    coord[1] = xy[1] * imax ;
+
+    hsfc2d( coord , nkey , T + it );
+    T[it+nkey] = i ;
+  }
+
+  /* SFC Key output */
+  
+  if ( 2 < ldinfo && 1 < nkey ) {
+    for ( ii = 1, it = 0, i = 0 ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT ) {
+      Info[ii]   = T[it];
+      Info[ii+1] = T[it+1];
+    }
+  }
+  else if ( 1 < ldinfo ) {
+    for ( ii = 1, it = 0 ,i = 0 ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT ) {
+      Info[ii] = T[it] ;
+    }
+  }
+
+  /* Sort */
+
+  switch ( nkey ) {
+  case 0: break ;
+  case 1: qsort( T , npt , sizeof(unsigned) * ldT , ui1comp ); break ;
+  case 2: qsort( T , npt , sizeof(unsigned) * ldT , ui2comp ); break ;
+  case 3: qsort( T , npt , sizeof(unsigned) * ldT , ui3comp ); break ;
+  default:
+    N_uiNcomp = nkey ;
+    qsort( T , npt , sizeof(unsigned) * ldT , uiNcomp );
+    N_uiNcomp = 0 ;
+    break ;
+  }
+
+  for (ii = 0, i = 0, it = nkey ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT) {
+    Info[ii] = T[it] + ibase; /* 1 -- FORTRAN convention, 0 -- C */
+  }
+
+  free( (void *) T );
+
+  return ;
+}
+
+/*--------------------------------------------------------------------*/
+
+void hsfc3sort(
+  const int      N ,     /* IN: Number of points */
+  const double * X ,     /* IN: array of X-Coordinates */
+  const double * Y ,     /* IN: array of Y-Coordinates */
+  const double * Z ,     /* IN: array of Y-Coordinates */
+  const int      ibase ,    /* IN: Stride for Y array */
+        int    * Info ,  /* OUT: (1 <= LDInfo) [ HSFC ordering ]
+                                   (2 <= LDInfo) [ HSFC index, #1 ]
+                                   (3 <= LDInfo) [ HSFC index, #2 ]
+                                   (4 <= LDInfo) [ HSFC index, #3 ] */
+        int      LDInfo )/* IN:  Leading dimension of Info */
+{
+  /*------------------------------------------------------------------*/
+
+  const double imax = ((double) ~(0u)) ;
+
+  const unsigned ldinfo = LDInfo ;
+  const unsigned long long npt    = N ;
+  const unsigned nkey   = NKEY(3) ;
+  const unsigned ldT    = nkey + 1 ;
+
+  unsigned * const T = (unsigned *) malloc( sizeof(unsigned) * ldT * npt );
+
+  int i , ix , iy , iz , ii , it ;
+
+  /* Fill SFC table */
+
+  for ( i = it = ix = iy = iz = 0 ; (unsigned long long)i < npt ;
+        ++i , ix++ , iy++ , iz++ , it += ldT ) {
+    double xyz[3] ;
+    unsigned coord[3] ;
+
+    xyz[0] = X[ix] ;
+    xyz[1] = Y[iy] ;
+    xyz[2] = Z[iz] ;
+
+    coord[0] = xyz[0] * imax ;
+    coord[1] = xyz[1] * imax ;
+    coord[2] = xyz[2] * imax ;
+
+    hsfc3d( coord , nkey , T + it );
+    T[it+nkey] = i ;
+  }
+
+  /* SFC Key output */
+  
+  if ( 3 < ldinfo && 2 < nkey ) {
+    for ( ii = 1, it = 0, i = 0 ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT ) {
+      Info[ii]   = T[it];
+      Info[ii+1] = T[it+1];
+      Info[ii+2] = T[it+2];
+    }
+  }
+  else if ( 2 < ldinfo && 1 < nkey ) {
+    for ( ii = 1, it = 0, i = 0 ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT ) {
+      Info[ii]   = T[it];
+      Info[ii+1] = T[it+1];
+    }
+  }
+  else if ( 1 < ldinfo ) {
+    for ( ii = 1, it = 0 ,i = 0 ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT ) {
+      Info[ii] = T[it] ;
+    }
+  }
+
+  /* Sort */
+
+  switch ( nkey ) {
+  case 0: break ;
+  case 1: qsort( T , npt , sizeof(unsigned) * ldT , ui1comp ); break ;
+  case 2: qsort( T , npt , sizeof(unsigned) * ldT , ui2comp ); break ;
+  case 3: qsort( T , npt , sizeof(unsigned) * ldT , ui3comp ); break ;
+  default:
+    N_uiNcomp = nkey ;
+    qsort( T , npt , sizeof(unsigned) * ldT , uiNcomp );
+    N_uiNcomp = 0 ;
+    break ;
+  }
+
+  for (ii = 0, i = 0, it = nkey ; (unsigned long long)i < npt ; ++i, ii += ldinfo, it += ldT) {
+    Info[ii] = T[it] + ibase ; /* FORTRAN convention */
+  }
+
+  free( (void *) T );
+
+  return ;
+}
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/input.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/input.h
@@ -0,0 +1,70 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ *  
+ *  This file and the associated header is based on a file from the capablanca
+ *  project available under the MIT open-source license.  As author of that code,
+ *  I, Neal Davis, permit repurposing and redistribution for CLAMR under the New
+ *  BSD License used above.
+ *      http://code.google.com/p/capablanca/
+ */
+#ifndef _INPUT_H
+#define	_INPUT_H
+
+void outputHelp();
+void outputVersion();
+void parseInput(const int argc, char** argv);
+
+#endif	/* _INPUT_H */
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/input.cpp
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/input.cpp
@@ -0,0 +1,513 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ *  
+ *  This file and the associated header is based on a file from the capablanca
+ *  project available under the MIT open-source license.  As author of that code,
+ *  I, Neal Davis, permit repurposing and redistribution for CLAMR under the New
+ *  BSD License used above.
+ *      http://code.google.com/p/capablanca/
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "state.h"
+#include "partition.h"
+#include "mesh.h"
+#include "hash.h"
+#include "crux.h"
+//#include "graphics/display.h"
+#include "graphics.h"
+
+#include <fstream>
+#include <iostream>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <sys/stat.h>
+#include <limits.h>
+
+#define OUTPUT_INTERVAL 100
+#define COARSE_GRID_RES 128
+#define MAX_TIME_STEP 3000
+
+using namespace std;
+
+//  Global variables.
+char progName[12];      //  Program name.
+char progVers[8];       //  Program version.
+
+//  External global variables.
+extern bool verbose,
+            localStencil,
+            outline,
+            face_based,
+            dynamic_load_balance_on,
+            h5_spoutput,
+            restart;
+extern int  outputInterval,
+            crux_type,
+            enhanced_precision_sum,
+            tmax,
+            levmx,
+            nx,
+            ny,
+            niter,
+            measure_type,
+            lttrace_on,
+            do_quo_setup,
+            calc_neighbor_type,
+	    choose_hash_method,
+            initial_order,
+            graphic_outputInterval,
+            graphics_type,
+            checkpoint_outputInterval,
+            neighbor_remap,
+            num_of_rollback_states,
+            cycle_reorder;
+extern float
+            mem_opt_factor;
+extern double
+            upper_mass_diff_percentage;
+
+extern char* restart_file;
+
+void outputHelp()
+{   cout << "CLAMR is an experimental adaptive mesh refinement code for the GPU." << endl
+         #ifdef PACKAGE_VERSION 
+         << "Version is " << PACKAGE_VERSION << endl << endl 
+         #endif
+         << "Usage:  " << progName << " [options]..." << endl
+         << "  -b <B>            Number of rollback images, disk or in memory (default 2);" << endl
+         << "  -c <C>            Checkpoint to disk at interval specified;" << endl
+         << "  -C <C>            Checkpoint to memory at interval specified;" << endl
+         << "  -d                turn on LTTRACE;" << endl
+         << "  -D                turn on dynamic load balancing using LTTRACE;" << endl
+         << "  -e <E>            force hash_method, ie linear, quadratic..." <<endl          
+         << "      \"perfect\"" << endl
+         << "      \"linear\"" << endl
+         << "      \"quadratic\"" << endl
+         << "      \"prime_jump\"" << endl
+         << "  -f                face-based finite difference;" << endl
+         << "  -g <g>            specify I step between saving graphics information for post processing;" << endl
+         << "  -G <G>            specify graphics file type for post processing;" << endl
+         << "      \"bmp\"" << endl
+         << "      \"gif\"" << endl
+         << "      \"jpeg\"" << endl
+         << "      \"mpeg\"" << endl
+         << "      \"pdf\"" << endl
+         << "      \"png\"" << endl
+         << "      \"svg\"" << endl
+         << "      \"data\"" << endl
+         << "  -h                display this help message;" << endl
+         << "  -i <I>            specify I steps between output files;" << endl
+         << "  -l <l>            max number of levels;" << endl
+         << "  -M <M>            memory optimization factor 1.0 <= M <=100.0 (default 1.0 -- represents 1/20 perfect hash);" << endl
+         << "  -m <m>            specify partition measure type;" << endl
+         << "      \"with_duplicates\"" << endl
+         << "      \"without_duplicates\"" << endl
+         << "  -N <n>            specify calc neighbor type;" << endl
+         << "      \"hash_table\"" << endl
+         << "      \"kdtree\"" << endl
+         << "  -n <N>            specify coarse grid resolution of NxN;" << endl
+         << "  -o                turn off outlines;" << endl
+         << "  -P <P>            specify initial order P;" << endl
+         << "      \"original_order\"" << endl
+         << "      \"hilbert_sort\"" << endl
+         << "      \"hilbert_partition\"" << endl
+         << "      \"z_order\"" << endl
+         << "  -p <p>            specify ordering P every cycle;" << endl
+         << "      \"original_order\"" << endl
+         << "      \"hilbert_sort\"" << endl
+         << "      \"hilbert_partition\"" << endl
+         << "      \"local_hilbert\"" << endl
+         << "      \"local_fixed\"" << endl
+         << "      \"z_order\"" << endl
+         << "  -q                turn on quo;" << endl
+         << "  -r                regular sum instead of enhanced precision sum (Kahan sum);" << endl
+         << "  -R                restart simulation from the backup file specified;" << endl
+         << "  -s <s>            specify space-filling curve method S;" << endl
+         << "  -S                write out double precision data as single precision;" << endl
+         << "  -T                execute with TVD;" << endl
+         << "  -t <t>            specify T time steps to run;" << endl
+         << "  -u                allowed percentage of difference between total mass between iterations." << endl
+         << "                    the default value for this parameter is 2.6e-13;" << endl
+         << "  -V                use verbose output;" << endl
+         << "  -v                display version information." << endl
+         << "  -z                force recalculation of neighbors." << endl; }
+
+void outputVersion()
+{   cout << progName << " " << progVers << endl; }
+
+/*  parseInput(const int argc, char** argv)
+ *  
+ *  Interpret the command line input.
+ */
+void parseInput(const int argc, char** argv)
+{   strcpy(progName, "clamr");
+    #ifdef PACKAGE_VERSION
+    strcpy(progVers, PACKAGE_VERSION);
+   #endif
+    //	Reconstruct command line argument as a string.
+    char progCL[256];       //  Complete program command line.
+    strcpy(progCL, argv[0]);
+    for (int i = 1; i < argc; i++)
+    {   strcat(progCL, " ");
+        strcat(progCL, argv[i]); }
+    
+    //  Set variables to defaults, which may be overridden by CLI.
+    verbose                 = false;
+    localStencil            = true;
+    outline                 = true;
+#ifdef HAVE_LTTRACE
+    lttrace_on              = 0;
+#endif
+#ifdef HAVE_QUO
+    do_quo_setup            = 0;
+#endif
+    dynamic_load_balance_on = false;
+    crux_type               = CRUX_NONE;
+    face_based              = false;
+    restart                 = false;
+    restart_file            = NULL;
+    outputInterval          = OUTPUT_INTERVAL;
+    nx                      = COARSE_GRID_RES;
+    ny                      = COARSE_GRID_RES;
+    niter                   = MAX_TIME_STEP;
+    neighbor_remap          = true;
+    //measure_type            = CSTARVALUE;
+    measure_type            = NO_PARTITION_MEASURE;
+    calc_neighbor_type      = HASH_TABLE;
+    choose_hash_method      = METHOD_UNSET;
+    initial_order           = HILBERT_SORT;
+    cycle_reorder           = ORIGINAL_ORDER;
+    graphic_outputInterval  = INT_MAX;
+    graphics_type           = GRAPHICS_NONE;
+    checkpoint_outputInterval = INT_MAX;
+    num_of_rollback_states  = 2;
+    levmx                   = 1;
+    mem_opt_factor          = 1.0;
+    upper_mass_diff_percentage = -1.0;
+    enhanced_precision_sum  = SUM_KAHAN;
+    
+    char   *val;
+    if (argc > 1)
+    {   int i = 1;
+        val = strtok(argv[i++], " ,.-");
+        while (val != NULL){
+            switch (val[0]){
+               case 'b':     //  Number of rollback images, disk or in memory (default 2)
+                    sprintf(val,"0");
+                    if (i < argc) val = strtok(argv[i++], " ,");
+                    if(atoi(val) < 1){
+                        printf("backup number must be at least 1, setting to default value 2\n");
+                    }
+                    else{
+                        num_of_rollback_states = atoi(val);
+                    }
+                    break;
+                case 'c':   //  Checkpoint to disk at interval specified
+                    val = strtok(argv[i++], " ,.-");
+                    checkpoint_outputInterval = atoi(val);
+                    crux_type = CRUX_DISK;
+                    break;
+
+                case 'C':   //  Checkpoint to memory at interval specified
+                    val = strtok(argv[i++], " ,.-");
+                    checkpoint_outputInterval = atoi(val);
+                    crux_type = CRUX_IN_MEMORY;
+                    break;
+
+                case 'd':   //  Turn on lttrace.
+                            //  This is provided as a separate option to measure
+                            //  the overhead of having lttrace on.
+#ifdef HAVE_LTTRACE
+                    lttrace_on = 1;
+#endif
+                    break;
+
+                case 'D':   //  Turn on dynamic load balancing.
+                            //  This forces on lttrace.
+#ifdef HAVE_LTTRACE
+                    lttrace_on = true;
+                    dynamic_load_balance_on = true;
+#endif
+                    break;
+
+                case 'e':   //  hash method specified.
+                    val = strtok(argv[i++], " ,");
+                    if (! strcmp(val,"perfect") ) {
+                       choose_hash_method = PERFECT_HASH;
+                    } else if (! strcmp(val,"linear") ) {
+                       choose_hash_method = LINEAR;
+                    } else if (! strcmp(val,"quadratic") ) {
+                       choose_hash_method = QUADRATIC;
+                    } else if (! strcmp(val,"prime_jump") ) {
+                       choose_hash_method = PRIME_JUMP;
+                    }
+                    break;
+
+                case 'f':   // Use face-based finite difference
+                    face_based = true;
+                    break;
+                
+                case 'g':   //  Save graphics data to files during simulation.
+                    val = strtok(argv[i++], " ,.-");
+                    graphic_outputInterval = atoi(val);
+                    if (graphics_type == GRAPHICS_NONE) graphics_type = GRAPHICS_DATA;
+                    break;
+
+                case 'G':   //  Graphics data file type.
+                    val = strtok(argv[i++], " ,.-");
+                    if (! strcmp(val,"none") ) {
+                       graphics_type = GRAPHICS_NONE;
+                       graphic_outputInterval  = INT_MAX;
+                    } else if (! strcmp(val,"data") ) {
+                       graphics_type = GRAPHICS_DATA;
+#ifdef HAVE_MAGICKWAND
+                    } else if (! strcmp(val,"bmp") ) {
+                       graphics_type = GRAPHICS_BMP;
+                    } else if (! strcmp(val,"gif") ) {
+                       graphics_type = GRAPHICS_GIF;
+                    } else if (! strcmp(val,"jpeg") ) {
+                       graphics_type = GRAPHICS_JPEG;
+                    } else if (! strcmp(val,"mpeg") ) {
+                       graphics_type = GRAPHICS_MPEG;
+                    } else if (! strcmp(val,"pdf") ) {
+                       graphics_type = GRAPHICS_PDF;
+                    } else if (! strcmp(val,"png") ) {
+                       graphics_type = GRAPHICS_PNG;
+                    } else if (! strcmp(val,"svg") ) {
+                       graphics_type = GRAPHICS_SVG;
+#endif
+                    } else {
+                       printf("Unrecognized option for graphics file type %s\n",val);
+                       exit(-1);
+                    }
+                    break;
+
+                case 'h':   //  Output help.
+                    outputHelp();
+                    cout.flush();
+                    exit(EXIT_SUCCESS);
+                    break;
+                    
+                case 'i':   //  Output interval specified.
+                    val = strtok(argv[i++], " ,.-");
+                    outputInterval = atoi(val);
+                    break;
+                    
+                case 'l':   //  max level specified.
+                    val = strtok(argv[i++], " ,");
+                    levmx = atoi(val);
+                    break;
+                    
+                case 'M':   //  memory optimization factor
+                    val = strtok(argv[i++], " ,");
+                    mem_opt_factor = atof(val);
+                    break;
+                    
+                case 'm':   //  partition measure specified.
+                    val = strtok(argv[i++], " ,");
+                    if (! strcmp(val,"no_partition_measure") ) {
+                       measure_type = NO_PARTITION_MEASURE;
+                    } else if (! strcmp(val,"with_duplicates") ) {
+                       measure_type = WITH_DUPLICATES;
+                    } else if (! strcmp(val,"without_duplicates") ) {
+                       measure_type = WITHOUT_DUPLICATES;
+                    } else if (! strcmp(val,"cvalue") ) {
+                       measure_type = CVALUE;
+                    } else if (! strcmp(val,"cstarvalue") ) {
+                       measure_type = CSTARVALUE;
+                    }
+                    break;
+                    
+                case 'N':   //  calc neighbor type specified.
+                    val = strtok(argv[i++], " ,");
+                    if (! strcmp(val,"hash_table") ) {
+                       calc_neighbor_type = HASH_TABLE;
+                    } else if (! strcmp(val,"kdtree") ) {
+                       calc_neighbor_type = KDTREE;
+                    }
+                    break;
+                    
+                case 'n':   //  Domain grid resolution specified.
+                    val = strtok(argv[i++], " ,");
+                    nx = atoi(val);
+                    ny = nx;
+                    break;
+                    
+                case 'o':   //  Turn off outlines on mesh drawing.
+                    outline = false;
+                    break;
+                    
+                case 'P':   //  Initial order specified.
+                    val = strtok(argv[i++], " ,");
+                    if (! strcmp(val,"original_order") ) {
+                       initial_order = ORIGINAL_ORDER;
+                    } else if (! strcmp(val,"hilbert_sort") ) {
+                       initial_order = HILBERT_SORT;
+                    } else if (! strcmp(val,"hilbert_partition") ) {
+                       initial_order = HILBERT_PARTITION;
+                    } else if (! strcmp(val,"z_order") ) {
+                       initial_order = ZORDER;
+                    }
+                    break;
+                    
+                case 'p':   //  Initial order specified.
+                    val = strtok(argv[i++], " ,");
+                    if (! strcmp(val,"original_order") ) {
+                       cycle_reorder = ORIGINAL_ORDER;
+                       localStencil = false;
+                    } else if (! strcmp(val,"hilbert_sort") ) {
+                       cycle_reorder = HILBERT_SORT;
+                       localStencil = false;
+                    } else if (! strcmp(val,"hilbert_partition") ) {
+                       cycle_reorder = HILBERT_PARTITION;
+                       localStencil = false;
+                    } else if (! strcmp(val,"local_hilbert") ) {
+                       cycle_reorder = ORIGINAL_ORDER;
+                       localStencil = true;
+                    } else if (! strcmp(val,"local_fixed") ) {
+                       cycle_reorder = ORIGINAL_ORDER;
+                       localStencil = false;
+                    } else if (! strcmp(val,"z_order") ) {
+                       cycle_reorder = ZORDER;
+                       localStencil = false;
+                    }
+                    break;
+   
+                case 'q':   //  turn on quo package.
+#ifdef HAVE_QUO
+                    do_quo_setup = 1;
+#endif
+                    break;
+                    
+                case 'r':   //  Regular sum instead of enhanced precision sum.
+                    val = strtok(argv[i++], " ,");
+                    if (! strcmp(val,"regular_sum") ) {
+                       enhanced_precision_sum = SUM_REGULAR;
+                    } else if (! strcmp(val,"kahan_sum") ) {
+                       enhanced_precision_sum = SUM_KAHAN;
+                    } else {
+                       printf("Error with sum argument %s\n",val);
+                       exit(0);
+                    }
+                    break;
+
+                case 'R':  //  Restart application from last checkpoint
+                    restart = true;
+                    restart_file = strtok(argv[i++], " ,");
+
+#ifndef HDF5_FF
+                    struct stat stat_descriptor;
+                    if (stat(restart_file,&stat_descriptor) == -1){
+                       printf("Error -- restart file %s does not exist\n",restart_file);
+                       exit(0);
+                    }
+#endif
+                    break;
+
+                case 's':   //  Space-filling curve method specified (default HILBERT_SORT).
+                //  Add different problem setups such as sloped wave in x, y and diagonal directions to help check algorithm
+                    //  HILBERT_SORT
+                    break;
+                    
+                case 'T':   //  TVD inclusion specified.
+                    break;
+                    
+                case 't':   //  Number of time steps specified.
+                    val = strtok(argv[i++], " ,.-");
+                    niter = atoi(val);
+                    break;
+
+                case 'u':   //  Allowed percentage of difference in mass per iteration
+                    val = strtok(argv[i++], " ,");
+                    upper_mass_diff_percentage = atof(val);
+                    break;
+                    
+                case 'V':   //  Verbose output desired.
+                    verbose = true;
+                    break;
+                    
+                case 'v':   //  Version.
+                    outputVersion();
+                    cout.flush();
+                    exit(EXIT_SUCCESS);
+                    break;
+
+                case 'z':  // Neighbor remap -- default is true, -z sets to false
+                    neighbor_remap = false;
+                    break;
+                    
+                default:    //  Unknown parameter encountered.
+                    cout << "⚠ Unknown input parameter " << val << endl;
+                    outputHelp();
+                    cout.flush();
+                    exit(EXIT_FAILURE);
+                    break; }
+            
+            val = strtok(argv[i++], " ,.-");
+        }
+     }
+
+/*
+    if(upper_mass_diff_percentage < 0){
+             upper_mass_diff_percentage = 1.0e-12;
+    }
+*/
+}
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/memstats.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/memstats.h
@@ -0,0 +1,74 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#ifndef _MEMSTATS_H
+#define _MEMSTATS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+long long memstats_memused();
+long long memstats_mempeak();
+long long memstats_memfree();
+long long memstats_memtotal();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _MEMSTATS_H */
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/memstats.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/memstats.c
@@ -0,0 +1,347 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <unistd.h>
+#include <string.h>
+
+#ifdef __APPLE_CC__
+#include <mach/mach_host.h>
+#include <mach/task.h>
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "memstats.h"
+
+pid_t pid;
+FILE *stat_fp = NULL, *meminfo_fp = NULL;
+
+long long memstats_memused(){
+   long long mem_current=0;
+#ifdef __APPLE_CC__
+/* This is all memory used and we want the memory for only our process -- do alternate
+   vm_size_t page_size;
+   mach_port_t mach_port;
+   mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
+
+   host_page_size(mach_port, &page_size);
+   vm_statistics_data_t vmstat;
+   host_statistics (mach_host_self (), HOST_VM_INFO, (host_info_t) &vmstat, &count);
+
+   mem_current = (vmstat.wire_count + vmstat.active_count + vmstat.inactive_count)*page_size/1024;
+*/
+
+   struct task_basic_info t_info;
+   mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
+   task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count);
+
+   mem_current = t_info.resident_size;
+#else
+   char proc_stat_file[50];
+   char *p;
+   int err;
+   int memdebug = 0;
+   //long long page_size = 1; //4096
+
+   if (!stat_fp){
+      pid = getpid();
+      sprintf(proc_stat_file, "/proc/%d/status", pid);
+      stat_fp = fopen(proc_stat_file, "r");
+      if (!stat_fp){
+         //printf("fopen %s failed: \n", proc_stat_file);
+         return(-1);
+      }
+   }
+
+   err = fflush(stat_fp);
+   if (err) {
+      printf("fflush %s failed: %s\n", proc_stat_file, strerror(err));
+      return(-1);
+   }
+   err = fseek(stat_fp, 0L, 0);
+   if (err) {
+      printf("fseek %s failed: %s\n", proc_stat_file, strerror(err));
+      return(-1);
+   }
+
+   char *str = (char *)malloc(140*sizeof(char));
+   while (!feof(stat_fp)){
+      str = fgets(str, 132, stat_fp);
+      if (str == NULL){
+         printf("Warning: Error in reading %s for memory stats\n",proc_stat_file);
+      }
+      p = strtok(str,":");
+      //printf("p is |%s|\n",p);
+      if (!strcmp(p, "VmRSS")) {
+         p = strtok('\0'," ");
+         p = strtok('\0'," ");
+         //mem_current = atoll(p)*1024; // Size is in kB 
+         mem_current = atoll(p); // Size is in kB 
+         if (memdebug) {
+            printf("VmRSS %lld\n",mem_current);
+         }
+         break;
+      }
+   }
+   free(str);
+
+   fclose(stat_fp);
+   stat_fp = NULL;
+#endif
+
+   return(mem_current);
+}
+
+long long memstats_mempeak(){
+   char proc_stat_file[50];
+   char *p;
+   int err;
+   int memdebug = 0;
+   long long mem_current=0;
+   //long long page_size = 1; //4096
+
+   if (!stat_fp){
+      pid = getpid();
+      sprintf(proc_stat_file, "/proc/%d/status", pid);
+      stat_fp = fopen(proc_stat_file, "r");
+      if (!stat_fp){
+         //printf("fopen %s failed: \n", proc_stat_file);
+         return(-1);
+      }
+   }
+
+   err = fflush(stat_fp);
+   if (err) {
+      printf("fflush %s failed: %s\n", proc_stat_file, strerror(err));
+      return(-1);
+   }
+   err = fseek(stat_fp, 0L, 0);
+   if (err) {
+      printf("fseek %s failed: %s\n", proc_stat_file, strerror(err));
+      return(-1);
+   }
+
+   char *str = (char *)malloc(140*sizeof(char));
+   while (!feof(stat_fp)){
+      str = fgets(str, 132, stat_fp);
+      if (str == NULL){
+         printf("Warning: Error in reading %s for memory stats\n",proc_stat_file);
+      }
+      p = strtok(str,":");
+      //printf("p is |%s|\n",p);
+      if (!strcmp(p, "VmHWM")) {
+         p = strtok('\0'," ");
+         p = strtok('\0'," ");
+         //mem_current = atoll(p)*1024; // Size is in kB 
+         mem_current = atoll(p); // Size is in kB 
+         if (memdebug) {
+            printf("VmRSS %lld\n",mem_current);
+         }
+         break;
+      }
+   }
+
+   fclose(stat_fp);
+   stat_fp = NULL;
+   free(str);
+
+   return(mem_current);
+}
+
+#define TIMER_ONEK 1024
+long long memstats_memfree(){
+   long long freemem;
+#ifdef __APPLE_CC__
+   vm_size_t page_size;
+   mach_port_t mach_port;
+   mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
+
+   mach_port = mach_host_self();
+   host_page_size(mach_port, &page_size);
+   vm_statistics64_data_t vmstat;
+   host_statistics64 (mach_port, HOST_VM_INFO, (host_info_t) &vmstat, &count);
+
+   freemem = vmstat.free_count*page_size/1024;
+#else
+   int err;
+   int memdebug = 0;
+   char buf[260];
+   char *p;
+
+   freemem = -1;
+
+   if (!meminfo_fp){
+      meminfo_fp = fopen("/proc/meminfo", "r");
+      if (!meminfo_fp){
+         printf("fopen failed: \n");
+         return(-1);
+      }
+   }
+
+   err = fflush(meminfo_fp);
+   if (err) {
+      printf("fflush failed: %s\n", strerror(err));
+      return(-1);
+   }
+   err = fseek(meminfo_fp, 0L, 0);
+   if (err) {
+      printf("fseek failed: %s\n", strerror(err));
+      return(-1);
+   }
+
+   while (!feof(meminfo_fp)) {
+      if (fgets(buf, 255, meminfo_fp)) { /* read header */
+         //printf("buf is %s\n",buf);
+         p = strtok(buf, ":");
+         if (memdebug){
+            printf("p: |%s|\n",p);
+         }
+         if (!strcmp(p, "MemFree")) {
+            p = strtok('\0', " ");
+            //printf("p is %s\n",p);
+            freemem = atoll(p); // in kB
+            break;
+         }
+      }
+   }
+
+   //return(freemem+cachedmem);
+
+   fclose(meminfo_fp);
+   meminfo_fp = NULL;
+#endif
+
+   return(freemem);
+}
+
+long long memstats_memtotal(){
+   long long totalmem;
+#ifdef __APPLE_CC__
+/*
+   vm_size_t page_size;
+   mach_port_t mach_port;
+   mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
+
+   host_page_size(mach_port, &page_size);
+   vm_statistics_data_t vmstat;
+   host_statistics (mach_host_self (), HOST_VM_INFO, (host_info_t) &vmstat, &count);
+
+   totalmem = (vmstat.wire_count + vmstat.active_count + vmstat.inactive_count + vmstat.free_count)
+              *page_size/1024;
+*/
+// alternate
+   int mib[2];
+   mib[0] = CTL_HW;
+   mib[1] = HW_MEMSIZE;
+   size_t length = sizeof(long long);
+   sysctl(mib, 2, &totalmem, &length, NULL, 0);
+   totalmem /= 1024;
+#else
+   int err;
+   int memdebug = 0;
+   char buf[260];
+   char *p;
+
+   totalmem = -1;
+
+   if (!meminfo_fp){
+      meminfo_fp = fopen("/proc/meminfo", "r");
+      if (!meminfo_fp){
+         printf("fopen failed: \n");
+         return(-1);
+      }
+   }
+
+   err = fflush(meminfo_fp);
+   if (err) {
+      printf("fflush failed: %s\n", strerror(err));
+      return(-1);
+   }
+   err = fseek(meminfo_fp, 0L, 0);
+   if (err) {
+      printf("fseek failed: %s\n", strerror(err));
+      return(-1);
+   }
+
+   while (!feof(meminfo_fp)) {
+      if (fgets(buf, 255, meminfo_fp)) { /* read header */
+         //printf("buf is %s\n",buf);
+         p = strtok(buf, ":");
+         if (memdebug){
+            printf("p: |%s|\n",p);
+         }
+         if (!strcmp(p, "MemTotal")) {
+            p = strtok('\0', " ");
+            //printf("p is %s\n",p);
+            totalmem = atoll(p); // in kB
+            break;
+         }
+      }
+   }
+
+   fclose(meminfo_fp);
+   meminfo_fp = NULL;
+#endif
+
+   return(totalmem);
+}
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.h
@@ -0,0 +1,711 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#ifndef MESH_H_
+#define MESH_H_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "MallocPlus.h"
+#include <string>
+#include <stdio.h>
+#include <vector>
+#include <math.h>
+#include "KDTree.h"
+#include "crux.h"
+#include "partition.h"
+#ifdef HAVE_OPENCL
+#include "ezcl/ezcl.h"
+#endif
+
+#if !defined(FULL_PRECISION) && !defined(MIXED_PRECISION) && !defined(MINIMUM_PRECISION)
+#define FULL_PRECISION
+#endif
+#ifdef NO_CL_DOUBLE
+#undef  FULL_PRECISION
+#undef  MIXED_PRECISION
+#define MINIMUM_PRECISION
+#endif
+
+#if defined(MINIMUM_PRECISION)
+   typedef float real_t; // this is used for intermediate calculations
+   typedef float spatial_t; // for spatial variables
+#ifdef HAVE_OPENCL
+   typedef cl_float cl_real_t; // for intermediate gpu physics state variables
+   typedef cl_float cl_spatial_t;
+#endif
+#ifdef HAVE_MPI
+   #define MPI_REAL_T MPI_FLOAT // for MPI communication for physics state variables
+   #define MPI_SPATIAL_T MPI_FLOAT
+#endif
+
+#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats
+   typedef double real_t;
+   typedef float spatial_t; // for spatial variables
+#ifdef HAVE_OPENCL
+   typedef cl_double cl_real_t; // for intermediate gpu physics state variables
+   typedef cl_float cl_spatial_t;
+#endif
+#ifdef HAVE_MPI
+   #define MPI_REAL_T MPI_DOUBLE
+   #define MPI_SPATIAL_T MPI_FLOAT
+#endif
+
+#elif defined(FULL_PRECISION)
+   typedef double real_t;
+   typedef double spatial_t; // for spatial variables
+#ifdef HAVE_OPENCL
+   typedef cl_double cl_real_t; // for intermediate gpu physics state variables
+   typedef cl_double cl_spatial_t;
+#endif
+#ifdef HAVE_MPI
+   #define MPI_REAL_T MPI_DOUBLE
+   #define MPI_SPATIAL_T MPI_DOUBLE
+#endif
+#endif
+
+#define TILE_SIZE 128
+
+#define SWAP_PTR(xnew,xold,xtmp) (xtmp=xnew, xnew=xold, xold=xtmp)
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+typedef unsigned int uint;
+
+//float mem_opt_factor = 1.0;
+
+enum boundary
+{  REAL_CELL      =  1,         //  Denotes cell type of real cell.
+   LEFT_BOUNDARY  = -1,         //  Denotes left boundary ghost cell.
+   RIGHT_BOUNDARY = -2,         //  Denotes right boundary ghost cell.
+   BOTTOM_BOUNDARY= -3,         //  Denotes bottom boundary ghost cell.
+   TOP_BOUNDARY   = -4,         //  Denotes top boundary ghost cell.
+   FRONT_BOUNDARY = -5,         //  Denotes front boundary ghost cell.
+   BACK_BOUNDARY  = -6 };       //  Denotes back boundary ghost cell.
+
+enum dimensionality
+{  ONE_DIMENSIONAL   = 1,       // Dimensionality based at 1 for clarity.
+   TWO_DIMENSIONAL,
+   THREE_DIMENSIONAL};
+
+enum orientation
+{  SW,                          //  SW quadrant.
+   NW,                          //  NW quadrant.
+   NE,                          //  NE quadrant.
+   SE };                        //  SE quadrant.
+
+enum neighbor_calc
+{  HASH_TABLE,                  //  Hash Table.
+   KDTREE };                    //  kD-tree.
+
+enum mesh_timers
+{
+   MESH_TIMER_COUNT_BCS,
+   MESH_TIMER_CALC_NEIGHBORS,
+   MESH_TIMER_HASH_SETUP,
+   MESH_TIMER_HASH_QUERY,
+   MESH_TIMER_FIND_BOUNDARY,
+   MESH_TIMER_PUSH_SETUP,
+   MESH_TIMER_PUSH_BOUNDARY,
+   MESH_TIMER_LOCAL_LIST,
+   MESH_TIMER_LAYER1,
+   MESH_TIMER_LAYER2,
+   MESH_TIMER_LAYER_LIST,
+   MESH_TIMER_COPY_MESH_DATA,
+   MESH_TIMER_FILL_MESH_GHOST,
+   MESH_TIMER_FILL_NEIGH_GHOST,
+   MESH_TIMER_SET_CORNER_NEIGH,
+   MESH_TIMER_NEIGH_ADJUST,
+   MESH_TIMER_SETUP_COMM,
+   MESH_TIMER_KDTREE_SETUP,
+   MESH_TIMER_KDTREE_QUERY,
+   MESH_TIMER_REFINE_SMOOTH,
+   MESH_TIMER_REZONE_ALL,
+   MESH_TIMER_PARTITION,
+   MESH_TIMER_CALC_SPATIAL_COORDINATES,
+   MESH_TIMER_LOAD_BALANCE,
+   MESH_TIMER_SIZE
+};
+
+enum mesh_counters
+{
+   MESH_COUNTER_REZONE,
+   MESH_COUNTER_REFINE_SMOOTH,
+   MESH_COUNTER_CALC_NEIGH,
+   MESH_COUNTER_LOAD_BALANCE,
+   MESH_COUNTER_SIZE
+};
+
+//#ifdef DEBUG_RESTORE_VALS
+static const char *mesh_counter_descriptor[MESH_COUNTER_SIZE] = {
+   "mesh_counter_rezone",
+   "mesh_counter_refine_smooth",
+   "mesh_counter_calc_neigh",
+   "mesh_counter_load_balance"
+};
+//#endif
+
+typedef enum mesh_timers   mesh_timer_category;
+typedef enum mesh_counters mesh_counter_category;
+
+enum mesh_device_types
+{
+   MESH_DEVICE_CPU,
+   MESH_DEVICE_GPU
+};
+
+typedef mesh_device_types mesh_device_type;
+
+using namespace std;
+
+/****************************************************************//**
+ * Mesh class
+ *    Contains the cell-based adaptive mesh refinement
+ *    (AMR) object with its data and methods.
+ *******************************************************************/
+class Mesh
+{
+
+public:
+   int ndim;                    //!<  Dimensionality of mesh (2 or 3).
+
+   MallocPlus mesh_memory;
+   MallocPlus gpu_mesh_memory;
+
+#ifdef HAVE_OPENCL
+   string defines;
+#endif
+
+   double    cpu_timers[MESH_TIMER_SIZE];
+   long long gpu_timers[MESH_TIMER_SIZE];
+
+   int    cpu_counters[MESH_COUNTER_SIZE];
+   int    gpu_counters[MESH_COUNTER_SIZE];
+
+   bool           do_rezone,
+                  gpu_do_rezone;
+
+   int            mype,
+                  numpe,
+                  parallel,
+                  cell_handle,
+                  noffset;
+
+   int            *lowerBound_Global,
+                  *upperBound_Global;
+
+   float          mem_factor;
+
+   double         offtile_ratio_local;
+   int            offtile_local_count;
+
+   vector<int>    corners_i,
+                  corners_j;
+
+   vector<int>    nsizes,
+                  ndispl;
+
+   FILE          *fp;
+
+   TKDTree        tree;         //!<  k-D tree for neighbor search.
+   vector<int>    proc;
+   vector<int>    lev_ibegin,   //!<  Lowest x-index in use at specified level of refinement.
+                  lev_iend,     //!<  Highest x-index in use at specified level of refinement.
+                  lev_jbegin,   //!<  Lowest y-index in use at specified level of refinement.
+                  lev_jend,     //!<  Highest y-index in use at specified level of refinement.
+                  lev_kbegin,   //!<  Lowest z-index in use at specified level of refinement.
+                  lev_kend,     //!<  Highest z-index in use at specified level of refinement.
+                  levtable;     //!<  Powers of two to simplify i,j calculations
+   vector<real_t> lev_deltax,   //!<  Grid spacing along x-axis at specified level of refinement.
+                  lev_deltay,   //!<  Grid spacing along y-axis at specified level of refinement.
+                  lev_deltaz;   //!<  Grid spacing along z-axis at specified level of refinement.
+   int            levmx,        //!<  Maximum level of refinement allowed.
+                  have_boundary,//!<  Mesh includes boundary cells, else creates on the fly
+                  ibase,        //!<  Index basis for arrays (0 for C, 1 for Fortan).
+                  imin,         //!<  Lowest x-index in use.
+                  imax,         //!<  Highest x-index in use.
+                  jmin,         //!<  Lowest y-index in use.
+                  jmax,         //!<  Highest y-index in use.
+                  kmin,         //!<  Lowest z-index in use.
+                  kmax;         //!<  Highest z-index in use.
+   size_t         ncells,       //!<  Number of cells in mesh.
+                  ncells_global, //!<  Global number of cells for parallel runs
+                  ncells_ghost; //!<  Number of cells in mesh with ghost cells.
+   real_t         xmin,         //!<  Lowest x-coordinate in use.
+                  xmax,         //!<  Highest x-coordinate in use.
+                  ymin,         //!<  Lowest y-coordinate in use.
+                  ymax,         //!<  Highest y-coordinate in use.
+                  zmin,         //!<  Lowest z-coordinate in use.
+                  zmax,         //!<  Highest z-coordinate in use.
+                  xcentermin,   //!<  Center of minimum x cell
+                  xcentermax,   //!<  Center of maximum x cell
+                  ycentermin,   //!<  Center of minimum y cell
+                  ycentermax,   //!<  Center of maximum y cell
+                  zcentermin,   //!<  Center of minimum z cell
+                  zcentermax,   //!<  Center of maximum z cell
+                  deltax,       //!<  Grid spacing along x-axis.
+                  deltay,       //!<  Grid spacing along y-axis.
+                  deltaz;       //!<  Grid spacing along z-axis.
+
+   vector<int>    index;        //!<  1D ordered index of mesh elements.
+
+                                 //  mesh state data
+   int            *i,            //!<  1D array of mesh element x-indices.
+                  *j,            //!<  1D array of mesh element y-indices.
+                  *k,            //!<  1D array of mesh element z-indices.
+                  *level,        //!<  1D array of mesh element refinement levels.
+                                 //!<  derived data from mesh state data
+                  *celltype,     //!<  1D ordered index of mesh element cell types (ghost or real).
+                  *nlft,         //!<  1D ordered index of mesh element left neighbors.
+                  *nrht,         //!<  1D ordered index of mesh element right neighbors.
+                  *nbot,         //!<  1D ordered index of mesh element bottom neighbors.
+                  *ntop,         //!<  1D ordered index of mesh element top neighbors.
+                  *nfrt,         //!<  1D ordered index of mesh element front neighbors.
+                  *nbak;         //!<  1D ordered index of mesh element back neighbors.
+
+   vector<spatial_t> x,          //!<  1D ordered index of mesh element x-coordinates.
+                     dx,         //!<  1D ordered index of mesh element x-coordinate spacings.
+                     y,          //!<  1D ordered index of mesh element y-coordinates.
+                     dy,         //!<  1D ordered index of mesh element y-coordinate spacings.
+                     z,          //!<  1D ordered index of mesh element z-coordinates.
+                     dz;         //!<  1D ordered index of mesh element z-coordinate spacings.
+
+#ifdef HAVE_OPENCL
+   cl_mem         dev_ioffset;
+
+   cl_mem         dev_celltype,       
+                  dev_i,       
+                  dev_j,       
+                  dev_level,       
+                  dev_nlft,       
+                  dev_nrht,       
+                  dev_nbot,       
+                  dev_ntop;       
+
+   cl_mem         dev_levdx,    // corresponds to lev_deltax
+                  dev_levdy,    // corresponds to lev_deltay
+                  dev_levibeg,
+                  dev_leviend,
+                  dev_levjbeg,
+                  dev_levjend,
+                  dev_levtable; //
+
+   cl_mem         dev_corners_i,
+                  dev_corners_j;
+#endif
+
+   int nxface;
+   int nyface;
+
+   vector<int> xface_i;
+   vector<int> xface_j;
+   vector<int> xface_level;
+   vector<int> map_xface2cell_lower;
+   vector<int> map_xface2cell_upper;
+
+   vector<int> map_xcell2face_left1;
+   vector<int> map_xcell2face_left2;
+   vector<int> map_xcell2face_right1;
+   vector<int> map_xcell2face_right2;
+
+   vector<int> ixmin_level;
+   vector<int> ixmax_level;
+   vector<int> jxmin_level;
+   vector<int> jxmax_level;
+   vector<int> ixadjust;
+   vector<int> jxadjust;
+
+   vector<int> yface_i;
+   vector<int> yface_j;
+   vector<int> yface_level;
+   vector<int> map_yface2cell_lower;
+   vector<int> map_yface2cell_upper;
+
+   vector<int> map_ycell2face_bot1;
+   vector<int> map_ycell2face_bot2;
+   vector<int> map_ycell2face_top1;
+   vector<int> map_ycell2face_top2;
+
+   vector<int> iymin_level;
+   vector<int> iymax_level;
+   vector<int> jymin_level;
+   vector<int> jymax_level;
+   vector<int> iyadjust;
+   vector<int> jyadjust;
+
+   //   Public constructors.
+   Mesh(FILE *fin, int *numpe);
+   Mesh(int nx, int ny, int levmx_in, int ndim_in, double deltax_in, double deltay_in, int boundary, int parallel_in, int do_gpu_calc);
+
+   //   Member functions.
+   void init(int nx, int ny, real_t circ_radius, partition_method initial_order, int do_gpu_calc);
+   void terminate(void);
+
+   void set_bounds(int n);
+   void get_bounds(int& lowerBound, int& upperBound);
+
+/****************************************************************//**
+ * @name Memory routines
+ *******************************************************************/
+///@{
+
+/****************************************************************//**
+ * \brief
+ * Allocates the basic mesh memory, i, j, and level, using the MallocPlus
+ * memory database.
+ *
+ * **Parameters**
+ * * size_t ncells -- number of cells in the mesh
+ *
+ * Typical Usage
+ *
+ *     mesh.allocate(ncells);
+ *******************************************************************/
+   void allocate(size_t ncells);
+
+   void resize(size_t new_ncells);
+   void memory_reset_ptrs(void);
+   void resize_old_device_memory(size_t ncells);
+///@}
+
+/* inline "macros" */
+
+///@{
+/****************************************************************//**
+ * \brief
+ * Boundary cell tests
+ *******************************************************************/
+   int  is_lower_boundary(int *iv, int *lev_begin, int ic)    { return (iv[ic] < lev_begin[level[ic]]); }
+   int  is_upper_boundary(int *iv, int *lev_end,   int ic)    { return (iv[ic] > lev_end[level[ic]]); }
+
+   int  is_left_boundary(int ic)    { return (i[ic] < lev_ibegin[level[ic]]); }
+   int  is_right_boundary(int ic)   { return (i[ic] > lev_iend[  level[ic]]); }
+   int  is_bottom_boundary(int ic)  { return (j[ic] < lev_jbegin[level[ic]]); }
+   int  is_top_boundary(int ic)     { return (j[ic] > lev_jend[  level[ic]]); }
+   int  is_front_boundary(int ic)   { return (k[ic] < lev_kbegin[level[ic]]); }
+   int  is_back_boundary(int ic)    { return (k[ic] > lev_kend[  level[ic]]); }
+///@}
+
+///@{
+/****************************************************************//**
+ * \brief
+ * Tests for positioning in set of 4 cells
+ *******************************************************************/
+   int is_lower(int i)  { return(i % 2 == 0); }
+   int is_upper(int i)  { return(i % 2 == 1); }
+
+   int is_lower_left(int i, int j)  { return(i % 2 == 0 && j % 2 == 0); }
+   int is_lower_right(int i, int j) { return(i % 2 == 1 && j % 2 == 0); }
+   int is_upper_left(int i, int j)  { return(i % 2 == 0 && j % 2 == 1); }
+   int is_upper_right(int i, int j) { return(i % 2 == 1 && j % 2 == 1); }
+///@}
+
+///@{
+/****************************************************************//**
+ * \brief
+ * Level tests
+ *******************************************************************/
+   int is_same_level_or_coarser(int nn, int nz) { return(level[nn] <= level[nz]); }
+   int is_coarser(int nn, int nz)               { return(level[nn] <  level[nz]); }
+   int is_finer(int nn, int nz)                 { return(level[nn] >  level[nz]); }
+   int is_same_level(int nn, int nz)            { return(level[nn] == level[nz]); }
+///@}
+
+/* accessor routines */
+   double get_cpu_timer(mesh_timer_category category)       {return(cpu_timers[category]); };
+   /* Convert nanoseconds to msecs */
+   double get_gpu_timer(mesh_timer_category category)       {return((double)(gpu_timers[category])*1.0e-9); };
+
+   void parallel_output(const char *string, double    local_value, int output_level, const char *units);
+   void parallel_output(const char *string, long long local_value, int output_level, const char *units);
+   void parallel_output(const char *string, int       local_value, int output_level, const char *units);
+   void timer_output(mesh_timer_category category, mesh_device_types device_type, int timer_level);
+
+   int get_cpu_counter(mesh_counter_category category)      {return(cpu_counters[category]); };
+   int get_gpu_counter(mesh_counter_category category)      {return(gpu_counters[category]); };
+
+   int get_calc_neighbor_type(void);
+
+   void print_partition_measure(void);
+   void print_calc_neighbor_type(void);
+   void print_partition_type(void);
+/* end accessor routines */
+
+/* Debugging, internal, or not used yet */
+#ifdef HAVE_OPENCL
+   int gpu_count_BCs();
+#endif
+   void kdtree_setup(void);
+   void partition_measure(void);
+   void partition_cells(int numpe,
+                   vector<int> &order,
+                   enum partition_method method);
+   void calc_distribution(int numpe);
+   void calc_symmetry(vector<int> &dsym,
+                  vector<int> &xsym,
+                  vector<int> &ysym);
+
+/* End of debugging, internal, or not used yet */
+
+   //void calc_face_list_test(double *H);
+   void calc_face_list(void);
+   void calc_face_list_wmap(void);
+   void calc_face_list_wbidirmap(void);
+   void calc_face_list_clearmaps(void);
+
+   int **get_xface_flag(int lev, bool print_output=0);
+   int **get_yface_flag(int lev, bool print_output=0);
+   void get_flat_grid(int lev, int ***zone_flag, int ***zone_cell);
+
+///@{
+/****************************************************************//**
+ * \brief
+ * Calculate neighbors
+ *
+ * **Parameters**
+ *
+ *  Input -- from within the object
+ *    i, j, level
+ *  Output -- in the object
+ *    nlft, nrht, nbot, ntop arrays
+ *******************************************************************/
+   void calc_neighbors(int ncells);
+   void calc_neighbors_local(void);
+#ifdef HAVE_OPENCL
+   void gpu_calc_neighbors(void);
+   void gpu_calc_neighbors_local(void);
+#endif
+   //   TODO:  Not created yet; overloading for 3D mesh support. (davis68)
+   void calc_neighbors(vector<int> &nlft,
+                  vector<int> &nrht,
+                  vector<int> &nbot,
+                  vector<int> &ntop,
+                  vector<int> &nfrt,
+                  vector<int> &nbak,
+                  vector<int> index);
+///@}
+
+///@{
+/****************************************************************//**
+ * \brief
+ * Calculate rezone count
+ *
+ * **Parameters**
+ *
+ *  Input
+ *    mpot -- potential mesh refinement
+ *    ioffset -- write offset for each cell
+ *  Output
+ *    result -- cell count
+ *******************************************************************/
+   int  rezone_count(vector<int> mpot, int &icount, int &jcount);
+#ifdef HAVE_OPENCL
+   void gpu_rezone_count2(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result);
+   void gpu_rezone_count(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result);
+   void gpu_rezone_scan(size_t block_size, size_t local_work_size, cl_mem dev_ioffset, cl_mem &dev_result);
+#endif
+///@}
+
+///@{
+/****************************************************************//**
+ * \brief
+ * Refine Smooth -- smooths jump in refinement level so that only a 1 to 2 jump occurs
+ *
+ *  **Parameters**
+ *
+ *  Input/Output
+ *    mpot -- potential mesh refinement array, 1 is refine and -1 coarsen
+ *    ioffset -- write offset for each cell to account for new cells
+ *    result -- refinement count
+ *******************************************************************/
+   size_t refine_smooth(vector<int> &mpot, int &icount, int &jcount);
+#ifdef HAVE_OPENCL
+   int gpu_refine_smooth(cl_mem &dev_mpot, int &icount, int &jcount);
+#endif
+///@}
+
+///@{
+/****************************************************************//**
+ * \brief
+ * Rezone mesh
+ *
+ *  **Parameters**
+ *
+ *  Input
+ *     add_ncells -- for each processor. A global sum will be done and the main part of
+ *        the rezone will be skipped if no cells are added.
+ *     mpot -- mesh rezone potential
+ *     have_state flag -- 0 (false) for setup when physics state has not been allocated
+ *     ioffset -- partial prefix scan results for starting address to write new cells
+ *     state_memory -- linked list of arrays for state
+ *  Output
+ *     new mesh and state arrays with refinement/coarsening performed
+ *******************************************************************/
+   void rezone_all(int icount, int jcount, vector<int> mpot, int have_state, MallocPlus &state_memory);
+#ifdef HAVE_OPENCL
+   void gpu_rezone_all(int icount, int jcount, cl_mem &dev_mpot, MallocPlus &gpu_state_memory);
+#endif
+///@}
+
+///@{
+/****************************************************************//**
+ * \brief
+ * Load balance -- only needed for parallel (MPI) runs
+ *
+ *  **Parameters**
+ *
+ *  Input
+ *    numcells -- ncells from rezone all routine. This is a copy in so that a local
+ *       value can be used for load_balance and gpu_load_balance without it getting
+ *       reset for clamr_checkall routine
+ *    weight -- weighting array per cell for balancing. Currently not used. Null value
+ *       indicates even weighting of cells for load balance. 
+ *    state_memory or gpu_state_memory -- linked-list of arrays from physics routine
+ *       to be load balanced. 
+ * Output -- arrays will be returned load balanced with new sizes. Pointers to arrays
+ *       will need to be reset
+ *******************************************************************/
+#ifdef HAVE_MPI
+   void do_load_balance_local(size_t numcells, float *weight, MallocPlus &state_memory);
+#ifdef HAVE_OPENCL
+   int gpu_do_load_balance_local(size_t numcells, float *weight, MallocPlus &gpu_state_memory);
+#endif
+#endif
+///@}
+
+///@{
+/****************************************************************//**
+ * \brief
+ * Calculate spatial coordinates
+ *
+ *  **Parameters**
+ *
+ *  Input -- from within the object
+ *    i, j, level
+ *  Output
+ *    x, y -- coordinates for each cell
+ *    dx, dy -- size of each cell
+ *******************************************************************/
+   void calc_spatial_coordinates(int ibase);
+#ifdef HAVE_OPENCL
+   void gpu_calc_spatial_coordinates(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy);
+#endif
+///@}
+
+///@{
+/****************************************************************//**
+ * \brief
+ * Testing routines
+ *******************************************************************/
+#ifdef HAVE_OPENCL
+   void compare_dev_local_to_local(void); // Not currently called
+   void compare_neighbors_gpu_global_to_cpu_global(void);
+#endif
+   void compare_neighbors_cpu_local_to_cpu_global(uint ncells_ghost, uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl);
+#ifdef HAVE_OPENCL
+   void compare_neighbors_all_to_gpu_local(Mesh *mesh_global, int *nsizes, int *ndispl);
+   void compare_mpot_gpu_global_to_cpu_global(int *mpot, cl_mem dev_mpot);
+#endif
+   void compare_mpot_cpu_local_to_cpu_global(uint ncells_global, int *nsizes, int *displ, int *mpot, int *mpot_global, int cycle);
+#ifdef HAVE_OPENCL
+   void compare_mpot_all_to_gpu_local(int *mpot, int *mpot_global, cl_mem dev_mpot, cl_mem dev_mpot_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle);
+   void compare_ioffset_gpu_global_to_cpu_global(uint old_ncells, int *mpot);
+   void compare_ioffset_all_to_gpu_local(uint old_ncells, uint old_ncells_global, int block_size, int block_size_global, int *mpot, int *mpot_global, cl_mem dev_ioffset, cl_mem dev_ioffset_global, int *ioffset, int *ioffset_global, int *celltype_global, int *i_global, int *j_global);
+   void compare_coordinates_gpu_global_to_cpu_global_double(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, double *H);
+   void compare_coordinates_gpu_global_to_cpu_global_float(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, float *H);
+#endif
+   void compare_coordinates_cpu_local_to_cpu_global_double(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, double *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, double *H_global, int cycle);
+   void compare_coordinates_cpu_local_to_cpu_global_float(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, float *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, float *H_global, int cycle);
+#ifdef HAVE_OPENCL
+   void compare_indices_gpu_global_to_cpu_global(void);
+#endif
+   void compare_indices_cpu_local_to_cpu_global(uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl, int cycle);
+#ifdef HAVE_OPENCL
+   void compare_indices_all_to_gpu_local(Mesh *mesh_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle);
+#endif
+///@}
+
+   size_t get_checkpoint_size(void);
+   void store_checkpoint(Crux *crux);
+   void restore_checkpoint(Crux *crux);
+
+   void calc_celltype_threaded(size_t ncells);
+   void calc_celltype(size_t ncells);
+
+private:
+   //   Private constructors.
+   Mesh(const Mesh&);   //   Blocks copy constructor so copies are not made inadvertently.
+
+   //   Member functions.
+   void print_object_info();
+
+   void set_refinement_order(int order[4], int ic, int ifirst, int ilast, int jfirst, int jlast,
+                                int level_first, int level_last, int *i, int *j, int *level);
+
+   void write_grid(int ncycle);
+   void calc_centerminmax(void);
+   void calc_minmax(void);
+
+   void print(void);
+   void print_local(void);
+#ifdef HAVE_OPENCL
+   void print_dev_local();
+#endif
+
+};
+
+#endif /* MESH_H */
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.cpp
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.cpp
@@ -0,0 +1,10456 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#ifdef HAVE_MPI
+#include "mpi.h"
+#endif
+
+#include <algorithm>
+#include <unistd.h>
+#include <limits.h>
+#include <time.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+//#include "hsfc.h"
+#include "KDTree.h"
+#include "mesh.h"
+#ifdef HAVE_OPENCL
+#include "ezcl/ezcl.h"
+#endif
+#include "timer.h"
+#ifdef HAVE_MPI
+#include "l7/l7.h"
+#endif
+#include "reduce.h"
+#include "genmalloc.h"
+#include "hash.h"
+
+#define DEBUG 0
+//#define BOUNDS_CHECK 1
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+#define DEBUG_RESTORE_VALS 1
+
+typedef int scanInt;
+void scan ( scanInt *input , scanInt *output , scanInt length);
+
+#ifdef _OPENMP
+#undef REZONE_NO_OPTIMIZATION
+#else
+#define REZONE_NO_OPTIMIZATION 1
+#endif
+
+#define TIMING_LEVEL 2
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+
+#define IPOW2(a) (2 << (a))
+
+#if defined(MINIMUM_PRECISION)
+#define CONSERVATION_EPS    .1
+#define STATE_EPS      15.0
+
+#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats
+#define CONSERVATION_EPS    .02
+#define STATE_EPS        .025
+
+#elif defined(FULL_PRECISION)
+#define CONSERVATION_EPS    .02
+#define STATE_EPS        .025
+
+#endif
+
+typedef unsigned int uint;
+#ifdef __APPLE_CC__
+typedef unsigned long ulong;
+#endif
+
+#define TWO 2
+#define HALF 0.5
+
+#define __NEW_STENCIL__
+//#define __OLD_STENCIL__
+//#define STENCIL_WARNING 1
+
+#ifdef STENCIL_WARNING
+int do_stencil_warning=1;
+#else
+int do_stencil_warning=0;
+#endif
+
+#ifdef HAVE_OPENCL
+#include "mesh_kernel.inc"
+#endif
+
+extern bool localStencil;
+int calc_neighbor_type;
+bool dynamic_load_balance_on;
+bool neighbor_remap;
+
+#ifdef _OPENMP
+static bool iversion_flag = false;
+#endif
+
+static const char *mesh_timer_descriptor[MESH_TIMER_SIZE] = {
+   "mesh_timer_count_BCs",
+   "mesh_timer_calc_neighbors",
+   "mesh_timer_hash_setup",
+   "mesh_timer_hash_query",
+   "mesh_timer_find_boundary",
+   "mesh_timer_push_setup",
+   "mesh_timer_push_boundary",
+   "mesh_timer_local_list",
+   "mesh_timer_layer1",
+   "mesh_timer_layer2",
+   "mesh_timer_layer_list",
+   "mesh_timer_copy_mesh_data",
+   "mesh_timer_fill_mesh_ghost",
+   "mesh_timer_fill_neigh_ghost",
+   "mesh_timer_set_corner_neigh",
+   "mesh_timer_neigh_adjust",
+   "mesh_timer_setup_comm",
+   "mesh_timer_kdtree_setup",
+   "mesh_timer_kdtree_query",
+   "mesh_timer_refine_smooth",
+   "mesh_timer_rezone_all",
+   "mesh_timer_partition",
+   "mesh_timer_calc_spatial_coordinates",
+   "mesh_timer_load_balance"
+};
+
+#ifdef HAVE_OPENCL
+cl_kernel      kernel_hash_adjust_sizes;
+cl_kernel      kernel_hash_setup;
+cl_kernel      kernel_hash_setup_local;
+cl_kernel      kernel_neighbor_init;
+cl_kernel      kernel_calc_neighbors;
+cl_kernel      kernel_calc_neighbors_local;
+cl_kernel      kernel_calc_border_cells;
+cl_kernel      kernel_calc_border_cells2;
+cl_kernel      kernel_finish_scan;
+cl_kernel      kernel_get_border_data;
+cl_kernel      kernel_calc_layer1;
+cl_kernel      kernel_calc_layer1_sethash;
+cl_kernel      kernel_calc_layer2;
+cl_kernel      kernel_get_border_data2;
+cl_kernel      kernel_calc_layer2_sethash;
+cl_kernel      kernel_copy_mesh_data;
+cl_kernel      kernel_fill_mesh_ghost;
+cl_kernel      kernel_fill_neighbor_ghost;
+cl_kernel      kernel_set_corner_neighbor;
+cl_kernel      kernel_adjust_neighbors_local;
+cl_kernel      kernel_reduction_scan2;
+cl_kernel      kernel_reduction_count;
+cl_kernel      kernel_reduction_count2;
+cl_kernel      kernel_hash_size;
+cl_kernel      kernel_finish_hash_size;
+cl_kernel      kernel_calc_spatial_coordinates;
+cl_kernel      kernel_count_BCs;
+cl_kernel      kernel_do_load_balance_lower;
+cl_kernel      kernel_do_load_balance_middle;
+cl_kernel      kernel_do_load_balance_upper;
+#ifndef MINIMUM_PRECISION
+cl_kernel      kernel_do_load_balance_double;
+#endif
+cl_kernel      kernel_do_load_balance_float;
+cl_kernel      kernel_refine_smooth;
+cl_kernel      kernel_coarsen_smooth;
+cl_kernel      kernel_coarsen_check_block;
+cl_kernel      kernel_rezone_all;
+cl_kernel      kernel_rezone_neighbors;
+#ifndef MINIMUM_PRECISION
+cl_kernel      kernel_rezone_one_double;
+#endif
+cl_kernel      kernel_rezone_one_float;
+cl_kernel      kernel_copy_mpot_ghost_data;
+cl_kernel      kernel_set_boundary_refinement;
+#endif
+
+extern size_t hash_header_size;
+extern int   choose_hash_method;
+
+void Mesh::write_grid(int ncycle)
+{
+   FILE *fp;
+   char filename[20];
+
+   if (ncycle<0) ncycle=0;
+   sprintf(filename,"grid%02d.gph",ncycle);
+   fp=fopen(filename,"w");
+
+   fprintf(fp,"viewport %lf %lf %lf %lf\n",xmin,ymin,xmax,ymax);
+   for (uint ic = 0; ic < ncells; ic++) {
+      fprintf(fp,"rect  %lf   %lf   %lf   %lf\n",x[ic],y[ic],x[ic]+dx[ic],y[ic]+dy[ic]);
+   }
+
+   fprintf(fp,"line_init %lf %lf\n",x[0]+0.5*dx[0],y[0]+0.5*dy[0]);
+   for (uint ic = 1; ic < ncells; ic++){
+      fprintf(fp,"line %lf %lf\n",x[ic]+0.5*dx[ic],y[ic]+0.5*dy[ic]);
+   }
+
+   for (uint ic = 0; ic < ncells; ic++){
+      fprintf(fp,"text %lf %lf %d\n",x[ic]+0.5*dx[ic],y[ic]+0.5*dy[ic],ic);
+   }
+
+   fclose(fp);
+}
+
+Mesh::Mesh(FILE *fin, int *numpe)
+{
+   char string[80];
+   ibase = 1;
+
+   time_t trand;
+   time(&trand);
+   srand48((long)trand);
+
+   if(fgets(string, 80, fin) == NULL) exit(-1);
+   sscanf(string,"levmax %d",&levmx);
+   if(fgets(string, 80, fin) == NULL) exit(-1);
+   sscanf(string,"cells %ld",&ncells);
+   if(fgets(string, 80, fin) == NULL) exit(-1);
+   sscanf(string,"numpe %d",numpe);
+   if(fgets(string, 80, fin) == NULL) exit(-1);
+   sscanf(string,"ndim %d",&ndim);
+   if(fgets(string, 80, fin) == NULL) exit(-1);
+#ifdef MINIMUM_PRECISION
+   sscanf(string,"xaxis %f %f",&xmin, &deltax);
+#else
+   sscanf(string,"xaxis %lf %lf",&xmin, &deltax);
+#endif
+   if(fgets(string, 80, fin) == NULL) exit(-1);
+   sscanf(string,"yaxis %lf %lf",(double*)&ymin, (double*)&deltay);
+   if (ndim == THREE_DIMENSIONAL){
+     if(fgets(string, 80, fin) == NULL) exit(-1);
+     sscanf(string,"zaxis %lf %lf",(double*)&zmin, (double*)&deltaz);
+   }
+   if(fgets(string, 80, fin) == NULL) exit(-1);
+
+   index.resize(ncells);
+
+   allocate(ncells);
+
+   uint ic=0;
+   while(fgets(string, 80, fin)!=NULL){
+      sscanf(string, "%d %d %d %d", &(index[ic]), &(i[ic]), &(j[ic]), &(level[ic]));
+      ic++;
+   }
+
+   ibase = 0;
+   calc_spatial_coordinates(ibase);
+   KDTree_Initialize(&tree);
+
+
+  print();
+
+   if (ic != ncells) {
+      printf("Error -- cells read does not match number specified\n");
+   }
+   return;
+}
+
+void Mesh::print(void)
+{
+   assert(&nlft[0] != NULL);
+   assert(&x[0] != NULL);
+   assert(&index[0] != NULL);
+
+   //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size());
+   printf("index orig index   i     j     lev   nlft  nrht  nbot  ntop   xlow    xhigh     ylow    yhigh\n");
+   for (uint ic=0; ic<ncells; ic++)
+   {  printf("%6d %6d   %4d  %4d   %4d  %4d  %4d  %4d  %4d ", ic, index[ic], i[ic], j[ic], level[ic], nlft[ic], nrht[ic], nbot[ic], ntop[ic]);
+      printf("%8.2lf %8.2lf %8.2lf %8.2lf\n", x[ic], x[ic]+dx[ic], y[ic], y[ic]+dy[ic]); }
+}
+
+void Mesh::print_local()
+{  //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size());
+
+   if (mesh_memory.get_memory_size(nlft) >= ncells_ghost){
+      fprintf(fp,"%d:   index global  i     j     lev   nlft  nrht  nbot  ntop \n",mype);
+      for (uint ic=0; ic<ncells; ic++) {
+         fprintf(fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mype,ic, ic+noffset,i[ic], j[ic], level[ic], nlft[ic], nrht[ic], nbot[ic], ntop[ic]);
+      }
+      for (uint ic=ncells; ic<ncells_ghost; ic++) {
+         fprintf(fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mype,ic, ic+noffset,i[ic], j[ic], level[ic], nlft[ic], nrht[ic], nbot[ic], ntop[ic]);
+      }
+   } else {
+      fprintf(fp,"%d:    index   i     j     lev\n",mype);
+      for (uint ic=0; ic<ncells_ghost; ic++) {
+         fprintf(fp,"%d: %6d  %4d  %4d   %4d  \n", mype,ic, i[ic], j[ic], level[ic]);
+      }
+   }
+}
+
+#ifdef HAVE_OPENCL
+void Mesh::print_dev_local(void)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   vector<int>i_tmp(ncells_ghost);
+   vector<int>j_tmp(ncells_ghost);
+   vector<int>level_tmp(ncells_ghost);
+   vector<int>nlft_tmp(ncells_ghost);
+   vector<int>nrht_tmp(ncells_ghost);
+   vector<int>nbot_tmp(ncells_ghost);
+   vector<int>ntop_tmp(ncells_ghost);
+   ezcl_enqueue_read_buffer(command_queue, dev_i,     CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &i_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_j,     CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &j_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nlft,  CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nrht,  CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nbot,  CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_ntop,  CL_TRUE,  0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
+
+   //fprintf(fp,"\n%d:                    Printing mesh for dev_local\n\n",mype);
+
+   fprintf(fp,"%d:   index global  i     j     lev   nlft  nrht  nbot  ntop \n",mype);
+   for (uint ic=0; ic<MAX(ncells_ghost,ncells); ic++) {
+      fprintf(fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mype,ic, ic+noffset,i_tmp[ic], j_tmp[ic], level_tmp[ic], nlft_tmp[ic], nrht_tmp[ic], nbot_tmp[ic], ntop_tmp[ic]);
+   }
+   //fprintf(fp,"\n%d:              Finished printing mesh for dev_local\n\n",mype);
+}
+
+void Mesh::compare_dev_local_to_local(void)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   vector<int>i_tmp(ncells_ghost);
+   vector<int>j_tmp(ncells_ghost);
+   vector<int>level_tmp(ncells_ghost);
+   vector<int>nlft_tmp(ncells_ghost);
+   vector<int>nrht_tmp(ncells_ghost);
+   vector<int>nbot_tmp(ncells_ghost);
+   vector<int>ntop_tmp(ncells_ghost);
+   ezcl_enqueue_read_buffer(command_queue, dev_i,     CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &i_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_j,     CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &j_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE,  0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nlft,  CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nrht,  CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nbot,  CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_ntop,  CL_TRUE,  0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
+
+   fprintf(fp,"\n%d:                      Comparing mesh for dev_local to local\n\n",mype);
+   //fprintf(fp,"%d:   index global  i     j     lev   nlft  nrht  nbot  ntop \n",mype);
+   for (uint ic=0; ic<ncells_ghost; ic++) {
+      if (i_tmp[ic]     != i[ic]    ) fprintf(fp,"%d: Error: cell %d dev_i     %d i     %d\n",mype,ic,i_tmp[ic],    i[ic]);
+      if (j_tmp[ic]     != j[ic]    ) fprintf(fp,"%d: Error: cell %d dev_j     %d j     %d\n",mype,ic,j_tmp[ic],    j[ic]);
+      if (level_tmp[ic] != level[ic]) fprintf(fp,"%d: Error: cell %d dev_level %d level %d\n",mype,ic,level_tmp[ic],level[ic]);
+
+      //fprintf(fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mype,ic, ic+noffset,i_tmp[ic], j_tmp[ic], level_tmp[ic], nlft_tmp[ic], nrht_tmp[ic], nbot_tmp[ic], ntop_tmp[ic]);
+   }
+   fprintf(fp,"\n%d:                 Finished comparing mesh for dev_local to local\n\n",mype);
+}
+
+void Mesh::compare_neighbors_gpu_global_to_cpu_global()
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   vector<int>nlft_check(ncells);
+   vector<int>nrht_check(ncells);
+   vector<int>nbot_check(ncells);
+   vector<int>ntop_check(ncells);
+   ezcl_enqueue_read_buffer(command_queue, dev_nlft,  CL_FALSE, 0, ncells*sizeof(cl_int), &nlft_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nrht,  CL_FALSE, 0, ncells*sizeof(cl_int), &nrht_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nbot,  CL_FALSE, 0, ncells*sizeof(cl_int), &nbot_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_ntop,  CL_TRUE,  0, ncells*sizeof(cl_int), &ntop_check[0], NULL);
+
+   //printf("\n%d:                      Comparing neighbors for gpu_global to cpu_global\n\n",mype);
+   for (uint ic=0; ic<ncells; ic++) {
+      if (nlft[ic] != nlft_check[ic]) printf("DEBUG -- nlft: ic %d nlft %d nlft_check %d\n",ic, nlft[ic], nlft_check[ic]);
+      if (nrht[ic] != nrht_check[ic]) printf("DEBUG -- nrht: ic %d nrht %d nrht_check %d\n",ic, nrht[ic], nrht_check[ic]);
+      if (nbot[ic] != nbot_check[ic]) printf("DEBUG -- nbot: ic %d nbot %d nbot_check %d\n",ic, nbot[ic], nbot_check[ic]);
+      if (ntop[ic] != ntop_check[ic]) printf("DEBUG -- ntop: ic %d ntop %d ntop_check %d\n",ic, ntop[ic], ntop_check[ic]);
+   }
+   //printf("\n%d:                 Finished comparing mesh for dev_local to local\n\n",mype);
+}
+#endif
+
+void Mesh::compare_neighbors_cpu_local_to_cpu_global(uint ncells_ghost, uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl)
+{
+
+#ifdef HAVE_MPI
+   int *nlft_global = mesh_global->nlft;
+   int *nrht_global = mesh_global->nrht;
+   int *nbot_global = mesh_global->nbot;
+   int *ntop_global = mesh_global->ntop;
+
+   vector<int> Test(ncells_ghost);
+   for(uint ic=0; ic<ncells; ic++){
+      Test[ic] = mype*1000 +ic;
+   }
+   if (numpe > 1) L7_Update(&Test[0], L7_INT, cell_handle);
+
+   vector<int> Test_global(ncells_global);
+   MPI_Allgatherv(&Test[0], nsizes[mype], MPI_INT, &Test_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   vector<int> Test_check(ncells);
+   vector<int> Test_check_global(ncells_global);
+
+   // ==================== check left value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nlft[ic]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[nlft_global[ic]] != Test_check_global[ic]) {
+         if (mype == 0) printf("%d: Error with nlft for cell %d -- nlft %d global %d check %d\n",mype,ic,nlft_global[ic],Test_global[nlft_global[ic]],Test_check_global[ic]);
+      }
+   }
+   
+   // ==================== check left left value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nlft[nlft[ic]]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[nlft_global[nlft_global[ic]]] != Test_check_global[ic]) {
+         printf("%d: Error with nlft nlft for cell %5d -- nlftg %5d nlftg nlftg %5d global %5d\n",
+            mype,ic,nlft_global[ic],nlft_global[nlft_global[ic]],Test_global[nlft_global[nlft_global[ic]]]);
+         printf("%d:                         check %5d -- nlftl %5d nlftl nlftl %5d check  %5d\n",
+            mype,ic,nlft[ic],nlft[nlft[ic]],Test_check_global[ic]);
+      }
+   }
+   
+   // ==================== check right value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nrht[ic]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[nrht_global[ic]] != Test_check_global[ic]) {
+         if (mype == 0) printf("%d: Error with nrht for cell %d -- %d %d\n",mype,ic,Test_global[nrht_global[ic]],Test_check_global[ic]);
+      }
+   }
+   
+   // ==================== check right right value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nrht[nrht[ic]]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[nrht_global[nrht_global[ic]]] != Test_check_global[ic]) {
+         printf("%d: Error with nrht nrht for cell %5d -- nrhtg %5d nrhtg nrhtg %5d global %5d\n",
+            mype,ic,nrht_global[ic],nrht_global[nrht_global[ic]],Test_global[nrht_global[nrht_global[ic]]]);
+         printf("%d:                         check %5d -- nrhtl %5d nrhtl nrhtl %5d check  %5d\n",
+            mype,ic,nrht[ic],nrht[nrht[ic]],Test_check_global[ic]);
+      }
+   }
+
+   // ==================== check bottom value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nbot[ic]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[nbot_global[ic]] != Test_check_global[ic]) {
+         if (mype == 0) printf("%d: Error with nbot for cell %d -- %d %d\n",mype,ic,Test_global[nbot_global[ic]],Test_check_global[ic]);
+      }
+   }
+   
+   // ==================== check bottom bottom value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nbot[nbot[ic]]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[nbot_global[nbot_global[ic]]] != Test_check_global[ic]) {
+         printf("%d: Error with nbot nbot for cell %5d -- nbotg %5d nbotg nbotg %5d global %5d\n",
+            mype,ic,nbot_global[ic],nbot_global[nbot_global[ic]],Test_global[nbot_global[nbot_global[ic]]]);
+         printf("%d:                         check %5d -- nbotl %5d nbotl nbotl %5d check  %5d\n",
+            mype,ic,nbot[ic],nbot[nbot[ic]],Test_check_global[ic]);
+      }
+   }
+   
+   // ==================== check top value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[ntop[ic]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[ntop_global[ic]] != Test_check_global[ic]) {
+         if (mype == 0) printf("%d: Error with ntop for cell %d -- %d %d\n",mype,ic,Test_global[ntop_global[ic]],Test_check_global[ic]);
+      }
+   }
+
+   // ==================== check top top value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[ntop[ntop[ic]]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[ntop_global[ntop_global[ic]]] != Test_check_global[ic]) {
+         printf("%d: Error with ntop ntop for cell %5d -- ntopg %5d ntopg ntopg %5d global %5d\n",
+            mype,ic,ntop_global[ic],ntop_global[ntop_global[ic]],Test_global[ntop_global[ntop_global[ic]]]);
+         printf("%d:                         check %5d -- ntopl %5d ntopl ntopl %5d check  %5d\n",
+            mype,ic,ntop[ic],ntop[ntop[ic]],Test_check_global[ic]);
+      }
+   }
+#else
+   // Just to get rid of compiler warnings
+   if (1 == 2) printf("DEBUG -- ncells_global %d ncells_ghost %d mesh_global %p nsizes[0] %d ndispl[0] %d\n",
+               ncells_global,ncells_ghost,mesh_global,nsizes[0],ndispl[0]);
+#endif
+}
+
+#ifdef HAVE_OPENCL
+void Mesh::compare_neighbors_all_to_gpu_local(Mesh *mesh_global, int *nsizes, int *ndispl)
+//uint ncells_ghost, uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl)
+{
+#ifdef HAVE_MPI
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   size_t &ncells_global = mesh_global->ncells;
+   int *nlft_global = mesh_global->nlft;
+   int *nrht_global = mesh_global->nrht;
+   int *nbot_global = mesh_global->nbot;
+   int *ntop_global = mesh_global->ntop;
+
+   // Checking CPU parallel to CPU global
+   vector<int> Test(ncells_ghost);
+   for(uint ic=0; ic<ncells; ic++){
+      Test[ic] = mype*1000 +ic; 
+   }    
+   if (numpe > 1) L7_Update(&Test[0], L7_INT, cell_handle);
+
+   vector<int> Test_global(ncells_global);
+   MPI_Allgatherv(&Test[0], nsizes[mype], MPI_INT, &Test_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   vector<int> Test_check(ncells);
+   vector<int> Test_check_global(ncells_global);
+
+   // ==================== check left value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nlft[ic]];
+      //if (mype == 1 && ic==0) printf("%d: nlft check for ic 0 is %d\n",mype,nlft[0]);
+   }    
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      //if (Test_global[nlft_global[ic]] != Test_check_global[ic]) {
+         //if (mype == 0) printf("%d: Error with nlft for cell %d -- nlft %d global %d check %d\n",mype,ic,nlft_global[ic],Test_global[nlft_global[ic]],Test_check_global[ic]);
+      //}  
+   }    
+     
+   // ==================== check left left value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nlft[nlft[ic]]];
+   }    
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[nlft_global[nlft_global[ic]]] != Test_check_global[ic]) {
+         printf("%d: Error with nlft nlft for cell %5d -- nlftg %5d nlftg nlftg %5d global %5d\n",
+            mype,ic,nlft_global[ic],nlft_global[nlft_global[ic]],Test_global[nlft_global[nlft_global[ic]]]);
+         printf("%d:                           check %5d -- nlftl %5d nlftl nlftl %5d check  %5d\n",
+            mype,ic,nlft[ic],nlft[nlft[ic]],Test_check_global[ic]);
+      }          
+   }       
+              
+   // ==================== check right value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nrht[ic]];
+   }       
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[nrht_global[ic]] != Test_check_global[ic]) {
+         if (mype == 0) printf("%d: Error with nrht for cell %d -- %d %d\n",mype,ic,Test_global[nrht_global[ic]],Test_check_global[ic]);
+      }
+   }
+
+   // ==================== check right right value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nrht[nrht[ic]]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[nrht_global[nrht_global[ic]]] != Test_check_global[ic]) {
+         printf("%d: Error with nrht nrht for cell %5d -- nrhtg %5d nrhtg nrhtg %5d global %5d\n",
+            mype,ic,nrht_global[ic],nrht_global[nrht_global[ic]],Test_global[nrht_global[nrht_global[ic]]]);
+         printf("%d:                         check %5d -- nrhtl %5d nrhtl nrhtl %5d check  %5d\n",
+            mype,ic,nrht[ic],nrht[nrht[ic]],Test_check_global[ic]);
+      }
+   }
+
+   // ==================== check bottom value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nbot[ic]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[nbot_global[ic]] != Test_check_global[ic]) {
+         if (mype == 0) printf("%d: Error with nbot for cell %d -- %d %d\n",mype,ic,Test_global[nbot_global[ic]],Test_check_global[ic]);
+      }
+   }
+
+   // ==================== check bottom bottom value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[nbot[nbot[ic]]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[nbot_global[nbot_global[ic]]] != Test_check_global[ic]) {
+         printf("%d: Error with nbot nbot for cell %5d -- nbotg %5d nbotg nbotg %5d global %5d\n",
+            mype,ic,nbot_global[ic],nbot_global[nbot_global[ic]],Test_global[nbot_global[nbot_global[ic]]]);
+         printf("%d:                         check %5d -- nbotl %5d nbotl nbotl %5d check  %5d\n",
+            mype,ic,nbot[ic],nbot[nbot[ic]],Test_check_global[ic]);
+      }
+   }
+   // ==================== check top value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[ntop[ic]];
+   }
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[ntop_global[ic]] != Test_check_global[ic]) {
+         if (mype == 0) printf("%d: Error with ntop for cell %d -- %d %d\n",mype,ic,Test_global[ntop_global[ic]],Test_check_global[ic]);
+      }
+   }
+
+   // ==================== check top top value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[ntop[ntop[ic]]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[ntop_global[ntop_global[ic]]] != Test_check_global[ic]) {
+         printf("%d: Error with ntop ntop for cell %5d -- ntopg %5d ntopg ntopg %5d global %5d\n",
+            mype,ic,ntop_global[ic],ntop_global[ntop_global[ic]],Test_global[ntop_global[ntop_global[ic]]]);
+         printf("%d:                         check %5d -- ntopl %5d ntopl ntopl %5d check  %5d\n",
+            mype,ic,ntop[ic],ntop[ntop[ic]],Test_check_global[ic]);
+      }
+   }
+   // checking gpu results
+   vector<int> nlft_check(ncells_ghost);         vector<int> nrht_check(ncells_ghost);
+   vector<int> nbot_check(ncells_ghost);         vector<int> ntop_check(ncells_ghost);
+   ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int),  &nlft_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int),  &nrht_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int),  &nbot_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE,  0, ncells_ghost*sizeof(cl_int),  &ntop_check[0], NULL);
+
+   for (uint ic=0; ic<ncells_ghost; ic++){
+      if (nlft[ic] != nlft_check[ic]) printf("%d: Error with gpu calculated nlft for cell %d nlft %d check %d\n",mype,ic,nlft[ic],nlft_check[ic]);
+      if (nrht[ic] != nrht_check[ic]) printf("%d: Error with gpu calculated nrht for cell %d nrht %d check %d\n",mype,ic,nrht[ic],nrht_check[ic]);
+      if (nbot[ic] != nbot_check[ic]) printf("%d: Error with gpu calculated nbot for cell %d nbot %d check %d\n",mype,ic,nbot[ic],nbot_check[ic]);
+      if (ntop[ic] != ntop_check[ic]) printf("%d: Error with gpu calculated ntop for cell %d ntop %d check %d\n",mype,ic,ntop[ic],ntop_check[ic]);
+   }
+
+   // ==================== check top top value ====================
+   for (uint ic=0; ic<ncells; ic++){
+      Test_check[ic] = Test[ntop[ntop[ic]]];
+   }
+
+   MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+   for (uint ic=0; ic<ncells_global; ic++){
+      if (Test_global[ntop_global[ntop_global[ic]]] != Test_check_global[ic]) {
+         printf("%d: Error with ntop ntop for cell %5d -- ntopg %5d ntopg ntopg %5d global %5d\n",
+            mype,ic,ntop_global[ic],ntop_global[ntop_global[ic]],Test_global[ntop_global[ntop_global[ic]]]);
+         printf("%d:                         check %5d -- ntopl %5d ntopl ntopl %5d check  %5d\n",
+            mype,ic,ntop[ic],ntop[ntop[ic]],Test_check_global[ic]);
+      }
+   }
+   // checking gpu results
+   //vector<int> nlft_check(ncells_ghost);         vector<int> nrht_check(ncells_ghost);
+   //vector<int> nbot_check(ncells_ghost);         vector<int> ntop_check(ncells_ghost);
+   ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int),  &nlft_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int),  &nrht_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int),  &nbot_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE,  0, ncells_ghost*sizeof(cl_int),  &ntop_check[0], NULL);
+
+   for (uint ic=0; ic<ncells_ghost; ic++){
+      if (nlft[ic] != nlft_check[ic]) printf("%d: Error with gpu calculated nlft for cell %d nlft %d check %d\n",mype,ic,nlft[ic],nlft_check[ic]);
+      if (nrht[ic] != nrht_check[ic]) printf("%d: Error with gpu calculated nrht for cell %d nrht %d check %d\n",mype,ic,nrht[ic],nrht_check[ic]);
+      if (nbot[ic] != nbot_check[ic]) printf("%d: Error with gpu calculated nbot for cell %d nbot %d check %d\n",mype,ic,nbot[ic],nbot_check[ic]);
+      if (ntop[ic] != ntop_check[ic]) printf("%d: Error with gpu calculated ntop for cell %d ntop %d check %d\n",mype,ic,ntop[ic],ntop_check[ic]);
+   }
+#else
+   // Just to get rid of compiler warnings
+   if (1 == 2) printf("DEBUG -- mesh_global %p nsizes[0] %d ndispl[0] %d\n",
+               mesh_global,nsizes[0],ndispl[0]);
+#endif
+}
+
+void Mesh::compare_indices_gpu_global_to_cpu_global(void)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   vector<int> i_check(ncells);
+   vector<int> j_check(ncells);
+   vector<int> level_check(ncells);
+   vector<int> celltype_check(ncells);
+   /// Set read buffers for data.
+   ezcl_enqueue_read_buffer(command_queue, dev_i,        CL_FALSE, 0, ncells*sizeof(cl_int), &i_check[0],        NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_j,        CL_FALSE, 0, ncells*sizeof(cl_int), &j_check[0],        NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_level,    CL_FALSE, 0, ncells*sizeof(cl_int), &level_check[0],    NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE,  0, ncells*sizeof(cl_int), &celltype_check[0], NULL);
+   for (uint ic = 0; ic < ncells; ic++){
+      if (i[ic]        != i_check[ic] )        printf("DEBUG -- i: ic %d i %d i_check %d\n",ic, i[ic], i_check[ic]);
+      if (j[ic]        != j_check[ic] )        printf("DEBUG -- j: ic %d j %d j_check %d\n",ic, j[ic], j_check[ic]);
+      if (level[ic]    != level_check[ic] )    printf("DEBUG -- level: ic %d level %d level_check %d\n",ic, level[ic], level_check[ic]);
+      if (celltype[ic] != celltype_check[ic] ) printf("DEBUG -- celltype: ic %d celltype %d celltype_check %d\n",ic, celltype[ic], celltype_check[ic]);
+   }
+}
+#endif
+
+void Mesh::compare_indices_cpu_local_to_cpu_global(uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl, int cycle)
+{
+   int *celltype_global = mesh_global->celltype;
+   int *i_global        = mesh_global->i;
+   int *j_global        = mesh_global->j;
+   int *level_global    = mesh_global->level;
+
+   vector<int> i_check_global(ncells_global);
+   vector<int> j_check_global(ncells_global);
+   vector<int> level_check_global(ncells_global);
+   vector<int> celltype_check_global(ncells_global);
+
+/*
+   vector<int> i_check_local(ncells);
+   vector<int> j_check_local(ncells);
+   vector<int> level_check_local(ncells);
+   vector<int> celltype_check_local(ncells);
+*/
+
+#ifdef HAVE_MPI
+   MPI_Allgatherv(&celltype[0], nsizes[mype], MPI_INT, &celltype_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+   MPI_Allgatherv(&i[0],        nsizes[mype], MPI_INT, &i_check_global[0],        &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+   MPI_Allgatherv(&j[0],        nsizes[mype], MPI_INT, &j_check_global[0],        &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+   MPI_Allgatherv(&level[0],    nsizes[mype], MPI_INT, &level_check_global[0],    &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+
+/*
+   MPI_Scatterv(&celltype_global[0], &nsizes[0], &ndispl[0], MPI_INT, &celltype_check_local[0], nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD);
+   MPI_Scatterv(&i_global[0],        &nsizes[0], &ndispl[0], MPI_INT, &i_check_local[0],        nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD);
+   MPI_Scatterv(&j_global[0],        &nsizes[0], &ndispl[0], MPI_INT, &j_check_local[0],        nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD);
+   MPI_Scatterv(&level_global[0],    &nsizes[0], &ndispl[0], MPI_INT, &level_check_local[0],    nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD);
+*/
+#else
+   // Just to get rid of compiler warnings
+   if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d\n",
+               nsizes[0],ndispl[0]);
+#endif
+
+   for (uint ic = 0; ic < ncells_global; ic++){
+      if (celltype_global[ic] != celltype_check_global[ic])  printf("DEBUG rezone 3 at cycle %d celltype_global & celltype_check_global %d %d  %d  \n",cycle,ic,celltype_global[ic],celltype_check_global[ic]);
+      if (i_global[ic] != i_check_global[ic])                printf("DEBUG rezone 3 at cycle %d i_global & i_check_global %d %d  %d  \n",cycle,ic,i_global[ic],i_check_global[ic]);
+      if (j_global[ic] != j_check_global[ic])                printf("DEBUG rezone 3 at cycle %d j_global & j_check_global %d %d  %d  \n",cycle,ic,j_global[ic],j_check_global[ic]);
+      if (level_global[ic] != level_check_global[ic])        printf("DEBUG rezone 3 at cycle %d level_global & level_check_global %d %d  %d  \n",cycle,ic,level_global[ic],level_check_global[ic]);
+   }
+
+/*
+   for (uint ic = 0; ic < ncells; ic++){
+      if (celltype[ic] != celltype_check_local[ic])  fprintf(fp,"DEBUG rezone 3 at cycle %d celltype & celltype_check_local %d %d  %d  \n",cycle,ic,celltype[ic],celltype_check_local[ic]);
+      if (i[ic] != i_check_local[ic])                fprintf(fp,"DEBUG rezone 3 at cycle %d i & i_check_local %d %d  %d  \n",cycle,ic,i[ic],i_check_local[ic]);
+      if (j[ic] != j_check_local[ic])                fprintf(fp,"DEBUG rezone 3 at cycle %d j & j_check_local %d %d  %d  \n",cycle,ic,j[ic],j_check_local[ic]);
+      if (level[ic] != level_check_local[ic])        fprintf(fp,"DEBUG rezone 3 at cycle %d level & level_check_local %d %d  %d  \n",cycle,ic,level[ic],level_check_local[ic]);
+   }
+*/
+}
+
+#ifdef HAVE_OPENCL
+void Mesh::compare_indices_all_to_gpu_local(Mesh *mesh_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle)
+{
+#ifdef HAVE_MPI
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   int *level_global = mesh_global->level;
+   int *celltype_global = mesh_global->celltype;
+   int *i_global = mesh_global->i;
+   int *j_global = mesh_global->j;
+
+   cl_mem &dev_celltype_global = mesh_global->dev_celltype;
+   cl_mem &dev_i_global = mesh_global->dev_i;
+   cl_mem &dev_j_global = mesh_global->dev_j;
+   cl_mem &dev_level_global = mesh_global->dev_level;
+
+   // Need to compare dev_H to H, etc
+   vector<int> level_check(ncells);
+   vector<int> celltype_check(ncells);
+   vector<int> i_check(ncells);
+   vector<int> j_check(ncells);
+   /// Set read buffers for data.
+   ezcl_enqueue_read_buffer(command_queue, dev_level,    CL_FALSE, 0, ncells*sizeof(cl_int),  &level_check[0],     NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_FALSE, 0, ncells*sizeof(cl_int),  &celltype_check[0],  NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_i,        CL_FALSE, 0, ncells*sizeof(cl_int),  &i_check[0],         NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_j,        CL_TRUE,  0, ncells*sizeof(cl_int),  &j_check[0],         NULL);
+   for (uint ic = 0; ic < ncells; ic++){
+      if (level[ic] != level_check[ic] )       printf("%d: DEBUG rezone 1 cell %d level %d level_check %d\n",mype, ic, level[ic], level_check[ic]);
+      if (celltype[ic] != celltype_check[ic] ) printf("%d: DEBUG rezone 1 cell %d celltype %d celltype_check %d\n",mype, ic, celltype[ic], celltype_check[ic]);
+      if (i[ic] != i_check[ic] )               printf("%d: DEBUG rezone 1 cell %d i %d i_check %d\n",mype, ic, i[ic], i_check[ic]);
+      if (j[ic] != j_check[ic] )               printf("%d: DEBUG rezone 1 cell %d j %d j_check %d\n",mype, ic, j[ic], j_check[ic]);
+   }
+
+   // And compare dev_H gathered to H_global, etc
+   vector<int>celltype_check_global(ncells_global);
+   vector<int>i_check_global(ncells_global);
+   vector<int>j_check_global(ncells_global);
+   vector<int>level_check_global(ncells_global);
+   MPI_Allgatherv(&celltype_check[0], nsizes[mype], MPI_INT,    &celltype_check_global[0], &nsizes[0], &ndispl[0], MPI_INT,    MPI_COMM_WORLD);
+   MPI_Allgatherv(&i_check[0],        nsizes[mype], MPI_INT,    &i_check_global[0],        &nsizes[0], &ndispl[0], MPI_INT,    MPI_COMM_WORLD);
+   MPI_Allgatherv(&j_check[0],        nsizes[mype], MPI_INT,    &j_check_global[0],        &nsizes[0], &ndispl[0], MPI_INT,    MPI_COMM_WORLD);
+   MPI_Allgatherv(&level_check[0],    nsizes[mype], MPI_INT,    &level_check_global[0],    &nsizes[0], &ndispl[0], MPI_INT,    MPI_COMM_WORLD);
+   for (uint ic = 0; ic < ncells_global; ic++){
+      if (level_global[ic] != level_check_global[ic] )       printf("%d: DEBUG rezone 2 cell %d level_global %d level_check_global %d\n",mype, ic, level_global[ic], level_check_global[ic]);
+      if (celltype_global[ic] != celltype_check_global[ic] ) printf("%d: DEBUG rezone 2 cell %d celltype_global %d celltype_check_global %d\n",mype, ic, celltype_global[ic], celltype_check_global[ic]);
+      if (i_global[ic] != i_check_global[ic] )               printf("%d: DEBUG rezone 2 cell %d i_global %d i_check_global %d\n",mype, ic, i_global[ic], i_check_global[ic]);
+      if (j_global[ic] != j_check_global[ic] )               printf("%d: DEBUG rezone 2 cell %d j_global %d j_check_global %d\n",mype, ic, j_global[ic], j_check_global[ic]);
+   }
+
+   // And compare H gathered to H_global, etc
+   MPI_Allgatherv(&celltype[0], nsizes[mype], MPI_INT,    &celltype_check_global[0], &nsizes[0], &ndispl[0], MPI_INT,    MPI_COMM_WORLD);
+   MPI_Allgatherv(&i[0],        nsizes[mype], MPI_INT,    &i_check_global[0],        &nsizes[0], &ndispl[0], MPI_INT,    MPI_COMM_WORLD);
+   MPI_Allgatherv(&j[0],        nsizes[mype], MPI_INT,    &j_check_global[0],        &nsizes[0], &ndispl[0], MPI_INT,    MPI_COMM_WORLD);
+   MPI_Allgatherv(&level[0],    nsizes[mype], MPI_INT,    &level_check_global[0],    &nsizes[0], &ndispl[0], MPI_INT,    MPI_COMM_WORLD);
+   for (uint ic = 0; ic < ncells_global; ic++){
+      if (celltype_global[ic] != celltype_check_global[ic])  printf("DEBUG rezone 3 at cycle %d celltype_global & celltype_check_global %d %d  %d  \n",ncycle,ic,celltype_global[ic],celltype_check_global[ic]);
+      if (i_global[ic] != i_check_global[ic])                printf("DEBUG rezone 3 at cycle %d i_global & i_check_global %d %d  %d  \n",ncycle,ic,i_global[ic],i_check_global[ic]);
+      if (j_global[ic] != j_check_global[ic])                printf("DEBUG rezone 3 at cycle %d j_global & j_check_global %d %d  %d  \n",ncycle,ic,j_global[ic],j_check_global[ic]);
+      if (level_global[ic] != level_check_global[ic])        printf("DEBUG rezone 3 at cycle %d level_global & level_check_global %d %d  %d  \n",ncycle,ic,level_global[ic],level_check_global[ic]);
+   }
+
+   // Now the global dev_H_global to H_global, etc
+   ezcl_enqueue_read_buffer(command_queue, dev_celltype_global, CL_FALSE, 0, ncells_global*sizeof(cl_int),  &celltype_check_global[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_i_global,        CL_FALSE, 0, ncells_global*sizeof(cl_int),  &i_check_global[0],        NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_j_global,        CL_FALSE, 0, ncells_global*sizeof(cl_int),  &j_check_global[0],        NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_level_global,    CL_TRUE,  0, ncells_global*sizeof(cl_int),  &level_check_global[0],    NULL);
+   for (uint ic = 0; ic < ncells_global; ic++){
+      if (celltype_global[ic] != celltype_check_global[ic])  printf("DEBUG rezone 4 at cycle %d celltype_global & celltype_check_global %d %d  %d  \n",ncycle,ic,celltype_global[ic],celltype_check_global[ic]);
+      if (i_global[ic] != i_check_global[ic])                printf("DEBUG rezone 4 at cycle %d i_global & i_check_global %d %d  %d  \n",ncycle,ic,i_global[ic],i_check_global[ic]);
+      if (j_global[ic] != j_check_global[ic])                printf("DEBUG rezone 4 at cycle %d j_global & j_check_global %d %d  %d  \n",ncycle,ic,j_global[ic],j_check_global[ic]);
+      if (level_global[ic] != level_check_global[ic])        printf("DEBUG rezone 4 at cycle %d level_global & level_check_global %d %d  %d  \n",ncycle,ic,level_global[ic],level_check_global[ic]);
+   }
+#else
+   // Just to get rid of compiler warnings
+   if (1 == 2) printf("DEBUG -- mesh_global %p ncells_global %d nsizes[0] %d ndispl[0] %d ncycle %d\n",
+               mesh_global,ncells_global,nsizes[0],ndispl[0],ncycle);
+#endif
+}
+
+void Mesh::compare_coordinates_gpu_global_to_cpu_global_double(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, double *H)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   vector<spatial_t>x_check(ncells);
+   vector<spatial_t>dx_check(ncells);
+   vector<spatial_t>y_check(ncells);
+   vector<spatial_t>dy_check(ncells);
+   vector<double>H_check(ncells);
+   ezcl_enqueue_read_buffer(command_queue, dev_x,   CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &x_check[0],  NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_dx,  CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dx_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_y,   CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &y_check[0],  NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_dy,  CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dy_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_H,   CL_TRUE,  0, ncells*sizeof(cl_double), &H_check[0],  NULL);
+   for (uint ic = 0; ic < ncells; ic++){
+      if (x[ic] != x_check[ic] || dx[ic] != dx_check[ic] || y[ic] != y_check[ic] || dy[ic] != dy_check[ic] ) {
+         printf("Error -- mismatch in spatial coordinates for cell %d is gpu %lf %lf %lf %lf cpu %lf %lf %lf %lf\n",ic,x_check[ic],dx_check[ic],y_check[ic],dy_check[ic],x[ic],dx[ic],y[ic],dy[ic]);
+         exit(0);
+      }
+   }  
+   for (uint ic = 0; ic < ncells; ic++){
+      if (fabs(H[ic] - H_check[ic]) > CONSERVATION_EPS) {
+         printf("Error -- mismatch in H for cell %d is gpu %lf cpu %lf\n",ic,H_check[ic],H[ic]);
+         exit(0);
+      }
+   }
+}
+
+void Mesh::compare_coordinates_gpu_global_to_cpu_global_float(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, float *H)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   vector<spatial_t>x_check(ncells);
+   vector<spatial_t>dx_check(ncells);
+   vector<spatial_t>y_check(ncells);
+   vector<spatial_t>dy_check(ncells);
+   vector<float>H_check(ncells);
+   ezcl_enqueue_read_buffer(command_queue, dev_x,   CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &x_check[0],  NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_dx,  CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dx_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_y,   CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &y_check[0],  NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_dy,  CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dy_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_H,   CL_TRUE,  0, ncells*sizeof(cl_float), &H_check[0],  NULL);
+   for (uint ic = 0; ic < ncells; ic++){
+      if (x[ic] != x_check[ic] || dx[ic] != dx_check[ic] || y[ic] != y_check[ic] || dy[ic] != dy_check[ic] ) {
+         printf("Error -- mismatch in spatial coordinates for cell %d is gpu %lf %lf %lf %lf cpu %lf %lf %lf %lf\n",ic,x_check[ic],dx_check[ic],y_check[ic],dy_check[ic],x[ic],dx[ic],y[ic],dy[ic]);
+         exit(0);
+      }
+   }  
+   for (uint ic = 0; ic < ncells; ic++){
+      if (fabs(H[ic] - H_check[ic]) > CONSERVATION_EPS) {
+         printf("Error -- mismatch in H for cell %d is gpu %lf cpu %lf\n",ic,H_check[ic],H[ic]);
+         exit(0);
+      }
+   }
+}
+#endif
+
+void Mesh::compare_coordinates_cpu_local_to_cpu_global_double(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, double *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, double *H_global, int cycle)
+{
+   vector<spatial_t> x_check_global(ncells_global);
+   vector<spatial_t> dx_check_global(ncells_global);
+   vector<spatial_t> y_check_global(ncells_global);
+   vector<spatial_t> dy_check_global(ncells_global);
+   vector<double> H_check_global(ncells_global);
+
+#ifdef HAVE_MPI
+   MPI_Allgatherv(&x[0],  nsizes[mype], MPI_SPATIAL_T, &x_check_global[0],  &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&dx[0], nsizes[mype], MPI_SPATIAL_T, &dx_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&y[0],  nsizes[mype], MPI_SPATIAL_T, &y_check_global[0],  &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&dy[0], nsizes[mype], MPI_SPATIAL_T, &dy_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&H[0],  nsizes[mype], MPI_DOUBLE, &H_check_global[0],  &nsizes[0], &ndispl[0], MPI_DOUBLE, MPI_COMM_WORLD);
+#else
+   // Just to get rid of compiler warnings
+   if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d x %p dx %p y %p dy %p H %p\n",
+               nsizes[0],ndispl[0],x,dx,y,dy,H);
+#endif
+
+   for (uint ic = 0; ic < ncells_global; ic++){
+      if (fabs(x_global[ic] -x_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d x_global & x_check_global  %d %lf %lf \n",cycle,ic,x_global[ic], x_check_global[ic]);
+      if (fabs(dx_global[ic]-dx_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dx_global & dx_check_global %d %lf %lf \n",cycle,ic,dx_global[ic],dx_check_global[ic]);
+      if (fabs(y_global[ic] -y_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d y_global & y_check_global  %d %lf %lf \n",cycle,ic,y_global[ic], y_check_global[ic]);
+      if (fabs(dy_global[ic]-dy_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dy_global & dy_check_global %d %lf %lf \n",cycle,ic,dy_global[ic],dy_check_global[ic]);
+      if (fabs(H_global[ic] -H_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d H_global & H_check_global  %d %lf %lf \n",cycle,ic,H_global[ic], H_check_global[ic]);
+   }
+
+}
+
+void Mesh::compare_coordinates_cpu_local_to_cpu_global_float(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, float *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, float *H_global, int cycle)
+{
+   vector<spatial_t> x_check_global(ncells_global);
+   vector<spatial_t> dx_check_global(ncells_global);
+   vector<spatial_t> y_check_global(ncells_global);
+   vector<spatial_t> dy_check_global(ncells_global);
+   vector<float> H_check_global(ncells_global);
+
+#ifdef HAVE_MPI
+   MPI_Allgatherv(&x[0],  nsizes[mype], MPI_SPATIAL_T, &x_check_global[0],  &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&dx[0], nsizes[mype], MPI_SPATIAL_T, &dx_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&y[0],  nsizes[mype], MPI_SPATIAL_T, &y_check_global[0],  &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&dy[0], nsizes[mype], MPI_SPATIAL_T, &dy_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&H[0],  nsizes[mype], MPI_FLOAT,     &H_check_global[0],  &nsizes[0], &ndispl[0], MPI_FLOAT,     MPI_COMM_WORLD);
+#else
+   // Just to get rid of compiler warnings
+   if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d x %p dx %p y %p dy %p H %p\n",
+               nsizes[0],ndispl[0],x,dx,y,dy,H);
+#endif
+
+   for (uint ic = 0; ic < ncells_global; ic++){
+      if (fabs(x_global[ic] -x_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d x_global & x_check_global  %d %lf %lf \n",cycle,ic,x_global[ic], x_check_global[ic]);
+      if (fabs(dx_global[ic]-dx_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dx_global & dx_check_global %d %lf %lf \n",cycle,ic,dx_global[ic],dx_check_global[ic]);
+      if (fabs(y_global[ic] -y_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d y_global & y_check_global  %d %lf %lf \n",cycle,ic,y_global[ic], y_check_global[ic]);
+      if (fabs(dy_global[ic]-dy_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dy_global & dy_check_global %d %lf %lf \n",cycle,ic,dy_global[ic],dy_check_global[ic]);
+      if (fabs(H_global[ic] -H_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d H_global & H_check_global  %d %lf %lf \n",cycle,ic,H_global[ic], H_check_global[ic]);
+   }
+
+}
+
+#ifdef HAVE_OPENCL
+void Mesh::compare_mpot_gpu_global_to_cpu_global(int *mpot, cl_mem dev_mpot)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   vector<int>mpot_check(ncells);
+   ezcl_enqueue_read_buffer(command_queue, dev_mpot,  CL_TRUE,  0, ncells*sizeof(cl_int), &mpot_check[0], NULL);
+
+   for (uint ic=0; ic<ncells; ic++) {
+      if (mpot[ic] != mpot_check[ic]) printf("DEBUG -- mpot: ic %d mpot %d mpot_check %d\n",ic, mpot[ic], mpot_check[ic]);
+   }
+}
+#endif
+
+void Mesh::compare_mpot_cpu_local_to_cpu_global(uint ncells_global, int *nsizes, int *ndispl, int *mpot, int *mpot_global, int cycle)
+{
+   vector<int>mpot_save_global(ncells_global);
+#ifdef HAVE_MPI
+   MPI_Allgatherv(&mpot[0], ncells, MPI_INT, &mpot_save_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+#else
+   // Just to get rid of compiler warnings
+   if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d mpot %p\n",
+               nsizes[0],ndispl[0],mpot);
+#endif
+   for (uint ic = 0; ic < ncells_global; ic++){
+      if (mpot_global[ic] != mpot_save_global[ic]) {
+         if (mype == 0) printf("%d: DEBUG refine_potential 3 at cycle %d cell %d mpot_global & mpot_save_global %d %d \n",mype,cycle,ic,mpot_global[ic],mpot_save_global[ic]);
+      }
+   }
+
+}
+
+#ifdef HAVE_OPENCL
+void Mesh::compare_mpot_all_to_gpu_local(int *mpot, int *mpot_global, cl_mem dev_mpot, cl_mem dev_mpot_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle)
+{
+#ifdef HAVE_MPI
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   // Need to compare dev_mpot to mpot 
+   vector<int>mpot_save(ncells);
+   ezcl_enqueue_read_buffer(command_queue, dev_mpot, CL_TRUE,  0, ncells*sizeof(cl_int), &mpot_save[0], NULL);
+   for (uint ic = 0; ic < ncells; ic++){
+      if (mpot[ic] != mpot_save[ic]) {
+         printf("%d: DEBUG refine_potential 1 at cycle %d cell %d mpot & mpot_save %d %d \n",mype,ncycle,ic,mpot[ic],mpot_save[ic]);
+      }    
+   }    
+
+   // Compare dev_mpot to mpot_global
+   vector<int>mpot_save_global(ncells_global);
+   MPI_Allgatherv(&mpot_save[0], nsizes[mype], MPI_INT, &mpot_save_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+   for (uint ic = 0; ic < ncells_global; ic++){
+      if (mpot_global[ic] != mpot_save_global[ic]) {
+         if (mype == 0) printf("%d: DEBUG refine_potential 2 at cycle %d cell %d mpot_global & mpot_save_global %d %d \n",mype,ncycle,ic,mpot_global[ic],mpot_save_global[ic]);
+      }    
+   }    
+
+   // Compare mpot to mpot_global
+   MPI_Allgatherv(&mpot[0], nsizes[mype], MPI_INT, &mpot_save_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+   for (uint ic = 0; ic < ncells_global; ic++){
+      if (mpot_global[ic] != mpot_save_global[ic]) {
+         if (mype == 0) printf("%d: DEBUG refine_potential 3 at cycle %d cell %d mpot_global & mpot_save_global %d %d \n",mype,ncycle,ic,mpot_global[ic],mpot_save_global[ic]);
+      }    
+   }    
+
+   // Compare dev_mpot_global to mpot_global
+   ezcl_enqueue_read_buffer(command_queue, dev_mpot_global, CL_TRUE,  0, ncells_global*sizeof(cl_int), &mpot_save_global[0], NULL);
+   for (uint ic = 0; ic < ncells_global; ic++){
+      if (mpot_global[ic] != mpot_save_global[ic]) {
+         if (mype == 0) printf("%d: DEBUG refine_potential 4 at cycle %d cell %u mpot_global & mpot_save_global %d %d \n",mype,ncycle,ic,mpot_global[ic],mpot_save_global[ic]);
+      }    
+   }    
+#else
+   // Just to get rid of compiler warnings
+   if (1 == 2) printf("DEBUG -- mpot %p mpot_global %p dev_mpot %p dev_mpot_global %p ncells_global %d nsizes[0] %d ndispl[0] %d ncycle %d\n",
+               mpot,mpot_global,dev_mpot,dev_mpot_global,ncells_global,nsizes[0],ndispl[0],ncycle);
+#endif
+}
+
+void Mesh::compare_ioffset_gpu_global_to_cpu_global(uint old_ncells, int *mpot)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   size_t local_work_size  = MIN(ncells, TILE_SIZE);
+   size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
+
+   //size_t block_size = (ncells + TILE_SIZE - 1) / TILE_SIZE; //  For on-device global reduction kernel.
+   size_t block_size     = global_work_size/local_work_size;
+
+   vector<int> ioffset_check(block_size);
+   ezcl_enqueue_read_buffer(command_queue, dev_ioffset, CL_TRUE, 0, block_size*sizeof(cl_int), &ioffset_check[0], NULL);
+
+   int mcount, mtotal;
+   mtotal = 0;
+   for (uint ig=0; ig<(old_ncells+TILE_SIZE-1)/TILE_SIZE; ig++){
+      mcount = 0;
+      for (uint ic=ig*TILE_SIZE; ic<(ig+1)*TILE_SIZE; ic++){
+         if (ic >= old_ncells) break;
+
+         if (mpot[ic] < 0) {
+            if (celltype[ic] == REAL_CELL) {
+               // remove all but cell that will remain to get count right when split
+               // across processors
+               if (is_lower_left(i[ic],j[ic]) ) mcount++;
+            } else {
+               // either upper right or lower left will remain for boundary cells
+               if (is_upper_right(i[ic],j[ic]) || is_lower_left(i[ic],j[ic]) ) mcount++;
+            }
+         }
+         if (mpot[ic] >= 0) {
+            if (celltype[ic] == REAL_CELL){
+               mcount += mpot[ic] ? 4 : 1;
+            } else {
+               mcount += mpot[ic] ? 2 : 1;
+            }
+         }
+      }
+      if (mtotal != ioffset_check[ig]) printf("DEBUG ig %d ioffset %d mcount %d\n",ig,ioffset_check[ig],mtotal);
+      mtotal += mcount;
+   }
+}
+
+void Mesh::compare_ioffset_all_to_gpu_local(uint old_ncells, uint old_ncells_global, int block_size, int block_size_global, int *mpot, int *mpot_global, cl_mem dev_ioffset, cl_mem dev_ioffset_global, int *ioffset, int *ioffset_global, int *celltype_global, int *i_global, int *j_global)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   // This compares ioffset for each block in the calculation
+   ezcl_enqueue_read_buffer(command_queue, dev_ioffset, CL_TRUE, 0, block_size*sizeof(cl_int), &ioffset[0], NULL);
+   int mtotal = 0; 
+   for (uint ig=0; ig<(old_ncells+TILE_SIZE-1)/TILE_SIZE; ig++){
+      int mcount = 0; 
+      for (uint ic=ig*TILE_SIZE; ic<(ig+1)*TILE_SIZE; ic++){
+         if (ic >= old_ncells) break;
+
+         if (mpot[ic] < 0) {
+            if (celltype[ic] == REAL_CELL) {
+               // remove all but cell that will remain to get count right when split
+               // across processors
+               if (is_lower_left(i[ic],j[ic]) ) mcount++;
+            } else {
+               // either upper right or lower left will remain for boundary cells
+               if (is_upper_right(i[ic],j[ic]) || is_lower_left(i[ic],j[ic]) ) mcount++;
+            }
+         }
+         if (mpot[ic] >= 0) {
+            if (celltype[ic] == REAL_CELL){
+               mcount += mpot[ic] ? 4 : 1;
+            } else {
+               mcount += mpot[ic] ? 2 : 1;
+            }
+         }
+      }    
+      if (mtotal != ioffset[ig]) printf("%d: DEBUG ig %d ioffset %d mtotal %d\n",mype,ig,ioffset[ig],mtotal);
+      mtotal += mcount;
+   }    
+
+   // For global This compares ioffset for each block in the calculation
+   ezcl_enqueue_read_buffer(command_queue, dev_ioffset_global, CL_TRUE, 0, block_size_global*sizeof(cl_int), &ioffset_global[0], NULL);
+   mtotal = 0; 
+   int count = 0; 
+   for (uint ig=0; ig<(old_ncells_global+TILE_SIZE-1)/TILE_SIZE; ig++){
+      int mcount = 0; 
+      for (uint ic=ig*TILE_SIZE; ic<(ig+1)*TILE_SIZE; ic++){
+         if (ic >= old_ncells_global) break;
+
+         if (mpot_global[ic] < 0) {
+            if (celltype_global[ic] == REAL_CELL) {
+               // remove all but cell that will remain to get count right when split
+               // across processors
+               if (is_lower_left(i_global[ic],j_global[ic]) ) mcount++;
+            } else {
+               // either upper right or lower left will remain for boundary cells
+               if (is_upper_right(i_global[ic],j_global[ic]) || is_lower_left(i_global[ic],j_global[ic]) ) mcount++;
+            }
+         }
+
+         if (mpot_global[ic] >= 0) {
+            if (celltype_global[ic] == REAL_CELL) {
+               mcount += mpot_global[ic] ? 4 : 1; 
+            } else {
+               mcount += mpot_global[ic] ? 2 : 1; 
+            }
+         }    
+      }    
+      if (mtotal != ioffset_global[ig]) {
+         printf("DEBUG global ig %d ioffset %d mtotal %d\n",ig,ioffset_global[ig],mtotal);
+         count++;
+      }    
+      if (count > 10) exit(0);
+      mtotal += mcount;
+   }    
+}
+#endif
+
+Mesh::Mesh(int nx, int ny, int levmx_in, int ndim_in, double deltax_in, double deltay_in, int boundary, int parallel_in, int do_gpu_calc)
+{
+   lowerBound_Global = NULL;
+   upperBound_Global = NULL;
+   for (int i = 0; i < MESH_TIMER_SIZE; i++){
+      cpu_timers[i] = 0.0;
+      gpu_timers[i] = 0L;
+   }
+
+   for (int i = 0; i < MESH_COUNTER_SIZE; i++){
+      cpu_counters[i] = 0;
+      gpu_counters[i] = 0;
+   }
+
+   ndim   = ndim_in;
+   levmx  = levmx_in;
+#ifdef HAVE_OPENCL
+   if (ndim == TWO_DIMENSIONAL) defines = "-DTWO_DIMENSIONAL -DCARTESIAN";
+#endif
+
+   offtile_ratio_local = 0;
+   offtile_local_count = 1;
+
+   mype  = 0;
+   numpe = 1;
+   ncells = 0;
+   ncells_ghost = 0;
+   parallel = parallel_in;
+   noffset = 0;
+   mem_factor = 1.0;
+   //mem_factor = 1.5;
+   
+#ifdef HAVE_MPI
+   int mpi_init;
+   MPI_Initialized(&mpi_init);
+   if (mpi_init && parallel){
+      MPI_Comm_rank(MPI_COMM_WORLD,&mype);
+      MPI_Comm_size(MPI_COMM_WORLD,&numpe);
+   }
+   // TODO add fini
+   if (parallel) mesh_memory.pinit(MPI_COMM_WORLD, 2L * 1024 * 1024 * 1024);
+#endif
+   cell_handle = 0;
+
+   if (numpe == 1) mem_factor = 1.0;
+
+   deltax = deltax_in;
+   deltay = deltay_in;
+
+   have_boundary = boundary;
+
+   //int istart = 1;
+   //int jstart = 1;
+   //int iend   = nx;
+   //int jend   = ny;
+   int nxx    = nx;
+   int nyy    = ny;
+   imin = 0;
+   jmin = 0;
+   imax = nx+1;
+   jmax = ny+1;
+   if (have_boundary) {
+      //istart = 0;
+      //jstart = 0;
+      //iend   = nx + 1;
+      //jend   = ny + 1;
+      nxx    = nx + 2;
+      nyy    = ny + 2;
+      imin   = 0;
+      jmin   = 0;
+      imax   = nx + 1;
+      jmax   = ny + 1;
+   }
+   
+   xmin = -deltax * 0.5 * (real_t)nxx;
+   ymin = -deltay * 0.5 * (real_t)nyy;
+   xmax =  deltax * 0.5 * (real_t)nxx;
+   ymax =  deltay * 0.5 * (real_t)nyy;
+   
+   size_t lvlMxSize = levmx + 1;
+
+   levtable.resize(lvlMxSize);
+   lev_ibegin.resize(lvlMxSize);
+   lev_jbegin.resize(lvlMxSize);
+   lev_iend.resize(  lvlMxSize);
+   lev_jend.resize(  lvlMxSize);
+   lev_deltax.resize(lvlMxSize);
+   lev_deltay.resize(lvlMxSize);
+   
+   lev_ibegin[0] = imin + 1;
+   lev_iend[0]   = imax - 1;
+   lev_jbegin[0] = jmin + 1;
+   lev_jend[0]   = jmax - 1;
+   lev_deltax[0] = deltax;
+   lev_deltay[0] = deltay;
+   
+   for (int lev = 1; lev <= levmx; lev++) {
+      lev_ibegin[lev] = lev_ibegin[lev-1]*2;
+      lev_iend[lev]   = lev_iend  [lev-1]*2 + 1;
+      lev_jbegin[lev] = lev_jbegin[lev-1]*2;
+      lev_jend[lev]   = lev_jend  [lev-1]*2 + 1;
+      lev_deltax[lev] = lev_deltax[lev-1]*0.5;
+      lev_deltay[lev] = lev_deltay[lev-1]*0.5;
+   }
+   for (uint lev=0; lev<lvlMxSize; lev++){
+      levtable[lev] = IPOW2(lev);
+   }
+
+   if (do_gpu_calc) {
+#ifdef HAVE_OPENCL
+   // The copy host ptr flag will have the data copied to the GPU as part of the allocation
+      dev_levtable = ezcl_malloc(&levtable[0],   const_cast<char *>("dev_levtable"), &lvlMxSize, sizeof(cl_int),    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
+      dev_levdx    = ezcl_malloc(&lev_deltax[0], const_cast<char *>("dev_levdx"),    &lvlMxSize, sizeof(cl_real_t), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
+      dev_levdy    = ezcl_malloc(&lev_deltay[0], const_cast<char *>("dev_levdy"),    &lvlMxSize, sizeof(cl_real_t), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
+      dev_levibeg  = ezcl_malloc(&lev_ibegin[0], const_cast<char *>("dev_levibeg"),  &lvlMxSize, sizeof(cl_int),    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
+      dev_leviend  = ezcl_malloc(&lev_iend[0],   const_cast<char *>("dev_leviend"),  &lvlMxSize, sizeof(cl_int),    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
+      dev_levjbeg  = ezcl_malloc(&lev_jbegin[0], const_cast<char *>("dev_levjbeg"),  &lvlMxSize, sizeof(cl_int),    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
+      dev_levjend  = ezcl_malloc(&lev_jend[0],   const_cast<char *>("dev_levjend"),  &lvlMxSize, sizeof(cl_int),    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
+#endif
+   }
+
+   ibase = 0;
+
+   int ncells_corners = 4;
+   int i_corner[] = {   0,   0,imax,imax};
+   int j_corner[] = {   0,jmax,   0,jmax};
+
+   for(int ic=0; ic<ncells_corners; ic++){
+      for (int    jj = j_corner[ic]*IPOW2(levmx); jj < (j_corner[ic]+1)*IPOW2(levmx); jj++) {
+         for (int ii = i_corner[ic]*IPOW2(levmx); ii < (i_corner[ic]+1)*IPOW2(levmx); ii++) {
+            corners_i.push_back(ii);
+            corners_j.push_back(jj);
+         }
+      }
+   }
+
+   do_rezone = true;
+   gpu_do_rezone = true;
+
+   celltype = NULL;
+   nlft     = NULL;
+   nrht     = NULL;
+   nbot     = NULL;
+   ntop     = NULL;
+}
+
+void Mesh::init(int nx, int ny, real_t circ_radius, partition_method initial_order, int do_gpu_calc)
+{
+   if (do_gpu_calc) {
+#ifdef HAVE_OPENCL
+      cl_context context = ezcl_get_context();
+
+      hash_lib_init();
+      if (mype == 0) printf("Starting compile of kernels in mesh\n");
+      char *bothsources = (char *)malloc(strlen(mesh_kern_source)+strlen(get_hash_kernel_source_string())+1);
+      strcpy(bothsources, get_hash_kernel_source_string());
+      strcat(bothsources, mesh_kern_source);
+      strcat(bothsources, "\0");
+      const char *defines = NULL;
+      cl_program program = ezcl_create_program_wsource(context, defines, bothsources);
+      free(bothsources);
+
+      kernel_reduction_scan2          = ezcl_create_kernel_wprogram(program, "finish_reduction_scan2_cl");
+      kernel_reduction_count          = ezcl_create_kernel_wprogram(program, "finish_reduction_count_cl");
+      kernel_reduction_count2         = ezcl_create_kernel_wprogram(program, "finish_reduction_count2_cl");
+      kernel_hash_adjust_sizes        = ezcl_create_kernel_wprogram(program, "hash_adjust_sizes_cl");
+      kernel_hash_setup               = ezcl_create_kernel_wprogram(program, "hash_setup_cl");
+      kernel_hash_setup_local         = ezcl_create_kernel_wprogram(program, "hash_setup_local_cl");
+      kernel_neighbor_init            = ezcl_create_kernel_wprogram(program, "neighbor_init_cl");
+      kernel_calc_neighbors           = ezcl_create_kernel_wprogram(program, "calc_neighbors_cl");
+      kernel_calc_neighbors_local     = ezcl_create_kernel_wprogram(program, "calc_neighbors_local_cl");
+      kernel_calc_border_cells        = ezcl_create_kernel_wprogram(program, "calc_border_cells_cl");
+      kernel_calc_border_cells2       = ezcl_create_kernel_wprogram(program, "calc_border_cells2_cl");
+      kernel_finish_scan              = ezcl_create_kernel_wprogram(program, "finish_scan_cl");
+      kernel_get_border_data          = ezcl_create_kernel_wprogram(program, "get_border_data_cl");
+      kernel_calc_layer1              = ezcl_create_kernel_wprogram(program, "calc_layer1_cl");
+      kernel_calc_layer1_sethash      = ezcl_create_kernel_wprogram(program, "calc_layer1_sethash_cl");
+      kernel_calc_layer2              = ezcl_create_kernel_wprogram(program, "calc_layer2_cl");
+      kernel_get_border_data2         = ezcl_create_kernel_wprogram(program, "get_border_data2_cl");
+      kernel_calc_layer2_sethash      = ezcl_create_kernel_wprogram(program, "calc_layer2_sethash_cl");
+      kernel_copy_mesh_data           = ezcl_create_kernel_wprogram(program, "copy_mesh_data_cl");
+      kernel_fill_mesh_ghost          = ezcl_create_kernel_wprogram(program, "fill_mesh_ghost_cl");
+      kernel_fill_neighbor_ghost      = ezcl_create_kernel_wprogram(program, "fill_neighbor_ghost_cl");
+      kernel_set_corner_neighbor      = ezcl_create_kernel_wprogram(program, "set_corner_neighbor_cl");
+      kernel_adjust_neighbors_local   = ezcl_create_kernel_wprogram(program, "adjust_neighbors_local_cl");
+      kernel_hash_size                = ezcl_create_kernel_wprogram(program, "calc_hash_size_cl");
+      kernel_finish_hash_size         = ezcl_create_kernel_wprogram(program, "finish_reduction_minmax4_cl");
+      kernel_calc_spatial_coordinates = ezcl_create_kernel_wprogram(program, "calc_spatial_coordinates_cl");
+      kernel_do_load_balance_lower    = ezcl_create_kernel_wprogram(program, "do_load_balance_lower_cl");
+      kernel_do_load_balance_middle   = ezcl_create_kernel_wprogram(program, "do_load_balance_middle_cl");
+      kernel_do_load_balance_upper    = ezcl_create_kernel_wprogram(program, "do_load_balance_upper_cl");
+#ifndef MINIMUM_PRECISION
+      kernel_do_load_balance_double   = ezcl_create_kernel_wprogram(program, "do_load_balance_double_cl");
+#endif
+      kernel_do_load_balance_float    = ezcl_create_kernel_wprogram(program, "do_load_balance_float_cl");
+      kernel_refine_smooth            = ezcl_create_kernel_wprogram(program, "refine_smooth_cl");
+      kernel_coarsen_smooth           = ezcl_create_kernel_wprogram(program, "coarsen_smooth_cl");
+      kernel_coarsen_check_block      = ezcl_create_kernel_wprogram(program, "coarsen_check_block_cl");
+      kernel_rezone_all               = ezcl_create_kernel_wprogram(program, "rezone_all_cl");
+      kernel_rezone_neighbors         = ezcl_create_kernel_wprogram(program, "rezone_neighbors_cl");
+#ifndef MINIMUM_PRECISION
+      kernel_rezone_one_double        = ezcl_create_kernel_wprogram(program, "rezone_one_double_cl");
+#endif
+      kernel_rezone_one_float         = ezcl_create_kernel_wprogram(program, "rezone_one_float_cl");
+      kernel_copy_mpot_ghost_data     = ezcl_create_kernel_wprogram(program, "copy_mpot_ghost_data_cl");
+      kernel_set_boundary_refinement  = ezcl_create_kernel_wprogram(program, "set_boundary_refinement");
+      init_kernel_2stage_sum();
+      init_kernel_2stage_sum_int();
+      if (! have_boundary){
+        kernel_count_BCs              = ezcl_create_kernel_wprogram(program, "count_BCs_cl");
+      }
+
+      ezcl_program_release(program);
+      if (mype == 0) printf("Finishing compile of kernels in mesh\n");
+#endif
+   }
+
+   //KDTree_Initialize(&tree);
+   if (ncells > 0) { // this is a restart.
+        nsizes.resize (numpe);
+        ndispl.resize (numpe);
+       if (parallel && numpe > 1) {
+#ifdef HAVE_MPI
+          int ncells_int = ncells;
+          MPI_Allgather(&ncells_int, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD);
+          ndispl[0]=0;
+          for (int ip=1; ip<numpe; ip++){
+             ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
+          }
+          noffset=ndispl[mype];
+          ncells_global = ndispl[numpe-1] + nsizes[numpe-1];
+#endif
+       } else {
+          noffset = 0;
+          ncells_global = ncells;
+          proc.resize (ncells);
+          calc_distribution(numpe);
+       }
+       calc_celltype(ncells);
+
+   } else {
+       int istart = 1,
+           jstart = 1,
+           iend   = nx,
+           jend   = ny,
+           nxx    = nx,
+           nyy    = ny;
+       if (have_boundary) {
+          istart = 0;
+          jstart = 0;
+          iend   = nx + 1;
+          jend   = ny + 1;
+          nxx    = nx + 2;
+          nyy    = ny + 2;
+       }
+
+       if (ndim == TWO_DIMENSIONAL) ncells = nxx * nyy - have_boundary * 4;
+       else                         ncells = nxx * nyy;
+
+       noffset = 0;
+       if (parallel) {
+          ncells_global = ncells;
+          nsizes.resize(numpe);
+          ndispl.resize(numpe);
+
+          for (int ip=0; ip<numpe; ip++){
+             nsizes[ip] = ncells_global/numpe;
+             if (ip < (int)(ncells_global%numpe)) nsizes[ip]++;
+          }
+
+          ndispl[0]=0;
+          for (int ip=1; ip<numpe; ip++){
+             ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
+          }
+          ncells= nsizes[mype];
+          noffset=ndispl[mype];
+       }
+
+       allocate(ncells);
+       index.resize(ncells);
+
+       int ic = 0;
+
+       for (int jj = jstart; jj <= jend; jj++) {
+          for (int ii = istart; ii <= iend; ii++) {
+             if (have_boundary && ii == 0    && jj == 0   ) continue;
+             if (have_boundary && ii == 0    && jj == jend) continue;
+             if (have_boundary && ii == iend && jj == 0   ) continue;
+             if (have_boundary && ii == iend && jj == jend) continue;
+
+             if (ic >= (int)noffset && ic < (int)(ncells+noffset)){
+                int iclocal = ic-noffset;
+                index[iclocal] = ic;
+                i[iclocal]     = ii;
+                j[iclocal]     = jj;
+                level[iclocal] = 0;
+             }
+             ic++;
+          }
+       }
+
+       //if (numpe > 1 && (initial_order != HILBERT_SORT && initial_order != HILBERT_PARTITION) ) mem_factor = 2.0;
+       partition_cells(numpe, index, initial_order);
+
+       calc_celltype(ncells);
+       calc_spatial_coordinates(0);
+
+       //  Start lev loop here
+       for (int ilevel=1; ilevel<=levmx; ilevel++) {
+
+          //int old_ncells = ncells;
+
+          ncells_ghost = ncells;
+          calc_neighbors_local();
+
+          kdtree_setup();
+
+          int nez;
+          vector<int> ind(ncells);
+
+    #ifdef FULL_PRECISION
+          KDTree_QueryCircleIntersect_Double(&tree, &nez, &(ind[0]), circ_radius, ncells, &x[0], &dx[0], &y[0], &dy[0]);
+    #else
+          KDTree_QueryCircleIntersect_Float(&tree, &nez, &(ind[0]), circ_radius, ncells, &x[0], &dx[0], &y[0], &dy[0]);
+    #endif
+
+          vector<int> mpot(ncells_ghost,0);
+
+          for (int ic=0; ic<nez; ++ic){
+             if (level[ind[ic]] < levmx) mpot[ind[ic]] = 1;
+          }
+
+          KDTree_Destroy(&tree);
+          //  Refine the cells.
+          int icount = 0;
+          int jcount = 0;
+          int new_ncells = refine_smooth(mpot, icount, jcount);
+
+          MallocPlus dummy;
+          rezone_all(icount, jcount, mpot, 0, dummy);
+
+          ncells = new_ncells;
+
+          calc_spatial_coordinates(0);
+
+    #ifdef HAVE_MPI
+          if (parallel && numpe > 1) {
+             int ncells_int = ncells;
+             MPI_Allgather(&ncells_int, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD);
+             ndispl[0]=0;
+             for (int ip=1; ip<numpe; ip++){
+                ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
+             }
+             noffset=ndispl[mype];
+             ncells_global = ndispl[numpe-1] + nsizes[numpe-1];
+          }
+    #endif
+       }  // End lev loop here
+       index.clear();
+       ncells_ghost = ncells;
+   }
+   int ncells_corners = 4;
+   int i_corner[] = {   0,   0,imax,imax};
+   int j_corner[] = {   0,jmax,   0,jmax};
+
+   for(int ic=0; ic<ncells_corners; ic++){
+      for (int    jj = j_corner[ic]*IPOW2(levmx); jj < (j_corner[ic]+1)*IPOW2(levmx); jj++) {
+         for (int ii = i_corner[ic]*IPOW2(levmx); ii < (i_corner[ic]+1)*IPOW2(levmx); ii++) {
+            corners_i.push_back(ii);
+            corners_j.push_back(jj);
+         }
+      }
+   }
+}
+
+size_t Mesh::refine_smooth(vector<int> &mpot, int &icount, int &jcount)
+{
+   vector<int> mpot_old;
+
+   int newcount;
+   int newcount_global;
+
+   struct timeval tstart_lev2;
+
+   rezone_count(mpot, icount, jcount);
+
+#ifdef _OPENMP
+#pragma omp parallel
+{ //START Parallel Region
+#endif
+
+#ifdef _OPENMP
+#pragma omp master
+{//MASTER START
+#endif
+   newcount = icount;
+   newcount_global = newcount;
+
+   if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
+
+#ifdef HAVE_MPI
+   if (parallel) {
+      MPI_Allreduce(&newcount, &newcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+   }
+#endif
+
+#ifdef _OPENMP
+}//END MASTER
+#pragma omp barrier
+#endif
+
+   if(newcount_global > 0 && levmx > 1) {
+
+      size_t my_ncells=ncells;
+      if (parallel) my_ncells=ncells_ghost;
+
+#ifdef _OPENMP
+#pragma omp master
+{//MASTER START
+#endif
+      cpu_counters[MESH_COUNTER_REFINE_SMOOTH]++;
+
+      mpot_old.resize(my_ncells);
+#ifdef _OPENMP
+}//END MASTER
+#pragma omp barrier
+#endif
+
+      int levcount = 1;
+       
+      while (newcount_global > 0 && levcount < levmx){
+
+         levcount++; 
+#ifdef _OPENMP
+#pragma omp master
+{//MASTER START
+#endif
+
+         mpot.swap(mpot_old);
+         newcount=0;
+#ifdef HAVE_MPI
+         if (numpe > 1) {
+            L7_Update(&mpot_old[0], L7_INT, cell_handle);
+         }
+#endif
+
+#ifdef _OPENMP
+}//END MASTER
+#pragma omp barrier
+#endif
+
+         int upperBound, lowerBound;
+         get_bounds(upperBound, lowerBound);
+         int mynewcount = newcount; //All threads get a mynewcount
+
+#ifdef _OPENMP
+#pragma omp for reduction(+:newcount)
+#endif
+         for(uint ic = 0; ic < ncells; ic++) {
+        // for(uint ic = lowerBound; ic < upperBound; ic++){
+            int lev = level[ic];
+            mpot[ic] = mpot_old[ic];
+            if(mpot_old[ic] > 0) continue;
+   
+            int nl = nlft[ic];
+            if (nl >= 0 && nl < (int)ncells_ghost) {
+               int ll = level[nl];
+               if(mpot_old[nl] > 0) ll++;
+   
+               if(ll - lev > 1) {
+                  mpot[ic]=1;
+                  mynewcount++;
+                  continue;
+               }
+
+               ll = level[nl];
+               if (ll > lev) {
+                  int nlt = ntop[nl];
+                  if (nlt >= 0 && nlt < (int)ncells_ghost) {
+                     int llt = level[nlt];
+                     if(mpot_old[nlt] > 0) llt++;
+
+                     if(llt - lev > 1) {
+                        mpot[ic]=1;
+                        mynewcount++;
+                        continue;
+                     }
+                  }
+               }
+            }
+
+            int nr = nrht[ic];
+            if (nr >= 0 && nr < (int)ncells_ghost) {
+               int lr = level[nr];
+               if(mpot_old[nr] > 0) lr++;
+   
+               if(lr - lev > 1) {
+                  mpot[ic]=1;
+                  mynewcount++;
+                  continue;
+               }
+
+               lr = level[nr];
+               if (lr > lev) {
+                  int nrt = ntop[nr];
+                  if (nrt >= 0 && nrt < (int)ncells_ghost) {
+                     int lrt = level[nrt];
+                     if(mpot_old[nrt] > 0) lrt++;
+
+                     if(lrt - lev > 1) {
+                        mpot[ic]=1;
+                        mynewcount++;
+                        continue;
+                     }
+                  }
+               }
+            }
+
+            int nt = ntop[ic];
+            if (nt >= 0 && nt < (int)ncells_ghost) {
+               int lt = level[nt];
+               if(mpot_old[nt] > 0) lt++;
+   
+               if(lt - lev > 1) {
+                  mpot[ic]=1;
+                  mynewcount++;
+                  continue;
+               }
+
+               lt = level[nt];
+               if (lt > lev) {
+                  int ntr = nrht[nt];
+                  if (ntr >= 0 && ntr < (int)ncells_ghost) {
+                     int ltr = level[ntr];
+                     if(mpot_old[ntr] > 0) ltr++;
+
+                     if(ltr - lev > 1) {
+                        mpot[ic]=1;
+                        mynewcount++;
+                        continue;
+                     }
+                  }
+               }
+            }
+
+            int nb = nbot[ic];
+            if (nb >= 0 && nb < (int)ncells_ghost) {
+               int lb = level[nb];
+               if(mpot_old[nb] > 0) lb++;
+   
+               if(lb - lev > 1) {
+                  mpot[ic]=1;
+                  mynewcount++;
+                  continue;
+               }
+
+               lb = level[nb];
+               if (lb > lev) {
+                  int nbr = nrht[nb];
+                  if (nbr >= 0 && nbr < (int)ncells_ghost) {
+                     int lbr = level[nbr];
+                     if(mpot_old[nbr] > 0) lbr++;
+
+                     if(lbr - lev > 1) {
+                        mpot[ic]=1;
+                        mynewcount++;
+                        continue;
+                     }
+                  }
+               }
+            }
+         }
+#ifdef _OPENMP
+#pragma omp atomic 
+#endif
+         newcount += mynewcount;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+{
+#endif
+         icount += newcount;
+         newcount_global = newcount;
+
+#ifdef HAVE_MPI
+         if (parallel) {
+            MPI_Allreduce(&newcount, &newcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+         }
+#endif
+
+#ifdef _OPENMP
+}//END MASTER
+#pragma omp barrier
+#endif
+
+      } // while (newcount_global > 0 && levcount < levmx);
+
+   }
+
+
+#ifdef _OPENMP
+#pragma omp master
+{
+#endif
+
+#ifdef HAVE_MPI
+   if (numpe > 1) {
+      L7_Update(&mpot[0], L7_INT, cell_handle);
+  }
+#endif
+
+   mpot_old.clear();
+   mpot_old.resize(ncells_ghost);
+
+   mpot_old.swap(mpot);
+#ifdef _OPENMP
+}//END MASTER
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+   for(uint ic=0; ic<ncells; ic++) {
+      mpot[ic] = mpot_old[ic];
+      if (mpot_old[ic] >= 0) continue;
+      if (mpot_old[ic] <= -1000000) continue;
+      if (        is_upper_right(i[ic],j[ic]) ) {
+         int nr = nrht[ic];
+         int lr = level[nr];
+         if (mpot_old[nr] > 0) lr++;
+         int nt = ntop[ic];
+         int lt = level[nt];
+         if (mpot_old[nt] > 0) lt++;
+         if (lr > level[ic] || lt > level[ic]) mpot[ic] = 0;
+      } else if ( is_upper_left(i[ic],j[ic] ) ) {
+         int nl = nlft[ic];
+         int ll = level[nl];
+         if (mpot_old[nl] > 0) ll++;
+         int nt = ntop[ic];
+         int lt = level[nt];
+         if (mpot_old[nt] > 0) lt++;
+         if (ll > level[ic] || lt > level[ic]) mpot[ic] = 0;
+      } else if ( is_lower_right(i[ic],j[ic] ) ) {
+         int nr = nrht[ic];
+         int lr = level[nr];
+         if (mpot_old[nr] > 0) lr++;
+         int nb = nbot[ic];
+         int lb = level[nb];
+         if (mpot_old[nb] > 0) lb++;
+         if (lr > level[ic] || lb > level[ic]) mpot[ic] = 0;
+      } else if ( is_lower_left(i[ic],j[ic] ) ) {
+         int nl = nlft[ic];
+         int ll = level[nl];
+         if (mpot_old[nl] > 0) ll++;
+         int nb = nbot[ic];
+         int lb = level[nb];
+         if (mpot_old[nb] > 0) lb++;
+         if (ll > level[ic] || lb > level[ic]) mpot[ic] = 0;
+      }
+   }
+
+#ifdef _OPENMP
+#pragma omp master
+{
+#endif
+
+#ifdef HAVE_MPI
+   if (numpe > 1) {
+      L7_Update(&mpot[0], L7_INT, cell_handle);
+  }
+#endif
+
+   mpot_old.swap(mpot);
+#ifdef _OPENMP
+}//END MASTER
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+   for(uint ic=0; ic<ncells; ic++) {
+      int n1=0, n2=0, n3=0;
+      mpot[ic] = mpot_old[ic];
+      if (mpot_old[ic] >= 0) continue;
+      if (mpot_old[ic] <= -1000000) continue;
+      if ( is_upper_right(i[ic],j[ic]) ) {
+         n1 = nbot[ic];
+         n2 = nlft[ic];
+         n3 = nlft[n1];
+      } else if ( is_upper_left(i[ic],j[ic] ) ) {
+         n1 = nbot[ic];
+         n2 = nrht[ic];
+         n3 = nrht[n1];
+      } else if ( is_lower_right(i[ic],j[ic] ) ) {
+         n1 = ntop[ic];
+         n2 = nlft[ic];
+         n3 = nlft[n1];
+      } else if ( is_lower_left(i[ic],j[ic] ) ) {
+         n1 = ntop[ic];
+         n2 = nrht[ic];
+         n3 = nrht[n1];
+      }
+      if (n3 < 0) {
+         mpot[ic] = 0;
+      } else {
+         int lev1 = level[n1];
+         int lev2 = level[n2];
+         int lev3 = level[n3];
+         if (mpot_old[n1] > 0) lev1++;
+         if (mpot_old[n2] > 0) lev2++;
+         if (mpot_old[n3] > 0) lev3++;
+
+         if (mpot_old[n1] != -1 || lev1 != level[ic] ||
+             mpot_old[n2] != -1 || lev2 != level[ic] ||
+             mpot_old[n3] != -1 || lev3 != level[ic]) {
+            mpot[ic] = 0;
+         }
+      }
+   }
+
+#ifdef _OPENMP
+#pragma omp master
+{
+#endif
+
+#ifdef HAVE_MPI
+   if (numpe > 1) {
+      L7_Update(&mpot[0], L7_INT, cell_handle);
+  }
+#endif
+
+#ifdef _OPENMP
+}//END MASTER
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+   for (uint ic=0; ic<ncells; ic++) {
+      if (celltype[ic] < 0) {
+         switch (celltype[ic]) {
+            case LEFT_BOUNDARY:
+               mpot[ic] = mpot[nrht[ic]];
+               break;
+            case RIGHT_BOUNDARY:
+               mpot[ic] = mpot[nlft[ic]];
+               break;
+            case BOTTOM_BOUNDARY:
+               mpot[ic] = mpot[ntop[ic]];
+               break;
+            case TOP_BOUNDARY:
+               mpot[ic] = mpot[nbot[ic]];
+               break;
+         }
+      }
+   }
+
+#ifdef _OPENMP
+#pragma omp barrier
+}//END Parallel Region
+#endif
+
+   newcount = ncells + rezone_count(mpot, icount, jcount);
+
+#ifdef HAVE_MPI
+   int icount_global = icount;
+   int jcount_global = jcount;
+   if (parallel) {
+      int count[2], global_count[2];
+      count[0] = icount;
+      count[1] = jcount;
+      MPI_Allreduce(&count, &global_count, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+      icount_global = global_count[0];
+      jcount_global = global_count[1];
+   }
+   do_rezone = (icount_global != 0 || jcount_global != 0) ? true : false;
+#else
+   do_rezone = (icount != 0 || jcount != 0) ? true : false;
+#endif
+
+
+   if (TIMING_LEVEL >= 2) cpu_timers[MESH_TIMER_REFINE_SMOOTH] += cpu_timer_stop(tstart_lev2);
+
+   return(newcount);
+}
+
+#ifdef HAVE_OPENCL
+int Mesh::gpu_refine_smooth(cl_mem &dev_mpot, int &icount, int &jcount)
+{
+   struct timeval tstart_lev2;
+   if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
+
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   size_t local_work_size = 128;
+   size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
+   size_t block_size = global_work_size/local_work_size;
+
+   int icount_global = icount;
+   int jcount_global = jcount;
+
+#ifdef HAVE_MPI
+   if (parallel) {
+      int count[2], count_global[2];
+      count[0] = icount;
+      count[1] = jcount;
+      MPI_Allreduce(&count, &count_global, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+      icount_global = count_global[0];
+      jcount_global = count_global[1];
+   }
+#endif
+
+   int levcount = 1;
+   //int which_smooth=0;
+
+   if(icount_global > 0 && levcount < levmx) {
+      size_t result_size = 1;
+      cl_mem dev_result  = ezcl_malloc(NULL, const_cast<char *>("dev_result"),  &result_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_mpot_old = ezcl_malloc(NULL, const_cast<char *>("dev_mpot_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+      int newcount = icount;
+      int newcount_global = icount_global;
+      while (newcount_global > 0 && levcount < levmx) {
+         levcount++;
+
+         gpu_counters[MESH_COUNTER_REFINE_SMOOTH]++;
+
+#ifdef HAVE_MPI
+         if (numpe > 1) {
+            L7_Dev_Update(dev_mpot, L7_INT, cell_handle);
+         }
+#endif
+
+         if (icount_global) {
+            ezcl_device_memory_swap(&dev_mpot_old, &dev_mpot);
+
+            ezcl_set_kernel_arg(kernel_refine_smooth, 0, sizeof(cl_int),  (void *)&ncells);
+            ezcl_set_kernel_arg(kernel_refine_smooth, 1, sizeof(cl_int),  (void *)&ncells_ghost);
+            ezcl_set_kernel_arg(kernel_refine_smooth, 2, sizeof(cl_int),  (void *)&levmx);
+            ezcl_set_kernel_arg(kernel_refine_smooth, 3, sizeof(cl_mem),  (void *)&dev_nlft);
+            ezcl_set_kernel_arg(kernel_refine_smooth, 4, sizeof(cl_mem),  (void *)&dev_nrht);
+            ezcl_set_kernel_arg(kernel_refine_smooth, 5, sizeof(cl_mem),  (void *)&dev_nbot);
+            ezcl_set_kernel_arg(kernel_refine_smooth, 6, sizeof(cl_mem),  (void *)&dev_ntop);
+            ezcl_set_kernel_arg(kernel_refine_smooth, 7, sizeof(cl_mem),  (void *)&dev_level);
+            ezcl_set_kernel_arg(kernel_refine_smooth, 8, sizeof(cl_mem),  (void *)&dev_celltype);
+            ezcl_set_kernel_arg(kernel_refine_smooth, 9, sizeof(cl_mem),  (void *)&dev_mpot_old);
+            ezcl_set_kernel_arg(kernel_refine_smooth,10, sizeof(cl_mem),  (void *)&dev_mpot);
+            ezcl_set_kernel_arg(kernel_refine_smooth,11, sizeof(cl_mem),  (void *)&dev_redscratch);
+            ezcl_set_kernel_arg(kernel_refine_smooth,12, sizeof(cl_mem),  (void *)&dev_result);
+            ezcl_set_kernel_arg(kernel_refine_smooth,13, local_work_size*sizeof(cl_int),    NULL);
+
+            ezcl_enqueue_ndrange_kernel(command_queue, kernel_refine_smooth, 1, NULL, &global_work_size, &local_work_size, NULL);
+
+            gpu_rezone_count(block_size, local_work_size, dev_redscratch, dev_result);
+
+            int result;
+            ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int), &result, NULL);
+
+            //printf("result = %d after %d refine smooths\n",result,which_smooth);
+            //which_smooth++;
+
+            icount = result;
+         }
+
+         newcount = icount-newcount;
+         newcount_global = newcount;
+#ifdef HAVE_MPI
+         if (parallel) {
+            MPI_Allreduce(&newcount, &newcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+         }
+#endif
+         icount_global += newcount_global;
+         //printf("DEBUG -- icount %d icount_global %d newcount %d newcount_global %d\n",icount,icount_global,newcount,newcount_global);
+      }
+
+      ezcl_device_memory_delete(dev_mpot_old);
+      ezcl_device_memory_delete(dev_redscratch);
+      ezcl_device_memory_delete(dev_result);
+   }
+
+   if (jcount_global) {
+#ifdef HAVE_MPI
+      if (numpe > 1) {
+         L7_Dev_Update(dev_mpot, L7_INT, cell_handle);
+      }
+#endif
+
+      cl_mem dev_mpot_old = ezcl_malloc(NULL, const_cast<char *>("dev_mpot_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+      if (jcount) {
+         ezcl_device_memory_swap(&dev_mpot_old, &dev_mpot);
+
+         ezcl_set_kernel_arg(kernel_coarsen_smooth, 0, sizeof(cl_int),  (void *)&ncells);
+         ezcl_set_kernel_arg(kernel_coarsen_smooth, 1, sizeof(cl_mem),  (void *)&dev_nlft);
+         ezcl_set_kernel_arg(kernel_coarsen_smooth, 2, sizeof(cl_mem),  (void *)&dev_nrht);
+         ezcl_set_kernel_arg(kernel_coarsen_smooth, 3, sizeof(cl_mem),  (void *)&dev_nbot);
+         ezcl_set_kernel_arg(kernel_coarsen_smooth, 4, sizeof(cl_mem),  (void *)&dev_ntop);
+         ezcl_set_kernel_arg(kernel_coarsen_smooth, 5, sizeof(cl_mem),  (void *)&dev_i);
+         ezcl_set_kernel_arg(kernel_coarsen_smooth, 6, sizeof(cl_mem),  (void *)&dev_j);
+         ezcl_set_kernel_arg(kernel_coarsen_smooth, 7, sizeof(cl_mem),  (void *)&dev_level);
+         ezcl_set_kernel_arg(kernel_coarsen_smooth, 8, sizeof(cl_mem),  (void *)&dev_mpot_old);
+         ezcl_set_kernel_arg(kernel_coarsen_smooth, 9, sizeof(cl_mem),  (void *)&dev_mpot);
+
+         ezcl_enqueue_ndrange_kernel(command_queue, kernel_coarsen_smooth, 1, NULL, &global_work_size, &local_work_size, NULL);
+      }
+
+#ifdef HAVE_MPI
+      if (numpe > 1) {
+         L7_Dev_Update(dev_mpot, L7_INT, cell_handle);
+      }
+#endif
+
+      if (jcount) {
+         size_t result_size = 1;
+         cl_mem dev_result  = ezcl_malloc(NULL, const_cast<char *>("dev_result"),  &result_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+         ezcl_device_memory_swap(&dev_mpot_old, &dev_mpot);
+
+         ezcl_set_kernel_arg(kernel_coarsen_check_block, 0, sizeof(cl_int),  (void *)&ncells);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block, 1, sizeof(cl_mem),  (void *)&dev_nlft);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block, 2, sizeof(cl_mem),  (void *)&dev_nrht);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block, 3, sizeof(cl_mem),  (void *)&dev_nbot);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block, 4, sizeof(cl_mem),  (void *)&dev_ntop);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block, 5, sizeof(cl_mem),  (void *)&dev_i);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block, 6, sizeof(cl_mem),  (void *)&dev_j);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block, 7, sizeof(cl_mem),  (void *)&dev_level);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block, 8, sizeof(cl_mem),  (void *)&dev_celltype);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block, 9, sizeof(cl_mem),  (void *)&dev_mpot_old);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block,10, sizeof(cl_mem),  (void *)&dev_mpot);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block,11, sizeof(cl_mem),  (void *)&dev_redscratch);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block,12, sizeof(cl_mem),  (void *)&dev_result);
+         ezcl_set_kernel_arg(kernel_coarsen_check_block,13, local_work_size*sizeof(cl_int),    NULL);
+
+         ezcl_enqueue_ndrange_kernel(command_queue, kernel_coarsen_check_block, 1, NULL, &global_work_size, &local_work_size, NULL);
+
+         gpu_rezone_count(block_size, local_work_size, dev_redscratch, dev_result);
+
+         int result;
+         ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int), &result, NULL);
+
+         //printf("result = %d after coarsen smooth\n",result);
+
+         jcount = result;
+
+         ezcl_device_memory_delete(dev_redscratch);
+         ezcl_device_memory_delete(dev_result);
+      }
+
+      jcount_global = jcount;
+
+#ifdef HAVE_MPI
+      if (parallel) {
+         MPI_Allreduce(&jcount, &jcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+      }
+#endif
+
+      ezcl_device_memory_delete(dev_mpot_old);
+   }
+
+   if (icount_global || jcount_global) {
+#ifdef HAVE_MPI
+      if (numpe > 1) {
+         L7_Dev_Update(dev_mpot, L7_INT, cell_handle);
+      }
+#endif
+
+      size_t result_size = 1;
+      cl_mem dev_result  = ezcl_malloc(NULL, const_cast<char *>("dev_result"),  &result_size, sizeof(cl_int2), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_int2), CL_MEM_READ_WRITE, 0);
+      dev_ioffset  = ezcl_malloc(NULL, const_cast<char *>("dev_ioffset"), &block_size,   sizeof(cl_uint), CL_MEM_READ_WRITE, 0);
+
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 0,  sizeof(cl_int), (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 1,  sizeof(cl_mem), (void *)&dev_nlft);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 2,  sizeof(cl_mem), (void *)&dev_nrht);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 3,  sizeof(cl_mem), (void *)&dev_nbot);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 4,  sizeof(cl_mem), (void *)&dev_ntop);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 5,  sizeof(cl_mem), (void *)&dev_i);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 6,  sizeof(cl_mem), (void *)&dev_j);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 7,  sizeof(cl_mem), (void *)&dev_celltype);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 8,  sizeof(cl_mem), (void *)&dev_mpot);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 9,  sizeof(cl_mem), (void *)&dev_redscratch);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 10, sizeof(cl_mem), (void *)&dev_ioffset);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 11, sizeof(cl_mem), (void *)&dev_result);
+      ezcl_set_kernel_arg(kernel_set_boundary_refinement, 12, local_work_size*sizeof(cl_int2),    NULL);
+
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_set_boundary_refinement, 1, NULL, &global_work_size, &local_work_size, NULL);
+
+      gpu_rezone_count2(block_size, local_work_size, dev_redscratch, dev_result);
+
+      int my_result[2];
+      ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, 1*sizeof(cl_int2), &my_result, NULL);
+      //printf("Result is %lu icount %d jcount %d\n", ncells+my_result[0]-my_result[1],my_result[0],my_result[1]);
+      icount = my_result[0];
+      jcount = my_result[1];
+
+      icount_global = icount;
+      jcount_global = jcount;
+#ifdef HAVE_MPI
+      if (parallel) {
+         int count[2], count_global[2];
+         count[0] = icount;
+         count[1] = jcount;
+         MPI_Allreduce(&count, &count_global, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+         icount_global = count_global[0];
+         jcount_global = count_global[1];
+      }
+#endif
+
+      gpu_rezone_scan(block_size, local_work_size, dev_ioffset, dev_result);
+
+      //ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int), &my_result, NULL);
+      //printf("After scan, Result is %d\n", my_result[0]);
+
+      ezcl_device_memory_delete(dev_result);
+      ezcl_device_memory_delete(dev_redscratch);
+
+   } else {
+      ezcl_device_memory_delete(dev_mpot);
+      dev_mpot = NULL;
+   }
+
+   gpu_do_rezone = (icount_global != 0 || jcount_global != 0) ? true : false;
+
+   if (TIMING_LEVEL >= 2) gpu_timers[MESH_TIMER_REFINE_SMOOTH] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+
+   return ncells+icount-jcount;
+}
+#endif
+
+void Mesh::terminate(void)
+{
+      mesh_memory.memory_delete(i);
+      mesh_memory.memory_delete(j);
+      mesh_memory.memory_delete(level);
+      mesh_memory.memory_delete(celltype);
+      if (neighbor_remap) {
+         mesh_memory.memory_delete(nlft);
+         mesh_memory.memory_delete(nrht);
+         mesh_memory.memory_delete(nbot);
+         mesh_memory.memory_delete(ntop);
+      }
+
+#ifdef HAVE_OPENCL
+      hash_lib_terminate();
+
+      ezcl_device_memory_delete(dev_levtable);
+      ezcl_device_memory_delete(dev_levdx);
+      ezcl_device_memory_delete(dev_levdy);
+      ezcl_device_memory_delete(dev_levibeg);
+      ezcl_device_memory_delete(dev_leviend);
+      ezcl_device_memory_delete(dev_levjbeg);
+      ezcl_device_memory_delete(dev_levjend);
+
+      ezcl_device_memory_delete(dev_level);
+      ezcl_device_memory_delete(dev_i);
+      ezcl_device_memory_delete(dev_j);
+      ezcl_device_memory_delete(dev_celltype);
+      if (neighbor_remap && dev_nlft != NULL){
+         ezcl_device_memory_delete(dev_nlft);
+         ezcl_device_memory_delete(dev_nrht);
+         ezcl_device_memory_delete(dev_nbot);
+         ezcl_device_memory_delete(dev_ntop);
+      }
+
+      ezcl_kernel_release(kernel_reduction_scan2);
+      ezcl_kernel_release(kernel_reduction_count);
+      ezcl_kernel_release(kernel_reduction_count2);
+      ezcl_kernel_release(kernel_hash_adjust_sizes);
+      ezcl_kernel_release(kernel_hash_setup);
+      ezcl_kernel_release(kernel_hash_setup_local);
+      ezcl_kernel_release(kernel_neighbor_init);
+      ezcl_kernel_release(kernel_calc_neighbors);
+      ezcl_kernel_release(kernel_calc_neighbors_local);
+      ezcl_kernel_release(kernel_calc_border_cells);
+      ezcl_kernel_release(kernel_calc_border_cells2);
+      ezcl_kernel_release(kernel_finish_scan);
+      ezcl_kernel_release(kernel_get_border_data);
+      ezcl_kernel_release(kernel_calc_layer1);
+      ezcl_kernel_release(kernel_calc_layer1_sethash);
+      ezcl_kernel_release(kernel_calc_layer2);
+      ezcl_kernel_release(kernel_get_border_data2);
+      ezcl_kernel_release(kernel_calc_layer2_sethash);
+      //ezcl_kernel_release(kernel_calc_neighbors_local2);
+      ezcl_kernel_release(kernel_copy_mesh_data);
+      ezcl_kernel_release(kernel_fill_mesh_ghost);
+      ezcl_kernel_release(kernel_fill_neighbor_ghost);
+      ezcl_kernel_release(kernel_set_corner_neighbor);
+      ezcl_kernel_release(kernel_adjust_neighbors_local);
+      //ezcl_kernel_release(kernel_copy_ghost_data);
+      //ezcl_kernel_release(kernel_adjust_neighbors);
+      ezcl_kernel_release(kernel_hash_size);
+      ezcl_kernel_release(kernel_finish_hash_size);
+      ezcl_kernel_release(kernel_calc_spatial_coordinates);
+      ezcl_kernel_release(kernel_do_load_balance_lower);
+      ezcl_kernel_release(kernel_do_load_balance_middle);
+      ezcl_kernel_release(kernel_do_load_balance_upper);
+#ifndef MINIMUM_PRECISION
+      ezcl_kernel_release(kernel_do_load_balance_double);
+#endif
+      ezcl_kernel_release(kernel_do_load_balance_float);
+      ezcl_kernel_release(kernel_refine_smooth);
+      ezcl_kernel_release(kernel_coarsen_smooth);
+      ezcl_kernel_release(kernel_coarsen_check_block);
+      ezcl_kernel_release(kernel_rezone_all);
+      ezcl_kernel_release(kernel_rezone_neighbors);
+#ifndef MINIMUM_PRECISION
+      ezcl_kernel_release(kernel_rezone_one_double);
+#endif
+      ezcl_kernel_release(kernel_rezone_one_float);
+      ezcl_kernel_release(kernel_copy_mpot_ghost_data);
+      ezcl_kernel_release(kernel_set_boundary_refinement);
+      terminate_kernel_2stage_sum();
+      terminate_kernel_2stage_sum_int();
+      if (! have_boundary){
+        ezcl_kernel_release(kernel_count_BCs);
+      }
+#endif
+#if defined(HAVE_J7) && defined(HAVE_MPI)
+   if (parallel) mesh_memory.pfini();
+#endif
+}
+
+int Mesh::rezone_count(vector<int> mpot, int &icount, int &jcount)
+{
+   int my_icount=0;
+   int my_jcount=0;
+
+#ifdef _OPENMP
+#pragma omp parallel for reduction (+:my_jcount,my_icount)
+#endif
+   for (uint ic=0; ic<ncells; ++ic){
+      if (mpot[ic] < 0) {
+         if (celltype[ic] == REAL_CELL) {
+            // remove all but cell that will remain to get count right when split
+            // across processors
+            if (! is_lower_left(i[ic],j[ic]) ) my_jcount--;
+         } else {
+            // either upper right or lower left will remain for boundary cells
+            if (! (is_upper_right(i[ic],j[ic]) || is_lower_left(i[ic],j[ic]) ) ) my_jcount--;
+         }
+      }
+
+      if (mpot[ic] > 0) {
+         //printf("mpot[%d] = %d level %d levmx %d\n",ic,mpot[ic],level[ic],levmx);
+         if (celltype[ic] == REAL_CELL){
+            my_icount += 3;
+         } else {
+            my_icount ++;
+         }
+      }
+   }
+   //printf("icount is %d\n",my_icount);
+   icount = my_icount;
+   jcount = my_jcount;
+
+   return(icount+jcount);
+}
+
+#ifdef HAVE_OPENCL
+void Mesh::gpu_rezone_count2(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+     /*
+     __kernel void finish_reduction_count2_cl(
+                       const    int   isize,      // 0
+              __global          int  *redscratch, // 1
+              __global          int  *result,     // 2
+              __local           int  *tile)       // 3
+     */
+   ezcl_set_kernel_arg(kernel_reduction_count2, 0, sizeof(cl_int),  (void *)&block_size);
+   ezcl_set_kernel_arg(kernel_reduction_count2, 1, sizeof(cl_mem),  (void *)&dev_redscratch);
+   ezcl_set_kernel_arg(kernel_reduction_count2, 2, sizeof(cl_mem),  (void *)&dev_result);
+   ezcl_set_kernel_arg(kernel_reduction_count2, 3, local_work_size*sizeof(cl_int2),    NULL);
+
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_count2, 1, NULL, &local_work_size, &local_work_size, NULL);
+}
+
+void Mesh::gpu_rezone_count(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+     /*
+     __kernel void finish_reduction_count_cl(
+                       const    int   isize,      // 0
+              __global          int  *redscratch, // 1
+              __global          int  *result,     // 2
+              __local           int  *tile)       // 3
+     */
+   ezcl_set_kernel_arg(kernel_reduction_count, 0, sizeof(cl_int),  (void *)&block_size);
+   ezcl_set_kernel_arg(kernel_reduction_count, 1, sizeof(cl_mem),  (void *)&dev_redscratch);
+   ezcl_set_kernel_arg(kernel_reduction_count, 2, sizeof(cl_mem),  (void *)&dev_result);
+   ezcl_set_kernel_arg(kernel_reduction_count, 3, local_work_size*sizeof(cl_int),    NULL);
+
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_count, 1, NULL, &local_work_size, &local_work_size, NULL);
+}
+
+void Mesh::gpu_rezone_scan(size_t block_size, size_t local_work_size, cl_mem dev_ioffset, cl_mem &dev_result)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+     /*
+     __kernel void finish_reduction_scan_cl(
+                       const    int   isize,    // 0
+              __global          int  *ioffset,  // 1
+              __global          int  *result,   // 2
+              __local           int  *tile)     // 3
+     */
+   ezcl_set_kernel_arg(kernel_reduction_scan2, 0, sizeof(cl_int),  (void *)&block_size);
+   ezcl_set_kernel_arg(kernel_reduction_scan2, 1, sizeof(cl_mem),  (void *)&dev_ioffset);
+   ezcl_set_kernel_arg(kernel_reduction_scan2, 2, sizeof(cl_mem),  (void *)&dev_result);
+   ezcl_set_kernel_arg(kernel_reduction_scan2, 3, local_work_size*sizeof(cl_uint2),    NULL);
+
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_scan2, 1, NULL, &local_work_size, &local_work_size, NULL);
+}
+#endif
+
+void Mesh::kdtree_setup()
+{
+   KDTree_Initialize(&tree);
+
+   TBounds box;
+   for (uint ic=0; ic<ncells; ic++) {
+     box.min.x = x[ic];
+     box.max.x = x[ic]+dx[ic];
+     box.min.y = y[ic];
+     box.max.y = y[ic]+dy[ic];
+     KDTree_AddElement(&tree, &box);
+   }
+}
+
+void Mesh::calc_spatial_coordinates(int ibase)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   x.resize(ncells);
+   dx.resize(ncells);
+   y.resize(ncells);
+   dy.resize(ncells);
+
+#ifdef _OPENMP
+#pragma omp parallel
+   {
+#endif
+
+   int lowerBounds, upperBounds;
+   set_bounds(ncells);
+   get_bounds(lowerBounds, upperBounds);
+
+   if (have_boundary) {
+      for (uint ic = lowerBounds; ic < upperBounds; ic++) {
+         int lev = level[ic];
+         x[ic]  = xmin + (lev_deltax[lev] * (i[ic] - ibase));
+         dx[ic] =        lev_deltax[lev];
+         y[ic]  = ymin + (lev_deltay[lev] * (j[ic] - ibase));
+         dy[ic] =        lev_deltay[lev];
+      }
+   } else {
+      for (uint ic = lowerBounds; ic < upperBounds; ic++) {
+         int lev = level[ic];
+         x[ic]  = xmin + (lev_deltax[lev] * (i[ic] - lev_ibegin[lev]));
+         dx[ic] =        lev_deltax[lev];
+         y[ic]  = ymin + (lev_deltay[lev] * (j[ic] - lev_jbegin[lev]));
+         dy[ic] =        lev_deltay[lev];
+      }
+   }
+
+   cpu_timers[MESH_TIMER_CALC_SPATIAL_COORDINATES] += cpu_timer_stop(tstart_cpu);
+
+#ifdef _OPENMP
+#pragma omp barrier
+   } // end parallel region
+#endif
+}
+
+#ifdef HAVE_OPENCL
+void Mesh::gpu_calc_spatial_coordinates(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   cl_event calc_spatial_coordinates_event;
+
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   size_t local_work_size = MIN(ncells, TILE_SIZE);
+   size_t global_work_size = ((ncells + local_work_size - 1) /local_work_size) * local_work_size;
+
+// Only coded for base 0 and have boundary
+//  Need:
+//     xmin
+//     ymin
+//
+//     lev_deltax -- dev_levdx
+//     lev_deltay -- dev_levdy
+//     x
+//     dx
+//     y
+//     dy
+//     level
+//     i
+//     j
+
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates,  0, sizeof(cl_int),    (void *)&ncells);
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates,  1, sizeof(cl_real_t), (void *)&xmin);
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates,  2, sizeof(cl_real_t), (void *)&ymin);
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates,  3, sizeof(cl_mem),    (void *)&dev_levdx);
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates,  4, sizeof(cl_mem),    (void *)&dev_levdy);
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates,  5, sizeof(cl_mem),    (void *)&dev_x);
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates,  6, sizeof(cl_mem),    (void *)&dev_dx);
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates,  7, sizeof(cl_mem),    (void *)&dev_y);
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates,  8, sizeof(cl_mem),    (void *)&dev_dy);
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates,  9, sizeof(cl_mem),    (void *)&dev_level);
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 10, sizeof(cl_mem),    (void *)&dev_i);
+   ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 11, sizeof(cl_mem),    (void *)&dev_j);
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_spatial_coordinates, 1, NULL, &global_work_size, &local_work_size, &calc_spatial_coordinates_event);
+
+   ezcl_wait_for_events(1, &calc_spatial_coordinates_event);
+   ezcl_event_release(calc_spatial_coordinates_event);
+
+   gpu_timers[MESH_TIMER_CALC_SPATIAL_COORDINATES] += (long)(cpu_timer_stop(tstart_cpu) * 1.0e9);
+}
+#endif
+
+void Mesh::calc_minmax(void)
+{
+   xmin=+1.0e30, ymin=+1.0e30, zmin=+1.0e30;
+
+   for (uint ic=0; ic<ncells; ic++){
+      if (x[ic] < xmin) xmin = x[ic];
+   }
+   for (uint ic=0; ic<ncells; ic++){
+      if (y[ic] < ymin) ymin = y[ic];
+   }
+   if (ndim > TWO_DIMENSIONAL) {
+      for (uint ic=0; ic<ncells; ic++){
+         if (z[ic] < zmin) zmin = z[ic];
+      }
+   }
+
+   xmax=-1.0e30, ymax=-1.0e30, zmax=-1.0e30;
+   real_t xhigh, yhigh, zhigh;
+
+   for (uint ic=0; ic<ncells; ic++){
+      xhigh = x[ic]+dx[ic];
+      if (xhigh > xmax) xmax = xhigh;
+   }
+   for (uint ic=0; ic<ncells; ic++){
+      yhigh = y[ic]+dy[ic];
+      if (yhigh > ymax) ymax = yhigh;
+   }
+   if (ndim > TWO_DIMENSIONAL) {
+      for (uint ic=0; ic<ncells; ic++){
+        zhigh = z[ic]+dz[ic];
+        if (zhigh > zmax) zmax = zhigh;
+      }
+   }
+
+#ifdef HAVE_MPI
+   if (parallel) {
+      real_t xmin_global,xmax_global,ymin_global,ymax_global;
+      MPI_Allreduce(&xmin, &xmin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD);
+      MPI_Allreduce(&xmax, &xmax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD);
+      MPI_Allreduce(&ymin, &ymin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD);
+      MPI_Allreduce(&ymax, &ymax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD);
+      xmin = xmin_global;
+      xmax = xmax_global;
+      ymin = ymin_global;
+      ymax = ymax_global;
+   }
+#endif
+
+}
+void Mesh::calc_centerminmax(void)
+{
+   xcentermin=+1.0e30, ycentermin=+1.0e30, zcentermin=+1.0e30;
+   xcentermax=-1.0e30, ycentermax=-1.0e30, zcentermax=-1.0e30;
+   real_t xmid, ymid, zmid;
+
+   for (uint ic=0; ic<ncells; ic++){
+      xmid = x[ic]+0.5*dx[ic];
+      if (xmid < xcentermin) xcentermin = xmid;
+      if (xmid > xcentermax) xcentermax = xmid;
+   }
+   for (uint ic=0; ic<ncells; ic++){
+      ymid = y[ic]+0.5*dy[ic];
+      if (ymid < ycentermin) ycentermin = ymid;
+      if (ymid > ycentermax) ycentermax = ymid;
+   }
+   if (ndim > TWO_DIMENSIONAL) {
+      for (uint ic=0; ic<ncells; ic++){
+         zmid = z[ic]+0.5*dz[ic];
+         if (zmid < zcentermin) zcentermin = zmid;
+         if (zmid > zcentermax) zcentermax = zmid;
+      }
+   }
+
+#ifdef HAVE_MPI
+   if (parallel) {
+      real_t xcentermin_global,xcentermax_global,ycentermin_global,ycentermax_global;
+      MPI_Allreduce(&xcentermin, &xcentermin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD);
+      MPI_Allreduce(&xcentermax, &xcentermax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD);
+      MPI_Allreduce(&ycentermin, &ycentermin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD);
+      MPI_Allreduce(&ycentermax, &ycentermax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD);
+      xcentermin = xcentermin_global;
+      xcentermax = xcentermax_global;
+      ycentermin = ycentermin_global;
+      ycentermax = ycentermax_global;
+   }
+#endif
+
+}
+
+void Mesh::rezone_all(int icount, int jcount, vector<int> mpot, int have_state, MallocPlus &state_memory)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   if (! do_rezone) {
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+      index.clear();
+      index.resize(ncells);
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+      for (uint ic=0; ic<ncells; ic++){
+         index[ic]=ic;
+      }
+
+#ifdef _OPENMP
+#pragma omp master
+#endif
+      cpu_timers[MESH_TIMER_REZONE_ALL] += cpu_timer_stop(tstart_cpu);
+
+   } else {
+
+// sign for jcount is different in GPU and CPU code -- abs is a quick fix
+   int add_ncells = icount - abs(jcount);
+
+#ifdef _OPENMP
+#pragma omp master
+#endif
+   cpu_counters[MESH_COUNTER_REZONE]++;
+
+   static vector<int> celltype_save;
+
+   static int new_ncells;
+
+   static int *i_old, *j_old, *level_old;
+
+   static int ifirst;
+   static int ilast;
+   static int jfirst;
+   static int jlast;
+   static int level_first;
+   static int level_last;
+
+   static vector<int> new_ic;
+
+#ifdef _OPENMP
+#pragma omp master
+   {
+#endif
+      celltype_save.resize(ncells);
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+   if (have_state) {
+#ifdef _OPENMP
+#pragma omp for
+#endif
+      for (int ic = 0; ic < (int)ncells; ic++){
+         celltype_save[ic] = celltype[ic];
+      }
+   }
+
+#ifdef _OPENMP
+#pragma omp master
+   {
+#endif
+   new_ncells = ncells + add_ncells;
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+// int ref_entry_count = 0;
+   if (have_state){
+#ifdef _OPENMP
+#pragma omp for
+#endif
+      for (uint ic=0; ic<ncells; ic++) {
+//       if (mpot[ic] > 0) ref_entry_count++;
+         if (mpot[ic] < 0) {
+            // Normal cell coarsening
+            if (is_lower_left(i[ic],j[ic]) ) mpot[ic] = -2;
+            // Boundary cell case
+            if (celltype[ic] != REAL_CELL && is_upper_right(i[ic],j[ic]) ) mpot[ic] = -3;
+         }
+      }
+   }
+
+   //  Initialize new variables
+// int *i_old, *j_old, *level_old;
+
+   int flags = RESTART_DATA;
+#ifdef HAVE_J7
+   if (parallel) flags = LOAD_BALANCE_MEMORY;
+#endif
+
+#ifdef _OPENMP
+#pragma omp master
+   {
+#endif
+   i_old     = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "i_old",     flags);
+   j_old     = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "j_old",     flags);
+   level_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "level_old", flags);
+
+   mesh_memory.memory_swap(&i,     &i_old);
+   mesh_memory.memory_swap(&j,     &j_old);
+   mesh_memory.memory_swap(&level, &level_old);
+
+   index.clear();
+   index.resize(new_ncells);
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+   static vector<int> order; //  Vector of refined mesh traversal order; set to -1 to indicate errors.
+   //
+   //vector<int>  invorder(4, -1); //  Vector mapping location from base index.
+
+   //int ref_entry = 0;
+
+#ifdef _OPENMP
+#pragma omp master
+   {
+#endif
+   //  Insert new cells into the mesh at the point of refinement.
+   order.resize(4,    -1); //  Vector of refined mesh traversal order; set to -1 to indicate errors.
+
+   ifirst      = 0;
+   ilast       = 0;
+   jfirst      = 0;
+   jlast       = 0;
+   level_first = 0;
+   level_last  = 0;
+
+   if (parallel) {
+#ifdef HAVE_MPI
+      MPI_Request req[12];
+      MPI_Status status[12];
+
+      static int prev     = MPI_PROC_NULL;
+      static int next     = MPI_PROC_NULL;
+
+      if (mype != 0)         prev = mype-1;
+      if (mype < numpe - 1)  next = mype+1;
+
+      MPI_Isend(&i_old[ncells-1],     1,MPI_INT,next,1,MPI_COMM_WORLD,req+0);
+      MPI_Irecv(&ifirst,              1,MPI_INT,prev,1,MPI_COMM_WORLD,req+1);
+
+      MPI_Isend(&i_old[0],            1,MPI_INT,prev,1,MPI_COMM_WORLD,req+2);
+      MPI_Irecv(&ilast,               1,MPI_INT,next,1,MPI_COMM_WORLD,req+3);
+
+      MPI_Isend(&j_old[ncells-1],     1,MPI_INT,next,1,MPI_COMM_WORLD,req+4);
+      MPI_Irecv(&jfirst,              1,MPI_INT,prev,1,MPI_COMM_WORLD,req+5);
+
+      MPI_Isend(&j_old[0],            1,MPI_INT,prev,1,MPI_COMM_WORLD,req+6);
+      MPI_Irecv(&jlast,               1,MPI_INT,next,1,MPI_COMM_WORLD,req+7);
+
+      MPI_Isend(&level_old[ncells-1], 1,MPI_INT,next,1,MPI_COMM_WORLD,req+8);
+      MPI_Irecv(&level_first,         1,MPI_INT,prev,1,MPI_COMM_WORLD,req+9);
+
+      MPI_Isend(&level_old[0],        1,MPI_INT,prev,1,MPI_COMM_WORLD,req+10);
+      MPI_Irecv(&level_last,          1,MPI_INT,next,1,MPI_COMM_WORLD,req+11);
+
+      MPI_Waitall(12, req, status);
+#endif
+   }
+
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+#ifdef REZONE_NO_OPTIMIZATION
+   vector<int>  invorder(4, -1); //  Vector mapping location from base index.
+   for (int ic = 0, nc = 0; ic < (int)ncells; ic++)
+   {
+      if (mpot[ic] == 0 || mpot[ic] == -1000000)
+      {  //  No change is needed; copy the old cell straight to the new mesh at this location.
+         index[ic] = nc;
+         i[nc]     = i_old[ic];
+         j[nc]     = j_old[ic];
+         level[nc] = level_old[ic];
+         nc++;
+      } //  Complete no change needed.
+      
+      else if (mpot[ic] < 0)
+      {  //  Coarsening is needed; remove this cell and the other three and replace them with one.
+         index[ic] = nc;
+         if (mpot[ic] <= -2) {
+            //printf("                     %d: DEBUG -- coarsening cell %d nc %d\n",mype,ic,nc);
+            i[nc] = i_old[ic]/2;
+            j[nc] = j_old[ic]/2;
+            level[nc] = level_old[ic] - 1;
+            nc++;
+         }
+      } //  Coarsening complete.
+      
+      else if (mpot[ic] > 0)
+      {  //  Refinement is needed; insert four cells where once was one.
+         index[ic] = nc;
+         if (celltype[ic] == REAL_CELL)
+         {  
+            set_refinement_order(&order[0], ic, ifirst, ilast, jfirst, jlast,
+                                 level_first, level_last, i_old, j_old, level_old);
+
+            //  Create the cells in the correct order and orientation.
+            for (int ii = 0; ii < 4; ii++)
+            {  level[nc] = level_old[ic] + 1;
+               switch (order[ii])
+               {  case SW:
+                     // lower left
+                     invorder[SW] = ii;
+                     i[nc]     = i_old[ic]*2;
+                     j[nc]     = j_old[ic]*2;
+                     nc++;
+                     break;
+                     
+                  case SE:
+                     // lower right
+                     invorder[SE] = ii;
+                     i[nc]     = i_old[ic]*2 + 1;
+                     j[nc]     = j_old[ic]*2;
+                     nc++;
+                     break;
+                     
+                  case NW:
+                     // upper left
+                     invorder[NW] = ii;
+                     i[nc]     = i_old[ic]*2;
+                     j[nc]     = j_old[ic]*2 + 1;
+                     nc++;
+                     break;
+                     
+                  case NE:
+                     // upper right
+                     invorder[NE] = ii;
+                     i[nc]     = i_old[ic]*2 + 1;
+                     j[nc]     = j_old[ic]*2 + 1;
+                     nc++;
+                     break; } } //  Complete cell refinement.
+         }  //  Complete real cell refinement.
+         
+         else if (celltype[ic] == LEFT_BOUNDARY) {
+            // lower
+            i[nc]  = i_old[ic]*2 + 1;
+            j[nc]  = j_old[ic]*2;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+            
+            // upper
+            i[nc] = i_old[ic]*2 + 1;
+            j[nc] = j_old[ic]*2 + 1;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+         }
+         else if (celltype[ic] == RIGHT_BOUNDARY) {
+            // lower
+            i[nc]  = i_old[ic]*2;
+            j[nc]  = j_old[ic]*2;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+            
+            // upper
+            i[nc] = i_old[ic]*2;
+            j[nc] = j_old[ic]*2 + 1;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+         }
+         else if (celltype[ic] == BOTTOM_BOUNDARY) {
+            // left
+            i[nc]  = i_old[ic]*2;
+            j[nc]  = j_old[ic]*2 + 1;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+            
+            // right
+            i[nc] = i_old[ic]*2 + 1;
+            j[nc] = j_old[ic]*2 + 1;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+         }
+         else if (celltype[ic] == TOP_BOUNDARY) {
+            // right
+            i[nc] = i_old[ic]*2 + 1;
+            j[nc] = j_old[ic]*2;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+
+            // left
+            i[nc]  = i_old[ic]*2;
+            j[nc]  = j_old[ic]*2;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+         }
+      } //  Complete refinement needed.
+   } //  Complete addition of new cells to the mesh.
+
+   mesh_memory.memory_delete(i_old);
+   mesh_memory.memory_delete(j_old);
+   mesh_memory.memory_delete(level_old);
+
+   calc_celltype(new_ncells);
+
+   if (have_state){
+      flags = RESTART_DATA;
+      MallocPlus state_memory_old = state_memory;
+      malloc_plus_memory_entry *memory_item;
+
+      for (memory_item = state_memory_old.memory_entry_by_name_begin();
+           memory_item != state_memory_old.memory_entry_by_name_end();
+           memory_item = state_memory_old.memory_entry_by_name_next() ) {
+         //printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
+         if (memory_item->mem_elsize == 8) {
+            double *state_temp_double = (double *)state_memory.memory_malloc(new_ncells, sizeof(double),
+                                                                             "state_temp_double", flags);
+
+            double *mem_ptr_double = (double *)memory_item->mem_ptr;
+
+            //ref_entry = 0;
+            for (int ic=0, nc=0; ic<(int)ncells; ic++) {
+
+               if (mpot[ic] == 0) {
+                  state_temp_double[nc] = mem_ptr_double[ic];
+                  nc++;
+               } else if (mpot[ic] < 0){
+                  if (mpot[ic] == -2) {
+                     int nr = nrht[ic];
+                     int nt = ntop[ic];
+                     int nrt = nrht[nt];
+                     state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nr] +
+                                              mem_ptr_double[nt] + mem_ptr_double[nrt])*0.25;
+                     nc++;
+                  }
+                  if (mpot[ic] == -3) {
+                     int nl = nlft[ic];
+                     int nb = nbot[ic];
+                     int nlb = nlft[nb];
+                     state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nl] +
+                                              mem_ptr_double[nb] + mem_ptr_double[nlb])*0.25;
+                     nc++;
+                  }
+               } else if (mpot[ic] > 0){
+                  // lower left
+                  state_temp_double[nc] = mem_ptr_double[ic];
+                  nc++;
+
+                  // lower right
+                  state_temp_double[nc] = mem_ptr_double[ic];
+                  nc++;
+
+                  if (celltype_save[ic] == REAL_CELL){
+                     // upper left
+                     state_temp_double[nc] = mem_ptr_double[ic];
+                     nc++;
+
+                     // upper right
+                     state_temp_double[nc] = mem_ptr_double[ic];
+                     nc++;
+                  }
+               }
+            }
+
+            state_memory.memory_replace(mem_ptr_double, state_temp_double);
+         } else if (memory_item->mem_elsize == 4) {
+            float *state_temp_float = (float *)state_memory.memory_malloc(new_ncells, sizeof(float),
+                                                                          "state_temp_float", flags);
+
+            float *mem_ptr_float = (float *)memory_item->mem_ptr;
+
+            for (int ic=0, nc=0; ic<(int)ncells; ic++) {
+
+               if (mpot[ic] == 0) {
+                  state_temp_float[nc] = mem_ptr_float[ic];
+                  nc++;
+               } else if (mpot[ic] < 0){
+                  if (mpot[ic] == -2) {
+                     int nr = nrht[ic];
+                     int nt = ntop[ic];
+                     int nrt = nrht[nt];
+                     state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nr] +
+                                             mem_ptr_float[nt] + mem_ptr_float[nrt])*0.25;
+                     nc++;
+                  }
+                  if (mpot[ic] == -3) {
+                     int nl = nlft[ic];
+                     int nb = nbot[ic];
+                     int nlb = nlft[nb];
+                     state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nl] +
+                                             mem_ptr_float[nb] + mem_ptr_float[nlb])*0.25;
+                     nc++;
+                  }
+               } else if (mpot[ic] > 0){
+                  // lower left
+                  state_temp_float[nc] = mem_ptr_float[ic];
+                  nc++;
+
+                  // lower right
+                  state_temp_float[nc] = mem_ptr_float[ic];
+                  nc++;
+
+                  if (celltype_save[ic] == REAL_CELL){
+                     // upper left
+                     state_temp_float[nc] = mem_ptr_float[ic];
+                     nc++;
+
+                     // upper right
+                     state_temp_float[nc] = mem_ptr_float[ic];
+                     nc++;
+                  }
+               }
+            }
+
+            state_memory.memory_replace(mem_ptr_float, state_temp_float);
+         }
+      }
+   }
+#else
+   // Data parallel optimizations for thread parallel -- slows down serial
+   // code by about 25%
+   static vector<int> add_count;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+      add_count.resize(ncells);
+      new_ic.resize(ncells+1);
+#ifdef _OPENMP
+   } // end master region
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+      for (int ic = 0; ic < (int)ncells; ic++){
+         if (mpot[ic] == 0) {
+            add_count[ic] = 1;
+         } else if (mpot[ic] < 0) {
+            if (mpot[ic] == -2){
+               add_count[ic] = 1;
+            } else {
+               add_count[ic] = 0;
+            }
+         } else if (mpot[ic] > 0) {
+            if (celltype[ic] != REAL_CELL) {
+               add_count[ic] = 2;
+            } else {
+               add_count[ic] = 4;
+            }
+         }
+      }
+
+#ifdef _OPENMP
+#pragma omp barrier
+#endif
+      scan (&add_count[0], &new_ic[0], ncells);
+#ifdef _OPENMP
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+   for (int ic = 0; ic < (int)ncells; ic++) {
+   vector<int>  invorder(4, -1); //  Vector mapping location from base index.
+      int nc = new_ic[ic];
+      if (mpot[ic] == 0)
+      {  //  No change is needed; copy the old cell straight to the new mesh at this location.
+         index[ic] = nc;
+         i[nc]     = i_old[ic];
+         j[nc]     = j_old[ic];
+         level[nc] = level_old[ic];
+      } //  Complete no change needed.
+
+      else if (mpot[ic] < 0)
+      {  //  Coarsening is needed; remove this cell and the other three and replace them with one.
+         index[ic] = nc;
+         if (mpot[ic] <= -2) {
+            //printf("                     %d: DEBUG -- coarsening cell %d nc %d\n",mype,ic,nc);
+            i[nc] = i_old[ic]/2;
+            j[nc] = j_old[ic]/2;
+            level[nc] = level_old[ic] - 1;
+         }
+      } //  Coarsening complete.
+
+      else if (mpot[ic] > 0)
+      {  //  Refinement is needed; insert four cells where once was one.
+         index[ic] = nc;
+         if (celltype[ic] == REAL_CELL)
+         {  
+            int order[4];
+            set_refinement_order(&order[0], ic, ifirst, ilast, jfirst, jlast,
+                                 level_first, level_last, i_old, j_old, level_old);
+
+            //  Create the cells in the correct order and orientation.
+            for (int ii = 0; ii < 4; ii++) {
+               level[nc] = level_old[ic] + 1;
+               switch (order[ii]) {
+                  case SW:
+                     // lower left
+                     invorder[SW] = ii;
+                     i[nc]     = i_old[ic]*2;
+                     j[nc]     = j_old[ic]*2;
+                     nc++;
+                     break;
+                     
+                  case SE:
+                     // lower right
+                     invorder[SE] = ii;
+                     i[nc]     = i_old[ic]*2 + 1;
+                     j[nc]     = j_old[ic]*2;
+                     nc++;
+                     break;
+                     
+                  case NW:
+                     // upper left
+                     invorder[NW] = ii;
+                     i[nc]     = i_old[ic]*2;
+                     j[nc]     = j_old[ic]*2 + 1;
+                     nc++;
+                     break;
+                     
+                  case NE:
+                     // upper right
+                     invorder[NE] = ii;
+                     i[nc]     = i_old[ic]*2 + 1;
+                     j[nc]     = j_old[ic]*2 + 1;
+                     nc++;
+                     break;
+                  }
+               } //  Complete cell refinement.
+         }  //  Complete real cell refinement.
+         
+         else if (celltype[ic] == LEFT_BOUNDARY) {
+            // lower
+            i[nc]  = i_old[ic]*2 + 1;
+            j[nc]  = j_old[ic]*2;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+            
+            // upper
+            i[nc] = i_old[ic]*2 + 1;
+            j[nc] = j_old[ic]*2 + 1;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+         }
+         else if (celltype[ic] == RIGHT_BOUNDARY) {
+            // lower
+            i[nc]  = i_old[ic]*2;
+            j[nc]  = j_old[ic]*2;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+            
+            // upper
+            i[nc] = i_old[ic]*2;
+            j[nc] = j_old[ic]*2 + 1;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+         }
+         else if (celltype[ic] == BOTTOM_BOUNDARY) {
+            // left
+            i[nc]  = i_old[ic]*2;
+            j[nc]  = j_old[ic]*2 + 1;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+            
+            // right
+            i[nc] = i_old[ic]*2 + 1;
+            j[nc] = j_old[ic]*2 + 1;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+         }
+         else if (celltype[ic] == TOP_BOUNDARY) {
+            // right
+            i[nc] = i_old[ic]*2 + 1;
+            j[nc] = j_old[ic]*2;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+
+            // left
+            i[nc]  = i_old[ic]*2;
+            j[nc]  = j_old[ic]*2;
+            level[nc] = level_old[ic] + 1;
+            nc++;
+         }
+      } //  Complete refinement needed.
+   } //  Complete addition of new cells to the mesh.
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+   mesh_memory.memory_delete(i_old);
+   mesh_memory.memory_delete(j_old);
+   mesh_memory.memory_delete(level_old);
+#ifdef _OPENMP
+   } // end master region
+#endif
+
+   calc_celltype_threaded(new_ncells);
+
+   if (have_state){
+
+      static MallocPlus state_memory_old;
+      static malloc_plus_memory_entry *memory_begin;
+      static malloc_plus_memory_entry *memory_end;
+      static malloc_plus_memory_entry *memory_next;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+      {
+#endif
+      state_memory_old = state_memory;
+
+      memory_begin = state_memory_old.memory_entry_by_name_begin();
+      memory_end   = state_memory_old.memory_entry_by_name_end();
+#ifdef _OPENMP
+      } // end master region
+#pragma omp barrier
+#endif
+
+      for (malloc_plus_memory_entry *memory_item = memory_begin;
+           memory_item != memory_end;
+           memory_item = memory_next ) {
+         //ref_entry = 0;
+         //printf("DEBUG -- memory_item->mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
+         if (memory_item->mem_elsize == 8) {
+
+            static double *state_temp_double, *mem_ptr_double;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+            {
+#endif
+               state_temp_double = (double *)state_memory.memory_malloc(new_ncells, sizeof(double),
+                                                                                "state_temp_double", flags);
+               mem_ptr_double = (double *)memory_item->mem_ptr;
+#ifdef _OPENMP
+            } // end master region
+#pragma omp barrier
+#endif
+
+            //ref_entry = 0;
+#ifdef _OPENMP
+#pragma omp for
+#endif
+            for (int ic=0; ic<(int)ncells; ic++) {
+
+               int nc = new_ic[ic];
+               if (mpot[ic] == 0) {
+                  state_temp_double[nc] = mem_ptr_double[ic];
+               } else if (mpot[ic] < 0){
+                  if (mpot[ic] == -2) {
+                     int nr = nrht[ic];
+                     int nt = ntop[ic];
+                     int nrt = nrht[nt];
+                     state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nr] +
+                                              mem_ptr_double[nt] + mem_ptr_double[nrt])*0.25;
+                  }
+                  if (mpot[ic] == -3) {
+                     int nl = nlft[ic];
+                     int nb = nbot[ic];
+                     int nlb = nlft[nb];
+                     state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nl] +
+                                              mem_ptr_double[nb] + mem_ptr_double[nlb])*0.25;
+                  }
+               } else if (mpot[ic] > 0){
+                  // lower left
+                  state_temp_double[nc] = mem_ptr_double[ic];
+                  nc++;
+
+                  // lower right
+                  state_temp_double[nc] = mem_ptr_double[ic];
+                  nc++;
+
+                  if (celltype_save[ic] == REAL_CELL){
+                     // upper left
+                     state_temp_double[nc] = mem_ptr_double[ic];
+                     nc++;
+
+                     // upper right
+                     state_temp_double[nc] = mem_ptr_double[ic];
+                     nc++;
+                  }
+               }
+            } // end cell loop
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+            {
+#endif
+            state_memory.memory_replace(mem_ptr_double, state_temp_double);
+#ifdef _OPENMP
+            } // end master region
+#pragma omp barrier
+#endif
+
+         } else if (memory_item->mem_elsize == 4) {
+
+            static float *state_temp_float, *mem_ptr_float;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+            {
+#endif
+               state_temp_float = (float *)state_memory.memory_malloc(new_ncells, sizeof(float),
+                                                                             "state_temp_float", flags);
+               mem_ptr_float = (float *)memory_item->mem_ptr;
+#ifdef _OPENMP
+            } // end master region
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+            for (int ic=0; ic<(int)ncells; ic++) {
+
+               int nc = new_ic[ic];
+               if (mpot[ic] == 0) {
+                  state_temp_float[nc] = mem_ptr_float[ic];
+               } else if (mpot[ic] < 0){
+                  if (mpot[ic] == -2) {
+                     int nr = nrht[ic];
+                     int nt = ntop[ic];
+                     int nrt = nrht[nt];
+                     state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nr] +
+                                             mem_ptr_float[nt] + mem_ptr_float[nrt])*0.25;
+                  }
+                  if (mpot[ic] == -3) {
+                     int nl = nlft[ic];
+                     int nb = nbot[ic];
+                     int nlb = nlft[nb];
+                     state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nl] +
+                                             mem_ptr_float[nb] + mem_ptr_float[nlb])*0.25;
+                  }
+               } else if (mpot[ic] > 0){
+                  // lower left
+                  state_temp_float[nc] = mem_ptr_float[ic];
+                  nc++;
+
+                  // lower right
+                  state_temp_float[nc] = mem_ptr_float[ic];
+                  nc++;
+
+                  if (celltype_save[ic] == REAL_CELL){
+                     // upper left
+                     state_temp_float[nc] = mem_ptr_float[ic];
+                     nc++;
+
+                     // upper right
+                     state_temp_float[nc] = mem_ptr_float[ic];
+                     nc++;
+                  }
+               }
+            } // end cell loop
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+            {
+#endif
+               state_memory.memory_replace(mem_ptr_float, state_temp_float);
+#ifdef _OPENMP
+            } // end master region
+#pragma omp barrier
+#endif
+         } // mem elem size 4 bytes
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         memory_next = state_memory_old.memory_entry_by_name_next();
+#ifdef _OPENMP
+         } // end master region
+#pragma omp barrier
+#endif
+
+      } // memory item iteration
+
+   } // if have state
+   // End of data parallel optimizations
+#endif
+
+   if (neighbor_remap) {
+      int flags = 0;
+      static int *nlft_old, *nrht_old, *nbot_old, *ntop_old;
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+      {
+#endif
+      nlft_old     = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "nlft_old",  flags);
+      nrht_old     = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "nrht_old",  flags);
+      nbot_old     = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "nbot_old",  flags);
+      ntop_old     = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "ntop_old",  flags);
+#ifdef _OPENMP
+      } // end master region
+#pragma omp barrier
+#endif
+      flags = RESTART_DATA;
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+      for (int ic = 0; ic < new_ncells; ic++){
+         nlft_old[ic] = -1;
+         nrht_old[ic] = -1;
+         nbot_old[ic] = -1;
+         ntop_old[ic] = -1;
+      }
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+      {
+#endif
+      mesh_memory.memory_swap(&nlft,  &nlft_old);
+      mesh_memory.memory_swap(&nrht,  &nrht_old);
+      mesh_memory.memory_swap(&nbot,  &nbot_old);
+      mesh_memory.memory_swap(&ntop,  &ntop_old);
+#ifdef _OPENMP
+      } // end master region
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+      for (int ic = 0; ic < (int)ncells; ic++){
+         int nc = index[ic];
+
+         if (mpot[ic] == 0){
+            if (nlft_old[ic] < (int)ncells && nlft_old[ic] >= 0){
+               nlft[nc] = (mpot[nlft_old[ic]] == 0) ? index[nlft_old[ic]] : -1;
+            }
+            if (nrht_old[ic] < (int)ncells && nrht_old[ic] >= 0){
+               nrht[nc] = (mpot[nrht_old[ic]] == 0) ? index[nrht_old[ic]] : -1;
+            }
+            if (nbot_old[ic] < (int)ncells && nbot_old[ic] >= 0){
+               nbot[nc] = (mpot[nbot_old[ic]] == 0) ? index[nbot_old[ic]] : -1;
+            }
+            if (ntop_old[ic] < (int)ncells && ntop_old[ic] >= 0){
+               ntop[nc] = (mpot[ntop_old[ic]] == 0) ? index[ntop_old[ic]] : -1;
+            }
+         } else if (mpot[ic] <= -2) {
+            nlft[nc]  = -1;
+            nrht[nc]  = -1;
+            nbot[nc]  = -1;
+            ntop[nc]  = -1;
+         } else if (mpot[ic] > 0){
+            nlft[nc]    = -1;
+            nlft[nc+1]  = -1;
+            nrht[nc]    = -1;
+            nrht[nc+1]  = -1;
+            nbot[nc]    = -1;
+            nbot[nc+1]  = -1;
+            ntop[nc]    = -1;
+            ntop[nc+1]  = -1;
+            if (celltype[nc] == REAL_CELL){
+               nlft[nc+2]  = -1;
+               nlft[nc+3]  = -1;
+               nrht[nc+2]  = -1;
+               nrht[nc+3]  = -1;
+               nbot[nc+2]  = -1;
+               nbot[nc+3]  = -1;
+               ntop[nc+2]  = -1;
+               ntop[nc+3]  = -1;
+            }
+         }
+         if (mpot[ic] > 0){
+            nc++;
+            switch(celltype[nc]){
+            case LEFT_BOUNDARY:
+               nlft[nc] = nc;
+               break;
+            case RIGHT_BOUNDARY:
+               nrht[nc] = nc;
+               break;
+            case BOTTOM_BOUNDARY:
+               nbot[nc] = nc;
+               break;
+            case TOP_BOUNDARY:
+               ntop[nc] = nc;
+               break;
+            }
+         }
+      }
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+      {
+#endif
+      nlft_old = (int *)mesh_memory.memory_delete(nlft_old);
+      nrht_old = (int *)mesh_memory.memory_delete(nrht_old);
+      nbot_old = (int *)mesh_memory.memory_delete(nbot_old);
+      ntop_old = (int *)mesh_memory.memory_delete(ntop_old);
+#ifdef _OPENMP
+      } // end master region
+#pragma omp barrier
+#endif
+   } else {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+      {
+#endif
+      nlft = (int *)mesh_memory.memory_delete(nlft);
+      nrht = (int *)mesh_memory.memory_delete(nrht);
+      nbot = (int *)mesh_memory.memory_delete(nbot);
+      ntop = (int *)mesh_memory.memory_delete(ntop);
+#ifdef _OPENMP
+      } // end master region
+#pragma omp barrier
+#endif
+   }
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+   //ncells = nc;
+
+#ifdef HAVE_MPI
+   if (parallel) {
+      MPI_Allgather(&new_ncells, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD);
+
+      ndispl[0]=0;
+      for (int ip=1; ip<numpe; ip++){
+         ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
+      }  
+      noffset=ndispl[mype];
+      ncells_global = ndispl[numpe-1]+nsizes[numpe-1];
+   }  
+#endif
+
+   cpu_timers[MESH_TIMER_REZONE_ALL] += cpu_timer_stop(tstart_cpu);
+#ifdef _OPENMP
+   } // end master region
+#pragma omp barrier
+#endif
+
+   } // if do_rezone
+
+}
+
+#ifdef HAVE_OPENCL
+void Mesh::gpu_rezone_all(int icount, int jcount, cl_mem &dev_mpot, MallocPlus &gpu_state_memory)
+{
+   if (! gpu_do_rezone) return;
+
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   gpu_counters[MESH_COUNTER_REZONE]++;
+
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   assert(dev_mpot);
+   assert(dev_level);
+   assert(dev_i);
+   assert(dev_j);
+   assert(dev_celltype);
+   assert(dev_ioffset);
+   assert(dev_levdx);
+   assert(dev_levdy);
+
+   int add_ncells = icount - jcount;
+
+// int global_icount = icount;
+// int global_jcount = jcount;
+
+   size_t old_ncells = ncells;
+   size_t new_ncells = ncells + add_ncells;
+
+#ifdef HAVE_MPI
+   //int global_add_ncells = add_ncells;
+
+// if (parallel) {
+//    int count[2], global_count[2];
+//    count[0] = icount;
+//    count[1] = jcount;
+//    MPI_Allreduce(&count, &global_count, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+//    global_icount = global_count[0];
+//    global_jcount = global_count[1];
+//    //global_add_ncells = global_icount + global_jcount;
+// }
+#endif
+
+   int ifirst      = 0;
+   int ilast       = 0;
+   int jfirst      = 0;
+   int jlast       = 0;
+   int level_first = 0;
+   int level_last  = 0;
+
+#ifdef HAVE_MPI
+   if (numpe > 1) {
+      int i_tmp_first, i_tmp_last;
+      int j_tmp_first, j_tmp_last;
+      int level_tmp_first, level_tmp_last;
+
+      ezcl_enqueue_read_buffer(command_queue,  dev_i,     CL_FALSE, 0,                             1*sizeof(cl_int), &i_tmp_first,     NULL);
+      ezcl_enqueue_read_buffer(command_queue,  dev_j,     CL_FALSE, 0,                             1*sizeof(cl_int), &j_tmp_first,     NULL);
+      ezcl_enqueue_read_buffer(command_queue,  dev_level, CL_FALSE, 0,                             1*sizeof(cl_int), &level_tmp_first, NULL);
+      ezcl_enqueue_read_buffer(command_queue,  dev_i,     CL_FALSE, (old_ncells-1)*sizeof(cl_int), 1*sizeof(cl_int), &i_tmp_last,      NULL);
+      ezcl_enqueue_read_buffer(command_queue,  dev_j,     CL_FALSE, (old_ncells-1)*sizeof(cl_int), 1*sizeof(cl_int), &j_tmp_last,      NULL);
+      ezcl_enqueue_read_buffer(command_queue,  dev_level, CL_TRUE,  (old_ncells-1)*sizeof(cl_int), 1*sizeof(cl_int), &level_tmp_last,  NULL);
+
+      MPI_Request req[12];
+      MPI_Status status[12];
+
+      static int prev     = MPI_PROC_NULL;
+      static int next     = MPI_PROC_NULL;
+
+      if (mype != 0)         prev = mype-1;
+      if (mype < numpe - 1)  next = mype+1;
+
+      MPI_Isend(&i_tmp_last,      1,MPI_INT,next,1,MPI_COMM_WORLD,req+0);
+      MPI_Irecv(&ifirst,          1,MPI_INT,prev,1,MPI_COMM_WORLD,req+1);
+
+      MPI_Isend(&i_tmp_first,     1,MPI_INT,prev,1,MPI_COMM_WORLD,req+2);
+      MPI_Irecv(&ilast,           1,MPI_INT,next,1,MPI_COMM_WORLD,req+3);
+
+      MPI_Isend(&j_tmp_last,      1,MPI_INT,next,1,MPI_COMM_WORLD,req+4);
+      MPI_Irecv(&jfirst,          1,MPI_INT,prev,1,MPI_COMM_WORLD,req+5);
+
+      MPI_Isend(&j_tmp_first,     1,MPI_INT,prev,1,MPI_COMM_WORLD,req+6);
+      MPI_Irecv(&jlast,           1,MPI_INT,next,1,MPI_COMM_WORLD,req+7);
+
+      MPI_Isend(&level_tmp_last,  1,MPI_INT,next,1,MPI_COMM_WORLD,req+8);
+      MPI_Irecv(&level_first,     1,MPI_INT,prev,1,MPI_COMM_WORLD,req+9);
+
+      MPI_Isend(&level_tmp_first, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+10);
+      MPI_Irecv(&level_last,      1,MPI_INT,next,1,MPI_COMM_WORLD,req+11);
+
+      MPI_Waitall(12, req, status);
+   }
+#endif
+
+/*
+   if (new_ncells != old_ncells){
+      ncells = new_ncells;
+   }
+*/
+
+   size_t mem_request = (int)((float)new_ncells*mem_factor);
+   cl_mem dev_celltype_new = ezcl_malloc(NULL, const_cast<char *>("dev_celltype_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+   cl_mem dev_level_new    = ezcl_malloc(NULL, const_cast<char *>("dev_level_new"),    &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+   cl_mem dev_i_new        = ezcl_malloc(NULL, const_cast<char *>("dev_i_new"),        &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+   cl_mem dev_j_new        = ezcl_malloc(NULL, const_cast<char *>("dev_j_new"),        &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+   cl_mem dev_ijadd;
+
+   vector<int>ijadd(6);
+   if (numpe > 1) {
+      ijadd[0] = ifirst;
+      ijadd[1] = ilast;
+      ijadd[2] = jfirst;
+      ijadd[3] = jlast;
+      ijadd[4] = level_first;
+      ijadd[5] = level_last;
+   }
+
+   size_t six = 6;
+   dev_ijadd = ezcl_malloc(NULL, const_cast<char *>("dev_ijadd"), &six, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+   ezcl_enqueue_write_buffer(command_queue, dev_ijadd, CL_TRUE, 0, 6*sizeof(cl_int), (void*)&ijadd[0], NULL);
+
+   cl_mem dev_indexoffset = ezcl_malloc(NULL, const_cast<char *>("dev_indexoffset"), &old_ncells, sizeof(cl_uint), CL_MEM_READ_WRITE, 0);
+
+   int stencil = 0;
+   if (localStencil) stencil = 1;
+
+   size_t local_work_size = 128;
+   size_t global_work_size = ((old_ncells+local_work_size - 1) /local_work_size) * local_work_size;
+
+   ezcl_set_kernel_arg(kernel_rezone_all, 0,  sizeof(cl_int),  (void *)&old_ncells);
+   ezcl_set_kernel_arg(kernel_rezone_all, 1,  sizeof(cl_int),  (void *)&stencil);
+   ezcl_set_kernel_arg(kernel_rezone_all, 2,  sizeof(cl_int),  (void *)&levmx);
+   ezcl_set_kernel_arg(kernel_rezone_all, 3,  sizeof(cl_mem),  (void *)&dev_mpot);
+   ezcl_set_kernel_arg(kernel_rezone_all, 4,  sizeof(cl_mem),  (void *)&dev_level);
+   ezcl_set_kernel_arg(kernel_rezone_all, 5,  sizeof(cl_mem),  (void *)&dev_i);
+   ezcl_set_kernel_arg(kernel_rezone_all, 6,  sizeof(cl_mem),  (void *)&dev_j);
+   ezcl_set_kernel_arg(kernel_rezone_all, 7,  sizeof(cl_mem),  (void *)&dev_celltype);
+   ezcl_set_kernel_arg(kernel_rezone_all, 8,  sizeof(cl_mem),  (void *)&dev_level_new);
+   ezcl_set_kernel_arg(kernel_rezone_all, 9,  sizeof(cl_mem),  (void *)&dev_i_new);
+   ezcl_set_kernel_arg(kernel_rezone_all, 10, sizeof(cl_mem),  (void *)&dev_j_new);
+   ezcl_set_kernel_arg(kernel_rezone_all, 11, sizeof(cl_mem),  (void *)&dev_celltype_new);
+   ezcl_set_kernel_arg(kernel_rezone_all, 12, sizeof(cl_mem),  (void *)&dev_ioffset);
+   ezcl_set_kernel_arg(kernel_rezone_all, 13, sizeof(cl_mem),  (void *)&dev_indexoffset);
+   ezcl_set_kernel_arg(kernel_rezone_all, 14, sizeof(cl_mem),  (void *)&dev_levdx);
+   ezcl_set_kernel_arg(kernel_rezone_all, 15, sizeof(cl_mem),  (void *)&dev_levdy);
+   ezcl_set_kernel_arg(kernel_rezone_all, 16, sizeof(cl_mem),  (void *)&dev_levtable);
+   ezcl_set_kernel_arg(kernel_rezone_all, 17, sizeof(cl_mem),  (void *)&dev_ijadd);
+   ezcl_set_kernel_arg(kernel_rezone_all, 18, local_work_size * sizeof(cl_uint), NULL);
+   //ezcl_set_kernel_arg(kernel_rezone_all, 19, local_work_size * sizeof(cl_real4_t),    NULL);
+
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_all,   1, NULL, &global_work_size, &local_work_size, NULL);
+
+   MallocPlus gpu_state_memory_old = gpu_state_memory;
+   malloc_plus_memory_entry *memory_item;
+
+   for (memory_item = gpu_state_memory_old.memory_entry_by_name_begin();
+        memory_item != gpu_state_memory_old.memory_entry_by_name_end();
+        memory_item = gpu_state_memory_old.memory_entry_by_name_next() ) {
+      //printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
+      cl_mem dev_state_mem_ptr = (cl_mem)memory_item->mem_ptr;
+
+      if (memory_item->mem_elsize == 8){
+#ifndef MINIMUM_PRECISION
+         cl_mem dev_state_var_new = (cl_mem)gpu_state_memory.memory_malloc(max(old_ncells,new_ncells), sizeof(cl_double), const_cast<char *>("dev_state_var_new"), DEVICE_REGULAR_MEMORY);
+
+         ezcl_set_kernel_arg(kernel_rezone_one_double, 0, sizeof(cl_int),  (void *)&old_ncells);
+         ezcl_set_kernel_arg(kernel_rezone_one_double, 1, sizeof(cl_mem),  (void *)&dev_i);
+         ezcl_set_kernel_arg(kernel_rezone_one_double, 2, sizeof(cl_mem),  (void *)&dev_j);
+         ezcl_set_kernel_arg(kernel_rezone_one_double, 3, sizeof(cl_mem),  (void *)&dev_nlft);
+         ezcl_set_kernel_arg(kernel_rezone_one_double, 4, sizeof(cl_mem),  (void *)&dev_nrht);
+         ezcl_set_kernel_arg(kernel_rezone_one_double, 5, sizeof(cl_mem),  (void *)&dev_nbot);
+         ezcl_set_kernel_arg(kernel_rezone_one_double, 6, sizeof(cl_mem),  (void *)&dev_ntop);
+         ezcl_set_kernel_arg(kernel_rezone_one_double, 7, sizeof(cl_mem),  (void *)&dev_celltype);
+         ezcl_set_kernel_arg(kernel_rezone_one_double, 8, sizeof(cl_mem),  (void *)&dev_mpot);
+         ezcl_set_kernel_arg(kernel_rezone_one_double, 9, sizeof(cl_mem),  (void *)&dev_indexoffset);
+         ezcl_set_kernel_arg(kernel_rezone_one_double,10, sizeof(cl_mem),  (void *)&dev_state_mem_ptr);
+         ezcl_set_kernel_arg(kernel_rezone_one_double,11, sizeof(cl_mem),  (void *)&dev_state_var_new);
+
+         ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_one_double,   1, NULL, &global_work_size, &local_work_size, NULL);
+
+         gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new);
+#else
+         printf("ERROR -- can't have double type for state variable\n");
+         exit(1);
+#endif
+      } else if (memory_item->mem_elsize == 4){
+         cl_mem dev_state_var_new = (cl_mem)gpu_state_memory.memory_malloc(max(old_ncells,new_ncells), sizeof(cl_float), const_cast<char *>("dev_state_var_new"), DEVICE_REGULAR_MEMORY);
+
+         ezcl_set_kernel_arg(kernel_rezone_one_float, 0, sizeof(cl_int),  (void *)&old_ncells);
+         ezcl_set_kernel_arg(kernel_rezone_one_float, 1, sizeof(cl_mem),  (void *)&dev_i);
+         ezcl_set_kernel_arg(kernel_rezone_one_float, 2, sizeof(cl_mem),  (void *)&dev_j);
+         ezcl_set_kernel_arg(kernel_rezone_one_float, 3, sizeof(cl_mem),  (void *)&dev_nlft);
+         ezcl_set_kernel_arg(kernel_rezone_one_float, 4, sizeof(cl_mem),  (void *)&dev_nrht);
+         ezcl_set_kernel_arg(kernel_rezone_one_float, 5, sizeof(cl_mem),  (void *)&dev_nbot);
+         ezcl_set_kernel_arg(kernel_rezone_one_float, 6, sizeof(cl_mem),  (void *)&dev_ntop);
+         ezcl_set_kernel_arg(kernel_rezone_one_float, 7, sizeof(cl_mem),  (void *)&dev_celltype);
+         ezcl_set_kernel_arg(kernel_rezone_one_float, 8, sizeof(cl_mem),  (void *)&dev_mpot);
+         ezcl_set_kernel_arg(kernel_rezone_one_float, 9, sizeof(cl_mem),  (void *)&dev_indexoffset);
+         ezcl_set_kernel_arg(kernel_rezone_one_float,10, sizeof(cl_mem),  (void *)&dev_state_mem_ptr);
+         ezcl_set_kernel_arg(kernel_rezone_one_float,11, sizeof(cl_mem),  (void *)&dev_state_var_new);
+
+         ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_one_float,   1, NULL, &global_work_size, &local_work_size, NULL);
+
+         gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new);
+      }
+   }
+
+   if (neighbor_remap & ! parallel) {
+      size_t mem_request = (int)((float)new_ncells*mem_factor);
+      cl_mem dev_nlft_new = ezcl_malloc(NULL, const_cast<char *>("dev_nlft_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_nrht_new = ezcl_malloc(NULL, const_cast<char *>("dev_nrht_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_nbot_new = ezcl_malloc(NULL, const_cast<char *>("dev_nbot_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_ntop_new = ezcl_malloc(NULL, const_cast<char *>("dev_ntop_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+      ezcl_set_kernel_arg(kernel_neighbor_init,  0, sizeof(cl_int),   (void *)&new_ncells);
+      ezcl_set_kernel_arg(kernel_neighbor_init,  1, sizeof(cl_mem),   (void *)&dev_nlft_new);
+      ezcl_set_kernel_arg(kernel_neighbor_init,  2, sizeof(cl_mem),   (void *)&dev_nrht_new);
+      ezcl_set_kernel_arg(kernel_neighbor_init,  3, sizeof(cl_mem),   (void *)&dev_nbot_new);
+      ezcl_set_kernel_arg(kernel_neighbor_init,  4, sizeof(cl_mem),   (void *)&dev_ntop_new);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_neighbor_init,   1, NULL, &global_work_size, &local_work_size, NULL);
+
+      ezcl_set_kernel_arg(kernel_rezone_neighbors,  0, sizeof(cl_int),  (void *)&old_ncells);
+      ezcl_set_kernel_arg(kernel_rezone_neighbors,  1, sizeof(cl_mem),  (void *)&dev_mpot);
+      ezcl_set_kernel_arg(kernel_rezone_neighbors,  2, sizeof(cl_mem),  (void *)&dev_indexoffset);
+      ezcl_set_kernel_arg(kernel_rezone_neighbors,  3, sizeof(cl_mem),  (void *)&dev_nlft);
+      ezcl_set_kernel_arg(kernel_rezone_neighbors,  4, sizeof(cl_mem),  (void *)&dev_nrht);
+      ezcl_set_kernel_arg(kernel_rezone_neighbors,  5, sizeof(cl_mem),  (void *)&dev_nbot);
+      ezcl_set_kernel_arg(kernel_rezone_neighbors,  6, sizeof(cl_mem),  (void *)&dev_ntop);
+      ezcl_set_kernel_arg(kernel_rezone_neighbors,  7, sizeof(cl_mem),  (void *)&dev_celltype_new);
+      ezcl_set_kernel_arg(kernel_rezone_neighbors,  8, sizeof(cl_mem),  (void *)&dev_nlft_new);
+      ezcl_set_kernel_arg(kernel_rezone_neighbors,  9, sizeof(cl_mem),  (void *)&dev_nrht_new);
+      ezcl_set_kernel_arg(kernel_rezone_neighbors, 10, sizeof(cl_mem),  (void *)&dev_nbot_new);
+      ezcl_set_kernel_arg(kernel_rezone_neighbors, 11, sizeof(cl_mem),  (void *)&dev_ntop_new);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_neighbors,   1, NULL, &global_work_size, &local_work_size, NULL);
+   
+      ezcl_device_memory_swap(&dev_nlft, &dev_nlft_new);
+      ezcl_device_memory_swap(&dev_nrht, &dev_nrht_new);
+      ezcl_device_memory_swap(&dev_nbot, &dev_nbot_new);
+      ezcl_device_memory_swap(&dev_ntop, &dev_ntop_new);
+
+      ezcl_device_memory_delete(dev_nlft_new);
+      ezcl_device_memory_delete(dev_nrht_new);
+      ezcl_device_memory_delete(dev_nbot_new);
+      ezcl_device_memory_delete(dev_ntop_new);
+   } else {
+      ezcl_device_memory_delete(dev_nlft);
+      ezcl_device_memory_delete(dev_nrht);
+      ezcl_device_memory_delete(dev_nbot);
+      ezcl_device_memory_delete(dev_ntop);
+      dev_nlft = NULL;
+      dev_nrht = NULL;
+      dev_nbot = NULL;
+      dev_ntop = NULL;
+   }
+
+   ezcl_device_memory_delete(dev_indexoffset);
+
+   if (new_ncells != old_ncells){
+      resize_old_device_memory(new_ncells);
+   }
+
+   ezcl_device_memory_swap(&dev_celltype, &dev_celltype_new);
+   ezcl_device_memory_swap(&dev_level, &dev_level_new);
+   ezcl_device_memory_swap(&dev_i, &dev_i_new);
+   ezcl_device_memory_swap(&dev_j, &dev_j_new);
+
+   ezcl_device_memory_delete(dev_mpot);
+   ezcl_device_memory_delete(dev_ijadd);
+   ezcl_device_memory_delete(dev_ioffset);
+
+   ezcl_device_memory_delete(dev_i_new);
+   ezcl_device_memory_delete(dev_j_new);
+   ezcl_device_memory_delete(dev_celltype_new);
+   ezcl_device_memory_delete(dev_level_new);
+
+#ifdef HAVE_MPI
+   if (parallel) {
+      int new_ncells = ncells + add_ncells;
+      MPI_Allgather(&new_ncells, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD);
+
+      ndispl[0]=0;
+      for (int ip=1; ip<numpe; ip++){
+         ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
+      }
+      noffset=ndispl[mype];
+      ncells_global = ndispl[numpe-1]+nsizes[numpe-1];
+   }
+#endif
+
+   gpu_timers[MESH_TIMER_REZONE_ALL] += (long)(cpu_timer_stop(tstart_cpu) * 1.0e9);
+}
+#endif
+
+void Mesh::calc_neighbors(int ncells)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   if (do_rezone) {
+
+      int flags = INDEX_ARRAY_MEMORY;
+
+#if defined (HAVE_J7)
+      if (parallel) flags |= LOAD_BALANCE_MEMORY;
+#endif
+
+      static int nlft_size = 0;
+
+#ifdef _OPENMP
+#pragma omp master
+      {
+#endif
+      cpu_counters[MESH_COUNTER_CALC_NEIGH]++;
+
+      if (nlft != NULL){
+         nlft_size = mesh_memory.get_memory_size(nlft);
+      }
+
+      if (nlft_size < ncells){
+            if (nlft != NULL){
+               nlft = (int *)mesh_memory.memory_delete(nlft);
+               nrht = (int *)mesh_memory.memory_delete(nrht);
+               nbot = (int *)mesh_memory.memory_delete(nbot);
+               ntop = (int *)mesh_memory.memory_delete(ntop);
+            }
+
+            nlft = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nlft", flags);
+            nrht = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nrht", flags);
+            nbot = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nbot", flags);
+            ntop = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "ntop", flags);
+      }
+#ifdef _OPENMP
+      }
+#pragma omp barrier
+#endif
+
+      if (nlft_size < ncells){
+         int lowerBounds, upperBounds;
+         get_bounds(lowerBounds, upperBounds);
+
+         for(int ic=lowerBounds; ic<upperBounds; ic++){
+            nlft[ic] = -1;
+            nrht[ic] = -1;
+            nbot[ic] = -1;
+            ntop[ic] = -1;
+         }
+      }
+
+      if (calc_neighbor_type == HASH_TABLE) {
+
+         struct timeval tstart_lev2;
+         if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
+
+         int jmaxsize = (jmax+1)*IPOW2(levmx);
+         int imaxsize = (imax+1)*IPOW2(levmx);
+
+         int *hash;
+
+#ifdef _OPENMP
+         hash = compact_hash_init_openmp(ncells, imaxsize, jmaxsize, 0);
+#else
+         hash = compact_hash_init(ncells, imaxsize, jmaxsize, 0);
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+            for(int ic=0; ic<ncells; ic++){
+               int lev = level[ic];
+
+               bool need_hash = (nlft[ic] == -1 || nrht[ic] == -1 || nbot[ic] == -1 || ntop[ic] == -1) ? true : false;
+
+               if (! need_hash){
+                   if ( (level[nlft[ic]] > lev && ntop[nlft[ic]] == -1) || 
+                        (level[nrht[ic]] > lev && ntop[nrht[ic]] == -1) ||
+                        (level[nbot[ic]] > lev && nrht[nbot[ic]] == -1) || 
+                        (level[ntop[ic]] > lev && nrht[ntop[ic]] == -1) ) need_hash = true;
+               }
+            
+               if (need_hash) {
+                  int levmult = IPOW2(levmx-lev);
+                  int ii = i[ic]*levmult;
+                  int jj = j[ic]*levmult;
+
+                  write_hash(ic,jj*imaxsize+ii,hash);
+               }
+            }
+
+            if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+               cpu_timers[MESH_TIMER_HASH_SETUP] += cpu_timer_stop(tstart_lev2);
+               cpu_timer_start(&tstart_lev2);
+            }
+
+            //fprintf(fp,"DEBUG ncells is %lu\n",ncells);
+#ifdef _OPENMP
+#pragma omp for
+#endif
+            for (int ic=0; ic<(int)ncells; ic++){
+               int ii = i[ic];
+               int jj = j[ic];
+               int lev = level[ic];
+               int levmult = IPOW2(levmx-lev);
+               int iicur = ii*levmult;
+               int iilft = max( (ii-1)*levmult, 0         );
+               int iirht = min( (ii+1)*levmult, imaxsize-1);
+               int jjcur = jj*levmult;
+               int jjbot = max( (jj-1)*levmult, 0         );
+               int jjtop = min( (jj+1)*levmult, jmaxsize-1);
+
+               int nlftval = nlft[ic];
+               int nrhtval = nrht[ic];
+               int nbotval = nbot[ic];
+               int ntopval = ntop[ic];
+
+               // Taking care of boundary cells
+               // Force each boundary cell to point to itself on its boundary direction
+               if (nlftval < 0 && iicur <    1*IPOW2(levmx)  ) nlftval = ic;
+               if (nbotval < 0 && jjcur <    1*IPOW2(levmx)  ) nbotval = ic;
+               if (nrhtval < 0 && iicur > imax*IPOW2(levmx)-1) nrhtval = ic;
+               if (ntopval < 0 && jjcur > jmax*IPOW2(levmx)-1) ntopval = ic;
+               // Boundary cells next to corner boundary need special checks
+               if (nlftval < 0 && iicur ==    1*IPOW2(levmx) &&  (jjcur < 1*IPOW2(levmx) || jjcur >= jmax*IPOW2(levmx) ) ) nlftval = ic;
+               if (nbotval < 0 && jjcur ==    1*IPOW2(levmx) &&  (iicur < 1*IPOW2(levmx) || iicur >= imax*IPOW2(levmx) ) ) nbotval = ic;
+               if (nrhtval < 0 && iirht == imax*IPOW2(levmx) &&  (jjcur < 1*IPOW2(levmx) || jjcur >= jmax*IPOW2(levmx) ) ) nrhtval = ic;
+               if (ntopval < 0 && jjtop == jmax*IPOW2(levmx) &&  (iicur < 1*IPOW2(levmx) || iicur >= imax*IPOW2(levmx) ) ) ntopval = ic;
+
+               // need to check for finer neighbor first
+               // Right and top neighbor don't change for finer, so drop through to same size
+               // Left and bottom need to be half of same size index for finer test
+               if (lev != levmx) {
+                  int iilftfiner = iicur-(iicur-iilft)/2;
+                  //int iirhtfiner = (iicur+iirht)/2;
+                  int jjbotfiner = jjcur-(jjcur-jjbot)/2;
+                  //int jjtopfiner = (jjcur+jjtop)/2;
+                  if (nlftval < 0) nlftval = read_hash(jjcur*imaxsize+iilftfiner, hash);
+                  if (nbotval < 0) nbotval = read_hash(jjbotfiner*imaxsize+iicur, hash);
+               }
+
+               // same size neighbor
+               if (nlftval < 0) nlftval = read_hash(jjcur*imaxsize+iilft, hash);
+               if (nrhtval < 0) nrhtval = read_hash(jjcur*imaxsize+iirht, hash);
+               if (nbotval < 0) nbotval = read_hash(jjbot*imaxsize+iicur, hash);
+               if (ntopval < 0) ntopval = read_hash(jjtop*imaxsize+iicur, hash);
+
+               // Now we need to take care of special case where bottom and left boundary need adjustment since
+               // expected cell doesn't exist on these boundaries if it is finer than current cell
+               if (lev != levmx) {
+                  if (jjcur < 1*IPOW2(levmx)) {
+                     if (nrhtval < 0) {
+                        int jjtopfiner = (jjcur+jjtop)/2;
+                        nrhtval = read_hash(jjtopfiner*imaxsize+iirht, hash);
+                     }
+                     if (nlftval < 0) {
+                        int iilftfiner = iicur-(iicur-iilft)/2;
+                        int jjtopfiner = (jjcur+jjtop)/2;
+                        nlftval = read_hash(jjtopfiner*imaxsize+iilftfiner, hash);
+                     }
+                  }
+            
+                  if (iicur < 1*IPOW2(levmx)) {
+                     if (ntopval < 0) {
+                        int iirhtfiner = (iicur+iirht)/2;
+                        ntopval = read_hash(jjtop*imaxsize+iirhtfiner, hash);
+                     }
+                     if (nbotval < 0) {
+                        int iirhtfiner = (iicur+iirht)/2;
+                        int jjbotfiner = jjcur-(jjcur-jjbot)/2;
+                        nbotval = read_hash(jjbotfiner*imaxsize+iirhtfiner, hash);
+                     }
+                  }
+               }
+            
+               // coarser neighbor
+               if (lev != 0){
+                  if (nlftval < 0) {
+                     iilft -= iicur-iilft;
+                     int jjlft = (jj/2)*2*levmult;
+                     nlftval = read_hash(jjlft*imaxsize+iilft, hash);
+                  }
+                  if (nrhtval < 0) {
+                     int jjrht = (jj/2)*2*levmult;
+                     nrhtval = read_hash(jjrht*imaxsize+iirht, hash);
+                  }
+                  if (nbotval < 0) {
+                     jjbot -= jjcur-jjbot;
+                     int iibot = (ii/2)*2*levmult;
+                     nbotval = read_hash(jjbot*imaxsize+iibot, hash);
+                  }
+                  if (ntopval < 0) {
+                     int iitop = (ii/2)*2*levmult;
+                     ntopval = read_hash(jjtop*imaxsize+iitop, hash);
+                  }
+               }
+
+               nlft[ic] = nlftval;
+               nrht[ic] = nrhtval;
+               nbot[ic] = nbotval;
+               ntop[ic] = ntopval;
+
+               //printf("neighbors[%d] = %d %d %d %d\n",ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
+            }
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         write_hash_collision_report();
+         read_hash_collision_report();
+
+         compact_hash_delete(hash);
+
+         if (TIMING_LEVEL >= 2) cpu_timers[MESH_TIMER_HASH_QUERY] += cpu_timer_stop(tstart_lev2);
+#ifdef _OPENMP
+         } // master block
+#endif
+
+      } else if (calc_neighbor_type == KDTREE) {
+
+         struct timeval tstart_lev2;
+         if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         TBounds box;
+         vector<int> index_list(IPOW2(levmx*levmx) );
+
+         int num;
+
+         ibase = 0;
+         calc_spatial_coordinates(ibase);
+
+         kdtree_setup();
+
+         if (TIMING_LEVEL >= 2) {
+            cpu_timers[MESH_TIMER_KDTREE_SETUP] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         for (int ic=0; ic<ncells; ic++) {
+
+            //left
+            nlft[ic]  = ic;
+            box.min.x = x[ic]-0.25*dx[ic];
+            box.max.x = x[ic]-0.25*dx[ic];
+            box.min.y = y[ic]+0.25*dy[ic];
+            box.max.y = y[ic]+0.25*dy[ic];
+            KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
+            if (num == 1) nlft[ic]=index_list[0];
+
+            //right
+            nrht[ic]  = ic;
+            box.min.x = x[ic]+1.25*dx[ic];
+            box.max.x = x[ic]+1.25*dx[ic];
+            box.min.y = y[ic]+0.25*dy[ic];
+            box.max.y = y[ic]+0.25*dy[ic];
+            KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
+            if (num == 1) nrht[ic]=index_list[0];
+
+            //bot
+            nbot[ic]  = ic;
+            box.min.x = x[ic]+0.25*dx[ic];
+            box.max.x = x[ic]+0.25*dx[ic];
+            box.min.y = y[ic]-0.25*dy[ic];
+            box.max.y = y[ic]-0.25*dy[ic];
+            KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
+            if (num == 1) nbot[ic]=index_list[0];
+
+            //top
+            ntop[ic]  = ic;
+            box.min.x = x[ic]+0.25*dx[ic];
+            box.max.x = x[ic]+0.25*dx[ic];
+            box.min.y = y[ic]+1.25*dy[ic];
+            box.max.y = y[ic]+1.25*dy[ic];
+            KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
+            if (num == 1) ntop[ic]=index_list[0];
+         }  //  End main loop over cells.
+
+         KDTree_Destroy(&tree);
+
+         if (TIMING_LEVEL >= 2) cpu_timers[MESH_TIMER_KDTREE_QUERY] += cpu_timer_stop(tstart_lev2);
+
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+      } // calc_neighbor_type
+
+#ifdef _OPENMP
+#pragma omp master
+#endif
+      ncells_ghost = ncells;
+
+   }
+
+#ifdef _OPENMP
+#pragma omp master
+#endif
+   cpu_timers[MESH_TIMER_CALC_NEIGHBORS] += cpu_timer_stop(tstart_cpu);
+}
+
+void Mesh::calc_neighbors_local(void)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   if (do_rezone) {
+
+      int flags = INDEX_ARRAY_MEMORY;
+
+#if defined (HAVE_J7)
+      if (parallel) flags |= LOAD_BALANCE_MEMORY;
+#endif
+
+#ifdef _OPENMP
+#pragma omp master
+      {
+#endif
+      cpu_counters[MESH_COUNTER_CALC_NEIGH]++;
+
+      if (mesh_memory.get_memory_size(nlft) < ncells){
+         if (nlft != NULL) nlft = (int *)mesh_memory.memory_delete(nlft);
+         if (nrht != NULL) nrht = (int *)mesh_memory.memory_delete(nrht);
+         if (nbot != NULL) nbot = (int *)mesh_memory.memory_delete(nbot);
+         if (ntop != NULL) ntop = (int *)mesh_memory.memory_delete(ntop);
+         nlft = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nlft", flags);
+         nrht = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nrht", flags);
+         nbot = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nbot", flags);
+         ntop = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "ntop", flags);
+      }
+#ifdef _OPENMP
+      }
+#pragma omp barrier
+#endif
+
+      int lowerBound, upperBound;
+      set_bounds(ncells);
+      get_bounds(lowerBound, upperBound);
+      for (int ic = lowerBound; ic < upperBound; ic++){
+         nlft[ic] = -98;
+         nrht[ic] = -98;
+         nbot[ic] = -98;
+         ntop[ic] = -98;
+      }
+
+      if (calc_neighbor_type == HASH_TABLE) {
+
+         struct timeval tstart_lev2;
+         if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
+
+         ncells_ghost = ncells;
+
+         // Find maximum i column and j row for this processor
+         static int jmintile, imintile, jmaxtile, imaxtile;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         jmintile = (jmax+1)*IPOW2(levmx);
+         imintile = (imax+1)*IPOW2(levmx);
+         jmaxtile = 0;
+         imaxtile = 0;
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+
+         int my_jmintile = jmintile;
+         int my_imintile = imintile;
+         int my_jmaxtile = 0;
+         int my_imaxtile = 0;
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+         for(uint ic=0; ic<ncells; ic++){
+            int lev = level[ic];
+//          if (lev < 0 || lev > levmx) printf("DEBUG -- cell %d lev %d\n",ic,level[ic]);
+            if ( j[ic]   *IPOW2(levmx-lev)   < my_jmintile) my_jmintile =  j[ic]   *IPOW2(levmx-lev)  ;
+            if ((j[ic]+1)*IPOW2(levmx-lev)-1 > my_jmaxtile) my_jmaxtile = (j[ic]+1)*IPOW2(levmx-lev)-1;
+            if ( i[ic]   *IPOW2(levmx-lev)   < my_imintile) my_imintile =  i[ic]   *IPOW2(levmx-lev)  ;
+            if ((i[ic]+1)*IPOW2(levmx-lev)-1 > my_imaxtile) my_imaxtile = (i[ic]+1)*IPOW2(levmx-lev)-1;
+         }
+#ifdef _OPENMP
+#pragma omp critical
+         {
+#endif
+            if (my_jmintile < jmintile) jmintile = my_jmintile;
+            if (my_imintile < imintile) imintile = my_imintile;
+            if (my_jmaxtile > jmaxtile) jmaxtile = my_jmaxtile;
+            if (my_imaxtile > imaxtile) imaxtile = my_imaxtile;
+#ifdef _OPENMP
+         } // end critical region
+#pragma omp barrier
+#endif
+
+      //if (DEBUG) fprintf(fp,"%d: Tile Sizes are imin %d imax %d jmin %d jmax %d\n",mype,imintile,imaxtile,jmintile,jmaxtile);
+
+      // Expand size by 2*coarse_cells for ghost cells
+      int jminsize = max(jmintile-2*IPOW2(levmx),0);
+      int jmaxsize = min(jmaxtile+2*IPOW2(levmx),(jmax+1)*IPOW2(levmx));
+      int iminsize = max(imintile-2*IPOW2(levmx),0);
+      int imaxsize = min(imaxtile+2*IPOW2(levmx),(imax+1)*IPOW2(levmx));
+      //if (DEBUG) fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize,imaxsize,jminsize,jmaxsize);
+
+      //fprintf(fp,"DEBUG -- ncells %lu\n",ncells);
+
+      static int *hash;
+
+#ifdef _OPENMP
+      hash = compact_hash_init_openmp(ncells, imaxsize-iminsize, jmaxsize-jminsize, 0);
+#else
+      hash = compact_hash_init(ncells, imaxsize-iminsize, jmaxsize-jminsize, 0);
+#endif
+
+      //printf("%d: DEBUG -- noffset %d cells %d\n",mype,noffset,ncells);
+
+      if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+         fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize,imaxsize,jminsize,jmaxsize);
+      }
+
+      static int imaxcalc, jmaxcalc;
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+      for(uint ic=0; ic<ncells; ic++){
+         int cellnumber = ic+noffset;
+         int lev = level[ic];
+         int levmult = IPOW2(levmx-lev);
+         int ii = i[ic]*levmult-iminsize;
+         int jj = j[ic]*levmult-jminsize;
+
+         write_hash(cellnumber, jj*(imaxsize-iminsize)+ii, hash);
+      } // end for loop
+
+      if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+         cpu_timers[MESH_TIMER_HASH_SETUP] += cpu_timer_stop(tstart_lev2);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+#ifdef _OPENMP
+#pragma omp master
+      {
+#endif
+      // Set neighbors to global cell numbers from hash
+      jmaxcalc = (jmax+1)*IPOW2(levmx);
+      imaxcalc = (imax+1)*IPOW2(levmx);
+#ifdef _OPENMP
+      }
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+      for (uint ic=0; ic<ncells; ic++){
+         int ii = i[ic];
+         int jj = j[ic];
+         int lev = level[ic];
+         int levmult = IPOW2(levmx-lev);
+
+         int iicur = ii*levmult-iminsize;
+         int iilft = max( (ii-1)*levmult, 0         )-iminsize;
+         int iirht = min( (ii+1)*levmult, imaxcalc-1)-iminsize;   
+         int jjcur = jj*levmult-jminsize;
+         int jjbot = max( (jj-1)*levmult, 0         )-jminsize;
+         int jjtop = min( (jj+1)*levmult, jmaxcalc-1)-jminsize;   
+
+         int nlftval = -1;
+         int nrhtval = -1;
+         int nbotval = -1;
+         int ntopval = -1;
+
+         // Taking care of boundary cells
+         // Force each boundary cell to point to itself on its boundary direction
+         if (iicur <    1*IPOW2(levmx)  -iminsize) nlftval = ic+noffset;
+         if (jjcur <    1*IPOW2(levmx)  -jminsize) nbotval = ic+noffset;
+         if (iicur > imax*IPOW2(levmx)-1-iminsize) nrhtval = ic+noffset;
+         if (jjcur > jmax*IPOW2(levmx)-1-jminsize) ntopval = ic+noffset;
+         // Boundary cells next to corner boundary need special checks
+         if (iicur ==    1*IPOW2(levmx)-iminsize &&  (jjcur < 1*IPOW2(levmx)-jminsize || jjcur >= jmax*IPOW2(levmx)-jminsize ) ) nlftval = ic+noffset;
+         if (jjcur ==    1*IPOW2(levmx)-jminsize &&  (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) nbotval = ic+noffset;
+         if (iirht == imax*IPOW2(levmx)-iminsize &&  (jjcur < 1*IPOW2(levmx)-jminsize || jjcur >= jmax*IPOW2(levmx)-jminsize ) ) nrhtval = ic+noffset;
+         if (jjtop == jmax*IPOW2(levmx)-jminsize &&  (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) ntopval = ic+noffset;
+
+         // need to check for finer neighbor first
+         // Right and top neighbor don't change for finer, so drop through to same size
+         // Left and bottom need to be half of same size index for finer test
+         if (lev != levmx) {
+            int iilftfiner = iicur-(iicur-iilft)/2;
+            int jjbotfiner = jjcur-(jjcur-jjbot)/2;
+            if (nlftval < 0) nlftval = read_hash(jjcur     *(imaxsize-iminsize)+iilftfiner, hash);
+            if (nbotval < 0) nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur,      hash);
+         }
+
+         // same size neighbor
+         if (nlftval < 0) {
+            int nlfttry = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash);
+            if (nlfttry >= 0 && nlfttry < (int)ncells && level[nlfttry] == lev) nlftval = nlfttry;
+         }
+         if (nrhtval < 0) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash);
+         if (nbotval < 0) {
+            int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash);
+            if (nbottry >= 0 && nbottry < (int)ncells && level[nbottry] == lev) nbotval = nbottry;
+         }
+         if (ntopval < 0) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash);
+              
+         // Now we need to take care of special case where bottom and left boundary need adjustment since
+         // expected cell doesn't exist on these boundaries if it is finer than current cell
+         if (lev != levmx) {
+            if (jjcur < 1*IPOW2(levmx)) {
+               if (nrhtval < 0) {
+                  int jjtopfiner = (jjcur+jjtop)/2;
+                  nrhtval = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash);
+               }
+               if (nlftval < 0) {
+                  int iilftfiner = iicur-(iicur-iilft)/2;
+                  int jjtopfiner = (jjcur+jjtop)/2;
+                  nlftval = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash);
+               }
+            }
+
+            if (iicur < 1*IPOW2(levmx)) {
+               if (ntopval < 0) {
+                  int iirhtfiner = (iicur+iirht)/2;
+                  ntopval = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash);
+               }
+               if (nbotval < 0) {
+                  int iirhtfiner = (iicur+iirht)/2;
+                  int jjbotfiner = jjcur-(jjcur-jjbot)/2;
+                  nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash);
+               }
+            }
+         }
+
+         // coarser neighbor
+         if (lev != 0){
+            if (nlftval < 0) {
+               iilft -= iicur-iilft;
+               int jjlft = (jj/2)*2*levmult-jminsize;
+               int nlfttry = read_hash(jjlft*(imaxsize-iminsize)+iilft, hash);
+               if (nlfttry >= 0 && nlfttry < (int)ncells && level[nlfttry] == lev-1) nlftval = nlfttry;
+            }       
+            if (nrhtval < 0) {
+               int jjrht = (jj/2)*2*levmult-jminsize;
+               int nrhttry = read_hash(jjrht*(imaxsize-iminsize)+iirht, hash);
+               if (nrhttry >= 0 && nrhttry < (int)ncells && level[nrhttry] == lev-1) nrhtval = nrhttry;
+            }       
+            if (nbotval < 0) {
+               jjbot -= jjcur-jjbot;
+               int iibot = (ii/2)*2*levmult-iminsize;
+               int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iibot, hash);
+               if (nbottry >= 0 && nbottry < (int)ncells && level[nbottry] == lev-1) nbotval = nbottry;
+            }       
+            if (ntopval < 0) {
+               int iitop = (ii/2)*2*levmult-iminsize;
+               int ntoptry = read_hash(jjtop*(imaxsize-iminsize)+iitop, hash);
+               if (ntoptry >= 0 && ntoptry < (int)ncells && level[ntoptry] == lev-1) ntopval = ntoptry;
+            }       
+         }       
+
+         nlft[ic] = nlftval;
+         nrht[ic] = nrhtval;
+         nbot[ic] = nbotval;
+         ntop[ic] = ntopval;
+
+         //fprintf(fp,"%d: neighbors[%d] = %d %d %d %d\n",mype,ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
+      }
+
+      if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         print_local();
+
+         int jmaxglobal = (jmax+1)*IPOW2(levmx);
+         int imaxglobal = (imax+1)*IPOW2(levmx);
+         fprintf(fp,"\n                                    HASH 0 numbering\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  if (ii >= iminsize && ii < imaxsize) {
+                     fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash));
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+
+         fprintf(fp,"\n                                    nlft numbering\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  if (ii >= iminsize && ii < imaxsize) {
+                     int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset;
+                     if (hashval >= 0 && hashval < (int)ncells) {
+                        fprintf(fp,"%5d",nlft[hashval]);
+                     } else {
+                        fprintf(fp,"     ");
+                     }
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+      
+         fprintf(fp,"\n                                    nrht numbering\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  if (ii >= iminsize && ii < imaxsize) {
+                     int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset;
+                     if (hashval >= 0 && hashval < (int)ncells) {
+                        fprintf(fp,"%5d",nrht[hashval]);
+                     } else {
+                        fprintf(fp,"     ");
+                     }
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+
+         fprintf(fp,"\n                                    nbot numbering\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  if (ii >= iminsize && ii < imaxsize) {
+                     int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset;
+                     if (hashval >= 0 && hashval < (int)ncells) {
+                        fprintf(fp,"%5d",nbot[hashval]);
+                     } else {
+                        fprintf(fp,"     ");
+                     }
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+
+         fprintf(fp,"\n                                    ntop numbering\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  if (ii >= iminsize && ii < imaxsize) {
+                     int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset;
+                     if (hashval >= 0 && hashval < (int)ncells) {
+                        fprintf(fp,"%5d",ntop[hashval]);
+                     } else {
+                        fprintf(fp,"     ");
+                     }
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+      }
+
+      if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+         cpu_timers[MESH_TIMER_HASH_QUERY] += cpu_timer_stop(tstart_lev2);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+#ifdef HAVE_MPI
+      if (numpe > 1) {
+         static int num_comm_partners;
+
+         static vector<int> iminsize_global;
+         static vector<int> imaxsize_global;
+         static vector<int> jminsize_global;
+         static vector<int> jmaxsize_global;
+         static vector<int> comm_partner;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         iminsize_global.resize(numpe);
+         imaxsize_global.resize(numpe);
+         jminsize_global.resize(numpe);
+         jmaxsize_global.resize(numpe);
+         comm_partner.resize(numpe,-1);
+
+         MPI_Allgather(&iminsize, 1, MPI_INT, &iminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
+         MPI_Allgather(&imaxsize, 1, MPI_INT, &imaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
+         MPI_Allgather(&jminsize, 1, MPI_INT, &jminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
+         MPI_Allgather(&jmaxsize, 1, MPI_INT, &jmaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
+
+         num_comm_partners = 0;
+         for (int ip = 0; ip < numpe; ip++){
+            if (ip == mype) continue;
+            if (iminsize_global[ip] > imaxtile) continue;
+            if (imaxsize_global[ip] < imintile) continue;
+            if (jminsize_global[ip] > jmaxtile) continue;
+            if (jmaxsize_global[ip] < jmintile) continue;
+            comm_partner[num_comm_partners] = ip;
+            num_comm_partners++;
+            //if (DEBUG) fprintf(fp,"%d: overlap with processor %d bounding box is %d %d %d %d\n",mype,ip,iminsize_global[ip],imaxsize_global[ip],jminsize_global[ip],jmaxsize_global[ip]);
+         }
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+
+         static vector<int> border_cell;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         border_cell.resize(ncells);
+
+#ifdef BOUNDS_CHECK
+         for (uint ic=0; ic<ncells; ic++){
+            int nl = nlft[ic];
+            if (nl != -1){
+               nl -= noffset;
+               if (nl<0 || nl>= (int)ncells) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl);
+            }
+            int nr = nrht[ic];
+            if (nr != -1){
+               nr -= noffset;
+               if (nr<0 || nr>= (int)ncells) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr);
+            }
+            int nb = nbot[ic];
+            if (nb != -1){
+               nb -= noffset;
+               if (nb<0 || nb>= (int)ncells) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb);
+            }
+            int nt = ntop[ic];
+            if (nt != -1){
+               nt -= noffset;
+               if (nt<0 || nt>= (int)ncells) printf("%d: Warning at line %d cell %d ntop %d\n",mype,__LINE__,ic,nt);
+            }
+         }
+#endif
+
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+
+         static vector<int> border_cell_out;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         border_cell_out.resize(ncells);
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+            for (uint ic=0; ic<ncells; ic++){
+               int iborder_cell = 0;
+
+               // left neighbor is undefined -- or -- if left is at finer level check left top for undefined
+               if (nlft[ic] == -1 || (level[nlft[ic]-noffset] > level[ic] && ntop[nlft[ic]-noffset] == -1) ){
+                  iborder_cell |= 0x0001;
+               }
+               if (nrht[ic] == -1 || (level[nrht[ic]-noffset] > level[ic] && ntop[nrht[ic]-noffset] == -1) ){
+                  iborder_cell |= 0x0002;
+               }
+               if (nbot[ic] == -1 || (level[nbot[ic]-noffset] > level[ic] && nrht[nbot[ic]-noffset] == -1) ) {
+                  iborder_cell |= 0x0004;
+               }
+               if (ntop[ic] == -1 || (level[ntop[ic]-noffset] > level[ic] && nrht[ntop[ic]-noffset] == -1) ) {
+                  iborder_cell |= 0x0008;
+               }
+   
+               border_cell[ic] = iborder_cell;
+            }
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+            for (uint ic=0; ic<ncells; ic++){
+               int iborder_cell = border_cell[ic];
+
+               if (iborder_cell == 0) {
+
+                  int nl = nlft[ic]-noffset;
+                  if (nl >= 0 && nl < (int)ncells) {
+                     if ((border_cell[nl] & 0x0001) == 0x0001) {
+                        iborder_cell |= 0x0016;
+                     } else if (level[nl] > level[ic]){
+                        int ntl = ntop[nl]-noffset;
+                        if (ntl >= 0 && ntl < (int)ncells && (border_cell[ntl] & 0x0001) == 0x0001) {
+                           iborder_cell |= 0x0016;
+                        }
+                     }
+                  }
+                  int nr = nrht[ic]-noffset;
+                  if (nr >= 0 && nr < (int)ncells) {
+                     if ((border_cell[nrht[ic]-noffset] & 0x0002) == 0x0002) {
+                        iborder_cell |= 0x0032;
+                     } else if (level[nr] > level[ic]){
+                        int ntr = ntop[nr]-noffset;
+                        if (ntr >= 0 && ntr < (int)ncells && (border_cell[ntr] & 0x0002) == 0x0002) {
+                           iborder_cell |= 0x0032;
+                        }
+                     }
+                  }
+                  int nb = nbot[ic]-noffset;
+                  if (nb >= 0 && nb < (int)ncells) {
+                     if ((border_cell[nb] & 0x0004) == 0x0004) {
+                        iborder_cell |= 0x0064;
+                     } else if (level[nb] > level[ic]){
+                        int nrb = nrht[nb]-noffset;
+                        if (nrb >= 0 && nrb < (int)ncells && (border_cell[nrb] & 0x0004) == 0x0004) {
+                           iborder_cell |= 0x0064;
+                        }
+                     }
+                  }
+                  int nt = ntop[ic]-noffset;
+                  if (nt >= 0 && nt < (int)ncells) {
+                     if ((border_cell[nt] & 0x0008) == 0x0008) {
+                        iborder_cell |= 0x0128;
+                     } else if (level[nt] > level[ic]){
+                        int nrt = nrht[nt]-noffset;
+                        if (nrt >= 0 && nrt < (int)ncells && (border_cell[nrt] & 0x0008) == 0x0008) {
+                           iborder_cell |= 0x0128;
+                        }
+                     }
+                  }
+               }
+
+               border_cell_out[ic] = iborder_cell;
+            }
+// indent offset
+
+         vector<int> border_cell_num;
+
+         static int nbsize_local;
+
+         static vector<int> border_cell_i;
+         static vector<int> border_cell_j;
+         static vector<int> border_cell_level;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         for (int ic=0; ic<(int)ncells; ic++){
+            if (border_cell_out[ic] > 0) border_cell_num.push_back(ic+noffset);
+         }
+         //printf("%d: border cell size is %d\n",mype,border_cell_num.size());
+
+         nbsize_local = border_cell_num.size();
+
+         border_cell_i.resize(nbsize_local);
+         border_cell_j.resize(nbsize_local);
+         border_cell_level.resize(nbsize_local);
+
+         for (int ic = 0; ic <nbsize_local; ic++){
+            int cell_num = border_cell_num[ic]-noffset;
+            border_cell_i[ic] = i[cell_num]; 
+            border_cell_j[ic] = j[cell_num]; 
+            border_cell_level[ic] = level[cell_num]; 
+         }
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+            fprintf(fp,"%d: Border cell size is %d\n",mype,nbsize_local);
+            for (int ib = 0; ib <nbsize_local; ib++){
+               fprintf(fp,"%d: Border cell %d is %d i %d j %d level %d\n",mype,ib,border_cell_num[ib],
+                  border_cell_i[ib],border_cell_j[ib],border_cell_level[ib]);
+            }
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+         }
+
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_FIND_BOUNDARY] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         // Allocate push database
+
+         static int **send_database;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         send_database = (int**)malloc(num_comm_partners*sizeof(int *));
+         for (int ip = 0; ip < num_comm_partners; ip++){
+            send_database[ip] = (int *)malloc(nbsize_local*sizeof(int));
+         }
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+
+         // Compute the overlap between processor bounding boxes and set up push database
+
+         static vector<int> send_buffer_count;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         send_buffer_count.resize(num_comm_partners);
+         for (int ip = 0; ip < num_comm_partners; ip++){
+            int icount = 0;
+            for (int ib = 0; ib <nbsize_local; ib++){
+               int lev = border_cell_level[ib];
+               int levmult = IPOW2(levmx-lev);
+               if (border_cell_i[ib]*levmult >= iminsize_global[comm_partner[ip]] && 
+                   border_cell_i[ib]*levmult <= imaxsize_global[comm_partner[ip]] && 
+                   border_cell_j[ib]*levmult >= jminsize_global[comm_partner[ip]] && 
+                   border_cell_j[ib]*levmult <= jmaxsize_global[comm_partner[ip]] ) {
+                  //   border_cell_i[ib],border_cell_j[ib],border_cell_level[ib]);
+                  send_database[ip][icount] = ib;
+                  icount++;
+               }
+            }
+            send_buffer_count[ip]=icount;
+         }
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+
+         // Initialize L7_Push_Setup with num_comm_partners, comm_partner, send_database and 
+         // send_buffer_count. L7_Push_Setup will copy data and determine recv_buffer_counts.
+         // It will return receive_count_total for use in allocations
+
+         static int receive_count_total;
+         int i_push_handle = 0;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         i_push_handle = 0;
+         L7_Push_Setup(num_comm_partners, &comm_partner[0], &send_buffer_count[0],
+                       send_database, &receive_count_total, &i_push_handle);
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+            fprintf(fp,"DEBUG num_comm_partners %d\n",num_comm_partners);
+            for (int ip = 0; ip < num_comm_partners; ip++){
+               fprintf(fp,"DEBUG comm partner is %d data count is %d\n",comm_partner[ip],send_buffer_count[ip]);
+               for (int ic = 0; ic < send_buffer_count[ip]; ic++){
+                  int ib = send_database[ip][ic];
+                  fprintf(fp,"DEBUG \t index %d cell number %d i %d j %d level %d\n",ib,border_cell_num[ib],
+                     border_cell_i[ib],border_cell_j[ib],border_cell_level[ib]);
+               }
+            }
+#ifdef _OPENMP
+         }
+#endif
+         }
+
+         // Can now free the send database. Other arrays are vectors and will automatically 
+         // deallocate
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         for (int ip = 0; ip < num_comm_partners; ip++){
+            free(send_database[ip]);
+         }
+         free(send_database);
+#ifdef _OPENMP
+         }
+#endif
+
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_PUSH_SETUP] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         // Push the data needed to the adjacent processors
+         static int *border_cell_num_local;
+         static int *border_cell_i_local;
+         static int *border_cell_j_local;
+         static int *border_cell_level_local;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         border_cell_num_local = (int *)malloc(receive_count_total*sizeof(int));
+         border_cell_i_local = (int *)malloc(receive_count_total*sizeof(int));
+         border_cell_j_local = (int *)malloc(receive_count_total*sizeof(int));
+         border_cell_level_local = (int *)malloc(receive_count_total*sizeof(int));
+
+         L7_Push_Update(&border_cell_num[0],   border_cell_num_local,   i_push_handle);
+         L7_Push_Update(&border_cell_i[0],     border_cell_i_local,     i_push_handle);
+         L7_Push_Update(&border_cell_j[0],     border_cell_j_local,     i_push_handle);
+         L7_Push_Update(&border_cell_level[0], border_cell_level_local, i_push_handle);
+
+         L7_Push_Free(&i_push_handle);
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+
+         nbsize_local = receive_count_total; 
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+            for (int ic = 0; ic < nbsize_local; ic++) {
+               fprintf(fp,"%d: Local Border cell %d is %d i %d j %d level %d\n",mype,ic,border_cell_num_local[ic],
+                  border_cell_i_local[ic],border_cell_j_local[ic],border_cell_level_local[ic]);
+            }
+#ifdef _OPENMP
+         }
+#endif
+         }
+
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_PUSH_BOUNDARY] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_LOCAL_LIST] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+            int jmaxglobal = (jmax+1)*IPOW2(levmx);
+            int imaxglobal = (imax+1)*IPOW2(levmx);
+            fprintf(fp,"\n                                    HASH numbering before layer 1\n");
+            for (int jj = jmaxglobal-1; jj>=0; jj--){
+               fprintf(fp,"%2d: %4d:",mype,jj);
+               if (jj >= jminsize && jj < jmaxsize) {
+                  for (int ii = 0; ii<imaxglobal; ii++){
+                     if (ii >= iminsize && ii < imaxsize) {
+                        fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash));
+                     } else {
+                        fprintf(fp,"     ");
+                     }
+                  }
+               }
+               fprintf(fp,"\n");
+            }
+            fprintf(fp,"%2d:      ",mype);
+            for (int ii = 0; ii<imaxglobal; ii++){
+               fprintf(fp,"%4d:",ii);
+            }
+            fprintf(fp,"\n");
+#ifdef _OPENMP
+         }
+#endif
+         }
+
+         vector<int> border_cell_needed_local;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         border_cell_needed_local.resize(nbsize_local, 0);
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+
+         // Layer 1
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         for (int ic =0; ic<nbsize_local; ic++){
+            int jj = border_cell_j_local[ic];
+            int ii = border_cell_i_local[ic];
+            int lev = border_cell_level_local[ic];
+            int levmult = IPOW2(levmx-lev);
+
+            int iicur = ii*levmult-iminsize;
+            int iilft = max( (ii-1)*levmult, 0         )-iminsize;
+            int iirht = min( (ii+1)*levmult, imaxcalc-1)-iminsize;
+            int jjcur = jj*levmult-jminsize;
+            int jjbot = max( (jj-1)*levmult, 0         )-jminsize;
+            int jjtop = min( (jj+1)*levmult, jmaxcalc-1)-jminsize;
+
+            //fprintf(fp,"DEBUG layer ic %d num %d i %d j %d lev %d\n",ic,border_cell_num_local[ic],ii,jj,lev);
+
+            int iborder = 0;
+
+            // Test for cell to left
+            if (iicur-(iicur-iilft)/2 >= 0 && iicur-(iicur-iilft)/2 < imaxsize-iminsize && jjcur >= 0 && (jjcur+jjtop)/2 < jmaxsize-jminsize){
+               int nlftval = -1;
+               // Check for finer cell left and bottom side
+               if (lev != levmx){                                // finer neighbor
+                  int iilftfiner = iicur-(iicur-iilft)/2;
+                  nlftval = read_hash(jjcur*(imaxsize-iminsize)+iilftfiner, hash);
+                  // Also check for finer cell left and top side
+                  if (nlftval < 0) {
+                     int jjtopfiner = (jjcur+jjtop)/2; 
+                     nlftval = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash);
+                  }
+               }
+
+               if (nlftval < 0 && iilft >= 0) {  // same size
+                  int nlfttry = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash);
+                  // we have to test for same level or it could be a finer cell one cell away that it is matching
+                  if (nlfttry-noffset >= 0 && nlfttry-noffset < (int)ncells && level[nlfttry-noffset] == lev) {
+                     nlftval = nlfttry;
+                  }
+               }
+    
+               if (lev != 0 && nlftval < 0 && iilft-(iicur-iilft) >= 0){      // coarser neighbor
+                  iilft -= iicur-iilft;
+                  int jjlft = (jj/2)*2*levmult-jminsize;
+                  int nlfttry = read_hash(jjlft*(imaxsize-iminsize)+iilft, hash);
+                  // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching
+                  if (nlfttry-noffset >= 0 && nlfttry-noffset < (int)ncells && level[nlfttry-noffset] == lev-1) {
+                    nlftval = nlfttry;
+                  }
+               }
+               if (nlftval >= 0) iborder |= 0x0001;
+            }
+
+            // Test for cell to right
+            if (iirht < imaxsize-iminsize && iirht >= 0 && jjcur >= 0 && jjtop < jmaxsize-jminsize) {
+               int nrhtval = -1;
+               // right neighbor -- finer, same size and coarser
+               nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash);
+               // right neighbor -- finer right top test
+               if (nrhtval < 0 && lev != levmx){
+                  int jjtopfiner = (jjcur+jjtop)/2;
+                  nrhtval = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash);
+               }
+               if (nrhtval < 0 && lev != 0) { // test for coarser, but not directly above
+                  int jjrhtcoarser = (jj/2)*2*levmult-jminsize;
+                  if (jjrhtcoarser != jjcur) {
+                     int nrhttry = read_hash(jjrhtcoarser*(imaxsize-iminsize)+iirht, hash);
+                     if (nrhttry-noffset >= 0 && nrhttry-noffset < (int)ncells && level[nrhttry-noffset] == lev-1) {
+                        nrhtval = nrhttry;
+                     }
+                  }
+               }
+               if (nrhtval > 0)  iborder |= 0x0002;
+            }
+
+            // Test for cell to bottom
+            if (iicur >= 0 && (iicur+iirht)/2 < imaxsize-iminsize && jjcur-(jjcur-jjbot)/2 >= 0 && jjcur-(jjcur-jjbot)/2 < jmaxsize-jminsize){
+               int nbotval = -1;
+               // Check for finer cell below and left side
+               if (lev != levmx){                                // finer neighbor
+                  int jjbotfiner = jjcur-(jjcur-jjbot)/2;
+                  nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur, hash);
+                  // Also check for finer cell below and right side
+                  if (nbotval < 0) {
+                     int iirhtfiner = (iicur+iirht)/2; 
+                     nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash);
+                  }
+               }
+
+               if (nbotval < 0 && jjbot >= 0) {  // same size
+                  int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash);
+                  // we have to test for same level or it could be a finer cell one cell away that it is matching
+                  if (nbottry-noffset >= 0 && nbottry-noffset < (int)ncells && level[nbottry-noffset] == lev) {
+                     nbotval = nbottry;
+                  }
+               }
+    
+               if (lev != 0 && nbotval < 0 && jjbot-(jjcur-jjbot) >= 0){      // coarser neighbor
+                  jjbot -= jjcur-jjbot;
+                  int iibot = (ii/2)*2*levmult-iminsize;
+                  int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iibot, hash);
+                  // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching
+                  if (nbottry-noffset >= 0 && nbottry-noffset < (int)ncells && level[nbottry-noffset] == lev-1) {
+                    nbotval = nbottry;
+                  }
+               }
+               if (nbotval >= 0) iborder |= 0x0004;
+            }
+
+            // Test for cell to top
+            if (iirht < imaxsize-iminsize && iicur >= 0 && jjtop >= 0 && jjtop < jmaxsize-jminsize) {
+               int ntopval = -1;
+               // top neighbor -- finer, same size and coarser
+               ntopval = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash);
+               // top neighbor -- finer top right test
+               if (ntopval < 0 && lev != levmx){
+                  int iirhtfiner = (iicur+iirht)/2;
+                  ntopval = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash);
+               }
+               if (ntopval < 0 && lev != 0) { // test for coarser, but not directly above
+                  int iitopcoarser = (ii/2)*2*levmult-iminsize;
+                  if (iitopcoarser != iicur) {
+                     int ntoptry = read_hash(jjtop*(imaxsize-iminsize)+iitopcoarser, hash);
+                     if (ntoptry-noffset >= 0 && ntoptry-noffset < (int)ncells && level[ntoptry-noffset] == lev-1) {
+                        ntopval = ntoptry;
+                     }
+                  }
+               }
+               if (ntopval > 0)  iborder |= 0x0008;
+            }
+
+            if (iborder) border_cell_needed_local[ic] = iborder;
+         }
+#ifdef _OPENMP
+         } // end master region
+#pragma omp barrier
+#endif
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+            for(int ic=0; ic<nbsize_local; ic++){
+               if (border_cell_needed_local[ic] == 0) continue;
+               fprintf(fp,"%d: First set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
+            }
+#ifdef _OPENMP
+         } // end master region
+#pragma omp barrier
+#endif
+         }
+
+         // Walk through cell array and set hash to border local index plus ncells+noffset for next pass
+         //fprintf(fp,"%d: DEBUG new hash jminsize %d jmaxsize %d iminsize %d imaxsize %d\n",mype,jminsize,jmaxsize,iminsize,imaxsize);
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         for(int ic=0; ic<nbsize_local; ic++){
+            if (border_cell_needed_local[ic] == 0) continue;
+            //fprintf(fp,"%d: index %d cell %d i %d j %d\n",mype,ic,border_cell_num_local[ic],border_cell_i_local[ic],border_cell_j_local[ic]);
+            int lev = border_cell_level_local[ic];
+            int levmult = IPOW2(levmx-lev);
+            int ii = border_cell_i_local[ic]*levmult-iminsize;
+            int jj = border_cell_j_local[ic]*levmult-jminsize;
+
+            write_hash(ncells+noffset+ic, jj*(imaxsize-iminsize)+ii, hash);
+         }
+#ifdef _OPENMP
+         } // end master region
+#pragma omp barrier
+#endif
+
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_LAYER1] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+            print_local();
+
+            int jmaxglobal = (jmax+1)*IPOW2(levmx);
+            int imaxglobal = (imax+1)*IPOW2(levmx);
+            fprintf(fp,"\n                                    HASH numbering for 1 layer\n");
+            for (int jj = jmaxglobal-1; jj>=0; jj--){
+               fprintf(fp,"%2d: %4d:",mype,jj);
+               if (jj >= jminsize && jj < jmaxsize) {
+                  for (int ii = 0; ii<imaxglobal; ii++){
+                     if (ii >= iminsize && ii < imaxsize) {
+                        fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) );
+                     } else {
+                        fprintf(fp,"     ");
+                     }
+                  }
+               }
+               fprintf(fp,"\n");
+            }
+            fprintf(fp,"%2d:      ",mype);
+            for (int ii = 0; ii<imaxglobal; ii++){
+               fprintf(fp,"%4d:",ii);
+            }
+            fprintf(fp,"\n");
+#ifdef _OPENMP
+         } // end master region
+#pragma omp barrier
+#endif
+         }
+
+         // Layer 2
+#ifdef _OPENMP
+#pragma omp master
+         {
+#endif
+         for (int ic =0; ic<nbsize_local; ic++){
+            if (border_cell_needed_local[ic] > 0) continue;
+            int jj = border_cell_j_local[ic];
+            int ii = border_cell_i_local[ic];
+            int lev = border_cell_level_local[ic];
+            int levmult = IPOW2(levmx-lev);
+
+            int iicur = ii*levmult-iminsize;
+            int iilft = max( (ii-1)*levmult, 0         )-iminsize;
+            int iirht = min( (ii+1)*levmult, imaxcalc-1)-iminsize;
+            int jjcur = jj*levmult-jminsize;
+            int jjbot = max( (jj-1)*levmult, 0         )-jminsize;
+            int jjtop = min( (jj+1)*levmult, jmaxcalc-1)-jminsize;
+
+            //fprintf(fp,"            DEBUG layer2 ic %d num %d i %d j %d lev %d\n",ic,border_cell_num_local[ic],ii,jj,lev);
+   
+            int iborder = 0;
+
+            // Test for cell to left
+            if (iicur-(iicur-iilft)/2 >= 0 && iicur-(iicur-iilft)/2 < imaxsize-iminsize && jjcur >= 0 &&      (jjcur+jjtop)/2 < jmaxsize-jminsize){
+               // Check for finer cell left and bottom side
+               if (lev != levmx){                                // finer neighbor
+                  int iilftfiner = iicur-(iicur-iilft)/2;
+                  int nl = read_hash(jjcur*(imaxsize-iminsize)+iilftfiner, hash);
+                  if (nl >= (int)(ncells+noffset) && (border_cell_needed_local[nl-ncells-noffset] & 0x0001) == 0x0001) {
+                     iborder = 0x0001;
+                  } else {
+                     // Also check for finer cell left and top side
+                     int jjtopfiner = (jjcur+jjtop)/2;
+                     int nlt = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash);
+                     if ( nlt >= (int)(ncells+noffset) && (border_cell_needed_local[nlt-ncells-noffset] & 0x0001) == 0x0001) {
+                        iborder = 0x0001;
+                     }
+                  }
+               }
+               if ( (iborder & 0x0001) == 0 && iilft >= 0) { //same size
+                  int nl = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash);
+                  int levcheck = -1;
+                  if (nl-noffset >= 0 && nl-noffset < (int)ncells) {
+                     levcheck = level[nl-noffset];
+                  } else if (nl >= 0 && (int)(nl-ncells-noffset) >= 0 && (int)(nl-ncells-noffset) < nbsize_local) {
+                     levcheck = border_cell_level_local[nl-ncells-noffset];
+                  }
+                  if (nl >= (int)(ncells+noffset) && levcheck == lev && (border_cell_needed_local[nl-ncells-noffset] & 0x0001) == 0x0001) {
+                     iborder = 0x0001;
+                  } else if (lev != 0 && iilft-(iicur-iilft) >= 0){      // coarser neighbor
+                     iilft -= iicur-iilft;
+                     int jjlft = (jj/2)*2*levmult-jminsize;
+                     nl = read_hash(jjlft*(imaxsize-iminsize)+iilft, hash);
+                     levcheck = -1;
+                     if (nl-noffset >= 0 && nl-noffset < (int)ncells) {
+                        levcheck = level[nl-noffset];
+                     } else if (nl >= 0 && (int)(nl-ncells-noffset) >= 0 && (int)(nl-ncells-noffset) < nbsize_local) {
+                        levcheck = border_cell_level_local[nl-ncells-noffset];
+                     }
+                     // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching
+                     if (nl  >= (int)(ncells+noffset) && levcheck == lev-1 && (border_cell_needed_local[nl-ncells-noffset] & 0x0001) == 0x0001) {
+                        iborder = 0x0001;
+                     }
+                  }
+               }
+            }
+
+            // Test for cell to right
+            if (iirht < imaxsize-iminsize && iirht >= 0 && jjcur >= 0 && jjtop < jmaxsize-jminsize) {
+               // right neighbor -- finer, same size and coarser
+               int nr = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash);
+               if (nr >= (int)(ncells+noffset) && (border_cell_needed_local[nr-ncells-noffset] & 0x0002) == 0x0002) {
+                  iborder = 0x0002;
+               } else if (lev != levmx){
+                  // right neighbor -- finer right top test
+                  int jjtopfiner = (jjcur+jjtop)/2;
+                  int nrt = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash);
+                  if (nrt >= (int)(ncells+noffset) && (border_cell_needed_local[nrt-ncells-noffset] & 0x0002) == 0x0002) {
+                     iborder = 0x0002;
+                  }
+               }
+               if ( (iborder & 0x0002) == 0  && lev != 0) { // test for coarser, but not directly right
+                  int jjrhtcoarser = (jj/2)*2*levmult-jminsize;
+                  if (jjrhtcoarser != jjcur) {
+                     int nr = read_hash(jjrhtcoarser*(imaxsize-iminsize)+iirht, hash);
+                     int levcheck = -1;
+                     if (nr-noffset >= 0 && nr-noffset < (int)ncells) {
+                        levcheck = level[nr-noffset];
+                     } else if (nr >= 0 && (int)(nr-ncells-noffset) >= 0 && (int)(nr-ncells-noffset) < nbsize_local) {
+                        levcheck = border_cell_level_local[nr-ncells-noffset];
+                     }
+                     if (nr >= (int)(ncells+noffset) && levcheck == lev-1 && (border_cell_needed_local[nr-ncells-noffset] & 0x0002) == 0x0002) {
+                        iborder = 0x0002;
+                     }
+                  }
+               }
+            }
+
+            // Test for cell to bottom
+            if (iicur >= 0 && (iicur+iirht)/2 < imaxsize-iminsize && jjcur-(jjcur-jjbot)/2 >= 0 && jjcur-(jjcur-jjbot)/2 < jmaxsize-jminsize){
+               // Check for finer cell below and left side
+               if (lev != levmx){                                // finer neighbor
+                  int jjbotfiner = jjcur-(jjcur-jjbot)/2;
+                  int nb = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur, hash);
+                  if (nb >= (int)(ncells+noffset) && (border_cell_needed_local[nb-ncells-noffset] & 0x0004) == 0x0004) {
+                     iborder = 0x0004;
+                  } else {
+                     // Also check for finer cell below and right side
+                     int iirhtfiner = (iicur+iirht)/2;
+                     int nbr = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash);
+                     if (nbr >= (int)(ncells+noffset) && (border_cell_needed_local[nbr-ncells-noffset] & 0x0004) == 0x0004) {
+                        iborder = 0x0004;
+                     }
+                  }
+               }
+               if ( (iborder & 0x0004) == 0 && jjbot >= 0) { //same size
+                  int nb = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash);
+                  int levcheck = -1;
+                  if (nb-noffset >= 0 && nb-noffset < (int)ncells) {
+                     levcheck = level[nb-noffset];
+                  } else if (nb >= 0 && (int)(nb-ncells-noffset) >= 0 && (int)(nb-ncells-noffset) < nbsize_local) {
+                     levcheck = border_cell_level_local[nb-ncells-noffset];
+                  }
+                  if (nb >= (int)(ncells+noffset) && levcheck == lev && (border_cell_needed_local[nb-ncells-noffset] & 0x0004) == 0x0004) {
+                     iborder = 0x0004;
+                  } else if (lev != 0 && jjbot-(jjcur-jjbot) >= 0){      // coarser neighbor
+                     jjbot -= jjcur-jjbot;
+                     int iibot = (ii/2)*2*levmult-iminsize;
+                     nb = read_hash(jjbot*(imaxsize-iminsize)+iibot, hash);
+                     levcheck = -1;
+                     if (nb-noffset >= 0 && nb-noffset < (int)ncells) {
+                        levcheck = level[nb-noffset];
+                     } else if (nb >= 0 && (int)(nb-ncells-noffset) >= 0 && (int)(nb-ncells-noffset) < nbsize_local) {
+                        levcheck = border_cell_level_local[nb-ncells-noffset];
+                     }
+                     // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching
+                     if (nb >= (int)(ncells+noffset) && levcheck == lev-1 && (border_cell_needed_local[nb-ncells-noffset] & 0x0004) == 0x0004) {
+                        iborder = 0x0004;
+                     }
+                  }
+               }
+            }
+
+            // Test for cell to top
+            if (iirht < imaxsize-iminsize && iicur >= 0 && jjtop >= 0 && jjtop < jmaxsize-jminsize) {
+               // top neighbor -- finer, same size and coarser
+               int nt = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash);
+               if (nt  >= (int)(ncells+noffset) && (border_cell_needed_local[nt-ncells-noffset] & 0x0008) == 0x0008) {
+                  iborder = 0x0008;
+               } else if (lev != levmx){
+                  int iirhtfiner = (iicur+iirht)/2;
+                  int ntr = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash);
+                  if ( ntr >= (int)(ncells+noffset) && (border_cell_needed_local[ntr-ncells-noffset] & 0x0008) == 0x0008) {
+                     iborder = 0x0008;
+                  }
+               }
+               if ( (iborder & 0x0008) == 0  && lev != 0) { // test for coarser, but not directly above
+                  int iitopcoarser = (ii/2)*2*levmult-iminsize;
+                  if (iitopcoarser != iicur) {
+                     int nb = read_hash(jjtop*(imaxsize-iminsize)+iitopcoarser, hash);
+                     int levcheck = -1;
+                     if (nb-noffset >= 0 && nb-noffset < (int)ncells) {
+                        levcheck = level[nb-noffset];
+                     } else if (nb >= 0 && (int)(nb-ncells-noffset) >= 0 && (int)(nb-ncells-noffset) < nbsize_local) {
+                        levcheck = border_cell_level_local[nb-ncells-noffset];
+                     }
+                     if (nb-noffset >= (int)(ncells-noffset) && levcheck == lev-1 && (border_cell_needed_local[nb-ncells-noffset] & 0x0008) == 0x0008) {
+                        iborder = 0x0008;
+                     }
+                  }
+               }
+            }
+
+            if (iborder) border_cell_needed_local[ic] = iborder |= 0x0016;
+         }
+#ifdef _OPENMP
+         } // end master region
+#pragma omp barrier
+#endif
+
+         vector<int> indices_needed;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         if (DEBUG) {
+            for(int ic=0; ic<nbsize_local; ic++){
+               if (border_cell_needed_local[ic] <  0x0016) fprintf(fp,"%d: First  set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
+               if (border_cell_needed_local[ic] >= 0x0016) fprintf(fp,"%d: Second set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
+            }
+         }
+
+         int inew = 0;
+         for(int ic=0; ic<nbsize_local; ic++){
+            if (border_cell_needed_local[ic] <= 0) continue;
+            indices_needed.push_back(border_cell_num_local[ic]);
+
+            border_cell_num_local[inew]    = border_cell_num_local[ic];
+            border_cell_i_local[inew]      = border_cell_i_local[ic];
+            border_cell_j_local[inew]      = border_cell_j_local[ic];
+            border_cell_level_local[inew]  = border_cell_level_local[ic];
+            // border_cell_num_local is not used after -- could be commented out?
+            // border_cell_needed_local[inew] = 1;
+
+            inew++;
+         }
+         nbsize_local = inew;
+
+         free(border_cell_num_local);
+#ifdef _OPENMP
+         } // end master region
+#pragma omp barrier
+#endif
+
+         // Walk through cell array and set hash to global cell values
+         //fprintf(fp,"%d: DEBUG new hash jminsize %d jmaxsize %d iminsize %d imaxsize %d\n",mype,jminsize,jmaxsize,iminsize,imaxsize);
+#ifdef _OPENMP
+#pragma omp for
+#endif
+         for(int ic=0; ic<nbsize_local; ic++){
+            int lev = border_cell_level_local[ic];
+            int levmult = IPOW2(levmx-lev);
+
+            int ii = border_cell_i_local[ic]*levmult-iminsize;
+            int jj = border_cell_j_local[ic]*levmult-jminsize;
+
+            write_hash(-(ncells+ic), jj*(imaxsize-iminsize)+ii, hash);
+         }
+
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_LAYER2] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+            print_local();
+
+            int jmaxglobal = (jmax+1)*IPOW2(levmx);
+            int imaxglobal = (imax+1)*IPOW2(levmx);
+            fprintf(fp,"\n                                    HASH numbering for 2 layer\n");
+            for (int jj = jmaxglobal-1; jj>=0; jj--){
+               fprintf(fp,"%2d: %4d:",mype,jj);
+               if (jj >= jminsize && jj < jmaxsize) {
+                  for (int ii = 0; ii<imaxglobal; ii++){
+                     if (ii >= iminsize && ii < imaxsize) {
+                        fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) );
+                     } else {
+                        fprintf(fp,"     ");
+                     }
+                  }
+               }
+               fprintf(fp,"\n");
+            }
+            fprintf(fp,"%2d:      ",mype);
+            for (int ii = 0; ii<imaxglobal; ii++){
+               fprintf(fp,"%4d:",ii);
+            }
+            fprintf(fp,"\n");
+#ifdef _OPENMP
+         } // end master region
+#endif
+         }
+
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_LAYER_LIST] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         int nghost = nbsize_local;
+         ncells_ghost = ncells + nghost;
+
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         celltype = (int *)mesh_memory.memory_realloc(ncells_ghost, celltype);
+         i        = (int *)mesh_memory.memory_realloc(ncells_ghost, i);
+         j        = (int *)mesh_memory.memory_realloc(ncells_ghost, j);
+         level    = (int *)mesh_memory.memory_realloc(ncells_ghost, level);
+         nlft     = (int *)mesh_memory.memory_realloc(ncells_ghost, nlft);
+         nrht     = (int *)mesh_memory.memory_realloc(ncells_ghost, nrht);
+         nbot     = (int *)mesh_memory.memory_realloc(ncells_ghost, nbot);
+         ntop     = (int *)mesh_memory.memory_realloc(ncells_ghost, ntop);
+         memory_reset_ptrs();
+#ifdef _OPENMP
+         } // end master region
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+         for (int ic = ncells; ic < (int)ncells_ghost; ic++){
+            nlft[ic] = -1;
+            nrht[ic] = -1;
+            nbot[ic] = -1;
+            ntop[ic] = -1;
+         }
+
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_COPY_MESH_DATA] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+         for(int ic=0; ic<nbsize_local; ic++){
+            int ii = border_cell_i_local[ic];
+            int jj = border_cell_j_local[ic];
+            int lev = border_cell_level_local[ic];
+            if (ii < lev_ibegin[lev]) celltype[ncells+ic] = LEFT_BOUNDARY;
+            if (ii > lev_iend[lev])   celltype[ncells+ic] = RIGHT_BOUNDARY;
+            if (jj < lev_jbegin[lev]) celltype[ncells+ic] = BOTTOM_BOUNDARY;
+            if (jj > lev_jend[lev])   celltype[ncells+ic] = TOP_BOUNDARY;
+            i[ncells+ic]     = ii;
+            j[ncells+ic]     = jj;
+            level[ncells+ic] = lev;
+         }
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+            free(border_cell_i_local);
+            free(border_cell_j_local);
+            free(border_cell_level_local);
+#ifdef _OPENMP
+         } // end master region
+#endif
+
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_FILL_MESH_GHOST] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+            {
+#endif
+               fprintf(fp,"After copying i,j, level to ghost cells\n");
+               print_local();
+#ifdef _OPENMP
+            } // end master region
+#endif
+         }
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+         for (uint ic=0; ic<ncells_ghost; ic++){
+            int ii = i[ic];
+            int jj = j[ic];
+            int lev = level[ic];
+            int levmult = IPOW2(levmx-lev);
+
+            int iicur = ii*levmult-iminsize;
+            int iilft = max( (ii-1)*levmult, 0         )-iminsize;
+            int iirht = min( (ii+1)*levmult, imaxcalc-1)-iminsize;
+            int jjcur = jj*levmult-jminsize;
+            int jjbot = max( (jj-1)*levmult, 0         )-jminsize;
+            int jjtop = min( (jj+1)*levmult, jmaxcalc-1)-jminsize;
+
+            //fprintf(fp,"DEBUG neigh ic %d nlft %d ii %d levmult %d iminsize %d icheck %d\n",ic,nlft[ic],ii,levmult,iminsize,(max(  ii   *levmult-1, 0))-iminsize);
+
+            int nlftval = nlft[ic];
+            int nrhtval = nrht[ic];
+            int nbotval = nbot[ic];
+            int ntopval = ntop[ic];
+
+            if (nlftval == -1){
+               // Taking care of boundary cells
+               // Force each boundary cell to point to itself on its boundary direction
+               if (iicur <    1*IPOW2(levmx)  -iminsize) nlftval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
+
+               // Boundary cells next to corner boundary need special checks
+               if (iicur ==    1*IPOW2(levmx)-iminsize &&  (jjcur < 1*IPOW2(levmx)-jminsize || jjcur >= jmax*IPOW2(levmx)-jminsize ) ) nlftval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
+
+               // need to check for finer neighbor first
+               // Right and top neighbor don't change for finer, so drop through to same size
+               // Left and bottom need to be half of same size index for finer test
+               if (lev != levmx) {
+                  int iilftfiner = iicur-(iicur-iilft)/2;
+                  if (nlftval == -1 && iilftfiner >= 0) nlftval = read_hash(jjcur*(imaxsize-iminsize)+iilftfiner, hash);
+               }
+
+               // same size neighbor
+               if (nlftval == -1 && iilft >= 0) nlftval = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash);
+
+               // Now we need to take care of special case where bottom and left boundary need adjustment since
+               // expected cell doesn't exist on these boundaries if it is finer than current cell
+               if (jjcur < 1*IPOW2(levmx) && lev != levmx) {
+                  if (nlftval == -1) {
+                     int iilftfiner = iicur-(iicur-iilft)/2;
+                     int jjtopfiner = (jjcur+jjtop)/2;
+                     if (jjtopfiner < jmaxsize-jminsize && iilftfiner >= 0) nlftval = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash);
+                  }
+               }
+
+               // coarser neighbor
+               if (lev != 0){
+                  if (nlftval == -1) {
+                     int iilftcoarser = iilft - (iicur-iilft);
+                     int jjlft = (jj/2)*2*levmult-jminsize;
+                     if (iilftcoarser >=0) nlftval = read_hash(jjlft*(imaxsize-iminsize)+iilftcoarser, hash);
+                  }
+               }
+
+               if (nlftval != -1) nlft[ic] = nlftval;
+            }
+
+            if (nrhtval == -1) {
+               // Taking care of boundary cells
+               // Force each boundary cell to point to itself on its boundary direction
+               if (iicur > imax*IPOW2(levmx)-1-iminsize) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
+
+               // Boundary cells next to corner boundary need special checks
+               if (iirht == imax*IPOW2(levmx)-iminsize &&  (jjcur < 1*IPOW2(levmx)-jminsize || jjcur >= jmax*IPOW2(levmx)-jminsize ) ) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
+
+               // same size neighbor
+               if (nrhtval == -1 && iirht < imaxsize-iminsize) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash);
+
+               // Now we need to take care of special case where bottom and left boundary need adjustment since
+               // expected cell doesn't exist on these boundaries if it is finer than current cell
+               if (jjcur < 1*IPOW2(levmx) && lev != levmx) {
+                  if (nrhtval == -1) {
+                     int jjtopfiner = (jjcur+jjtop)/2;
+                     if (jjtopfiner < jmaxsize-jminsize && iirht < imaxsize-iminsize) nrhtval = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash);
+                  }
+               }
+
+               // coarser neighbor
+               if (lev != 0){
+                  if (nrhtval == -1) {
+                     int jjrht = (jj/2)*2*levmult-jminsize;
+                     if (iirht < imaxsize-iminsize) nrhtval = read_hash(jjrht*(imaxsize-iminsize)+iirht, hash);
+                  }
+               }
+               if (nrhtval != -1) nrht[ic] = nrhtval;
+            }
+ 
+            if (nbotval == -1) {
+               // Taking care of boundary cells
+               // Force each boundary cell to point to itself on its boundary direction
+               if (jjcur <    1*IPOW2(levmx)  -jminsize) nbotval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
+               // Boundary cells next to corner boundary need special checks
+               if (jjcur ==    1*IPOW2(levmx)-jminsize &&  (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) nbotval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
+
+               // need to check for finer neighbor first
+               // Right and top neighbor don't change for finer, so drop through to same size
+               // Left and bottom need to be half of same size index for finer test
+               if (lev != levmx) {
+                  int jjbotfiner = jjcur-(jjcur-jjbot)/2;
+                  if (nbotval == -1 && jjbotfiner >= 0) nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur, hash);
+               }
+
+               // same size neighbor
+               if (nbotval == -1 && jjbot >=0) nbotval = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash);
+
+               // Now we need to take care of special case where bottom and left boundary need adjustment since
+               // expected cell doesn't exist on these boundaries if it is finer than current cell
+               if (iicur < 1*IPOW2(levmx) && lev != levmx) {
+                  if (nbotval == -1) {
+                     int iirhtfiner = (iicur+iirht)/2;
+                     int jjbotfiner = jjcur-(jjcur-jjbot)/2;
+                     if (jjbotfiner >= 0 && iirhtfiner < imaxsize-iminsize) nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash);
+                  }
+               }
+
+               // coarser neighbor
+               if (lev != 0){
+                  if (nbotval == -1) {
+                     int jjbotcoarser = jjbot - (jjcur-jjbot);
+                     int iibot = (ii/2)*2*levmult-iminsize;
+                     if (jjbotcoarser >= 0 && iibot >= 0) nbotval = read_hash(jjbotcoarser*(imaxsize-iminsize)+iibot, hash);
+                  }
+               }
+               if (nbotval != -1) nbot[ic] = nbotval;
+            }
+    
+            if (ntopval == -1) {
+               // Taking care of boundary cells
+               // Force each boundary cell to point to itself on its boundary direction
+               if (jjcur > jmax*IPOW2(levmx)-1-jminsize) ntopval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
+               // Boundary cells next to corner boundary need special checks
+               if (jjtop == jmax*IPOW2(levmx)-jminsize &&  (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) ntopval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
+
+               // same size neighbor
+               if (ntopval == -1 && jjtop < jmaxsize-jminsize) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash);
+   
+               if (iicur < 1*IPOW2(levmx)) {
+                  if (ntopval == -1) {
+                     int iirhtfiner = (iicur+iirht)/2;
+                     if (jjtop < jmaxsize-jminsize && iirhtfiner < imaxsize-iminsize) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash);
+                  }
+               }
+   
+               // coarser neighbor
+               if (lev != 0){
+                  if (ntopval == -1) {
+                     int iitop = (ii/2)*2*levmult-iminsize;
+                     if (jjtop < jmaxsize-jminsize && iitop < imaxsize-iminsize) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iitop, hash);
+                  }
+               }
+               if (ntopval != -1) ntop[ic] = ntopval;
+            }
+ 
+            //fprintf(fp,"%d: neighbors[%d] = %d %d %d %d\n",mype,ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
+         }
+
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_FILL_NEIGH_GHOST] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+            {
+#endif
+            fprintf(fp,"After setting neighbors through ghost cells\n");
+            print_local();
+#ifdef _OPENMP
+            } // end master region
+#endif
+         }
+
+/*
+         // Set neighbors to global cell numbers from hash
+         for (uint ic=0; ic<ncells; ic++){
+            ii = i[ic];
+            jj = j[ic];
+            lev = level[ic];
+            levmult = IPOW2(levmx-lev);
+            //fprintf(fp,"%d:Neighbors input for ic %d ii %d jj %d levmult %d lev %d\n",mype,ic, ii, jj, levmult,lev);
+            //fprintf(fp,"%d:Neighbors befor ic %d nlft %d nrht %d nbot %d ntop %d\n",mype,ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
+            if (nlft[ic] == -1) nlft[ic] = hash[(      jj   *levmult               )-jminsize][(max(  ii   *levmult-1, 0         ))-iminsize];
+            if (celltype[ic] == BOTTOM_BOUNDARY && nlft[ic] == -1){
+               if (nlft[ic] == -1) nlft[ic] = hash[(jj+1)*levmult-jminsize][(min( (ii+1)*levmult,   imaxcalc-1))-iminsize];
+            }
+            if (nrht[ic] == -1) nrht[ic] = hash[(      jj   *levmult               )-jminsize][(min( (ii+1)*levmult,   imaxcalc-1))-iminsize];
+            if (celltype[ic] == BOTTOM_BOUNDARY && nrht[ic] == -1){
+               if (nrht[ic] == -1) nrht[ic] = hash[(jj+1)*levmult-jminsize][(min( (ii+1)*levmult,   imaxcalc-1))-iminsize];
+               //if (ic == 3 && mype == 0) printf("DEBUG line %d -- ic %d celltype %d nrht %d\n",__line__,ic,celltype[ic],nrht[ic]);
+               //printf("DEBUG line %d -- ic %d celltype %d nrht %d jj %d ii %d\n",__line__,ic,celltype[ic],nrht[ic],(jj+1)*levmult-jminsize,(min( (ii+1)*levmult,   imaxcalc-1))-iminsize);
+            }
+            if (nbot[ic] == -1) nbot[ic] = hash[(max(  jj   *levmult-1, 0)         )-jminsize][(      ii   *levmult               )-iminsize];
+            if (celltype[ic] == LEFT_BOUNDARY && nbot[ic] == -1){
+               if (nbot[ic] == -1) nbot[ic] = hash[(max(  jj   *levmult-1, 0)         )-jminsize][(      ii   *levmult+1             )-iminsize];
+            }
+            if (ntop[ic] == -1) ntop[ic] = hash[(min( (jj+1)*levmult,   jmaxcalc-1))-jminsize][(      ii   *levmult               )-iminsize];
+            if (celltype[ic] == LEFT_BOUNDARY && ntop[ic] == -1){
+               if (ntop[ic] == -1) ntop[ic] = hash[(min( (jj+1)*levmult,   jmaxcalc-1))-jminsize][(      ii   *levmult+1             )-iminsize];
+            }
+            //fprintf(fp,"%d:Neighbors after ic %d nlft %d nrht %d nbot %d ntop %d\n",mype,ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
+         }
+*/
+
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_SET_CORNER_NEIGH] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+            {
+#endif
+               fprintf(fp,"After setting corner neighbors\n");
+               print_local();
+#ifdef _OPENMP
+            } // end master region
+#endif
+         }
+
+         // Adjusting neighbors to local indices
+#ifdef _OPENMP
+#pragma omp for
+#endif
+         for (uint ic=0; ic<ncells_ghost; ic++){
+            //fprintf(fp,"%d: ic %d nlft %d noffset %d ncells %ld\n",mype,ic,nlft[ic],noffset,ncells);
+            if (nlft[ic] <= -(int)ncells && nlft[ic] > -(int)ncells_ghost){
+               nlft[ic] = abs(nlft[ic]);
+            } else if (nlft[ic] >= noffset && nlft[ic] < (int)(noffset+ncells)) {
+               nlft[ic] -= noffset;
+            }
+            if (nrht[ic] <= -(int)ncells && nrht[ic] > -(int)ncells_ghost){
+               nrht[ic] = abs(nrht[ic]);
+            } else if (nrht[ic] >= noffset && nrht[ic] < (int)(noffset+ncells)) {
+               nrht[ic] -= noffset;
+            }
+            if (nbot[ic] <= -(int)ncells && nbot[ic] > -(int)ncells_ghost){
+               nbot[ic] = abs(nbot[ic]);
+            } else if (nbot[ic] >= noffset && nbot[ic] < (int)(noffset+ncells)) {
+               nbot[ic] -= noffset;
+            }
+            if (ntop[ic] <= -(int)ncells && ntop[ic] > -(int)ncells_ghost){
+               ntop[ic] = abs(ntop[ic]);
+            } else if (ntop[ic] >= noffset && ntop[ic] < (int)(noffset+ncells)) {
+               ntop[ic] -= noffset;
+            }
+         }
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+            {
+#endif
+               fprintf(fp,"After adjusting neighbors to local indices\n");
+               print_local();
+#ifdef _OPENMP
+            } // end master region
+#endif
+         }
+         
+         if (TIMING_LEVEL >= 2) {
+#ifdef _OPENMP
+#pragma omp master
+#endif
+            cpu_timers[MESH_TIMER_NEIGH_ADJUST] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+            offtile_ratio_local = (offtile_ratio_local*(double)offtile_local_count) + ((double)nghost / (double)ncells);
+            offtile_local_count++;
+            offtile_ratio_local /= offtile_local_count;
+            //printf("%d ncells size is %ld ncells_ghost size is %ld nghost %d\n",mype,ncells,ncells_ghost,nghost);
+            //fprintf(fp,"%d ncells_ghost size is %ld nghost %d\n",mype,ncells_ghost,nghost);
+
+            if (cell_handle) L7_Free(&cell_handle);
+            cell_handle=0;
+
+            if (DEBUG) {
+               fprintf(fp,"%d: SETUP ncells %ld noffset %d nghost %d\n",mype,ncells,noffset,nghost);
+               for (int ig = 0; ig<nghost; ig++){
+                  fprintf(fp,"%d: indices needed ic %d index %d\n",mype,ig,indices_needed[ig]);
+               }
+            }
+            L7_Setup(0, noffset, ncells, &indices_needed[0], nghost, &cell_handle);
+
+            if (TIMING_LEVEL >= 2) cpu_timers[MESH_TIMER_SETUP_COMM] += cpu_timer_stop(tstart_lev2);
+
+#ifdef _OPENMP
+         } // end master region
+#endif
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+            {
+#endif
+            print_local();
+
+            int jmaxglobal = (jmax+1)*IPOW2(levmx);
+            int imaxglobal = (imax+1)*IPOW2(levmx);
+            fprintf(fp,"\n                                    HASH numbering\n");
+            for (int jj = jmaxglobal-1; jj>=0; jj--){
+               fprintf(fp,"%2d: %4d:",mype,jj);
+               if (jj >= jminsize && jj < jmaxsize) {
+                  for (int ii = 0; ii<imaxglobal; ii++){
+                     if (ii >= iminsize && ii < imaxsize) {
+                        fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) );
+                     } else {
+                        fprintf(fp,"     ");
+                     }
+                  }
+               }
+               fprintf(fp,"\n");
+            }
+            fprintf(fp,"%2d:      ",mype);
+            for (int ii = 0; ii<imaxglobal; ii++){
+               fprintf(fp,"%4d:",ii);
+            }
+            fprintf(fp,"\n");
+
+            fprintf(fp,"\n                                    nlft numbering\n");
+            for (int jj = jmaxglobal-1; jj>=0; jj--){
+               fprintf(fp,"%2d: %4d:",mype,jj);
+               if (jj >= jminsize && jj < jmaxsize) {
+                  for (int ii = 0; ii<imaxglobal; ii++){
+                     if (ii >= iminsize && ii < imaxsize) {
+                        int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset;
+                        if ( (hashval >= 0 && hashval < (int)ncells) ) {
+                              fprintf(fp,"%5d",nlft[hashval]);
+                        } else {
+                              fprintf(fp,"     ");
+                        }
+                     }
+                  }
+               }
+               fprintf(fp,"\n");
+            }
+            fprintf(fp,"%2d:      ",mype);
+            for (int ii = 0; ii<imaxglobal; ii++){
+               fprintf(fp,"%4d:",ii);
+            }
+            fprintf(fp,"\n");
+      
+            fprintf(fp,"\n                                    nrht numbering\n");
+            for (int jj = jmaxglobal-1; jj>=0; jj--){
+               fprintf(fp,"%2d: %4d:",mype,jj);
+               if (jj >= jminsize && jj < jmaxsize) {
+                  for (int ii = 0; ii<imaxglobal; ii++){
+                     if ( ii >= iminsize && ii < imaxsize ) {
+                        int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset;
+                        if ( hashval >= 0 && hashval < (int)ncells ) {
+                           fprintf(fp,"%5d",nrht[hashval]);
+                        } else {
+                           fprintf(fp,"     ");
+                        }
+                     }
+                  }
+               }
+               fprintf(fp,"\n");
+            }
+            fprintf(fp,"%2d:      ",mype);
+            for (int ii = 0; ii<imaxglobal; ii++){
+               fprintf(fp,"%4d:",ii);
+            }
+            fprintf(fp,"\n");
+
+            fprintf(fp,"\n                                    nbot numbering\n");
+            for (int jj = jmaxglobal-1; jj>=0; jj--){
+               fprintf(fp,"%2d: %4d:",mype,jj);
+               if (jj >= jminsize && jj < jmaxsize) {
+                  for (int ii = 0; ii<imaxglobal; ii++){
+                     if ( ii >= iminsize && ii < imaxsize ) {
+                        int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset;
+                        if ( hashval >= 0 && hashval < (int)ncells ) {
+                           fprintf(fp,"%5d",nbot[hashval]);
+                        } else {
+                           fprintf(fp,"     ");
+                        }
+                     }
+                  }
+               }
+               fprintf(fp,"\n");
+            }
+            fprintf(fp,"%2d:      ",mype);
+            for (int ii = 0; ii<imaxglobal; ii++){
+               fprintf(fp,"%4d:",ii);
+            }
+            fprintf(fp,"\n");
+
+            fprintf(fp,"\n                                    ntop numbering\n");
+            for (int jj = jmaxglobal-1; jj>=0; jj--){
+               fprintf(fp,"%2d: %4d:",mype,jj);
+               if (jj >= jminsize && jj < jmaxsize) {
+                  for (int ii = 0; ii<imaxglobal; ii++){
+                     if ( ii >= iminsize && ii < imaxsize ) {
+                        int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset;
+                        if ( hashval >= 0 && hashval < (int)ncells ) {
+                           fprintf(fp,"%5d",ntop[hashval]);
+                        } else {
+                           fprintf(fp,"     ");
+                        }
+                     }
+                  }
+               }
+               fprintf(fp,"\n");
+            }
+            fprintf(fp,"%2d:      ",mype);
+            for (int ii = 0; ii<imaxglobal; ii++){
+               fprintf(fp,"%4d:",ii);
+            }
+            fprintf(fp,"\n");
+      
+#ifdef _OPENMP
+            } // end master region
+#endif
+         } // end DEBUG
+
+         if (DEBUG) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+            {
+#endif
+            print_local();
+
+            for (uint ic=0; ic<ncells; ic++){
+               fprintf(fp,"%d: before update ic %d        i %d j %d lev %d nlft %d nrht %d nbot %d ntop %d\n",
+                   mype,ic,i[ic],j[ic],level[ic],nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
+            }
+            int ig=0;
+            for (uint ic=ncells; ic<ncells_ghost; ic++, ig++){
+               fprintf(fp,"%d: after  update ic %d off %d i %d j %d lev %d nlft %d nrht %d nbot %d ntop %d\n",
+                   mype,ic,indices_needed[ig],i[ic],j[ic],level[ic],nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
+            }
+#ifdef _OPENMP
+            } // end master region
+#endif
+         } // end DEBUG
+
+      } // if numpe > 1
+#endif
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+            {
+#endif
+      write_hash_collision_report();
+      read_hash_collision_report();
+      compact_hash_delete(hash);
+
+#ifdef BOUNDS_CHECK
+      {
+         for (uint ic=0; ic<ncells; ic++){
+            int nl = nlft[ic];
+            if (nl<0 || nl>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl);
+            if (level[nl] > level[ic]){
+               int ntl = ntop[nl];
+               if (ntl<0 || ntl>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d global %d nlft %d ntop of nlft %d\n",mype,__LINE__,ic,ic+noffset,nl,ntl);
+            }
+            int nr = nrht[ic];
+            if (nr<0 || nr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr);
+            if (level[nr] > level[ic]){
+               int ntr = ntop[nr];
+               if (ntr<0 || ntr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d ntop of nrht %d\n",mype,__LINE__,ic,ntr);
+            }
+            int nb = nbot[ic];
+            if (nb<0 || nb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb);
+            if (level[nb] > level[ic]){
+               int nrb = nrht[nb];
+               if (nrb<0 || nrb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of nbot %d\n",mype,__LINE__,ic,nrb);
+            }
+            int nt = ntop[ic];
+            if (nt<0 || nt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d ntop %d\n",mype,__LINE__,ic,nt);
+            if (level[nt] > level[ic]){
+               int nrt = nrht[nt];
+               if (nrt<0 || nrt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of ntop %d\n",mype,__LINE__,ic,nrt);
+            }
+         }
+      }
+#endif
+
+#ifdef _OPENMP
+            } // end master region
+#pragma omp barrier
+#endif
+
+      } else if (calc_neighbor_type == KDTREE) {
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+         {
+#endif
+         struct timeval tstart_lev2;
+         if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
+
+         TBounds box;
+         vector<int> index_list(IPOW2(levmx*levmx) );
+
+         int num;
+
+         ibase = 0;
+         calc_spatial_coordinates(ibase);
+
+         kdtree_setup();
+
+         if (TIMING_LEVEL >= 2) {
+            cpu_timers[MESH_TIMER_KDTREE_SETUP] += cpu_timer_stop(tstart_lev2);
+            cpu_timer_start(&tstart_lev2);
+         }
+
+         for (uint ic=0; ic<ncells; ic++) {
+
+            //left
+            nlft[ic]  = ic;
+            box.min.x = x[ic]-0.25*dx[ic];
+            box.max.x = x[ic]-0.25*dx[ic];
+            box.min.y = y[ic]+0.25*dy[ic];
+            box.max.y = y[ic]+0.25*dy[ic];
+            KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
+            if (num == 1) nlft[ic]=index_list[0];
+
+            //right
+            nrht[ic]  = ic;
+            box.min.x = x[ic]+1.25*dx[ic];
+            box.max.x = x[ic]+1.25*dx[ic];
+            box.min.y = y[ic]+0.25*dy[ic];
+            box.max.y = y[ic]+0.25*dy[ic];
+            KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
+            if (num == 1) nrht[ic]=index_list[0];
+
+            //bot
+            nbot[ic]  = ic;
+            box.min.x = x[ic]+0.25*dx[ic];
+            box.max.x = x[ic]+0.25*dx[ic];
+            box.min.y = y[ic]-0.25*dy[ic];
+            box.max.y = y[ic]-0.25*dy[ic];
+            KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
+            if (num == 1) nbot[ic]=index_list[0];
+
+            //top
+            ntop[ic]  = ic;
+            box.min.x = x[ic]+0.25*dx[ic];
+            box.max.x = x[ic]+0.25*dx[ic];
+            box.min.y = y[ic]+1.25*dy[ic];
+            box.max.y = y[ic]+1.25*dy[ic];
+            KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
+            if (num == 1) ntop[ic]=index_list[0];
+         }  //  End main loop over cells.
+
+         KDTree_Destroy(&tree);
+
+         if (TIMING_LEVEL >= 2) cpu_timers[MESH_TIMER_KDTREE_QUERY] += cpu_timer_stop(tstart_lev2);
+
+#ifdef _OPENMP
+         }
+#pragma omp barrier
+#endif
+      } // calc_neighbor_type
+
+   }
+
+#ifdef _OPENMP
+#pragma omp master
+#endif
+   cpu_timers[MESH_TIMER_CALC_NEIGHBORS] += cpu_timer_stop(tstart_cpu);
+}
+
+#ifdef HAVE_OPENCL
+void Mesh::gpu_calc_neighbors(void)
+{
+   if (! gpu_do_rezone) return;
+
+   ulong gpu_hash_table_size =  0;
+
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   struct timeval tstart_lev2;
+   cpu_timer_start(&tstart_lev2);
+
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   gpu_counters[MESH_COUNTER_CALC_NEIGH]++;
+
+   assert(dev_levtable);
+   assert(dev_level);
+   assert(dev_i);
+   assert(dev_j);
+
+   size_t mem_request = (int)((float)ncells*mem_factor);
+
+   size_t local_work_size = MIN(ncells, TILE_SIZE);
+   size_t global_work_size = ((ncells + local_work_size - 1) /local_work_size) * local_work_size;
+
+   //printf("DEBUG file %s line %d dev_nlft %p size %d\n",__FILE__,__LINE__,dev_nlft,ezcl_get_device_mem_nelements(dev_nlft));
+
+   if (dev_nlft == NULL || ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells) {
+      dev_nlft     = ezcl_malloc(NULL, const_cast<char *>("dev_nlft"), &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      dev_nrht     = ezcl_malloc(NULL, const_cast<char *>("dev_nrht"), &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      dev_nbot     = ezcl_malloc(NULL, const_cast<char *>("dev_nbot"), &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      dev_ntop     = ezcl_malloc(NULL, const_cast<char *>("dev_ntop"), &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+
+      ezcl_set_kernel_arg(kernel_neighbor_init,  0, sizeof(cl_int),   (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_neighbor_init,  1, sizeof(cl_mem),   (void *)&dev_nlft);
+      ezcl_set_kernel_arg(kernel_neighbor_init,  2, sizeof(cl_mem),   (void *)&dev_nrht);
+      ezcl_set_kernel_arg(kernel_neighbor_init,  3, sizeof(cl_mem),   (void *)&dev_nbot);
+      ezcl_set_kernel_arg(kernel_neighbor_init,  4, sizeof(cl_mem),   (void *)&dev_ntop);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_neighbor_init,   1, NULL, &global_work_size, &local_work_size, NULL);
+   }
+
+   int imaxsize = (imax+1)*IPOW2(levmx);
+   int jmaxsize = (jmax+1)*IPOW2(levmx);
+
+   int gpu_hash_method       = METHOD_UNSET;
+// allow input.c to control hash types and methods
+   if (choose_hash_method != METHOD_UNSET) gpu_hash_method = choose_hash_method;
+//=========
+
+   size_t hashsize;
+
+   uint hash_report_level = 1;
+   cl_mem dev_hash_header = NULL;
+   cl_mem dev_hash = gpu_compact_hash_init(ncells, imaxsize, jmaxsize, gpu_hash_method, hash_report_level,
+      &gpu_hash_table_size, &hashsize, &dev_hash_header);
+
+      /*
+                    const int   isize,        // 0
+                    const int   levmx,        // 1
+                    const int   imaxsize,     // 2
+           __global const int   *levtable,    // 3
+           __global const int   *level,       // 4
+           __global const int   *i,           // 5
+           __global const int   *j,           // 6
+           __global const ulong *hash_header, // 7
+           __global       int   *hash)        // 8
+      */
+
+   cl_event hash_setup_event;
+
+   ezcl_set_kernel_arg(kernel_hash_setup,  0, sizeof(cl_int),   (void *)&ncells);
+   ezcl_set_kernel_arg(kernel_hash_setup,  1, sizeof(cl_int),   (void *)&levmx);
+   ezcl_set_kernel_arg(kernel_hash_setup,  2, sizeof(cl_int),   (void *)&imaxsize);
+   ezcl_set_kernel_arg(kernel_hash_setup,  3, sizeof(cl_mem),   (void *)&dev_levtable);
+   ezcl_set_kernel_arg(kernel_hash_setup,  4, sizeof(cl_mem),   (void *)&dev_level);
+   ezcl_set_kernel_arg(kernel_hash_setup,  5, sizeof(cl_mem),   (void *)&dev_i);
+   ezcl_set_kernel_arg(kernel_hash_setup,  6, sizeof(cl_mem),   (void *)&dev_j);
+   ezcl_set_kernel_arg(kernel_hash_setup,  7, sizeof(cl_mem),   (void *)&dev_nlft);
+   ezcl_set_kernel_arg(kernel_hash_setup,  8, sizeof(cl_mem),   (void *)&dev_nrht);
+   ezcl_set_kernel_arg(kernel_hash_setup,  9, sizeof(cl_mem),   (void *)&dev_nbot);
+   ezcl_set_kernel_arg(kernel_hash_setup, 10, sizeof(cl_mem),   (void *)&dev_ntop);
+   ezcl_set_kernel_arg(kernel_hash_setup, 11, sizeof(cl_mem),   (void *)&dev_hash_header);
+   ezcl_set_kernel_arg(kernel_hash_setup, 12, sizeof(cl_mem),   (void *)&dev_hash);
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_setup,   1, NULL, &global_work_size, &local_work_size, &hash_setup_event);
+
+   ezcl_wait_for_events(1, &hash_setup_event);
+   ezcl_event_release(hash_setup_event);
+
+   if (TIMING_LEVEL >= 2) {
+      gpu_timers[MESH_TIMER_HASH_SETUP] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+      cpu_timer_start(&tstart_lev2);
+   }
+
+      /*
+                    const int   isize,        // 0
+                    const int   levmx,        // 1
+                    const int   imax,         // 2
+                    const int   jmax,         // 3
+                    const int   imaxsize,     // 4
+                    const int   jmaxsize,     // 5
+           __global const int   *levtable,    // 6
+           __global const int   *level,       // 7
+           __global const int   *i,           // 8
+           __global const int   *j,           // 9
+           __global       int   *nlft,        // 10
+           __global       int   *nrht,        // 11
+           __global       int   *nbot,        // 12
+           __global       int   *ntop,        // 13
+           __global const ulong *hash_header, // 14
+           __global       int   *hash)        // 15
+      */
+
+   cl_event calc_neighbors_event;
+
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 0,  sizeof(cl_int),   (void *)&ncells);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 1,  sizeof(cl_int),   (void *)&levmx);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 2,  sizeof(cl_int),   (void *)&imax);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 3,  sizeof(cl_int),   (void *)&jmax);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 4,  sizeof(cl_int),   (void *)&imaxsize);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 5,  sizeof(cl_int),   (void *)&jmaxsize);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 6,  sizeof(cl_mem),   (void *)&dev_levtable);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 7,  sizeof(cl_mem),   (void *)&dev_level);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 8,  sizeof(cl_mem),   (void *)&dev_i);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 9,  sizeof(cl_mem),   (void *)&dev_j);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 10, sizeof(cl_mem),   (void *)&dev_nlft);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 11, sizeof(cl_mem),   (void *)&dev_nrht);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 12, sizeof(cl_mem),   (void *)&dev_nbot);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 13, sizeof(cl_mem),   (void *)&dev_ntop);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 14, sizeof(cl_mem),   (void *)&dev_hash_header);
+   ezcl_set_kernel_arg(kernel_calc_neighbors, 15, sizeof(cl_mem),   (void *)&dev_hash);
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_neighbors,   1, NULL, &global_work_size, &local_work_size, &calc_neighbors_event);
+
+   ezcl_wait_for_events(1, &calc_neighbors_event);
+   ezcl_event_release(calc_neighbors_event);
+
+   gpu_compact_hash_delete(dev_hash, dev_hash_header);
+
+   if (TIMING_LEVEL >= 2) gpu_timers[MESH_TIMER_HASH_QUERY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+
+   gpu_timers[MESH_TIMER_CALC_NEIGHBORS] += (long)(cpu_timer_stop(tstart_cpu) * 1.0e9);
+}
+
+
+void Mesh::gpu_calc_neighbors_local(void)
+{
+   if (! gpu_do_rezone) return;
+
+   ulong gpu_hash_table_size =  0;
+
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   struct timeval tstart_lev2;
+   if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
+
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   gpu_counters[MESH_COUNTER_CALC_NEIGH]++;
+
+   ncells_ghost = ncells;
+
+   assert(dev_levtable);
+   assert(dev_level);
+   assert(dev_i);
+   assert(dev_j);
+
+   size_t one = 1;
+   cl_mem dev_check = ezcl_malloc(NULL, const_cast<char *>("dev_check"), &one, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+   size_t mem_request = (int)((float)ncells*mem_factor);
+   dev_nlft = ezcl_malloc(NULL, const_cast<char *>("dev_nlft"), &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+   dev_nrht = ezcl_malloc(NULL, const_cast<char *>("dev_nrht"), &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+   dev_nbot = ezcl_malloc(NULL, const_cast<char *>("dev_nbot"), &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+   dev_ntop = ezcl_malloc(NULL, const_cast<char *>("dev_ntop"), &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+
+   size_t local_work_size =  64;
+   size_t global_work_size = ((ncells + local_work_size - 1) /local_work_size) * local_work_size;
+   size_t block_size     = global_work_size/local_work_size;
+
+   //printf("DEBUG file %s line %d lws = %d gws %d bs %d ncells %d\n",__FILE__,__LINE__,
+   //   local_work_size, global_work_size, block_size, ncells);
+   cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_int4), CL_MEM_READ_WRITE, 0);
+   cl_mem dev_sizes = ezcl_malloc(NULL, const_cast<char *>("dev_sizes"), &one, sizeof(cl_int4),  CL_MEM_READ_WRITE, 0);
+
+#ifdef BOUNDS_CHECK
+   if (ezcl_get_device_mem_nelements(dev_i) < (int)ncells || 
+       ezcl_get_device_mem_nelements(dev_j) < (int)ncells ||
+       ezcl_get_device_mem_nelements(dev_level) < (int)ncells ){
+      printf("%d: Warning ncells %ld size dev_i %d dev_j %d dev_level %d\n",mype,ncells,ezcl_get_device_mem_nelements(dev_i),ezcl_get_device_mem_nelements(dev_j),ezcl_get_device_mem_nelements(dev_level));
+   }
+#endif
+
+      /*
+       __kernel void calc_hash_size_cl(
+                          const int   ncells,      // 0
+                          const int   levmx,       // 1
+                 __global       int   *levtable,   // 2
+                 __global       int   *level,      // 3
+                 __global       int   *i,          // 4
+                 __global       int   *j,          // 5
+                 __global       int4  *redscratch, // 6
+                 __global       int4  *sizes,      // 7
+                 __local        int4  *tile)       // 8
+      */
+
+   ezcl_set_kernel_arg(kernel_hash_size, 0, sizeof(cl_int), (void *)&ncells);
+   ezcl_set_kernel_arg(kernel_hash_size, 1, sizeof(cl_int), (void *)&levmx);
+   ezcl_set_kernel_arg(kernel_hash_size, 2, sizeof(cl_mem), (void *)&dev_levtable);
+   ezcl_set_kernel_arg(kernel_hash_size, 3, sizeof(cl_mem), (void *)&dev_level);
+   ezcl_set_kernel_arg(kernel_hash_size, 4, sizeof(cl_mem), (void *)&dev_i);
+   ezcl_set_kernel_arg(kernel_hash_size, 5, sizeof(cl_mem), (void *)&dev_j);
+   ezcl_set_kernel_arg(kernel_hash_size, 6, sizeof(cl_mem), (void *)&dev_redscratch);
+   ezcl_set_kernel_arg(kernel_hash_size, 7, sizeof(cl_mem), (void *)&dev_sizes);
+   ezcl_set_kernel_arg(kernel_hash_size, 8, local_work_size*sizeof(cl_int4), NULL);
+
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_size,   1, NULL, &global_work_size, &local_work_size, NULL);
+
+   if (block_size > 1) {
+         /*
+         __kernel void finish_reduction_minmax4_cl(
+           const    int    isize,            // 0
+           __global int4  *redscratch,       // 1
+           __global int4  *sizes,            // 2
+           __local  int4  *tile)             // 3
+         */
+      ezcl_set_kernel_arg(kernel_finish_hash_size, 0, sizeof(cl_int), (void *)&block_size);
+      ezcl_set_kernel_arg(kernel_finish_hash_size, 1, sizeof(cl_mem), (void *)&dev_redscratch);
+      ezcl_set_kernel_arg(kernel_finish_hash_size, 2, sizeof(cl_mem), (void *)&dev_sizes);
+      ezcl_set_kernel_arg(kernel_finish_hash_size, 3, local_work_size*sizeof(cl_int4), NULL);
+
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_finish_hash_size,   1, NULL, &local_work_size, &local_work_size, NULL);
+   }
+
+   ezcl_device_memory_delete(dev_redscratch);
+
+   cl_int sizes[4];
+   ezcl_enqueue_read_buffer(command_queue, dev_sizes, CL_TRUE,  0, 1*sizeof(cl_int4), &sizes, NULL);
+
+   int imintile = sizes[0];
+   int imaxtile = sizes[1];
+   int jmintile = sizes[2];
+   int jmaxtile = sizes[3];
+
+   // Expand size by 2*coarse_cells for ghost cells
+   // TODO: May want to get fancier here and calc based on cell level
+   int jminsize = max(jmintile-2*IPOW2(levmx),0);
+   int jmaxsize = min(jmaxtile+2*IPOW2(levmx),(jmax+1)*IPOW2(levmx));
+   int iminsize = max(imintile-2*IPOW2(levmx),0);
+   int imaxsize = min(imaxtile+2*IPOW2(levmx),(imax+1)*IPOW2(levmx));
+   //fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize,imaxsize,jminsize,jmaxsize);
+
+   //ezcl_enqueue_write_buffer(command_queue, dev_sizes, CL_TRUE,  0, 1*sizeof(cl_int4), &sizes, NULL);
+
+   int gpu_hash_method       = METHOD_UNSET;
+// allow imput.c to control hash types and methods
+   if (choose_hash_method != METHOD_UNSET) gpu_hash_method = choose_hash_method;
+//=========
+
+   size_t hashsize;
+
+   uint hash_report_level = 1;
+   cl_mem dev_hash_header = NULL;
+   cl_mem dev_hash = gpu_compact_hash_init(ncells, imaxsize-iminsize, jmaxsize-jminsize, gpu_hash_method, hash_report_level, &gpu_hash_table_size, &hashsize, &dev_hash_header);
+
+   int csize = corners_i.size();
+#ifdef BOUNDS_CHECK
+   for (int ic=0; ic<csize; ic++){
+      if (corners_i[ic] >= iminsize) continue;
+      if (corners_j[ic] >= jminsize) continue;
+      if (corners_i[ic] <  imaxsize) continue;
+      if (corners_j[ic] <  jmaxsize) continue;
+      if ( (corners_j[ic]-jminsize)*(imaxsize-iminsize)+(corners_i[ic]-iminsize) < 0 ||
+           (corners_j[ic]-jminsize)*(imaxsize-iminsize)+(corners_i[ic]-iminsize) > (int)hashsize){
+         printf("%d: Warning corners i %d j %d hash %d\n",mype,corners_i[ic],corners_j[ic],
+            (corners_j[ic]-jminsize)*(imaxsize-iminsize)+(corners_i[ic]-iminsize));
+      }
+   }
+#endif
+
+   size_t corners_local_work_size  = MIN(csize, TILE_SIZE);
+   size_t corners_global_work_size = ((csize+corners_local_work_size - 1) /corners_local_work_size) * corners_local_work_size;
+
+   ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 0, sizeof(cl_int), (void *)&csize);
+   ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 1, sizeof(cl_int), (void *)&levmx);
+   ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 2, sizeof(cl_int), (void *)&imax);
+   ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 3, sizeof(cl_int), (void *)&jmax);
+   ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 4, sizeof(cl_mem), (void *)&dev_levtable);
+   ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 5, sizeof(cl_mem), (void *)&dev_sizes);
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_adjust_sizes,   1, NULL, &corners_global_work_size, &corners_local_work_size, NULL);
+
+   if (DEBUG){
+      vector<int> sizes_tmp(4);
+      ezcl_enqueue_read_buffer(command_queue, dev_sizes, CL_TRUE,  0, 1*sizeof(cl_int4), &sizes_tmp[0], NULL);
+      int iminsize_tmp = sizes_tmp[0];
+      int imaxsize_tmp = sizes_tmp[1];
+      int jminsize_tmp = sizes_tmp[2];
+      int jmaxsize_tmp = sizes_tmp[3];
+      fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize_tmp,imaxsize_tmp,jminsize_tmp,jmaxsize_tmp);
+   }
+
+   local_work_size = 128;
+   global_work_size = ((ncells + local_work_size - 1) /local_work_size) * local_work_size;
+
+#ifdef BOUNDS_CHECK
+   {
+      vector<int> i_tmp(ncells);
+      vector<int> j_tmp(ncells);
+      vector<int> level_tmp(ncells);
+      ezcl_enqueue_read_buffer(command_queue, dev_i,     CL_FALSE, 0, ncells*sizeof(cl_int), &i_tmp[0], NULL);
+      ezcl_enqueue_read_buffer(command_queue, dev_j,     CL_FALSE, 0, ncells*sizeof(cl_int), &j_tmp[0], NULL);
+      ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE,  0, ncells*sizeof(cl_int), &level_tmp[0], NULL);
+      for (int ic=0; ic<(int)ncells; ic++){
+         int lev = level_tmp[ic];
+         for (   int jj = j_tmp[ic]*IPOW2(levmx-lev)-jminsize; jj < (j_tmp[ic]+1)*IPOW2(levmx-lev)-jminsize; jj++) {
+            for (int ii = i_tmp[ic]*IPOW2(levmx-lev)-iminsize; ii < (i_tmp[ic]+1)*IPOW2(levmx-lev)-iminsize; ii++) {
+               if (jj < 0 || jj >= (jmaxsize-jminsize) || ii < 0 || ii >= (imaxsize-iminsize) ) {
+                  printf("%d: Warning ncell %d writes to hash out-of-bounds at line %d ii %d jj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,ic,__LINE__,ii,jj,iminsize,imaxsize,jminsize,jmaxsize);
+               }
+            }
+         }
+      }
+   }
+#endif
+
+   //printf("%d: lws %d gws %d \n",mype,local_work_size,global_work_size);
+   cl_event hash_setup_local_event;
+
+      /*
+                    const int   isize,           // 0
+                    const int   levmx,           // 1
+                    const int   imax,            // 2
+                    const int   jmax,            // 3
+                    const int   noffset,         // 4
+           __global       int   *sizes,          // 5
+           __global       int   *levtable,       // 6
+           __global       int   *level,          // 7
+           __global       int   *i,              // 8
+           __global       int   *j,              // 9
+           __global const ulong *hash_heaer,     // 10
+           __global       int   *hash)           // 11
+      */
+
+   ezcl_set_kernel_arg(kernel_hash_setup_local,  0, sizeof(cl_int),   (void *)&ncells);
+   ezcl_set_kernel_arg(kernel_hash_setup_local,  1, sizeof(cl_int),   (void *)&levmx);
+   ezcl_set_kernel_arg(kernel_hash_setup_local,  2, sizeof(cl_int),   (void *)&imax);
+   ezcl_set_kernel_arg(kernel_hash_setup_local,  3, sizeof(cl_int),   (void *)&jmax);
+   ezcl_set_kernel_arg(kernel_hash_setup_local,  4, sizeof(cl_int),   (void *)&noffset);
+   ezcl_set_kernel_arg(kernel_hash_setup_local,  5, sizeof(cl_mem),   (void *)&dev_sizes);
+   ezcl_set_kernel_arg(kernel_hash_setup_local,  6, sizeof(cl_mem),   (void *)&dev_levtable);
+   ezcl_set_kernel_arg(kernel_hash_setup_local,  7, sizeof(cl_mem),   (void *)&dev_level);
+   ezcl_set_kernel_arg(kernel_hash_setup_local,  8, sizeof(cl_mem),   (void *)&dev_i);
+   ezcl_set_kernel_arg(kernel_hash_setup_local,  9, sizeof(cl_mem),   (void *)&dev_j);
+   ezcl_set_kernel_arg(kernel_hash_setup_local, 10, sizeof(cl_mem),   (void *)&dev_hash_header);
+   ezcl_set_kernel_arg(kernel_hash_setup_local, 11, sizeof(cl_mem),   (void *)&dev_hash);
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_setup_local,   1, NULL, &global_work_size, &local_work_size, &hash_setup_local_event);
+
+   ezcl_wait_for_events(1, &hash_setup_local_event);
+   ezcl_event_release(hash_setup_local_event);
+
+   if (DEBUG){
+      vector<int> sizes_tmp(4);
+      ezcl_enqueue_read_buffer(command_queue, dev_sizes, CL_TRUE,  0, 1*sizeof(cl_int4), &sizes_tmp[0], NULL);
+      int iminsize_tmp = sizes_tmp[0];
+      int imaxsize_tmp = sizes_tmp[1];
+      int jminsize_tmp = sizes_tmp[2];
+      int jmaxsize_tmp = sizes_tmp[3];
+      fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize_tmp,imaxsize_tmp,jminsize_tmp,jmaxsize_tmp);
+   }
+
+   if (TIMING_LEVEL >= 2) {
+      gpu_timers[MESH_TIMER_HASH_SETUP] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+      cpu_timer_start(&tstart_lev2);
+   }
+
+#ifdef BOUNDS_CHECK
+   {
+      if (ezcl_get_device_mem_nelements(dev_nlft)  < (int)ncells ||
+          ezcl_get_device_mem_nelements(dev_nrht)  < (int)ncells ||
+          ezcl_get_device_mem_nelements(dev_nbot)  < (int)ncells ||
+          ezcl_get_device_mem_nelements(dev_ntop)  < (int)ncells ||
+          ezcl_get_device_mem_nelements(dev_i)     < (int)ncells ||
+          ezcl_get_device_mem_nelements(dev_j)     < (int)ncells ||
+          ezcl_get_device_mem_nelements(dev_level) < (int)ncells ) {
+         printf("%d: Warning -- sizes for dev_neigh too small ncells %ld neigh %d %d %d %d %d %d %d\n",mype,ncells,ezcl_get_device_mem_nelements(dev_nlft),ezcl_get_device_mem_nelements(dev_nrht),ezcl_get_device_mem_nelements(dev_nbot),ezcl_get_device_mem_nelements(dev_ntop),ezcl_get_device_mem_nelements(dev_i),ezcl_get_device_mem_nelements(dev_j),ezcl_get_device_mem_nelements(dev_level));
+      }
+      vector<int> level_tmp(ncells);
+      ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, ncells*sizeof(cl_int), &level_tmp[0], NULL);
+      int iflag = 0;
+      for (int ic=0; ic<ncells; ic++){
+         if (levmx-level_tmp[ic] < 0 || levmx-level_tmp[ic] > levmx) {
+            printf("%d: Warning level value bad ic %d level %d ncells %d\n",mype,ic,level_tmp[ic],ncells);
+            iflag++;
+         }
+      }
+      if (ezcl_get_device_mem_nelements(dev_levtable) < levmx+1) printf("%d Warning levtable too small levmx is %d devtable size is %d\n",mype,levmx,ezcl_get_device_mem_nelements(dev_levtable));
+#ifdef HAVE_MPI
+      if (iflag > 20) {fflush(stdout); L7_Terminate(); exit(0);}
+#endif
+   }
+#endif
+
+#ifdef BOUNDS_CHECK
+   {
+      int jmaxcalc = (jmax+1)*IPOW2(levmx);
+      int imaxcalc = (imax+1)*IPOW2(levmx);
+      vector<int> i_tmp(ncells);
+      vector<int> j_tmp(ncells);
+      vector<int> level_tmp(ncells);
+      vector<int> hash_tmp(hashsize);
+      ezcl_enqueue_read_buffer(command_queue, dev_i,     CL_FALSE, 0, ncells*sizeof(cl_int), &i_tmp[0], NULL);
+      ezcl_enqueue_read_buffer(command_queue, dev_j,     CL_FALSE, 0, ncells*sizeof(cl_int), &j_tmp[0], NULL);
+      ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE,  0, ncells*sizeof(cl_int), &level_tmp[0], NULL);
+      ezcl_enqueue_read_buffer(command_queue, dev_hash,  CL_TRUE,  0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
+      for (int ic=0; ic<(int)ncells; ic++){
+         int ii  = i_tmp[ic];
+         int jj  = j_tmp[ic];
+         int lev = level_tmp[ic];
+         int levmult = IPOW2(levmx-lev);
+         int jjj=jj   *levmult-jminsize;
+         int iii=max(  ii   *levmult-1, 0         )-iminsize;
+         if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
+         jjj=jj   *levmult-jminsize;
+         iii=min( (ii+1)*levmult,   imaxcalc-1)-iminsize;
+         if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
+         jjj=max(  jj   *levmult-1, 0) -jminsize;
+         iii=ii   *levmult   -iminsize;
+         if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
+         jjj=min( (jj+1)*levmult,   jmaxcalc-1)-jminsize;
+         iii=ii   *levmult   -iminsize;
+         if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
+         int nlftval = hash_tmp[((      jj   *levmult               )-jminsize)*(imaxsize-iminsize)+((max(  ii   *levmult-1, 0         ))-iminsize)];
+         int nrhtval = hash_tmp[((      jj   *levmult               )-jminsize)*(imaxsize-iminsize)+((min( (ii+1)*levmult,   imaxcalc-1))-iminsize)];
+         int nbotval = hash_tmp[((max(  jj   *levmult-1, 0)         )-jminsize)*(imaxsize-iminsize)+((      ii   *levmult               )-iminsize)];
+         int ntopval = hash_tmp[((min( (jj+1)*levmult,   jmaxcalc-1))-jminsize)*(imaxsize-iminsize)+((      ii   *levmult               )-iminsize)];
+
+         if (nlftval == INT_MIN){
+            jjj = jj*levmult-jminsize;
+            iii = ii*levmult-iminsize;
+            if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
+         }
+         if (nrhtval == INT_MIN){
+            jjj = jj*levmult-jminsize;
+            iii = ii*levmult-iminsize;
+            if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
+         }
+         if (nbotval == INT_MIN) {
+            iii = ii*levmult-iminsize;
+            jjj = jj*levmult-jminsize;
+            if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
+         }
+         if (ntopval == INT_MIN) {
+            iii = ii*levmult-iminsize;
+            jjj = jj*levmult-jminsize;
+            if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
+         }
+      }
+   }
+#endif
+
+   cl_event calc_neighbors_local_event;
+
+      /*
+                    const int   isize,       // 0
+                    const int   levmx,       // 1
+                    const int   imaxsize,    // 2
+                    const int   jmaxsize,    // 3
+                    const int   noffset,     // 4
+           __global       int   *sizes,      // 5
+           __global       int   *levtable,   // 6
+           __global       int   *level,      // 7
+           __global       int   *i,          // 8
+           __global       int   *j,          // 9
+           __global       int   *nlft,       // 10
+           __global       int   *nrht,       // 11
+           __global       int   *nbot,       // 12
+           __global       int   *ntop,       // 13
+           __global const ulong *hash_heaer, // 14
+           __global       int   *hash)       // 15
+      */
+
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 0,  sizeof(cl_int),   (void *)&ncells);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 1,  sizeof(cl_int),   (void *)&levmx);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 2,  sizeof(cl_int),   (void *)&imax);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 3,  sizeof(cl_int),   (void *)&jmax);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 4,  sizeof(cl_int),   (void *)&noffset);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 5,  sizeof(cl_mem),   (void *)&dev_sizes);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 6,  sizeof(cl_mem),   (void *)&dev_levtable);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 7,  sizeof(cl_mem),   (void *)&dev_level);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 8,  sizeof(cl_mem),   (void *)&dev_i);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 9,  sizeof(cl_mem),   (void *)&dev_j);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 10, sizeof(cl_mem),   (void *)&dev_nlft);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 11, sizeof(cl_mem),   (void *)&dev_nrht);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 12, sizeof(cl_mem),   (void *)&dev_nbot);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 13, sizeof(cl_mem),   (void *)&dev_ntop);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 14, sizeof(cl_mem),   (void *)&dev_hash_header);
+   ezcl_set_kernel_arg(kernel_calc_neighbors_local, 15, sizeof(cl_mem),   (void *)&dev_hash);
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_neighbors_local,   1, NULL, &global_work_size, &local_work_size, &calc_neighbors_local_event);
+
+   ezcl_wait_for_events(1, &calc_neighbors_local_event);
+   ezcl_event_release(calc_neighbors_local_event);
+
+   if (TIMING_LEVEL >= 2) {
+      gpu_timers[MESH_TIMER_HASH_QUERY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+      cpu_timer_start(&tstart_lev2);
+   }
+
+   if (DEBUG) {
+      print_dev_local();
+
+      vector<int> hash_tmp(hashsize);
+      ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_FALSE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
+
+      cl_mem dev_hash_header_check = gpu_get_hash_header();
+      vector<ulong> hash_header_check(hash_header_size);
+      ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL);
+
+      int   gpu_hash_method     = (int)hash_header_check[0];
+      ulong gpu_hash_table_size =      hash_header_check[1];
+      ulong gpu_AA              =      hash_header_check[2];
+      ulong gpu_BB              =      hash_header_check[3];
+
+      vector<int> nlft_tmp(ncells_ghost);
+      vector<int> nrht_tmp(ncells_ghost);
+      vector<int> nbot_tmp(ncells_ghost);
+      vector<int> ntop_tmp(ncells_ghost);
+      ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
+      ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
+      ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
+      ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE,  0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
+
+      int jmaxglobal = (jmax+1)*IPOW2(levmx);
+      int imaxglobal = (imax+1)*IPOW2(levmx);
+      fprintf(fp,"\n                                    HASH 0 numbering\n");
+      for (int jj = jmaxglobal-1; jj>=0; jj--){
+         fprintf(fp,"%2d: %4d:",mype,jj);
+         if (jj >= jminsize && jj < jmaxsize) {
+            for (int ii = 0; ii<imaxglobal; ii++){
+               if (ii >= iminsize && ii < imaxsize) {
+                  fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) );
+               } else {
+                  fprintf(fp,"     ");
+               }
+            }
+         }
+         fprintf(fp,"\n");
+      }
+      fprintf(fp,"%2d:      ",mype);
+      for (int ii = 0; ii<imaxglobal; ii++){
+         fprintf(fp,"%4d:",ii);
+      }
+      fprintf(fp,"\n");
+
+      fprintf(fp,"\n                                    nlft numbering\n");
+      for (int jj = jmaxglobal-1; jj>=0; jj--){
+         fprintf(fp,"%2d: %4d:",mype,jj);
+         if (jj >= jminsize && jj < jmaxsize) {
+            for (int ii = 0; ii<imaxglobal; ii++){
+               if (ii >= iminsize && ii < imaxsize) {
+                  int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) -noffset;
+                  if (hashval >= 0 && hashval < (int)ncells) {
+                     fprintf(fp,"%5d",nlft_tmp[hashval]);
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               } else {
+                  fprintf(fp,"     ");
+               }
+            }
+         }
+         fprintf(fp,"\n");
+      }
+      fprintf(fp,"%2d:      ",mype);
+      for (int ii = 0; ii<imaxglobal; ii++){
+         fprintf(fp,"%4d:",ii);
+      }
+      fprintf(fp,"\n");
+   
+      fprintf(fp,"\n                                    nrht numbering\n");
+      for (int jj = jmaxglobal-1; jj>=0; jj--){
+         fprintf(fp,"%2d: %4d:",mype,jj);
+         if (jj >= jminsize && jj < jmaxsize) {
+            for (int ii = 0; ii<imaxglobal; ii++){
+               if (ii >= iminsize && ii < imaxsize) {
+                  int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0])-noffset;
+                  if (hashval >= 0 && hashval < (int)ncells) {
+                     fprintf(fp,"%5d",nrht_tmp[hashval]);
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               } else {
+                  fprintf(fp,"     ");
+               }
+            }
+         }
+         fprintf(fp,"\n");
+      }
+      fprintf(fp,"%2d:      ",mype);
+      for (int ii = 0; ii<imaxglobal; ii++){
+         fprintf(fp,"%4d:",ii);
+      }
+      fprintf(fp,"\n");
+
+      fprintf(fp,"\n                                    nbot numbering\n");
+      for (int jj = jmaxglobal-1; jj>=0; jj--){
+         fprintf(fp,"%2d: %4d:",mype,jj);
+         if (jj >= jminsize && jj < jmaxsize) {
+            for (int ii = 0; ii<imaxglobal; ii++){
+               if (ii >= iminsize && ii < imaxsize) {
+                  int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0])-noffset;
+                  if (hashval >= 0 && hashval < (int)ncells) {
+                     fprintf(fp,"%5d",nbot_tmp[hashval]);
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               } else {
+                  fprintf(fp,"     ");
+               }
+            }
+         }
+         fprintf(fp,"\n");
+      }
+      fprintf(fp,"%2d:      ",mype);
+      for (int ii = 0; ii<imaxglobal; ii++){
+         fprintf(fp,"%4d:",ii);
+      }
+      fprintf(fp,"\n");
+
+      fprintf(fp,"\n                                    ntop numbering\n");
+      for (int jj = jmaxglobal-1; jj>=0; jj--){
+         fprintf(fp,"%2d: %4d:",mype,jj);
+         if (jj >= jminsize && jj < jmaxsize) {
+            for (int ii = 0; ii<imaxglobal; ii++){
+               if (ii >= iminsize && ii < imaxsize) {
+                  int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0])-noffset;
+                  if (hashval >= 0 && hashval < (int)ncells) {
+                     fprintf(fp,"%5d",ntop_tmp[hashval]);
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               } else {
+                  fprintf(fp,"     ");
+               }
+            }
+         }
+         fprintf(fp,"\n");
+      }
+      fprintf(fp,"%2d:      ",mype);
+      for (int ii = 0; ii<imaxglobal; ii++){
+         fprintf(fp,"%4d:",ii);
+      }
+      fprintf(fp,"\n");
+   }
+
+#ifdef HAVE_MPI
+   if (numpe > 1) {
+         vector<int> iminsize_global(numpe);
+         vector<int> imaxsize_global(numpe);
+         vector<int> jminsize_global(numpe);
+         vector<int> jmaxsize_global(numpe);
+         vector<int> comm_partner(numpe,-1);
+
+         MPI_Allgather(&iminsize, 1, MPI_INT, &iminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
+         MPI_Allgather(&imaxsize, 1, MPI_INT, &imaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
+         MPI_Allgather(&jminsize, 1, MPI_INT, &jminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
+         MPI_Allgather(&jmaxsize, 1, MPI_INT, &jmaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
+
+         int num_comm_partners = 0; 
+         for (int ip = 0; ip < numpe; ip++){
+            if (ip == mype) continue;
+            if (iminsize_global[ip] > imaxtile) continue;
+            if (imaxsize_global[ip] < imintile) continue;
+            if (jminsize_global[ip] > jmaxtile) continue;
+            if (jmaxsize_global[ip] < jmintile) continue;
+            comm_partner[num_comm_partners] = ip;
+            num_comm_partners++;
+            //if (DEBUG) fprintf(fp,"%d: overlap with processor %d bounding box is %d %d %d %d\n",mype,ip,iminsize_global[ip],imaxsize_global[ip],jminsize_global[ip],jmaxsize_global[ip]);
+         }    
+
+#ifdef BOUNDS_CHECK
+      {
+         vector<int> nlft_tmp(ncells_ghost);
+         vector<int> nrht_tmp(ncells_ghost);
+         vector<int> nbot_tmp(ncells_ghost);
+         vector<int> ntop_tmp(ncells_ghost);
+         ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells*sizeof(cl_int), &nlft_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells*sizeof(cl_int), &nrht_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells*sizeof(cl_int), &nbot_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE,  0, ncells*sizeof(cl_int), &ntop_tmp[0], NULL);
+         for (uint ic=0; ic<ncells; ic++){
+            int nl = nlft_tmp[ic];
+            if (nl != -1){
+               nl -= noffset;
+               if (nl<0 || nl>= ncells) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl);
+            }
+            int nr = nrht_tmp[ic];
+            if (nr != -1){
+               nr -= noffset;
+               if (nr<0 || nr>= ncells) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr);
+            }
+            int nb = nbot_tmp[ic];
+            if (nb != -1){
+               nb -= noffset;
+               if (nb<0 || nb>= ncells) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb);
+            }
+            int nt = ntop_tmp[ic];
+            if (nt != -1){
+               nt -= noffset;
+               if (nt<0 || nt>= ncells) printf("%d: Warning at line %d cell %d ntop %d\n",mype,__LINE__,ic,nt);
+            }
+         }
+      }
+#endif
+
+      cl_mem dev_border_cell = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell1"), &ncells, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+
+      ezcl_set_kernel_arg(kernel_calc_border_cells, 0,  sizeof(cl_int), (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_calc_border_cells, 1,  sizeof(cl_int), (void *)&noffset);
+      ezcl_set_kernel_arg(kernel_calc_border_cells, 2,  sizeof(cl_mem), (void *)&dev_nlft);
+      ezcl_set_kernel_arg(kernel_calc_border_cells, 3,  sizeof(cl_mem), (void *)&dev_nrht);
+      ezcl_set_kernel_arg(kernel_calc_border_cells, 4,  sizeof(cl_mem), (void *)&dev_nbot);
+      ezcl_set_kernel_arg(kernel_calc_border_cells, 5,  sizeof(cl_mem), (void *)&dev_ntop);
+      ezcl_set_kernel_arg(kernel_calc_border_cells, 6,  sizeof(cl_mem), (void *)&dev_level);
+      ezcl_set_kernel_arg(kernel_calc_border_cells, 7,  sizeof(cl_mem), (void *)&dev_border_cell);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_border_cells, 1, NULL, &global_work_size, &local_work_size, NULL); 
+
+      cl_mem dev_border_cell_new = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell2"), &ncells, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+
+      size_t one = 1;
+      cl_mem dev_nbsize = ezcl_malloc(NULL, const_cast<char *>("dev_nbsize"), &one, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_ioffset = ezcl_malloc(NULL, const_cast<char *>("dev_ioffset"), &block_size, sizeof(cl_uint), CL_MEM_READ_WRITE, 0);
+
+      ezcl_set_kernel_arg(kernel_calc_border_cells2,  0,  sizeof(cl_int), (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_calc_border_cells2,  1,  sizeof(cl_int), (void *)&noffset);
+      ezcl_set_kernel_arg(kernel_calc_border_cells2,  2,  sizeof(cl_mem), (void *)&dev_nlft);
+      ezcl_set_kernel_arg(kernel_calc_border_cells2,  3,  sizeof(cl_mem), (void *)&dev_nrht);
+      ezcl_set_kernel_arg(kernel_calc_border_cells2,  4,  sizeof(cl_mem), (void *)&dev_nbot);
+      ezcl_set_kernel_arg(kernel_calc_border_cells2,  5,  sizeof(cl_mem), (void *)&dev_ntop);
+      ezcl_set_kernel_arg(kernel_calc_border_cells2,  6,  sizeof(cl_mem), (void *)&dev_level);
+      ezcl_set_kernel_arg(kernel_calc_border_cells2,  7,  sizeof(cl_mem), (void *)&dev_border_cell);
+      ezcl_set_kernel_arg(kernel_calc_border_cells2,  8,  sizeof(cl_mem), (void *)&dev_border_cell_new);
+      ezcl_set_kernel_arg(kernel_calc_border_cells2,  9,  sizeof(cl_mem), (void *)&dev_ioffset);
+      ezcl_set_kernel_arg(kernel_calc_border_cells2, 10,  sizeof(cl_mem), (void *)&dev_nbsize);
+      ezcl_set_kernel_arg(kernel_calc_border_cells2, 11,  local_work_size*sizeof(cl_int), NULL);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_border_cells2, 1, NULL, &global_work_size, &local_work_size, NULL); 
+
+      ezcl_device_memory_swap(&dev_border_cell, &dev_border_cell_new);
+      ezcl_device_memory_delete(dev_border_cell_new);
+
+      int group_size = (int)(global_work_size/local_work_size);
+
+      ezcl_set_kernel_arg(kernel_finish_scan, 0,  sizeof(cl_int), (void *)&group_size);
+      ezcl_set_kernel_arg(kernel_finish_scan, 1,  sizeof(cl_mem), (void *)&dev_ioffset);
+      ezcl_set_kernel_arg(kernel_finish_scan, 2,  sizeof(cl_mem), (void *)&dev_nbsize);
+      ezcl_set_kernel_arg(kernel_finish_scan, 3,  local_work_size*sizeof(cl_int), NULL);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_finish_scan, 1, NULL, &local_work_size, &local_work_size, NULL); 
+
+      int nbsize_local;
+      ezcl_enqueue_read_buffer(command_queue, dev_nbsize, CL_TRUE,  0, 1*sizeof(cl_int), &nbsize_local, NULL);
+      ezcl_device_memory_delete(dev_nbsize);
+
+      //printf("%d: border cell size is %d global is %ld\n",mype,nbsize_local,nbsize_global);
+
+      vector<int> border_cell_num(nbsize_local);
+      vector<int> border_cell_i(nbsize_local);
+      vector<int> border_cell_j(nbsize_local);
+      vector<int> border_cell_level(nbsize_local);
+    
+      // allocate new border memory
+      size_t nbsize_long = nbsize_local;
+      cl_mem dev_border_cell_i     = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_i"),     &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_border_cell_j     = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_j"),     &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_border_cell_level = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_level"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_border_cell_num   = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_num"),   &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+      ezcl_set_kernel_arg(kernel_get_border_data,  0,  sizeof(cl_int), (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_get_border_data,  1,  sizeof(cl_int), (void *)&noffset);
+      ezcl_set_kernel_arg(kernel_get_border_data,  2,  sizeof(cl_mem), (void *)&dev_ioffset);
+      ezcl_set_kernel_arg(kernel_get_border_data,  3,  sizeof(cl_mem), (void *)&dev_border_cell);
+      ezcl_set_kernel_arg(kernel_get_border_data,  4,  sizeof(cl_mem), (void *)&dev_i);
+      ezcl_set_kernel_arg(kernel_get_border_data,  5,  sizeof(cl_mem), (void *)&dev_j);
+      ezcl_set_kernel_arg(kernel_get_border_data,  6,  sizeof(cl_mem), (void *)&dev_level);
+      ezcl_set_kernel_arg(kernel_get_border_data,  7,  sizeof(cl_mem), (void *)&dev_border_cell_i);
+      ezcl_set_kernel_arg(kernel_get_border_data,  8,  sizeof(cl_mem), (void *)&dev_border_cell_j);
+      ezcl_set_kernel_arg(kernel_get_border_data,  9,  sizeof(cl_mem), (void *)&dev_border_cell_level);
+      ezcl_set_kernel_arg(kernel_get_border_data, 10,  sizeof(cl_mem), (void *)&dev_border_cell_num);
+      ezcl_set_kernel_arg(kernel_get_border_data, 11,  local_work_size*sizeof(cl_uint), NULL);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_get_border_data, 1, NULL, &global_work_size, &local_work_size, NULL); 
+
+      ezcl_device_memory_delete(dev_ioffset);
+      ezcl_device_memory_delete(dev_border_cell);
+
+      // read gpu border cell data
+      ezcl_enqueue_read_buffer(command_queue, dev_border_cell_i,     CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_i[0],     NULL);
+      ezcl_enqueue_read_buffer(command_queue, dev_border_cell_j,     CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_j[0],     NULL);
+      ezcl_enqueue_read_buffer(command_queue, dev_border_cell_level, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_level[0], NULL);
+      ezcl_enqueue_read_buffer(command_queue, dev_border_cell_num,   CL_TRUE,  0, nbsize_local*sizeof(cl_int), &border_cell_num[0],   NULL);
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_FIND_BOUNDARY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+      // Allocate push database
+
+      int **send_database = (int**)malloc(num_comm_partners*sizeof(int *));
+      for (int ip = 0; ip < num_comm_partners; ip++){
+         send_database[ip] = (int *)malloc(nbsize_local*sizeof(int));
+      }
+
+      // Compute the overlap between processor bounding boxes and set up push database
+
+      vector<int> send_buffer_count(num_comm_partners);
+      for (int ip = 0; ip < num_comm_partners; ip++){
+         int icount = 0;
+         for (int ib = 0; ib <nbsize_local; ib++){
+            int lev = border_cell_level[ib];
+            int levmult = IPOW2(levmx-lev);
+            if (border_cell_i[ib]*levmult >= iminsize_global[comm_partner[ip]] && 
+                border_cell_i[ib]*levmult <= imaxsize_global[comm_partner[ip]] && 
+                border_cell_j[ib]*levmult >= jminsize_global[comm_partner[ip]] && 
+                border_cell_j[ib]*levmult <= jmaxsize_global[comm_partner[ip]] ) {
+               send_database[ip][icount] = ib;
+               icount++;
+            }
+         }
+         send_buffer_count[ip]=icount;
+      }
+
+      // Initialize L7_Push_Setup with num_comm_partners, comm_partner, send_database and 
+      // send_buffer_count. L7_Push_Setup will copy data and determine recv_buffer_counts.
+      // It will return receive_count_total for use in allocations
+
+      int receive_count_total;
+      int i_push_handle = 0;
+      L7_Push_Setup(num_comm_partners, &comm_partner[0], &send_buffer_count[0],
+                    send_database, &receive_count_total, &i_push_handle);
+
+      if (DEBUG) {
+         fprintf(fp,"DEBUG num_comm_partners %d\n",num_comm_partners);
+         for (int ip = 0; ip < num_comm_partners; ip++){
+            fprintf(fp,"DEBUG comm partner is %d data count is %d\n",comm_partner[ip],send_buffer_count[ip]);
+            for (int ic = 0; ic < send_buffer_count[ip]; ic++){
+               int ib = send_database[ip][ic];
+               fprintf(fp,"DEBUG \t index %d cell number %d i %d j %d level %d\n",ib,border_cell_num[ib],
+                  border_cell_i[ib],border_cell_j[ib],border_cell_level[ib]);
+            }
+         }
+      }
+
+      // Can now free the send database. Other arrays are vectors and will automatically 
+      // deallocate
+
+      for (int ip = 0; ip < num_comm_partners; ip++){
+         free(send_database[ip]);
+      }
+      free(send_database);
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_PUSH_SETUP] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+      // Push the data needed to the adjacent processors
+
+      int *border_cell_num_local = (int *)malloc(receive_count_total*sizeof(int));
+      int *border_cell_i_local = (int *)malloc(receive_count_total*sizeof(int));
+      int *border_cell_j_local = (int *)malloc(receive_count_total*sizeof(int));
+      int *border_cell_level_local = (int *)malloc(receive_count_total*sizeof(int));
+      L7_Push_Update(&border_cell_num[0],   border_cell_num_local,   i_push_handle);
+      L7_Push_Update(&border_cell_i[0],     border_cell_i_local,     i_push_handle);
+      L7_Push_Update(&border_cell_j[0],     border_cell_j_local,     i_push_handle);
+      L7_Push_Update(&border_cell_level[0], border_cell_level_local, i_push_handle);
+
+      L7_Push_Free(&i_push_handle);
+
+      ezcl_device_memory_delete(dev_border_cell_i);
+      ezcl_device_memory_delete(dev_border_cell_j);
+      ezcl_device_memory_delete(dev_border_cell_level);
+      ezcl_device_memory_delete(dev_border_cell_num);
+
+      nbsize_local = receive_count_total;
+
+      if (DEBUG) {
+         for (int ic = 0; ic < nbsize_local; ic++) {
+            fprintf(fp,"%d: Local Border cell %d is %d i %d j %d level %d\n",mype,ic,border_cell_num_local[ic],
+               border_cell_i_local[ic],border_cell_j_local[ic],border_cell_level_local[ic]);
+         }
+      }
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_PUSH_BOUNDARY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+      nbsize_long = nbsize_local;
+
+      dev_border_cell_num        = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_num"),        &nbsize_long, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      dev_border_cell_i          = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_i"),          &nbsize_long, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      dev_border_cell_j          = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_j"),          &nbsize_long, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      dev_border_cell_level      = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_level"),      &nbsize_long, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      cl_mem dev_border_cell_needed     = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_needed"),     &nbsize_long, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      cl_mem dev_border_cell_needed_out = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_needed_out"), &nbsize_long, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+
+      ezcl_enqueue_write_buffer(command_queue, dev_border_cell_num,    CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_num_local[0], NULL);
+      ezcl_enqueue_write_buffer(command_queue, dev_border_cell_i,      CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_i_local[0],   NULL);
+      ezcl_enqueue_write_buffer(command_queue, dev_border_cell_j,      CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_j_local[0],   NULL);
+      ezcl_enqueue_write_buffer(command_queue, dev_border_cell_level,  CL_TRUE,  0, nbsize_local*sizeof(cl_int), &border_cell_level_local[0],   NULL);
+
+      //ezcl_enqueue_write_buffer(command_queue, dev_border_cell_needed, CL_TRUE,  0, nbsize_local*sizeof(cl_int), &border_cell_needed_local[0],   NULL);
+
+      free(border_cell_i_local);
+      free(border_cell_j_local);
+      free(border_cell_level_local);
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_LOCAL_LIST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+      if (DEBUG) {
+         vector<int> hash_tmp(hashsize);
+         ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_TRUE,  0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
+
+         cl_mem dev_hash_header_check = gpu_get_hash_header();
+         vector<ulong> hash_header_check(hash_header_size);
+         ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL);
+
+         int   gpu_hash_method     = (int)hash_header_check[0];
+         ulong gpu_hash_table_size =      hash_header_check[1];
+         ulong gpu_AA              =      hash_header_check[2];
+         ulong gpu_BB              =      hash_header_check[3];
+
+         int jmaxglobal = (jmax+1)*IPOW2(levmx);
+         int imaxglobal = (imax+1)*IPOW2(levmx);
+         fprintf(fp,"\n                                    HASH numbering before layer 1\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  if (ii >= iminsize && ii < imaxsize) {
+                     fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) );
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+      }
+
+      size_t nb_local_work_size = 128;
+      size_t nb_global_work_size = ((nbsize_local + nb_local_work_size - 1) /nb_local_work_size) * nb_local_work_size;
+
+      ezcl_set_kernel_arg(kernel_calc_layer1,  0,  sizeof(cl_int),   (void *)&nbsize_local);
+      ezcl_set_kernel_arg(kernel_calc_layer1,  1,  sizeof(cl_int),   (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_calc_layer1,  2,  sizeof(cl_int),   (void *)&levmx);
+      ezcl_set_kernel_arg(kernel_calc_layer1,  3,  sizeof(cl_int),   (void *)&imax);
+      ezcl_set_kernel_arg(kernel_calc_layer1,  4,  sizeof(cl_int),   (void *)&jmax);
+      ezcl_set_kernel_arg(kernel_calc_layer1,  5,  sizeof(cl_int),   (void *)&noffset);
+      ezcl_set_kernel_arg(kernel_calc_layer1,  6,  sizeof(cl_mem),   (void *)&dev_sizes);
+      ezcl_set_kernel_arg(kernel_calc_layer1,  7,  sizeof(cl_mem),   (void *)&dev_levtable);
+      ezcl_set_kernel_arg(kernel_calc_layer1,  8,  sizeof(cl_mem),   (void *)&dev_level);
+      ezcl_set_kernel_arg(kernel_calc_layer1,  9,  sizeof(cl_mem),   (void *)&dev_border_cell_i);
+      ezcl_set_kernel_arg(kernel_calc_layer1, 10,  sizeof(cl_mem),   (void *)&dev_border_cell_j);
+      ezcl_set_kernel_arg(kernel_calc_layer1, 11,  sizeof(cl_mem),   (void *)&dev_border_cell_level);
+      ezcl_set_kernel_arg(kernel_calc_layer1, 12,  sizeof(cl_mem),   (void *)&dev_border_cell_needed);
+      ezcl_set_kernel_arg(kernel_calc_layer1, 13,  sizeof(cl_mem),   (void *)&dev_hash_header);
+      ezcl_set_kernel_arg(kernel_calc_layer1, 14,  sizeof(cl_mem),   (void *)&dev_hash);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_layer1, 1, NULL, &nb_global_work_size, &nb_local_work_size, NULL); 
+
+      if (DEBUG){
+         vector<int> border_cell_needed_local(nbsize_local);
+
+         ezcl_enqueue_read_buffer(command_queue, dev_border_cell_needed, CL_TRUE,  0, nbsize_local*sizeof(cl_int), &border_cell_needed_local[0],   NULL);
+
+         for(int ic=0; ic<nbsize_local; ic++){
+            if (border_cell_needed_local[ic] == 0) continue;
+            fprintf(fp,"%d: First set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
+         }
+      }
+
+      cl_event calc_layer1_sethash_event;
+
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash,  0,  sizeof(cl_int),   (void *)&nbsize_local);
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash,  1,  sizeof(cl_int),   (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash,  2,  sizeof(cl_int),   (void *)&noffset);
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash,  3,  sizeof(cl_int),   (void *)&levmx);
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash,  4,  sizeof(cl_mem),   (void *)&dev_sizes);
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash,  5,  sizeof(cl_mem),   (void *)&dev_levtable);
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash,  6,  sizeof(cl_mem),   (void *)&dev_border_cell_i);
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash,  7,  sizeof(cl_mem),   (void *)&dev_border_cell_j);
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash,  8,  sizeof(cl_mem),   (void *)&dev_border_cell_level);
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash,  9,  sizeof(cl_mem),   (void *)&dev_border_cell_needed);
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 10,  sizeof(cl_mem),   (void *)&dev_hash_header);
+      ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 11,  sizeof(cl_mem),   (void *)&dev_hash);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_layer1_sethash, 1, NULL, &nb_global_work_size, &nb_local_work_size, &calc_layer1_sethash_event); 
+
+      ezcl_wait_for_events(1, &calc_layer1_sethash_event);
+      ezcl_event_release(calc_layer1_sethash_event);
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_LAYER1] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+      if (DEBUG) {
+         print_dev_local();
+
+         vector<int> hash_tmp(hashsize);
+         ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_TRUE,  0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
+
+         cl_mem dev_hash_header_check = gpu_get_hash_header();
+         vector<ulong> hash_header_check(hash_header_size);
+         ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL);
+
+         int   gpu_hash_method     = (int)hash_header_check[0];
+         ulong gpu_hash_table_size =      hash_header_check[1];
+         ulong gpu_AA              =      hash_header_check[2];
+         ulong gpu_BB              =      hash_header_check[3];
+
+         int jmaxglobal = (jmax+1)*IPOW2(levmx);
+         int imaxglobal = (imax+1)*IPOW2(levmx);
+         fprintf(fp,"\n                                    HASH numbering for 1 layer\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  if (ii >= iminsize && ii < imaxsize) {
+                     fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) );
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+      }
+
+      group_size = (int)(nb_global_work_size/nb_local_work_size);
+
+      cl_mem dev_nbpacked = ezcl_malloc(NULL, const_cast<char *>("dev_nbpacked"), &one, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      size_t group_size_long = group_size;
+      dev_ioffset = ezcl_malloc(NULL, const_cast<char *>("dev_ioffset"), &group_size_long, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+
+      ezcl_set_kernel_arg(kernel_calc_layer2,  0,  sizeof(cl_int),   (void *)&nbsize_local);
+      ezcl_set_kernel_arg(kernel_calc_layer2,  1,  sizeof(cl_int),   (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_calc_layer2,  2,  sizeof(cl_int),   (void *)&noffset);
+      ezcl_set_kernel_arg(kernel_calc_layer2,  3,  sizeof(cl_int),   (void *)&levmx);
+      ezcl_set_kernel_arg(kernel_calc_layer2,  4,  sizeof(cl_int),   (void *)&imax);
+      ezcl_set_kernel_arg(kernel_calc_layer2,  5,  sizeof(cl_int),   (void *)&jmax);
+      ezcl_set_kernel_arg(kernel_calc_layer2,  6,  sizeof(cl_mem),   (void *)&dev_sizes);
+      ezcl_set_kernel_arg(kernel_calc_layer2,  7,  sizeof(cl_mem),   (void *)&dev_levtable);
+      ezcl_set_kernel_arg(kernel_calc_layer2,  8,  sizeof(cl_mem),   (void *)&dev_level);
+      ezcl_set_kernel_arg(kernel_calc_layer2,  9,  sizeof(cl_mem),   (void *)&dev_border_cell_i);
+      ezcl_set_kernel_arg(kernel_calc_layer2, 10,  sizeof(cl_mem),   (void *)&dev_border_cell_j);
+      ezcl_set_kernel_arg(kernel_calc_layer2, 11,  sizeof(cl_mem),   (void *)&dev_border_cell_level);
+      ezcl_set_kernel_arg(kernel_calc_layer2, 12,  sizeof(cl_mem),   (void *)&dev_border_cell_needed);
+      ezcl_set_kernel_arg(kernel_calc_layer2, 13,  sizeof(cl_mem),   (void *)&dev_border_cell_needed_out);
+      ezcl_set_kernel_arg(kernel_calc_layer2, 14,  sizeof(cl_mem),   (void *)&dev_hash_header);
+      ezcl_set_kernel_arg(kernel_calc_layer2, 15,  sizeof(cl_mem),   (void *)&dev_hash);
+      ezcl_set_kernel_arg(kernel_calc_layer2, 16,  sizeof(cl_mem),   (void *)&dev_ioffset);
+      ezcl_set_kernel_arg(kernel_calc_layer2, 17,  sizeof(cl_mem),   (void *)&dev_nbpacked);
+      ezcl_set_kernel_arg(kernel_calc_layer2, 18,  nb_local_work_size*sizeof(cl_mem), NULL);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_layer2, 1, NULL, &nb_global_work_size, &nb_local_work_size, NULL); 
+
+      if (DEBUG){
+         vector<int> border_cell_needed_local(nbsize_local);
+
+         ezcl_enqueue_read_buffer(command_queue, dev_border_cell_needed_out, CL_TRUE,  0, nbsize_local*sizeof(cl_int), &border_cell_needed_local[0],   NULL);
+         for(int ic=0; ic<nbsize_local; ic++){
+            if (border_cell_needed_local[ic] <= 0) continue;
+            if (border_cell_needed_local[ic] <  0x0016) fprintf(fp,"%d: First  set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
+            if (border_cell_needed_local[ic] >= 0x0016) fprintf(fp,"%d: Second set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
+         }
+      }
+
+      free(border_cell_num_local);
+
+      ezcl_device_memory_delete(dev_border_cell_needed);
+
+      ezcl_set_kernel_arg(kernel_finish_scan, 0,  sizeof(cl_int), (void *)&group_size);
+      ezcl_set_kernel_arg(kernel_finish_scan, 1,  sizeof(cl_mem), (void *)&dev_ioffset);
+      ezcl_set_kernel_arg(kernel_finish_scan, 2,  sizeof(cl_mem), (void *)&dev_nbpacked);
+      ezcl_set_kernel_arg(kernel_finish_scan, 3,  nb_local_work_size*sizeof(cl_int), NULL);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_finish_scan, 1, NULL, &nb_local_work_size, &nb_local_work_size, NULL); 
+
+      int nbpacked;
+      ezcl_enqueue_read_buffer(command_queue, dev_nbpacked, CL_TRUE,  0, 1*sizeof(cl_int), &nbpacked, NULL);
+      ezcl_device_memory_delete(dev_nbpacked);
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_LAYER2] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+      nbsize_long = nbsize_local;
+      cl_mem dev_border_cell_i_new     = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_i_new"),     &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_border_cell_j_new     = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_j_new"),     &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_border_cell_level_new = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_level_new"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+      cl_mem dev_indices_needed    = ezcl_malloc(NULL, const_cast<char *>("dev_indices_needed"),    &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+      cl_event get_border_data2_event;
+
+      ezcl_set_kernel_arg(kernel_get_border_data2,  0,  sizeof(cl_int), (void *)&nbsize_local);
+      ezcl_set_kernel_arg(kernel_get_border_data2,  1,  sizeof(cl_mem), (void *)&dev_ioffset);
+      ezcl_set_kernel_arg(kernel_get_border_data2,  2,  sizeof(cl_mem), (void *)&dev_border_cell_needed_out);
+      ezcl_set_kernel_arg(kernel_get_border_data2,  3,  sizeof(cl_mem), (void *)&dev_border_cell_i);
+      ezcl_set_kernel_arg(kernel_get_border_data2,  4,  sizeof(cl_mem), (void *)&dev_border_cell_j);
+      ezcl_set_kernel_arg(kernel_get_border_data2,  5,  sizeof(cl_mem), (void *)&dev_border_cell_level);
+      ezcl_set_kernel_arg(kernel_get_border_data2,  6,  sizeof(cl_mem), (void *)&dev_border_cell_num);
+      ezcl_set_kernel_arg(kernel_get_border_data2,  7,  sizeof(cl_mem), (void *)&dev_border_cell_i_new);
+      ezcl_set_kernel_arg(kernel_get_border_data2,  8,  sizeof(cl_mem), (void *)&dev_border_cell_j_new);
+      ezcl_set_kernel_arg(kernel_get_border_data2,  9,  sizeof(cl_mem), (void *)&dev_border_cell_level_new);
+      ezcl_set_kernel_arg(kernel_get_border_data2, 10,  sizeof(cl_mem), (void *)&dev_indices_needed);
+      ezcl_set_kernel_arg(kernel_get_border_data2, 11,  local_work_size*sizeof(cl_uint), NULL);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_get_border_data2, 1, NULL, &nb_global_work_size, &nb_local_work_size, &get_border_data2_event);
+
+      ezcl_device_memory_delete(dev_border_cell_num);
+
+      ezcl_device_memory_swap(&dev_border_cell_i,     &dev_border_cell_i_new);
+      ezcl_device_memory_swap(&dev_border_cell_j,     &dev_border_cell_j_new);
+      ezcl_device_memory_swap(&dev_border_cell_level, &dev_border_cell_level_new);
+
+      size_t nbp_local_work_size = 128;
+      size_t nbp_global_work_size = ((nbpacked + nbp_local_work_size - 1) /nbp_local_work_size) * nbp_local_work_size;
+
+      cl_event calc_layer2_sethash_event;
+
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash,  0,  sizeof(cl_int),   (void *)&nbpacked);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash,  1,  sizeof(cl_int),   (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash,  2,  sizeof(cl_int),   (void *)&noffset);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash,  3,  sizeof(cl_int),   (void *)&levmx);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash,  4,  sizeof(cl_int),   (void *)&imax);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash,  5,  sizeof(cl_int),   (void *)&jmax);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash,  6,  sizeof(cl_mem),   (void *)&dev_sizes);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash,  7,  sizeof(cl_mem),   (void *)&dev_levtable);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash,  8,  sizeof(cl_mem),   (void *)&dev_levibeg);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash,  9,  sizeof(cl_mem),   (void *)&dev_leviend);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 10,  sizeof(cl_mem),   (void *)&dev_levjbeg);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 11,  sizeof(cl_mem),   (void *)&dev_levjend);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 12,  sizeof(cl_mem),   (void *)&dev_border_cell_i);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 13,  sizeof(cl_mem),   (void *)&dev_border_cell_j);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 14,  sizeof(cl_mem),   (void *)&dev_border_cell_level);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 15,  sizeof(cl_mem),   (void *)&dev_indices_needed);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 16,  sizeof(cl_mem),   (void *)&dev_border_cell_needed_out);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 17,  sizeof(cl_mem),   (void *)&dev_hash_header);
+      ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 18,  sizeof(cl_mem),   (void *)&dev_hash);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_layer2_sethash, 1, NULL, &nbp_global_work_size, &nbp_local_work_size, &calc_layer2_sethash_event); 
+
+      ezcl_wait_for_events(1, &calc_layer2_sethash_event);
+      ezcl_event_release(calc_layer2_sethash_event);
+
+      ezcl_device_memory_delete(dev_ioffset);
+
+      ezcl_wait_for_events(1, &get_border_data2_event);
+      ezcl_event_release(get_border_data2_event);
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_LAYER_LIST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+      vector<int> indices_needed(nbpacked);
+
+      // read gpu border cell data 
+      ezcl_enqueue_read_buffer(command_queue, dev_indices_needed,    CL_TRUE,  0, nbpacked*sizeof(cl_int), &indices_needed[0],    NULL);
+
+      ezcl_device_memory_delete(dev_border_cell_i_new);
+      ezcl_device_memory_delete(dev_border_cell_j_new);
+      ezcl_device_memory_delete(dev_border_cell_level_new);
+
+      if (DEBUG) {
+         print_dev_local();
+
+         vector<int> hash_tmp(hashsize);
+         ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_TRUE,  0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
+
+         cl_mem dev_hash_header_check = gpu_get_hash_header();
+         vector<ulong> hash_header_check(hash_header_size);
+         ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL);
+
+         int   gpu_hash_method     = (int)hash_header_check[0];
+         ulong gpu_hash_table_size =      hash_header_check[1];
+         ulong gpu_AA              =      hash_header_check[2];
+         ulong gpu_BB              =      hash_header_check[3];
+
+         int jmaxglobal = (jmax+1)*IPOW2(levmx);
+         int imaxglobal = (imax+1)*IPOW2(levmx);
+         fprintf(fp,"\n                                    HASH numbering for 2 layer\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  if (ii >= iminsize && ii < imaxsize) {
+                     fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) );
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+         fflush(fp);
+      }
+
+      ezcl_device_memory_delete(dev_border_cell_needed_out);
+
+      int nghost = nbpacked;
+      ncells_ghost = ncells + nghost;
+
+      //if (mype == 1) printf("%d: DEBUG before expanding memory ncells %ld ncells_ghost %ld capacity %ld\n",mype,ncells,ncells_ghost,ezcl_get_device_mem_capacity(dev_i));
+      if (ezcl_get_device_mem_capacity(dev_celltype) < ncells_ghost ||
+          ezcl_get_device_mem_capacity(dev_i)        < ncells_ghost ||
+          ezcl_get_device_mem_capacity(dev_j)        < ncells_ghost ||
+          ezcl_get_device_mem_capacity(dev_level)    < ncells_ghost ||
+          ezcl_get_device_mem_capacity(dev_nlft)     < ncells_ghost ||
+          ezcl_get_device_mem_capacity(dev_nrht)     < ncells_ghost ||
+          ezcl_get_device_mem_capacity(dev_nbot)     < ncells_ghost ||
+          ezcl_get_device_mem_capacity(dev_ntop)     < ncells_ghost ) {
+
+         //if (mype == 0) printf("%d: DEBUG expanding memory ncells %ld ncells_ghost %ld capacity %ld\n",mype,ncells,ncells_ghost,ezcl_get_device_mem_capacity(dev_i));
+         //printf("%d: DEBUG expanding memory ncells %ld ncells_ghost %ld capacity %ld\n",mype,ncells,ncells_ghost,ezcl_get_device_mem_capacity(dev_i));
+         mem_factor = (float)(ncells_ghost/ncells);
+         cl_mem dev_celltype_old = ezcl_malloc(NULL, const_cast<char *>("dev_celltype_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         cl_mem dev_i_old        = ezcl_malloc(NULL, const_cast<char *>("dev_i_old"),        &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         cl_mem dev_j_old        = ezcl_malloc(NULL, const_cast<char *>("dev_j_old"),        &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         cl_mem dev_level_old    = ezcl_malloc(NULL, const_cast<char *>("dev_level_old"),    &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         cl_mem dev_nlft_old     = ezcl_malloc(NULL, const_cast<char *>("dev_nlft_old"),     &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         cl_mem dev_nrht_old     = ezcl_malloc(NULL, const_cast<char *>("dev_nrht_old"),     &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         cl_mem dev_nbot_old     = ezcl_malloc(NULL, const_cast<char *>("dev_nbot_old"),     &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         cl_mem dev_ntop_old     = ezcl_malloc(NULL, const_cast<char *>("dev_ntop_old"),     &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+         ezcl_device_memory_swap(&dev_celltype_old, &dev_celltype);
+         ezcl_device_memory_swap(&dev_i_old,        &dev_i       );
+         ezcl_device_memory_swap(&dev_j_old,        &dev_j       );
+         ezcl_device_memory_swap(&dev_level_old,    &dev_level   );
+         ezcl_device_memory_swap(&dev_nlft_old,     &dev_nlft    );
+         ezcl_device_memory_swap(&dev_nrht_old,     &dev_nrht    );
+         ezcl_device_memory_swap(&dev_nbot_old,     &dev_nbot    );
+         ezcl_device_memory_swap(&dev_ntop_old,     &dev_ntop    );
+
+         cl_event copy_mesh_data_event;
+
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 0,  sizeof(cl_int), (void *)&ncells);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 1,  sizeof(cl_mem), (void *)&dev_celltype_old);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 2,  sizeof(cl_mem), (void *)&dev_celltype);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 3,  sizeof(cl_mem), (void *)&dev_i_old);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 4,  sizeof(cl_mem), (void *)&dev_i);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 5,  sizeof(cl_mem), (void *)&dev_j_old);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 6,  sizeof(cl_mem), (void *)&dev_j);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 7,  sizeof(cl_mem), (void *)&dev_level_old);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 8,  sizeof(cl_mem), (void *)&dev_level);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 9,  sizeof(cl_mem), (void *)&dev_nlft_old);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 10, sizeof(cl_mem), (void *)&dev_nlft);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 11, sizeof(cl_mem), (void *)&dev_nrht_old);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 12, sizeof(cl_mem), (void *)&dev_nrht);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 13, sizeof(cl_mem), (void *)&dev_nbot_old);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 14, sizeof(cl_mem), (void *)&dev_nbot);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 15, sizeof(cl_mem), (void *)&dev_ntop_old);
+         ezcl_set_kernel_arg(kernel_copy_mesh_data, 16, sizeof(cl_mem), (void *)&dev_ntop);
+
+         ezcl_enqueue_ndrange_kernel(command_queue, kernel_copy_mesh_data,   1, NULL, &global_work_size, &local_work_size, &copy_mesh_data_event);
+
+         ezcl_device_memory_delete(dev_celltype_old);
+         ezcl_device_memory_delete(dev_i_old);
+         ezcl_device_memory_delete(dev_j_old);
+         ezcl_device_memory_delete(dev_level_old);
+         ezcl_device_memory_delete(dev_nlft_old);
+         ezcl_device_memory_delete(dev_nrht_old);
+         ezcl_device_memory_delete(dev_nbot_old);
+         ezcl_device_memory_delete(dev_ntop_old);
+
+         ezcl_wait_for_events(1, &copy_mesh_data_event);
+         ezcl_event_release(copy_mesh_data_event);
+      }
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_COPY_MESH_DATA] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+      nb_global_work_size = ((nbpacked + nb_local_work_size - 1) /nb_local_work_size) * nb_local_work_size;
+
+#ifdef BOUNDS_CHECK
+      if (ezcl_get_device_mem_nelements(dev_i) < (int)ncells_ghost || 
+          ezcl_get_device_mem_nelements(dev_j) < (int)ncells_ghost || 
+          ezcl_get_device_mem_nelements(dev_level) < (int)ncells_ghost || 
+          ezcl_get_device_mem_nelements(dev_celltype) < (int)ncells_ghost || 
+          ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells_ghost || 
+          ezcl_get_device_mem_nelements(dev_nrht) < (int)ncells_ghost || 
+          ezcl_get_device_mem_nelements(dev_nbot) < (int)ncells_ghost || 
+          ezcl_get_device_mem_nelements(dev_ntop) < (int)ncells_ghost ){
+             printf("DEBUG size issue at %d\n",__LINE__);
+      }
+      if (ezcl_get_device_mem_nelements(dev_border_cell_i) < nbpacked || 
+          ezcl_get_device_mem_nelements(dev_border_cell_j) < nbpacked || 
+          ezcl_get_device_mem_nelements(dev_border_cell_level) < nbpacked ){
+             printf("DEBUG size issue at %d\n",__LINE__);
+      }
+#endif
+ 
+      cl_event fill_mesh_ghost_event;
+
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost,  0,  sizeof(cl_int), (void *)&nbpacked);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost,  1,  sizeof(cl_int), (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost,  2,  sizeof(cl_mem), (void *)&dev_levibeg);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost,  3,  sizeof(cl_mem), (void *)&dev_leviend);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost,  4,  sizeof(cl_mem), (void *)&dev_levjbeg);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost,  5,  sizeof(cl_mem), (void *)&dev_levjend);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost,  6,  sizeof(cl_mem), (void *)&dev_border_cell_i);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost,  7,  sizeof(cl_mem), (void *)&dev_border_cell_j);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost,  8,  sizeof(cl_mem), (void *)&dev_border_cell_level);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost,  9,  sizeof(cl_mem), (void *)&dev_i);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 10,  sizeof(cl_mem), (void *)&dev_j);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 11,  sizeof(cl_mem), (void *)&dev_level);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 12,  sizeof(cl_mem), (void *)&dev_celltype);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 13,  sizeof(cl_mem), (void *)&dev_nlft);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 14,  sizeof(cl_mem), (void *)&dev_nrht);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 15,  sizeof(cl_mem), (void *)&dev_nbot);
+      ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 16,  sizeof(cl_mem), (void *)&dev_ntop);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_fill_mesh_ghost, 1, NULL, &nb_global_work_size, &nb_local_work_size, &fill_mesh_ghost_event); 
+
+      ezcl_wait_for_events(1, &fill_mesh_ghost_event);
+      ezcl_event_release(fill_mesh_ghost_event);
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_FILL_MESH_GHOST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+      if (DEBUG){
+         fprintf(fp,"After copying i,j, level to ghost cells\n");
+         print_dev_local();
+      }
+
+      ezcl_device_memory_delete(dev_border_cell_i);
+      ezcl_device_memory_delete(dev_border_cell_j);
+      ezcl_device_memory_delete(dev_border_cell_level);
+
+      size_t ghost_local_work_size = 128;
+      size_t ghost_global_work_size = ((ncells_ghost + ghost_local_work_size - 1) /ghost_local_work_size) * ghost_local_work_size;
+
+      cl_event fill_neighbor_ghost_event;
+
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost,  0,  sizeof(cl_int),   (void *)&ncells_ghost);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost,  1,  sizeof(cl_int),   (void *)&levmx);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost,  2,  sizeof(cl_int),   (void *)&imax);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost,  3,  sizeof(cl_int),   (void *)&jmax);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost,  4,  sizeof(cl_mem),   (void *)&dev_sizes);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost,  5,  sizeof(cl_mem),   (void *)&dev_levtable);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost,  6,  sizeof(cl_mem),   (void *)&dev_i);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost,  7,  sizeof(cl_mem),   (void *)&dev_j);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost,  8,  sizeof(cl_mem),   (void *)&dev_level);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost,  9,  sizeof(cl_mem),   (void *)&dev_hash_header);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 10,  sizeof(cl_mem),   (void *)&dev_hash);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 11,  sizeof(cl_mem),   (void *)&dev_nlft);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 12,  sizeof(cl_mem),   (void *)&dev_nrht);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 13,  sizeof(cl_mem),   (void *)&dev_nbot);
+      ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 14,  sizeof(cl_mem),   (void *)&dev_ntop);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_fill_neighbor_ghost, 1, NULL, &ghost_global_work_size, &ghost_local_work_size, &fill_neighbor_ghost_event); 
+
+      ezcl_wait_for_events(1, &fill_neighbor_ghost_event);
+      ezcl_event_release(fill_neighbor_ghost_event);
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_FILL_NEIGH_GHOST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+      if (DEBUG){
+         fprintf(fp,"After setting neighbors through ghost cells\n");
+         print_dev_local();
+      }
+
+#ifdef BOUNDS_CHECK
+      if (ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells_ghost || 
+          ezcl_get_device_mem_nelements(dev_nrht) < (int)ncells_ghost ||
+          ezcl_get_device_mem_nelements(dev_nbot) < (int)ncells_ghost ||
+          ezcl_get_device_mem_nelements(dev_ntop) < (int)ncells_ghost ){
+         printf("%d: Warning sizes for set_corner_neighbor not right ncells ghost %d nlft size %d\n",mype,ncells_ghost,ezcl_get_device_mem_nelements(dev_nlft));
+      }
+#endif
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_SET_CORNER_NEIGH] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+      if (DEBUG){
+         fprintf(fp,"After setting corner neighbors\n");
+         print_dev_local();
+      }
+
+#ifdef BOUNDS_CHECK
+      if (ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells_ghost || 
+          ezcl_get_device_mem_nelements(dev_nrht) < (int)ncells_ghost ||
+          ezcl_get_device_mem_nelements(dev_nbot) < (int)ncells_ghost ||
+          ezcl_get_device_mem_nelements(dev_ntop) < (int)ncells_ghost ){
+         printf("%d: Warning sizes for adjust neighbors not right ncells ghost %d nlft size %d\n",mype,ncells_ghost,ezcl_get_device_mem_nelements(dev_nlft));
+      }
+      if (ezcl_get_device_mem_nelements(dev_indices_needed) < (int)(ncells_ghost-ncells) ){
+         printf("%d: Warning indices size wrong nghost %d size indices_needed\n",mype,ncells_ghost-ncells,ezcl_get_device_mem_nelements(dev_indices_needed));
+      }
+#endif
+
+      cl_event adjust_neighbors_local_event;
+
+      ezcl_set_kernel_arg(kernel_adjust_neighbors_local,  0,  sizeof(cl_int), (void *)&ncells_ghost);
+      ezcl_set_kernel_arg(kernel_adjust_neighbors_local,  1,  sizeof(cl_int), (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_adjust_neighbors_local,  2,  sizeof(cl_int), (void *)&noffset);
+      ezcl_set_kernel_arg(kernel_adjust_neighbors_local,  3,  sizeof(cl_mem), (void *)&dev_indices_needed);
+      ezcl_set_kernel_arg(kernel_adjust_neighbors_local,  4,  sizeof(cl_mem), (void *)&dev_nlft);
+      ezcl_set_kernel_arg(kernel_adjust_neighbors_local,  5,  sizeof(cl_mem), (void *)&dev_nrht);
+      ezcl_set_kernel_arg(kernel_adjust_neighbors_local,  6,  sizeof(cl_mem), (void *)&dev_nbot);
+      ezcl_set_kernel_arg(kernel_adjust_neighbors_local,  7,  sizeof(cl_mem), (void *)&dev_ntop);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_adjust_neighbors_local, 1, NULL, &ghost_global_work_size, &ghost_local_work_size, &adjust_neighbors_local_event); 
+
+      ezcl_device_memory_delete(dev_indices_needed);
+
+      if (DEBUG){
+         fprintf(fp,"After adjusting neighbors to local indices\n");
+         print_dev_local();
+      }
+
+      ezcl_wait_for_events(1, &adjust_neighbors_local_event);
+      ezcl_event_release(adjust_neighbors_local_event);
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_NEIGH_ADJUST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+         cpu_timer_start(&tstart_lev2);
+      }
+
+      offtile_ratio_local = (offtile_ratio_local*(double)offtile_local_count) + ((double)nghost / (double)ncells);
+      offtile_local_count++;
+      offtile_ratio_local /= offtile_local_count;
+
+      if (cell_handle) L7_Free(&cell_handle);
+      cell_handle=0;
+
+      if (DEBUG){
+         fprintf(fp,"%d: SETUP ncells %ld noffset %d nghost %d\n",mype,ncells,noffset,nghost);
+         for (int ic=0; ic<nghost; ic++){
+            fprintf(fp,"%d: indices needed ic %d index %d\n",mype,ic,indices_needed[ic]);
+         }
+      }
+
+      L7_Dev_Setup(0, noffset, ncells, &indices_needed[0], nghost, &cell_handle);
+
+#ifdef BOUNDS_CHECK
+      {
+         vector<int> nlft_tmp(ncells_ghost);
+         vector<int> nrht_tmp(ncells_ghost);
+         vector<int> nbot_tmp(ncells_ghost);
+         vector<int> ntop_tmp(ncells_ghost);
+         vector<int> level_tmp(ncells_ghost);
+         vector<real_t> H_tmp(ncells_ghost);
+         ezcl_enqueue_read_buffer(command_queue, dev_nlft,  CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0],  NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_nrht,  CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0],  NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_nbot,  CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0],  NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_ntop,  CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0],  NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE,  0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL);
+         for (uint ic=0; ic<ncells; ic++){
+            int nl = nlft_tmp[ic];
+            if (nl<0 || nl>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl);
+            if (level_tmp[nl] > level_tmp[ic]){
+               int ntl = ntop_tmp[nl];
+               if (ntl<0 || ntl>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d global %d nlft %d ntop of nlft %d\n",mype,__LINE__,ic,ic+noffset,nl,ntl);
+            }
+            int nr = nrht_tmp[ic];
+            if (nr<0 || nr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr);
+            if (level_tmp[nr] > level_tmp[ic]){
+               int ntr = ntop_tmp[nr];
+               if (ntr<0 || ntr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d ntop of nrht %d\n",mype,__LINE__,ic,ntr);
+            }
+            int nb = nbot_tmp[ic];
+            if (nb<0 || nb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb);
+            if (level_tmp[nb] > level_tmp[ic]){
+               int nrb = nrht_tmp[nb];
+               if (nrb<0 || nrb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of nbot %d\n",mype,__LINE__,ic,nrb);
+            }
+            int nt = ntop_tmp[ic];
+            if (nt<0 || nt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d global %d ntop %d ncells %ld ncells_ghost %ld\n",mype,__LINE__,ic,ic+noffset,nt,ncells,ncells_ghost);
+            if (level_tmp[nt] > level_tmp[ic]){
+               int nrt = nrht_tmp[nt];
+               if (nrt<0 || nrt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of ntop %d\n",mype,__LINE__,ic,nrt);
+            }
+         }
+      }
+#endif
+
+      if (TIMING_LEVEL >= 2) {
+         gpu_timers[MESH_TIMER_SETUP_COMM] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+      }
+
+      if (DEBUG) {
+         print_dev_local();
+
+         vector<int> hash_tmp(hashsize);
+         ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_FALSE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
+
+         cl_mem dev_hash_header_check = gpu_get_hash_header();
+         vector<ulong> hash_header_check(hash_header_size);
+         ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL);
+
+         int   gpu_hash_method     = (int)hash_header_check[0];
+         ulong gpu_hash_table_size =      hash_header_check[1];
+         ulong gpu_AA              =      hash_header_check[2];
+         ulong gpu_BB              =      hash_header_check[3];
+
+         vector<int> nlft_tmp(ncells_ghost);
+         vector<int> nrht_tmp(ncells_ghost);
+         vector<int> nbot_tmp(ncells_ghost);
+         vector<int> ntop_tmp(ncells_ghost);
+         ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE,  0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
+
+         int jmaxglobal = (jmax+1)*IPOW2(levmx);
+         int imaxglobal = (imax+1)*IPOW2(levmx);
+         fprintf(fp,"\n                                    HASH numbering\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  if (ii >= iminsize && ii < imaxsize) {
+                     fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) );
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+
+         fprintf(fp,"\n                                    nlft numbering\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) -noffset;
+                  if ( (ii >= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) {
+                        fprintf(fp,"%5d",nlft_tmp[hashval]);
+                  } else {
+                        fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+      
+         fprintf(fp,"\n                                    nrht numbering\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) -noffset;
+                  if ( (ii >= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) {
+                     fprintf(fp,"%5d",nrht_tmp[hashval]);
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+
+         fprintf(fp,"\n                                    nbot numbering\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) -noffset;
+                  if ( (ii >= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) {
+                     fprintf(fp,"%5d",nbot_tmp[hashval]);
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+
+         fprintf(fp,"\n                                    ntop numbering\n");
+         for (int jj = jmaxglobal-1; jj>=0; jj--){
+            fprintf(fp,"%2d: %4d:",mype,jj);
+            if (jj >= jminsize && jj < jmaxsize) {
+               for (int ii = 0; ii<imaxglobal; ii++){
+                  int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) -noffset;
+                  if ( (ii >= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) {
+                     fprintf(fp,"%5d",ntop_tmp[hashval]);
+                  } else {
+                     fprintf(fp,"     ");
+                  }
+               }
+            }
+            fprintf(fp,"\n");
+         }
+         fprintf(fp,"%2d:      ",mype);
+         for (int ii = 0; ii<imaxglobal; ii++){
+            fprintf(fp,"%4d:",ii);
+         }
+         fprintf(fp,"\n");
+      }
+
+      if (DEBUG) {
+         print_dev_local();
+
+         vector<int> i_tmp(ncells_ghost);
+         vector<int> j_tmp(ncells_ghost);
+         vector<int> level_tmp(ncells_ghost);
+         vector<int> nlft_tmp(ncells_ghost);
+         vector<int> nrht_tmp(ncells_ghost);
+         vector<int> nbot_tmp(ncells_ghost);
+         vector<int> ntop_tmp(ncells_ghost);
+         ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &i_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &j_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE,  0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
+
+         for (uint ic=0; ic<ncells; ic++){
+            fprintf(fp,"%d: before update ic %d        i %d j %d lev %d nlft %d nrht %d nbot %d ntop %d\n",
+                mype,ic,i_tmp[ic],j_tmp[ic],level_tmp[ic],nlft_tmp[ic],nrht_tmp[ic],nbot_tmp[ic],ntop_tmp[ic]);
+         }
+         int ig=0;
+         for (uint ic=ncells; ic<ncells_ghost; ic++, ig++){
+            fprintf(fp,"%d: after  update ic %d off %d i %d j %d lev %d nlft %d nrht %d nbot %d ntop %d\n",
+                mype,ic,indices_needed[ig],i_tmp[ic],j_tmp[ic],level_tmp[ic],nlft_tmp[ic],nrht_tmp[ic],nbot_tmp[ic],ntop_tmp[ic]);
+         }
+      }
+   }
+#endif
+
+   ezcl_device_memory_delete(dev_sizes);
+   ezcl_device_memory_delete(dev_check);
+
+   gpu_compact_hash_delete(dev_hash, dev_hash_header);
+
+   gpu_timers[MESH_TIMER_CALC_NEIGHBORS] += (long)(cpu_timer_stop(tstart_cpu) * 1.0e9);
+}
+#endif
+
+void Mesh::print_calc_neighbor_type(void)
+{
+   if ( calc_neighbor_type == HASH_TABLE ) {
+      if (mype == 0) printf("Using hash tables to calculate neighbors\n");
+      if (mype == 0 && numpe == 1) final_hash_collision_report();
+   } else {
+      printf("hash table size %ld\n",ncells*(int)log(ncells)*sizeof(int));
+      if (mype == 0) printf("Using k-D tree to calculate neighbors\n");
+   }
+}
+
+int Mesh::get_calc_neighbor_type(void)
+{
+   return(calc_neighbor_type );
+}
+
+void Mesh::calc_celltype_threaded(size_t ncells)
+{
+   int flags=0;
+#ifdef HAVE_J7
+   if (parallel) flags = LOAD_BALANCE_MEMORY;
+#endif
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+   if (celltype == NULL || mesh_memory.get_memory_size(celltype) < ncells) {
+      if (celltype != NULL) celltype = (int *)mesh_memory.memory_delete(celltype);
+      celltype = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "celltype", flags);
+   }
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+   for (uint ic=0; ic<ncells; ++ic) {
+      celltype[ic] = REAL_CELL;
+      if (is_left_boundary(ic) )   celltype[ic] = LEFT_BOUNDARY;
+      if (is_right_boundary(ic) )  celltype[ic] = RIGHT_BOUNDARY;
+      if (is_bottom_boundary(ic) ) celltype[ic] = BOTTOM_BOUNDARY;
+      if (is_top_boundary(ic))     celltype[ic] = TOP_BOUNDARY;
+   }
+}
+
+void Mesh::calc_celltype(size_t ncells)
+{
+   int flags = 0;
+#ifdef HAVE_J7
+   if (parallel) flags = LOAD_BALANCE_MEMORY;
+#endif
+
+   if (celltype == NULL || mesh_memory.get_memory_size(celltype) < ncells) {
+      if (celltype != NULL) celltype = (int *)mesh_memory.memory_delete(celltype);
+      celltype = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "celltype", flags);
+   }
+
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+   for (uint ic=0; ic<ncells; ++ic) {
+      celltype[ic] = REAL_CELL;
+      if (is_left_boundary(ic) )   celltype[ic] = LEFT_BOUNDARY;
+      if (is_right_boundary(ic) )  celltype[ic] = RIGHT_BOUNDARY;
+      if (is_bottom_boundary(ic) ) celltype[ic] = BOTTOM_BOUNDARY;
+      if (is_top_boundary(ic))     celltype[ic] = TOP_BOUNDARY;
+   }
+}
+
+void Mesh::calc_symmetry(vector<int> &dsym, vector<int> &xsym, vector<int> &ysym)
+{
+   TBounds box;
+   vector<int> index_list( IPOW2(levmx*levmx) );
+
+   int num;
+   for (uint ic=0; ic<ncells; ic++) {
+      dsym[ic]=ic;
+      xsym[ic]=ic;
+      ysym[ic]=ic;
+
+      //diagonal symmetry
+      box.min.x = -1.0*(x[ic]+0.5*dx[ic]);
+      box.max.x = -1.0*(x[ic]+0.5*dx[ic]);
+      box.min.y = -1.0*(y[ic]+0.5*dy[ic]);
+      box.max.y = -1.0*(y[ic]+0.5*dy[ic]);
+      KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
+      if (num == 1) dsym[ic]=index_list[0];
+      //printf("ic %d dsym[ic] %d num %d\n",ic,dsym[ic],num);
+
+      //x symmetry
+      box.min.x = -1.0*(x[ic]+0.5*dx[ic]);
+      box.max.x = -1.0*(x[ic]+0.5*dx[ic]);
+      box.min.y = y[ic]+0.5*dy[ic];
+      box.max.y = y[ic]+0.5*dy[ic];
+      KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
+      if (num == 1) xsym[ic]=index_list[0];
+
+      //y symmetry
+      box.min.x = x[ic]+0.5*dx[ic];
+      box.max.x = x[ic]+0.5*dx[ic];
+      box.min.y = -1.0*(y[ic]+0.5*dy[ic]);
+      box.max.y = -1.0*(y[ic]+0.5*dy[ic]);
+      KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
+      if (num == 1) ysym[ic]=index_list[0];
+
+   }
+}
+
+#ifdef HAVE_MPI
+void Mesh::do_load_balance_local(size_t numcells, float *weight, MallocPlus &state_memory)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   // To get rid of compiler warning
+   if (DEBUG && weight != NULL) printf("DEBUG weight[0] = %f\n",weight[0]);
+
+   int ncells_old = numcells;
+   int noffset_old = ndispl[mype];
+
+// Need to add weight array to load balance if it is not NULL
+// Need to add tolerance to when load balance is done
+
+   int do_load_balance_global = 0;
+   int nsizes_old = 0;
+
+   for (int ip=0; ip<numpe; ip++){
+      nsizes_old = nsizes[ip];
+
+      // Calc new,even partition of data across processors
+      nsizes[ip] = ncells_global/numpe;
+      // Account for leftover cells
+      if (ip < (int)(ncells_global%numpe)) nsizes[ip]++;
+
+      if (nsizes_old != nsizes[ip]) do_load_balance_global = 1;
+   }
+
+   if (do_load_balance_global) {
+      cpu_counters[MESH_COUNTER_LOAD_BALANCE]++;
+
+      mesh_memory.memory_delete(celltype);
+      mesh_memory.memory_delete(nlft);
+      mesh_memory.memory_delete(nrht);
+      mesh_memory.memory_delete(nbot);
+      mesh_memory.memory_delete(ntop);
+
+      ndispl[0]=0;
+      for (int ip=1; ip<numpe; ip++){
+         ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
+      }
+      ncells = nsizes[mype];
+      noffset=ndispl[mype];
+
+      // Indices of blocks to be added to load balance
+      int lower_block_start = noffset;
+      int lower_block_end   = min(noffset_old-1, (int)(noffset+ncells-1));
+      int upper_block_start = max((int)(noffset_old+ncells_old), noffset);
+      int upper_block_end   = noffset+ncells-1;
+
+      int lower_block_size = max(lower_block_end-lower_block_start+1,0);
+      if(lower_block_end < 0) lower_block_size = 0; // Handles segfault at start of array
+      int upper_block_size = max(upper_block_end-upper_block_start+1,0);
+      int indices_needed_count = lower_block_size + upper_block_size;
+
+      int in = 0;
+
+      vector<int> indices_needed(indices_needed_count);
+      for (int iz = lower_block_start; iz <= lower_block_end; iz++, in++){
+         indices_needed[in]=iz;
+      }
+      for (int iz = upper_block_start; iz <= upper_block_end; iz++, in++){
+         indices_needed[in]=iz;
+      }
+
+      int load_balance_handle = 0;
+      L7_Setup(0, noffset_old, ncells_old, &indices_needed[0], indices_needed_count, &load_balance_handle);
+
+      //printf("\n%d: DEBUG load balance report\n",mype);
+
+      state_memory.memory_realloc_all(ncells_old+indices_needed_count);
+
+      MallocPlus state_memory_old = state_memory;
+
+
+      malloc_plus_memory_entry *memory_item;
+
+      for (memory_item = state_memory_old.memory_entry_by_name_begin();
+           memory_item != state_memory_old.memory_entry_by_name_end();
+           memory_item = state_memory_old.memory_entry_by_name_next() ) {
+
+         //if (mype == 0) printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
+
+         if (memory_item->mem_elsize == 8) {
+            double *mem_ptr_double = (double *)memory_item->mem_ptr;
+
+            int flags = state_memory.get_memory_flags(mem_ptr_double);
+            double *state_temp_double = (double *) state_memory.memory_malloc(ncells, sizeof(double),
+                                                                              "state_temp_double", flags);
+
+            //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr);
+            L7_Update(mem_ptr_double, L7_DOUBLE, load_balance_handle);
+            in = 0;
+            if(lower_block_size > 0) {
+               for(; in < MIN(lower_block_size, (int)ncells); in++) {
+                  state_temp_double[in] = mem_ptr_double[ncells_old + in];
+               }
+            }
+
+            for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) {
+               state_temp_double[in] = mem_ptr_double[ic];
+            }
+
+            if(upper_block_size > 0) {
+               int ic = ncells_old + lower_block_size;
+               for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) {
+                  state_temp_double[in] = mem_ptr_double[ic+k];
+               }
+            }
+            state_memory.memory_replace(mem_ptr_double, state_temp_double);
+         } else if (memory_item->mem_elsize == 4) {
+            float *mem_ptr_float = (float *)memory_item->mem_ptr;
+
+            int flags = state_memory.get_memory_flags(mem_ptr_float);
+            float *state_temp_float = (float *) state_memory.memory_malloc(ncells, sizeof(float),
+                                                                          "state_temp_float", flags);
+
+            //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr);
+            L7_Update(mem_ptr_float, L7_FLOAT, load_balance_handle);
+            in = 0;
+            if(lower_block_size > 0) {
+               for(; in < MIN(lower_block_size, (int)ncells); in++) {
+                  state_temp_float[in] = mem_ptr_float[ncells_old + in];
+               }
+            }
+
+            for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) {
+               state_temp_float[in] = mem_ptr_float[ic];
+            }
+
+            if(upper_block_size > 0) {
+               int ic = ncells_old + lower_block_size;
+               for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) {
+                  state_temp_float[in] = mem_ptr_float[ic+k];
+               }
+            }
+            state_memory.memory_replace(mem_ptr_float, state_temp_float);
+         }
+      }
+
+      mesh_memory.memory_realloc_all(ncells_old+indices_needed_count);
+
+      MallocPlus mesh_memory_old = mesh_memory;
+
+      for (memory_item = mesh_memory_old.memory_entry_by_name_begin();
+           memory_item != mesh_memory_old.memory_entry_by_name_end();
+           memory_item = mesh_memory_old.memory_entry_by_name_next() ) {
+
+         //if (mype == 0) printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
+
+         if (memory_item->mem_elsize == 8) {
+            long long *mem_ptr_long = (long long *)memory_item->mem_ptr;
+
+            int flags = mesh_memory.get_memory_flags(mem_ptr_long);
+            long long *mesh_temp_long = (long long *)mesh_memory.memory_malloc(ncells, sizeof(long long), "mesh_temp_long", flags);
+
+            //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr);
+            L7_Update(mem_ptr_long, L7_LONG_LONG_INT, load_balance_handle);
+            in = 0;
+            if(lower_block_size > 0) {
+               for(; in < MIN(lower_block_size, (int)ncells); in++) {
+                  mesh_temp_long[in] = mem_ptr_long[ncells_old + in];
+               }
+            }
+
+            for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) {
+               mesh_temp_long[in] = mem_ptr_long[ic];
+            }
+
+            if(upper_block_size > 0) {
+               int ic = ncells_old + lower_block_size;
+               for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) {
+                  mesh_temp_long[in] = mem_ptr_long[ic+k];
+               }
+            }
+            mesh_memory.memory_replace(mem_ptr_long, mesh_temp_long);
+
+         } else {
+            int *mem_ptr_int = (int *)memory_item->mem_ptr;
+
+            int flags = mesh_memory.get_memory_flags(mem_ptr_int);
+            int *mesh_temp_int = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "mesh_temp_int", flags);
+
+            //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr);
+            L7_Update(mem_ptr_int, L7_INT, load_balance_handle);
+            in = 0;
+            if(lower_block_size > 0) {
+               for(; in < MIN(lower_block_size, (int)ncells); in++) {
+                  mesh_temp_int[in] = mem_ptr_int[ncells_old + in];
+               }
+            }
+
+            for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) {
+               mesh_temp_int[in] = mem_ptr_int[ic];
+            }
+
+            if(upper_block_size > 0) {
+               int ic = ncells_old + lower_block_size;
+               for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) {
+                  mesh_temp_int[in] = mem_ptr_int[ic+k];
+               }
+            }
+            mesh_memory.memory_replace(mem_ptr_int, mesh_temp_int);
+
+         }
+      }
+
+      L7_Free(&load_balance_handle);
+      load_balance_handle = 0;
+
+      memory_reset_ptrs();
+
+      //mesh_memory.memory_report();
+      //state_memory.memory_report();
+      //printf("%d: DEBUG end load balance report\n\n",mype);
+      calc_celltype(ncells);
+   }
+
+
+   cpu_timers[MESH_TIMER_LOAD_BALANCE] += cpu_timer_stop(tstart_cpu);
+}
+#endif
+
+#ifdef HAVE_OPENCL
+#ifdef HAVE_MPI
+int Mesh::gpu_do_load_balance_local(size_t numcells, float *weight, MallocPlus &gpu_state_memory)
+{
+   int do_load_balance_global = 0;
+
+   if (! gpu_do_rezone) return(do_load_balance_global);
+
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   // To get rid of compiler warning
+   if (DEBUG && weight != NULL) printf("DEBUG weight[0] = %f\n",weight[0]);
+
+   int ncells_old = numcells;
+   int noffset_old = ndispl[mype];
+
+// Need to add weight array to load balance if it is not NULL
+// Need to add tolerance to when load balance is done
+
+   int nsizes_old = 0;
+   for (int ip=0; ip<numpe; ip++){
+      nsizes_old = nsizes[ip];
+      nsizes[ip] = ncells_global/numpe;
+      if (ip < (int)(ncells_global%numpe)) nsizes[ip]++;
+      if (nsizes_old != nsizes[ip]) do_load_balance_global = 1;
+   }
+
+   if(do_load_balance_global) {
+
+      cl_command_queue command_queue = ezcl_get_command_queue();
+
+      gpu_counters[MESH_COUNTER_LOAD_BALANCE]++;
+
+      ndispl[0]=0;
+      for (int ip=1; ip<numpe; ip++){
+         ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
+      }
+      ncells = nsizes[mype];
+      noffset=ndispl[mype];
+
+      // Indices of blocks to be added to load balance
+      int lower_block_start = noffset;
+      int lower_block_end   = min(noffset_old-1, (int)(noffset+ncells-1));
+      int upper_block_start = max((int)(noffset_old+ncells_old), noffset);
+      int upper_block_end   = noffset+ncells-1;
+      //printf("%d: lbs %d lbe %d ubs %d ube %d\n",mype,lower_block_start-noffset_old,lower_block_end-noffset_old,upper_block_start-noffset_old,upper_block_end-noffset_old);
+
+      size_t lower_block_size = max(lower_block_end-lower_block_start+1,0);
+      if(lower_block_end < 0) lower_block_size = 0; // Handles segfault at start of array
+      size_t upper_block_size = max(upper_block_end-upper_block_start+1,0);
+      int indices_needed_count = lower_block_size + upper_block_size;
+
+      size_t middle_block_size = ncells - lower_block_size - upper_block_size;
+      int middle_block_start = max(noffset - noffset_old, 0);
+
+      int lower_segment_size = noffset-noffset_old;
+      int do_whole_segment = 0;
+      if (lower_segment_size > ncells_old) do_whole_segment = 1;
+
+      int upper_segment_size = ( (noffset_old+ncells_old) - (noffset+ncells) );
+      int upper_segment_start = (noffset_old+ncells_old) - upper_segment_size - noffset_old;
+      if (upper_segment_size > ncells_old) do_whole_segment=1;
+
+      int in = 0;
+      vector<int> indices_needed(indices_needed_count);
+      for (int iz = lower_block_start; iz <= lower_block_end; iz++, in++){
+         indices_needed[in]=iz;
+      }
+      for (int iz = upper_block_start; iz <= upper_block_end; iz++, in++){
+         indices_needed[in]=iz;
+      }
+
+      int load_balance_handle = 0;
+      L7_Setup(0, noffset_old, ncells_old, &indices_needed[0], indices_needed_count, &load_balance_handle);
+       
+      size_t local_work_size = 128;
+      size_t global_work_size = ((ncells + local_work_size - 1) / local_work_size) * local_work_size;
+
+      // printf("MYPE%d: \t ncells = %d \t ncells_old = %d \t ncells_global = %d \n", mype, ncells, ncells_old, ncells_global);
+
+      // Allocate lower block on GPU
+      size_t low_block_size = MAX(1, lower_block_size);
+      cl_mem dev_state_var_lower = ezcl_malloc(NULL, const_cast<char *>("dev_state_var_lower"), &low_block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0);
+
+      // Allocate upper block on GPU
+      size_t up_block_size = MAX(1, upper_block_size);
+      cl_mem dev_state_var_upper = ezcl_malloc(NULL, const_cast<char *>("dev_state_var_upper"), &up_block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0);
+
+      MallocPlus gpu_state_memory_old = gpu_state_memory;
+      malloc_plus_memory_entry *memory_item;
+
+      for (memory_item = gpu_state_memory_old.memory_entry_by_name_begin();
+           memory_item != gpu_state_memory_old.memory_entry_by_name_end();
+           memory_item = gpu_state_memory_old.memory_entry_by_name_next() ) {
+         //printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
+         cl_mem dev_state_mem_ptr = (cl_mem)memory_item->mem_ptr;
+
+         if (memory_item->mem_elsize == 8){
+#ifndef MINIMUM_PRECISION
+            vector<double> state_var_tmp(ncells_old+indices_needed_count,0.0);
+
+            // Read current state values from GPU and write to CPU arrays
+            if (do_whole_segment) {
+               ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, ncells_old*sizeof(cl_double), &state_var_tmp[0], NULL);
+            } else {
+               // Read lower block from GPU
+               if (lower_segment_size > 0) {
+                  ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, lower_segment_size*sizeof(cl_double), &state_var_tmp[0], NULL);
+               }
+               // Read upper block from GPU
+               if (upper_segment_size > 0) {
+                  ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, upper_segment_start*sizeof(cl_double), upper_segment_size*sizeof(cl_double), &state_var_tmp[upper_segment_start], NULL);
+               }
+            }
+
+            // Update arrays with L7
+            L7_Update(&state_var_tmp[0], L7_DOUBLE, load_balance_handle);
+
+            // Set lower block on GPU
+            if(lower_block_size > 0) {
+               ezcl_enqueue_write_buffer(command_queue, dev_state_var_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_double), &state_var_tmp[ncells_old], NULL);
+            }
+            // Set upper block on GPU
+            if(upper_block_size > 0) {
+               ezcl_enqueue_write_buffer(command_queue, dev_state_var_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_double), &state_var_tmp[ncells_old+lower_block_size], NULL); 
+            }
+
+            // Allocate space on GPU for temp arrays (used in double buffering)
+            cl_mem dev_state_var_new = ezcl_malloc(NULL, gpu_state_memory.get_memory_name(dev_state_mem_ptr), &ncells, sizeof(cl_double), CL_MEM_READ_WRITE, 0);
+            gpu_state_memory.memory_add(dev_state_var_new, ncells, sizeof(cl_double), "dev_state_var_new", DEVICE_REGULAR_MEMORY);
+
+            //printf("DEBUG memory for proc %d is %p dev_state_new is %p\n",mype,dev_state_mem_ptr,dev_state_var_new);
+
+            ezcl_set_kernel_arg(kernel_do_load_balance_double, 0, sizeof(cl_int), &ncells);
+            ezcl_set_kernel_arg(kernel_do_load_balance_double, 1, sizeof(cl_int), &lower_block_size);
+            ezcl_set_kernel_arg(kernel_do_load_balance_double, 2, sizeof(cl_int), &middle_block_size);
+            ezcl_set_kernel_arg(kernel_do_load_balance_double, 3, sizeof(cl_int), &middle_block_start);
+            ezcl_set_kernel_arg(kernel_do_load_balance_double, 4, sizeof(cl_mem), &dev_state_mem_ptr);
+            ezcl_set_kernel_arg(kernel_do_load_balance_double, 5, sizeof(cl_mem), &dev_state_var_lower);
+            ezcl_set_kernel_arg(kernel_do_load_balance_double, 6, sizeof(cl_mem), &dev_state_var_upper);
+            ezcl_set_kernel_arg(kernel_do_load_balance_double, 7, sizeof(cl_mem), &dev_state_var_new);
+
+            ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_double,   1, NULL, &global_work_size, &local_work_size, NULL);
+
+            gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new);
+#else
+            printf("ERROR -- can't have double type for state variable\n");
+            exit(1);
+#endif
+         } else if (memory_item->mem_elsize == 4){
+            vector<float> state_var_tmp(ncells_old+indices_needed_count,0.0);
+
+            // Read current state values from GPU and write to CPU arrays
+            if (do_whole_segment) {
+               ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, ncells_old*sizeof(cl_float), &state_var_tmp[0], NULL);
+            } else {
+               // Read lower block from GPU
+               if (lower_segment_size > 0) {
+                  ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, lower_segment_size*sizeof(cl_float), &state_var_tmp[0], NULL);
+               }
+               // Read upper block from GPU
+               if (upper_segment_size > 0) {
+                  ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, upper_segment_start*sizeof(cl_float), upper_segment_size*sizeof(cl_float), &state_var_tmp[upper_segment_start], NULL);
+               }
+            }
+
+            // Update arrays with L7
+            L7_Update(&state_var_tmp[0], L7_FLOAT, load_balance_handle);
+
+            // Set lower block on GPU
+            if(lower_block_size > 0) {
+               ezcl_enqueue_write_buffer(command_queue, dev_state_var_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_float), &state_var_tmp[ncells_old], NULL);
+            }
+            // Set upper block on GPU
+            if(upper_block_size > 0) {
+               ezcl_enqueue_write_buffer(command_queue, dev_state_var_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_float), &state_var_tmp[ncells_old+lower_block_size], NULL); 
+            }
+
+            // Allocate space on GPU for temp arrays (used in double buffering)
+            cl_mem dev_state_var_new = ezcl_malloc(NULL, gpu_state_memory.get_memory_name(dev_state_mem_ptr), &ncells, sizeof(cl_float), CL_MEM_READ_WRITE, 0);
+            gpu_state_memory.memory_add(dev_state_var_new, ncells, sizeof(cl_float), "dev_state_var_new", DEVICE_REGULAR_MEMORY);
+
+            //printf("DEBUG memory for proc %d is %p dev_state_new is %p\n",mype,dev_state_mem_ptr,dev_state_var_new);
+
+            ezcl_set_kernel_arg(kernel_do_load_balance_float, 0, sizeof(cl_int), &ncells);
+            ezcl_set_kernel_arg(kernel_do_load_balance_float, 1, sizeof(cl_int), &lower_block_size);
+            ezcl_set_kernel_arg(kernel_do_load_balance_float, 2, sizeof(cl_int), &middle_block_size);
+            ezcl_set_kernel_arg(kernel_do_load_balance_float, 3, sizeof(cl_int), &middle_block_start);
+            ezcl_set_kernel_arg(kernel_do_load_balance_float, 4, sizeof(cl_mem), &dev_state_mem_ptr);
+            ezcl_set_kernel_arg(kernel_do_load_balance_float, 5, sizeof(cl_mem), &dev_state_var_lower);
+            ezcl_set_kernel_arg(kernel_do_load_balance_float, 6, sizeof(cl_mem), &dev_state_var_upper);
+            ezcl_set_kernel_arg(kernel_do_load_balance_float, 7, sizeof(cl_mem), &dev_state_var_new);
+
+            ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_float,   1, NULL, &global_work_size, &local_work_size, NULL);
+
+            gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new);
+         }
+      }
+
+      ezcl_device_memory_delete(dev_state_var_lower);
+      ezcl_device_memory_delete(dev_state_var_upper);
+
+      vector<int> i_tmp(ncells_old+indices_needed_count,0);
+      vector<int> j_tmp(ncells_old+indices_needed_count,0);
+      vector<int> level_tmp(ncells_old+indices_needed_count,0);
+      vector<int> celltype_tmp(ncells_old+indices_needed_count,0);
+
+      if (do_whole_segment) {
+         ezcl_enqueue_read_buffer(command_queue, dev_i,        CL_FALSE, 0, ncells_old*sizeof(cl_int), &i_tmp[0],        NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_j,        CL_FALSE, 0, ncells_old*sizeof(cl_int), &j_tmp[0],        NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_level,    CL_FALSE, 0, ncells_old*sizeof(cl_int), &level_tmp[0],    NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE,  0, ncells_old*sizeof(cl_int), &celltype_tmp[0], NULL);
+      } else {
+         if (lower_segment_size > 0) {
+            ezcl_enqueue_read_buffer(command_queue, dev_i,        CL_FALSE, 0, lower_segment_size*sizeof(cl_int), &i_tmp[0],        NULL);
+            ezcl_enqueue_read_buffer(command_queue, dev_j,        CL_FALSE, 0, lower_segment_size*sizeof(cl_int), &j_tmp[0],        NULL);
+            ezcl_enqueue_read_buffer(command_queue, dev_level,    CL_FALSE, 0, lower_segment_size*sizeof(cl_int), &level_tmp[0],    NULL);
+            ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE,  0, lower_segment_size*sizeof(cl_int), &celltype_tmp[0], NULL);
+         }
+         if (upper_segment_size > 0) {
+            ezcl_enqueue_read_buffer(command_queue, dev_i,        CL_FALSE, upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &i_tmp[upper_segment_start],        NULL);
+            ezcl_enqueue_read_buffer(command_queue, dev_j,        CL_FALSE, upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &j_tmp[upper_segment_start],        NULL);
+            ezcl_enqueue_read_buffer(command_queue, dev_level,    CL_FALSE, upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &level_tmp[upper_segment_start],    NULL);
+            ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE,  upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &celltype_tmp[upper_segment_start], NULL);
+         }
+      }
+
+      L7_Update(&i_tmp[0],        L7_INT, load_balance_handle);
+      L7_Update(&j_tmp[0],        L7_INT, load_balance_handle);
+      L7_Update(&level_tmp[0],    L7_INT, load_balance_handle);
+      L7_Update(&celltype_tmp[0], L7_INT, load_balance_handle);
+
+      L7_Free(&load_balance_handle);
+      load_balance_handle = 0;
+
+      // Allocate and set lower block on GPU
+      cl_mem dev_i_lower, dev_j_lower, dev_level_lower, dev_celltype_lower;
+
+      if(lower_block_size > 0) {
+         dev_i_lower        = ezcl_malloc(NULL, const_cast<char *>("dev_i_lower"),        &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         dev_j_lower        = ezcl_malloc(NULL, const_cast<char *>("dev_j_lower"),        &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         dev_level_lower    = ezcl_malloc(NULL, const_cast<char *>("dev_level_lower"),    &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         dev_celltype_lower = ezcl_malloc(NULL, const_cast<char *>("dev_celltype_lower"), &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+         ezcl_enqueue_write_buffer(command_queue, dev_i_lower,        CL_FALSE, 0, lower_block_size*sizeof(cl_int), &i_tmp[ncells_old],        NULL);
+         ezcl_enqueue_write_buffer(command_queue, dev_j_lower,        CL_FALSE, 0, lower_block_size*sizeof(cl_int), &j_tmp[ncells_old],        NULL);
+         ezcl_enqueue_write_buffer(command_queue, dev_level_lower,    CL_FALSE, 0, lower_block_size*sizeof(cl_int), &level_tmp[ncells_old],    NULL);
+         ezcl_enqueue_write_buffer(command_queue, dev_celltype_lower, CL_TRUE,  0, lower_block_size*sizeof(cl_int), &celltype_tmp[ncells_old], NULL);
+      }
+
+      // Allocate and set upper block on GPU
+      cl_mem dev_i_upper, dev_j_upper, dev_level_upper, dev_celltype_upper;
+      if(upper_block_size > 0) {
+         dev_i_upper        = ezcl_malloc(NULL, const_cast<char *>("dev_i_upper"),        &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         dev_j_upper        = ezcl_malloc(NULL, const_cast<char *>("dev_j_upper"),        &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         dev_level_upper    = ezcl_malloc(NULL, const_cast<char *>("dev_level_upper"),    &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+         dev_celltype_upper = ezcl_malloc(NULL, const_cast<char *>("dev_celltype_upper"), &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+         ezcl_enqueue_write_buffer(command_queue, dev_i_upper,        CL_FALSE, 0, upper_block_size*sizeof(cl_int), &i_tmp[ncells_old+lower_block_size],        NULL);
+         ezcl_enqueue_write_buffer(command_queue, dev_j_upper,        CL_FALSE, 0, upper_block_size*sizeof(cl_int), &j_tmp[ncells_old+lower_block_size],        NULL);
+         ezcl_enqueue_write_buffer(command_queue, dev_level_upper,    CL_FALSE, 0, upper_block_size*sizeof(cl_int), &level_tmp[ncells_old+lower_block_size],    NULL);
+         ezcl_enqueue_write_buffer(command_queue, dev_celltype_upper, CL_TRUE,  0, upper_block_size*sizeof(cl_int), &celltype_tmp[ncells_old+lower_block_size], NULL);
+      }
+
+      local_work_size = 128;
+
+      // printf("MYPE%d: \t ncells = %d \t ncells_old = %d \t ncells_global = %d \n", mype, ncells, ncells_old, ncells_global);
+      // Allocate space on GPU for temp arrays (used in double buffering)
+
+      size_t mem_request = (int)((float)ncells*mem_factor);
+      cl_mem dev_i_new        = ezcl_malloc(NULL, const_cast<char *>("dev_i_new"),        &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      cl_mem dev_j_new        = ezcl_malloc(NULL, const_cast<char *>("dev_j_new"),        &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      cl_mem dev_level_new    = ezcl_malloc(NULL, const_cast<char *>("dev_level_new"),    &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+      cl_mem dev_celltype_new = ezcl_malloc(NULL, const_cast<char *>("dev_celltype_new"), &mem_request, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+
+      // Set kernel arguments and call lower block kernel
+      if(lower_block_size > 0) {
+
+         size_t global_work_size = ((lower_block_size + local_work_size - 1) / local_work_size) * local_work_size;
+
+         ezcl_set_kernel_arg(kernel_do_load_balance_lower, 0, sizeof(cl_mem), &dev_i_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_lower, 1, sizeof(cl_mem), &dev_j_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_lower, 2, sizeof(cl_mem), &dev_level_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_lower, 3, sizeof(cl_mem), &dev_celltype_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_lower, 4, sizeof(cl_mem), &dev_i_lower);
+         ezcl_set_kernel_arg(kernel_do_load_balance_lower, 5, sizeof(cl_mem), &dev_j_lower);
+         ezcl_set_kernel_arg(kernel_do_load_balance_lower, 6, sizeof(cl_mem), &dev_level_lower);
+         ezcl_set_kernel_arg(kernel_do_load_balance_lower, 7, sizeof(cl_mem), &dev_celltype_lower);
+         ezcl_set_kernel_arg(kernel_do_load_balance_lower, 8, sizeof(cl_int), &lower_block_size);
+
+         ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_lower,   1, NULL, &global_work_size, &local_work_size, NULL);
+
+         ezcl_device_memory_delete(dev_i_lower);
+         ezcl_device_memory_delete(dev_j_lower);
+         ezcl_device_memory_delete(dev_level_lower);
+         ezcl_device_memory_delete(dev_celltype_lower);
+      }
+
+      // Set kernel arguments and call middle block kernel
+      if(middle_block_size > 0) {
+
+         size_t global_work_size = ((middle_block_size + local_work_size - 1) / local_work_size) * local_work_size;
+
+         ezcl_set_kernel_arg(kernel_do_load_balance_middle,  0, sizeof(cl_mem), &dev_i_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_middle,  1, sizeof(cl_mem), &dev_j_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_middle,  2, sizeof(cl_mem), &dev_level_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_middle,  3, sizeof(cl_mem), &dev_celltype_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_middle,  4, sizeof(cl_mem), &dev_i);
+         ezcl_set_kernel_arg(kernel_do_load_balance_middle,  5, sizeof(cl_mem), &dev_j);
+         ezcl_set_kernel_arg(kernel_do_load_balance_middle,  6, sizeof(cl_mem), &dev_level);
+         ezcl_set_kernel_arg(kernel_do_load_balance_middle,  7, sizeof(cl_mem), &dev_celltype);
+         ezcl_set_kernel_arg(kernel_do_load_balance_middle,  8, sizeof(cl_int), &lower_block_size);
+         ezcl_set_kernel_arg(kernel_do_load_balance_middle,  9, sizeof(cl_int), &middle_block_size);
+         ezcl_set_kernel_arg(kernel_do_load_balance_middle, 10, sizeof(cl_int), &middle_block_start);
+
+         ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_middle,   1, NULL, &global_work_size, &local_work_size, NULL);
+      }
+
+      // Set kernel arguments and call upper block kernel
+      if(upper_block_size > 0) {
+
+         size_t global_work_size = ((upper_block_size + local_work_size - 1) / local_work_size) * local_work_size;
+
+         ezcl_set_kernel_arg(kernel_do_load_balance_upper,  0, sizeof(cl_mem), &dev_i_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_upper,  1, sizeof(cl_mem), &dev_j_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_upper,  2, sizeof(cl_mem), &dev_level_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_upper,  3, sizeof(cl_mem), &dev_celltype_new);
+         ezcl_set_kernel_arg(kernel_do_load_balance_upper,  4, sizeof(cl_mem), &dev_i_upper);
+         ezcl_set_kernel_arg(kernel_do_load_balance_upper,  5, sizeof(cl_mem), &dev_j_upper);
+         ezcl_set_kernel_arg(kernel_do_load_balance_upper,  6, sizeof(cl_mem), &dev_level_upper);
+         ezcl_set_kernel_arg(kernel_do_load_balance_upper,  7, sizeof(cl_mem), &dev_celltype_upper);
+         ezcl_set_kernel_arg(kernel_do_load_balance_upper,  8, sizeof(cl_int), &lower_block_size);
+         ezcl_set_kernel_arg(kernel_do_load_balance_upper,  9, sizeof(cl_int), &middle_block_size);
+         ezcl_set_kernel_arg(kernel_do_load_balance_upper, 10, sizeof(cl_int), &upper_block_size);
+
+         ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_upper,   1, NULL, &global_work_size, &local_work_size, NULL);
+
+         ezcl_device_memory_delete(dev_i_upper);
+         ezcl_device_memory_delete(dev_j_upper);
+         ezcl_device_memory_delete(dev_level_upper);
+         ezcl_device_memory_delete(dev_celltype_upper);
+      }
+
+      ezcl_device_memory_swap(&dev_i_new,        &dev_i);
+      ezcl_device_memory_swap(&dev_j_new,        &dev_j);
+      ezcl_device_memory_swap(&dev_level_new,    &dev_level);
+      ezcl_device_memory_swap(&dev_celltype_new, &dev_celltype);
+
+      ezcl_device_memory_delete(dev_i_new);
+      ezcl_device_memory_delete(dev_j_new);
+      ezcl_device_memory_delete(dev_level_new);
+      ezcl_device_memory_delete(dev_celltype_new);
+
+      gpu_timers[MESH_TIMER_LOAD_BALANCE] += (long int)(cpu_timer_stop(tstart_cpu)*1.0e9);
+   }
+
+   return(do_load_balance_global);
+}
+#endif
+#endif
+
+#ifdef HAVE_OPENCL
+int Mesh::gpu_count_BCs(void)
+{
+   cl_event count_BCs_stage1_event, count_BCs_stage2_event;
+
+   size_t local_work_size  = MIN(ncells, TILE_SIZE);
+   size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
+
+   //size_t block_size = (ncells + TILE_SIZE - 1) / TILE_SIZE; //  For on-device global reduction kernel.
+   size_t block_size     = global_work_size/local_work_size;
+
+   int bcount = 0;
+
+   if (! have_boundary) {
+      cl_command_queue command_queue = ezcl_get_command_queue();
+      cl_mem dev_ioffset  = ezcl_malloc(NULL, const_cast<char *>("dev_ioffset"), &block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
+
+       /*
+       __kernel void count_BCs(
+                        const int    isize,      // 0   
+               __global const int   *i,         // 1
+               __global const int   *j,         // 2
+               __global const int   *level,     // 3
+               __global const int   *lev_ibeg,  // 4
+               __global const int   *lev_iend,  // 5
+               __global const int   *lev_jbeg,  // 6
+               __global const int   *lev_jend,  // 7
+               __global       int   *scratch,   // 8
+               __local        int   *tile)      // 9
+       */
+      size_t shared_spd_sum_int = local_work_size * sizeof(cl_int);
+      ezcl_set_kernel_arg(kernel_count_BCs, 0, sizeof(cl_int), (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_count_BCs, 1, sizeof(cl_mem), (void *)&dev_i);
+      ezcl_set_kernel_arg(kernel_count_BCs, 2, sizeof(cl_mem), (void *)&dev_j);
+      ezcl_set_kernel_arg(kernel_count_BCs, 3, sizeof(cl_mem), (void *)&dev_level);
+      ezcl_set_kernel_arg(kernel_count_BCs, 4, sizeof(cl_mem), (void *)&dev_levibeg);
+      ezcl_set_kernel_arg(kernel_count_BCs, 5, sizeof(cl_mem), (void *)&dev_leviend);
+      ezcl_set_kernel_arg(kernel_count_BCs, 6, sizeof(cl_mem), (void *)&dev_levjbeg);
+      ezcl_set_kernel_arg(kernel_count_BCs, 7, sizeof(cl_mem), (void *)&dev_levjend);
+      ezcl_set_kernel_arg(kernel_count_BCs, 8, sizeof(cl_mem), (void *)&dev_ioffset);
+      ezcl_set_kernel_arg(kernel_count_BCs, 9, shared_spd_sum_int, 0);
+
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_count_BCs, 1, NULL, &global_work_size, &local_work_size, &count_BCs_stage1_event);
+
+      if (block_size > 1) {
+         ezcl_set_kernel_arg(kernel_reduce_sum_int_stage2of2, 0, sizeof(cl_int), (void *)&block_size);
+         ezcl_set_kernel_arg(kernel_reduce_sum_int_stage2of2, 1, sizeof(cl_mem), (void *)&dev_ioffset);
+         ezcl_set_kernel_arg(kernel_reduce_sum_int_stage2of2, 2, shared_spd_sum_int, 0);
+
+         ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_sum_int_stage2of2, 1, NULL, &local_work_size, &local_work_size, &count_BCs_stage2_event);
+      }
+
+      ezcl_enqueue_read_buffer(command_queue, dev_ioffset, CL_TRUE, 0, 1*sizeof(cl_int), &bcount, NULL);
+ 
+      //printf("DEBUG -- bcount is %d\n",bcount);
+      //state->gpu_time_read += ezcl_timer_calc(&start_read_event, &start_read_event);
+
+      ezcl_device_memory_delete(dev_ioffset);
+
+      gpu_timers[MESH_TIMER_COUNT_BCS] += ezcl_timer_calc(&count_BCs_stage1_event, &count_BCs_stage1_event);
+      if (block_size > 1) {
+         gpu_timers[MESH_TIMER_COUNT_BCS] += ezcl_timer_calc(&count_BCs_stage2_event, &count_BCs_stage2_event);
+      }
+
+   }
+
+   return(bcount);
+}
+#endif
+
+void Mesh::allocate(size_t ncells)
+{
+   int flags = 0;
+   flags = RESTART_DATA;
+#ifdef HAVE_J7
+   if (parallel) flags = LOAD_BALANCE_MEMORY;
+#endif
+
+   i     = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "i",     flags);
+   j     = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "j",     flags);
+   level = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "level", flags);
+}
+
+
+void Mesh::resize(size_t new_ncells)
+{
+   size_t current_size = mesh_memory.get_memory_size(i);
+   if (new_ncells > current_size) mesh_memory.memory_realloc_all(new_ncells);
+}
+
+void Mesh::memory_reset_ptrs(void){
+   i        = (int *)mesh_memory.get_memory_ptr("i");
+   j        = (int *)mesh_memory.get_memory_ptr("j");
+   level    = (int *)mesh_memory.get_memory_ptr("level");
+   celltype = (int *)mesh_memory.get_memory_ptr("celltype");
+   nlft     = (int *)mesh_memory.get_memory_ptr("nlft");
+   nrht     = (int *)mesh_memory.get_memory_ptr("nrht");
+   nbot     = (int *)mesh_memory.get_memory_ptr("nbot");
+   ntop     = (int *)mesh_memory.get_memory_ptr("ntop");
+}
+
+void Mesh::resize_old_device_memory(size_t ncells)
+{
+#ifdef HAVE_OPENCL
+   ezcl_device_memory_delete(dev_level);
+   ezcl_device_memory_delete(dev_i);
+   ezcl_device_memory_delete(dev_j);
+   ezcl_device_memory_delete(dev_celltype);
+   size_t mem_request = (int)((float)ncells*mem_factor);
+   dev_level    = ezcl_malloc(NULL, const_cast<char *>("dev_level"),    &mem_request, sizeof(cl_int),  CL_MEM_READ_ONLY, 0);
+   dev_i        = ezcl_malloc(NULL, const_cast<char *>("dev_i"),        &mem_request, sizeof(cl_int),  CL_MEM_READ_ONLY, 0);
+   dev_j        = ezcl_malloc(NULL, const_cast<char *>("dev_j"),        &mem_request, sizeof(cl_int),  CL_MEM_READ_ONLY, 0);
+   dev_celltype = ezcl_malloc(NULL, const_cast<char *>("dev_celltype"), &mem_request, sizeof(cl_int),  CL_MEM_READ_ONLY, 0);
+#else
+   // To get rid of compiler warning
+   if (1 == 2) printf("DEBUG -- ncells is %lu\n",ncells);
+#endif
+}
+void Mesh::print_object_info(void)
+{
+   printf(" ---- Mesh object info -----\n");
+   printf("Dimensionality : %d\n",ndim);
+   printf("Parallel info  : mype %d numpe %d noffset %d parallel %d\n",mype,numpe,noffset,parallel);
+   printf("Sizes          : ncells %ld ncells_ghost %ld\n\n",ncells,ncells_ghost);
+#ifdef HAVE_OPENCL
+   int num_elements, elsize;
+
+   num_elements = ezcl_get_device_mem_nelements(dev_celltype);
+   elsize = ezcl_get_device_mem_elsize(dev_celltype);
+   printf("dev_celltype     ptr : %p nelements %d elsize %d\n",dev_celltype,num_elements,elsize);
+   num_elements = ezcl_get_device_mem_nelements(dev_level);
+   elsize = ezcl_get_device_mem_elsize(dev_level);
+   printf("dev_level        ptr : %p nelements %d elsize %d\n",dev_level,num_elements,elsize);
+   num_elements = ezcl_get_device_mem_nelements(dev_i);
+   elsize = ezcl_get_device_mem_elsize(dev_i);
+   printf("dev_i            ptr : %p nelements %d elsize %d\n",dev_i,num_elements,elsize);
+   num_elements = ezcl_get_device_mem_nelements(dev_j);
+   elsize = ezcl_get_device_mem_elsize(dev_j);
+   printf("dev_j            ptr : %p nelements %d elsize %d\n",dev_j,num_elements,elsize);
+
+   num_elements = ezcl_get_device_mem_nelements(dev_nlft);
+   elsize = ezcl_get_device_mem_elsize(dev_nlft);
+   printf("dev_nlft         ptr : %p nelements %d elsize %d\n",dev_nlft,num_elements,elsize);
+   num_elements = ezcl_get_device_mem_nelements(dev_nrht);
+   elsize = ezcl_get_device_mem_elsize(dev_nrht);
+   printf("dev_nrht         ptr : %p nelements %d elsize %d\n",dev_nrht,num_elements,elsize);
+   num_elements = ezcl_get_device_mem_nelements(dev_nbot);
+   elsize = ezcl_get_device_mem_elsize(dev_nbot);
+   printf("dev_nbot         ptr : %p nelements %d elsize %d\n",dev_nbot,num_elements,elsize);
+   num_elements = ezcl_get_device_mem_nelements(dev_ntop);
+   elsize = ezcl_get_device_mem_elsize(dev_ntop);
+   printf("dev_ntop         ptr : %p nelements %d elsize %d\n",dev_ntop,num_elements,elsize);
+#endif
+   printf("vector celltype  ptr : %p nelements %ld elsize %ld\n",&celltype[0],mesh_memory.get_memory_size(celltype),sizeof(celltype[0])); 
+   printf("vector level     ptr : %p nelements %ld elsize %ld\n",&level[0],   mesh_memory.get_memory_size(level),   sizeof(level[0])); 
+   printf("vector i         ptr : %p nelements %ld elsize %ld\n",&i[0],       mesh_memory.get_memory_size(i),       sizeof(i[0])); 
+   printf("vector j         ptr : %p nelements %ld elsize %ld\n",&j[0],       mesh_memory.get_memory_size(j),       sizeof(j[0])); 
+
+   printf("vector nlft      ptr : %p nelements %ld elsize %ld\n",&nlft[0],    mesh_memory.get_memory_size(nlft),    sizeof(nlft[0])); 
+   printf("vector nrht      ptr : %p nelements %ld elsize %ld\n",&nrht[0],    mesh_memory.get_memory_size(nrht),    sizeof(nrht[0])); 
+   printf("vector nbot      ptr : %p nelements %ld elsize %ld\n",&nbot[0],    mesh_memory.get_memory_size(nbot),    sizeof(nbot[0])); 
+   printf("vector ntop      ptr : %p nelements %ld elsize %ld\n",&ntop[0],    mesh_memory.get_memory_size(ntop),    sizeof(ntop[0])); 
+}
+
+
+void Mesh::set_refinement_order(int order[4], int ic, int ifirst, int ilast, int jfirst, int jlast,
+                                int level_first, int level_last, int *i_old, int *j_old, int *level_old)
+{
+            if (localStencil) {
+               //  Store the coordinates of the cells before and after this one on
+               //  the space-filling curve index.
+
+#ifdef __OLD_STENCIL__
+               spatial_t  nx[3],  //  x-coordinates of cells.
+                          ny[3];  //  y-coordinates of cells.
+               if (ic != 0) {
+                  nx[0] = lev_deltax[level_old[ic-1]] * (spatial_t)i[ic-1];
+                  ny[0] = lev_deltay[level_old[ic-1]] * (spatial_t)j[ic-1];
+               } else {
+                  nx[0] = lev_deltax[level_first] * (spatial_t)ifirst;
+                  ny[0] = lev_deltay[level_first] * (spatial_t)jfirst;
+               }
+               nx[1] = lev_deltax[level_old[ic  ]] * (spatial_t)i[ic  ];
+               ny[1] = lev_deltay[level_old[ic  ]] * (spatial_t)j[ic  ];
+               if (ic != ncells-1) {
+                  nx[2] = lev_deltax[level_old[ic+1]] * (spatial_t)i[ic+1];
+                  ny[2] = lev_deltay[level_old[ic+1]] * (spatial_t)j[ic+1];
+               } else {
+                  nx[2] = lev_deltax[level_last] * (spatial_t)ilast;
+                  ny[2] = lev_deltay[level_last] * (spatial_t)jlast;
+               }
+
+               //  Figure out relative orientation of the neighboring cells.  We are
+               //  are aided in this because the Hilbert curve only has six possible
+               //  ways across the cell:  four Ls and two straight lines.  Then
+               //  refine the cell according to the relative orientation and order
+               //  according to the four-point Hilbert stencil.
+               if      (nx[0] < nx[1] and ny[2] < ny[1])   //  southwest L, forward order
+               {  order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; }
+               else if (nx[2] < nx[1] and ny[0] < ny[1])   //  southwest L, reverse order
+               {  order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW; }
+               else if (nx[0] > nx[1] and ny[2] < ny[1])   //  southeast L, forward order
+               {  order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW; }
+               else if (nx[2] > nx[1] and ny[0] < ny[1])   //  southeast L, reverse order
+               {  order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; }
+               else if (nx[0] > nx[1] and ny[2] > ny[1])   //  northeast L, forward order
+               {  order[0] = SE; order[1] = SW; order[2] = NW; order[3] = NE; }
+               else if (nx[2] > nx[1] and ny[0] > ny[1])   //  northeast L, reverse order
+               {  order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE; }
+               else if (nx[0] < nx[1] and ny[2] > ny[1])   //  northwest L, forward order
+               {  order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW; }
+               else if (nx[2] < nx[1] and ny[0] > ny[1])   //  northwest L, reverse order
+               {  order[0] = NW; order[1] = NE; order[2] = SE; order[3] = SW; }
+               else if (nx[0] > nx[1] and nx[1] > nx[2])   //  straight horizontal, forward order
+               {  order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW; }
+               else if (nx[0] < nx[1] and nx[1] < nx[2])   //  straight horizontal, reverse order
+               {  order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; }
+               else if (ny[0] > ny[1] and ny[1] > ny[2])   //  straight vertical, forward order
+               {  order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE; }
+               else if (ny[0] < ny[1] and ny[1] < ny[2])   //  straight vertical, reverse order
+               {  order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW; }
+               else                                        //  other, default to z-order
+               {  order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE; }
+#endif
+
+#ifdef __NEW_STENCIL__
+               int ir[3],   // First i index at finest level of the mesh
+                   jr[3];   // First j index at finest level of the mesh
+               // Cell's Radius at the Finest level of the mesh
+
+               int crf = IPOW2(levmx-level_old[ic]);
+
+               if (ic != 0) {
+                  ir[0] = i_old[ic - 1] * IPOW2(levmx-level_old[ic - 1]);
+                  jr[0] = j_old[ic - 1] * IPOW2(levmx-level_old[ic - 1]);
+               } else {
+                  //printf("%d cell %d is a first\n",mype,ic);
+                  ir[0] = ifirst * IPOW2(levmx-level_first);
+                  jr[0] = jfirst * IPOW2(levmx-level_first);
+               }
+               ir[1] = i_old[ic    ] * IPOW2(levmx-level_old[ic    ]);
+               jr[1] = j_old[ic    ] * IPOW2(levmx-level_old[ic    ]);
+               if (ic != (int)ncells-1) {
+                  ir[2] = i_old[ic + 1] * IPOW2(levmx-level_old[ic + 1]);
+                  jr[2] = j_old[ic + 1] * IPOW2(levmx-level_old[ic + 1]);
+               } else {
+                  //printf("%d cell %d is a last\n",mype,ic);
+                  ir[2] = ilast * IPOW2(levmx-level_last);
+                  jr[2] = jlast * IPOW2(levmx-level_last);
+               }
+               //if (parallel) fprintf(fp,"%d: DEBUG rezone top boundary -- ic %d global %d noffset %d nc %d i %d j %d level %d\n",mype,ic,ic+noffset,noffset,nc,i[nc],j[nc],level[nc]);
+
+               int dir_in  = ir[1] - ir[0];
+               int dir_out = ir[1] - ir[2];
+               int djr_in  = jr[1] - jr[0];
+               int djr_out = jr[1] - jr[2];
+
+               char  in_direction = 'X';
+               char out_direction = 'X';
+
+               // Left In
+               if( (djr_in == 0 && (dir_in == crf*HALF || dir_in == crf || dir_in == crf*TWO)) || (djr_in == -crf*HALF && dir_in == crf*HALF) || (djr_in == crf && dir_in == crf*TWO) ) {
+                  in_direction = 'L';
+               }
+               // Bottom In
+               else if( (dir_in == 0 && (djr_in == crf*HALF || djr_in == crf || djr_in == crf*TWO)) || (dir_in == -crf*HALF && djr_in == crf*HALF) || (dir_in == crf && djr_in == crf*TWO) ) {
+                  in_direction = 'B';
+               }
+               // Right In
+               else if( (dir_in == -crf && (djr_in == -crf*HALF || djr_in == 0 || (djr_in == crf && level_old[ic-1] < level_old[ic]))) ) {
+                  in_direction = 'R';
+               }
+               // Top In
+               else if( (djr_in == -crf && (dir_in == -crf*HALF || dir_in == 0 || (dir_in == crf && level_old[ic-1] < level_old[ic]))) ) {
+                  in_direction = 'T';
+               }
+               // Further from the left
+               else if( dir_in > 0 && djr_in == 0 ) {
+                  in_direction = 'L';
+               }
+               // Further from the right
+               else if( dir_in < 0 && djr_in == 0 ) {
+                  in_direction = 'R';
+               }
+               // Further from the bottom
+               else if( djr_in > 0 && dir_in == 0 ) {
+                  in_direction = 'B';
+               }
+               // Further from the top
+               else if( djr_in < 0 && dir_in == 0 ) {
+                  in_direction = 'T';
+               }
+               // SW in; 'M'
+               else if( dir_in > 0 && djr_in > 0) {
+                  in_direction = 'M';
+               }
+               // NW in; 'W'
+               else if( dir_in > 0 && djr_in < 0) {
+                  in_direction = 'W';
+               }
+               // SE in; 'F'
+               else if( dir_in < 0 && djr_in > 0) {
+                  in_direction = 'F';
+               }
+               // NE in; 'E'
+               else if( dir_in < 0 && djr_in < 0) {
+                  in_direction = 'E';
+               }
+
+   
+               // Left Out
+               if( (djr_out == 0 && (dir_out == crf*HALF || dir_out == crf || dir_out == crf*TWO)) || (djr_out == -crf*HALF && dir_out == crf*HALF) || (djr_out == crf && dir_out == crf*TWO) ) {
+                  out_direction = 'L';
+               }
+               // Bottom Out
+               else if( (dir_out == 0 && (djr_out == crf*HALF || djr_out == crf || djr_out == crf*TWO)) || (dir_out == -crf*HALF && djr_out == crf*HALF) || (dir_out == crf && djr_out == crf*TWO) ) {
+                  out_direction = 'B';
+               }
+               // Right Out
+               else if( (dir_out == -crf && (djr_out == -crf*HALF || djr_out == 0 || (djr_out == crf && level_old[ic+1] < level_old[ic]))) ) {
+                  out_direction = 'R';
+               }
+               // Top Out
+               else if( (djr_out == -crf && (dir_out == -crf*HALF || dir_out == 0 || (dir_out == crf && level_old[ic+1] < level_old[ic]))) ) {
+                  out_direction = 'T';
+               }
+               // Further from the left
+               else if( dir_out > 0 && djr_out == 0 ) {
+                  out_direction = 'L';
+               }
+               // Further from the right
+               else if( dir_out < 0 && djr_out == 0 ) {
+                  out_direction = 'R';
+               }
+               // Further from the bottom
+               else if( djr_out > 0 && dir_out == 0 ) {
+                  out_direction = 'B';
+               }
+               // Further from the top
+               else if( djr_out < 0 && dir_out == 0 ) {
+                  out_direction = 'T';
+               }
+               // SW out; 'M'
+               else if( dir_out > 0 && djr_out > 0) {
+                  out_direction = 'M';
+               }
+               // NW out; 'W'
+               else if( dir_out > 0 && djr_out < 0) {
+                  out_direction = 'W';
+               }
+               // SE out; 'F'
+               else if( dir_out < 0 && djr_out > 0) {
+                  out_direction = 'F';
+               }
+               // NE out; 'E'
+               else if( dir_out < 0 && djr_out < 0) {
+                  out_direction = 'E';
+               }
+
+               // Set the Stencil
+               if(in_direction == 'L' && (out_direction == 'B' || out_direction == 'R' || out_direction == 'F')) {
+                  order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE;
+               }
+               else if(in_direction == 'L' && (out_direction == 'T' || out_direction == 'W' )) {
+                  order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW;
+               }
+               else if(in_direction == 'L' && out_direction == 'M') {
+                  order[0] = NW; order[1] = NE; order[2] = SE; order[3] = SW;
+               }
+               else if(in_direction == 'L' && out_direction == 'E') {
+                  order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE;
+               }
+
+               else if(in_direction == 'B' && (out_direction == 'R' || out_direction == 'F' )) {
+                  order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE;
+               }
+               else if(in_direction == 'B' && (out_direction == 'L' || out_direction == 'T' || out_direction == 'W' )) {
+                  order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW;
+               }
+               else if(in_direction == 'B' && out_direction == 'M') {
+                  order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW;
+               }
+               else if(in_direction == 'B' && out_direction == 'E') {
+                  order[0] = SW; order[1] = NW; order[2] = SE; order[3] = NE;
+               }
+               
+               else if(in_direction == 'R' && (out_direction == 'T' || out_direction == 'L' || out_direction == 'W' )) {
+                  order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW;
+               }
+               else if(in_direction == 'R' && (out_direction == 'B' || out_direction == 'F' )) {
+                  order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE;
+               }
+               else if(in_direction == 'R' && out_direction == 'M') {
+                  order[0] = NE; order[1] = NW; order[2] = SE; order[3] = SW;
+               }
+               else if(in_direction == 'R' && out_direction == 'E') {
+                  order[0] = SE; order[1] = SW; order[2] = NW; order[3] = NE;
+               }
+
+               else if(in_direction == 'T' && (out_direction == 'L' || out_direction == 'W' )) {
+                  order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW;
+               }
+               else if(in_direction == 'T' && (out_direction == 'R' || out_direction == 'B' || out_direction == 'F' )) {
+                  order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE;
+               }
+               else if(in_direction == 'T' && out_direction == 'M') {
+                  order[0] = NE; order[1] = SE; order[2] = NW; order[3] = SW;
+               }
+               else if(in_direction == 'T' && out_direction == 'E') {
+                  order[0] = NW; order[1] = SW; order[2] = SE; order[3] = NE;
+               }
+
+               else if(in_direction == 'M' && (out_direction == 'L' || out_direction == 'W' || out_direction == 'T') ) {
+                  order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW;
+               }
+               else if(in_direction == 'M' && (out_direction == 'R' || out_direction == 'F' || out_direction == 'B') ) {
+                  order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE;
+               }
+               else if(in_direction == 'M' && out_direction == 'E') {
+                  order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE;
+               }
+ 
+               else if(in_direction == 'W' && (out_direction == 'L' || out_direction == 'M' || out_direction == 'B') ) {
+                  order[0] = NW; order[1] = NE; order[2] = SE; order[3] = SW;
+               }
+               else if(in_direction == 'W' && (out_direction == 'R' || out_direction == 'E' || out_direction == 'T') ) {
+                  order[0] = NW; order[1] = SW; order[2] = SE; order[3] = NE;
+               }
+               else if(in_direction == 'W' && out_direction == 'F') {
+                  order[0] = NW; order[1] = NE; order[2] = SW; order[3] = SE;
+               }
+
+               else if(in_direction == 'F' && (out_direction == 'L' || out_direction == 'M' || out_direction == 'B') ) {
+                  order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW;
+               }
+               else if(in_direction == 'F' && (out_direction == 'R' || out_direction == 'E' || out_direction == 'T') ) {
+                  order[0] = SE; order[1] = SW; order[2] = NW; order[3] = NE;
+               }
+               else if(in_direction == 'F' && out_direction == 'W') {
+                  order[0] = SE; order[1] = NE; order[2] = SW; order[3] = NW;
+               }
+
+               else if(in_direction == 'E' && (out_direction == 'L' || out_direction == 'W' || out_direction == 'T') ) {
+                  order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW;
+               }
+               else if(in_direction == 'E' && (out_direction == 'R' || out_direction == 'F' || out_direction == 'B') ) {
+                  order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE;
+               }
+               else if(in_direction == 'E' && out_direction == 'M') {
+                  order[0] = NE; order[1] = SE; order[2] = NW; order[3] = SW;
+               }
+
+               else { // Default to a knot 
+                  order[0] = NW; order[1] = SE; order[2] = SW; order[3] = NE;
+                  if (do_stencil_warning) {
+                     printf("Nonlocal case for the stencil.\n");
+                  }
+               }
+               //  Determine the relative orientation of the neighboring cells.
+               //  There are 12 possible ways across the cell: 4 Ls and 2 straight
+               //  lines, each with 2 directions of traversal.
+               //  Then the cell is refined and ordered according to the relative
+               //  orientation and four-point Hilbert stencil.
+
+               // XXX NOTE that the four-point stencil varies depending upon
+               // the starting and ending point of the global Hilbert curve.
+               // The stencil applied here assumes the start at (0,0) and the end
+               // at (0,y_max). XXX WRONG
+#endif                 
+
+            }  //  End local stencil version
+            else //  Use Z-ordering for the curve.
+            {  order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE; }
+            
+}
+
+void Mesh::calc_face_list(void)
+{
+   xface_i.clear();
+   xface_j.clear();
+   xface_level.clear();
+
+   ixmin_level.clear();
+   ixmax_level.clear();
+   jxmin_level.clear();
+   jxmax_level.clear();
+   ixmin_level.resize(levmx+1,  9999999);
+   ixmax_level.resize(levmx+1, -9999999);
+   jxmin_level.resize(levmx+1,  9999999);
+   jxmax_level.resize(levmx+1, -9999999);
+
+   ixadjust.clear();
+   ixadjust.resize(levmx+1);
+   jxadjust.clear();
+   jxadjust.resize(levmx+1);
+
+   int iface=0;
+   for (int nz=0; nz<(int)ncells; nz++){
+      int nr = nrht[nz];
+      if (nr == nz) continue;
+
+      int ifactor = 1;
+      if (level[nr] < level[nz]) ifactor = 2;
+
+      // Have right face
+      //printf("DEBUG xface -- iface %d lower nz %d upper nr %d\n",iface,nz,nr);
+      xface_level.push_back(MAX(level[nz],level[nr]));
+      xface_i.push_back(i[nr]*ifactor);
+      if (level[nr] < level[nz] && is_upper(j[nz]) ) {
+         xface_j.push_back(j[nr]*ifactor+1);
+      } else {
+         xface_j.push_back(j[nr]*ifactor);
+      }
+
+      iface++;
+
+      if (level[nr] > level[nz] && is_lower(j[nr]) ){
+         int ntr = ntop[nr];
+         if (ntr != nr) {
+            //printf("DEBUG xface -- iface %d lower nz %d upper ntr %d\n",iface,nz,ntr);
+            xface_level.push_back(MAX(level[nz],level[ntr]));
+            xface_i.push_back(i[ntr]*ifactor);
+            xface_j.push_back(j[ntr]*ifactor);
+
+            iface++;
+         }
+      }
+   }
+   nxface=iface;
+
+   yface_i.clear();
+   yface_j.clear();
+   yface_level.clear();
+
+   iymin_level.clear();
+   iymax_level.clear();
+   jymin_level.clear();
+   jymax_level.clear();
+   iymin_level.resize(levmx+1,  9999999);
+   iymax_level.resize(levmx+1, -9999999);
+   jymin_level.resize(levmx+1,  9999999);
+   jymax_level.resize(levmx+1, -9999999);
+
+   iyadjust.clear();
+   iyadjust.resize(levmx+1);
+   jyadjust.clear();
+   jyadjust.resize(levmx+1);
+
+   iface=0;
+   for (int nz=0; nz<(int)ncells; nz++){
+      int nt = ntop[nz];
+      if (nt == nz) continue;
+
+      int ifactor = 1;
+      if (level[nt] < level[nz]) ifactor = 2;
+
+      // Have top face
+      //printf("DEBUG yface -- iface %d lower nz %d upper nt %d\n",iface,nz,nt);
+      yface_level.push_back(MAX(level[nz],level[nt]));
+      yface_j.push_back(j[nt]*ifactor);
+      if (level[nt] < level[nz] && is_upper(i[nz]) ) {
+         yface_i.push_back(i[nt]*ifactor+1);
+      } else{
+         yface_i.push_back(i[nt]*ifactor);
+      }
+
+      iface++;
+      if (level[nt] > level[nz] && is_lower(i[nt]) ){
+         int nrt = nrht[nt];
+         if (nrt != nt) {
+            //printf("DEBUG yface -- iface %d lower nz %d upper nrt %d\n",iface,nz,nrt);
+            yface_level.push_back(MAX(level[nz],level[nrt]));
+            yface_j.push_back(j[nrt]*ifactor);
+            yface_i.push_back(i[nrt]*ifactor);
+
+            iface++;
+         }
+
+      }
+   }
+   nyface=iface;
+
+   for (int iface=0; iface < nxface; iface++){
+      int fl = xface_level[iface];
+
+      int fi = xface_i[iface];
+      if (fi < ixmin_level[fl]) ixmin_level[fl] = fi;
+      if (fi > ixmax_level[fl]) ixmax_level[fl] = fi;
+
+      int fj = xface_j[iface];
+      if (fj < jxmin_level[fl]) jxmin_level[fl] = fj;
+      if (fj > jxmax_level[fl]) jxmax_level[fl] = fj;
+   }
+
+   for (int iface=0; iface < nxface; iface++){
+      int fl = xface_level[iface];
+      if (ixmax_level[fl] < ixmin_level[fl]) continue;
+
+      xface_i[iface] -= ixmin_level[fl];
+      xface_j[iface] -= jxmin_level[fl];
+   }
+
+   for (int fl = 0; fl <= levmx; fl++){
+      ixadjust[fl] = ixmin_level[fl];
+      jxadjust[fl] = jxmin_level[fl];
+      ixmax_level[fl] -= ixmin_level[fl];;
+      jxmax_level[fl] -= jxmin_level[fl];
+      ixmin_level[fl] = 0;
+      jxmin_level[fl] = 0;
+   }
+
+   for (int iface=0; iface < nyface; iface++){
+      int fl = yface_level[iface];
+
+      int fi = yface_i[iface];
+      if (fi < iymin_level[fl]) iymin_level[fl] = fi;
+      if (fi > iymax_level[fl]) iymax_level[fl] = fi;
+
+      int fj = yface_j[iface];
+      if (fj < jymin_level[fl]) jymin_level[fl] = fj;
+      if (fj > jymax_level[fl]) jymax_level[fl] = fj;
+   }
+
+   for (int iface=0; iface < nyface; iface++){
+      int fl = yface_level[iface];
+      if (iymax_level[fl] < iymin_level[fl]) continue;
+
+      yface_i[iface] -= iymin_level[fl];
+      yface_j[iface] -= jymin_level[fl];
+   }
+
+   for (int fl = 0; fl <= levmx; fl++){
+      iyadjust[fl] = iymin_level[fl];
+      jyadjust[fl] = jymin_level[fl];
+      iymax_level[fl] -= iymin_level[fl];;
+      jymax_level[fl] -= jymin_level[fl];
+      iymin_level[fl] = 0;
+      jymin_level[fl] = 0;
+   }
+
+}
+
+void Mesh::calc_face_list_wmap(void)
+{
+   map_xface2cell_lower.clear();
+   map_xface2cell_upper.clear();
+
+   xface_i.clear();
+   xface_j.clear();
+   xface_level.clear();
+
+   ixmin_level.clear();
+   ixmax_level.clear();
+   jxmin_level.clear();
+   jxmax_level.clear();
+   ixmin_level.resize(levmx+1,  9999999);
+   ixmax_level.resize(levmx+1, -9999999);
+   jxmin_level.resize(levmx+1,  9999999);
+   jxmax_level.resize(levmx+1, -9999999);
+
+   ixadjust.clear();
+   ixadjust.resize(levmx+1);
+   jxadjust.clear();
+   jxadjust.resize(levmx+1);
+
+   int iface=0;
+   for (int nz=0; nz<(int)ncells; nz++){
+      int nr = nrht[nz];
+      if (nr == nz) continue;
+
+      int ifactor = 1;
+      if (level[nr] < level[nz]) ifactor = 2;
+
+      // Have right face
+      map_xface2cell_lower.push_back(nz);
+      map_xface2cell_upper.push_back(nr);
+      xface_level.push_back(MAX(level[nz],level[nr]));
+      xface_i.push_back(i[nr]*ifactor);
+      if (level[nr] < level[nz] && is_upper(j[nz]) ) {
+         xface_j.push_back(j[nr]*ifactor+1);
+      } else {
+         xface_j.push_back(j[nr]*ifactor);
+      }
+
+      iface++;
+
+      if (level[nr] > level[nz] && is_lower(j[nr]) ){
+         int ntr = ntop[nr];
+         if (ntr != nr) {
+            map_xface2cell_lower.push_back(nz);
+            map_xface2cell_upper.push_back(ntr);
+            xface_level.push_back(MAX(level[nz],level[ntr]));
+            xface_i.push_back(i[ntr]*ifactor);
+            xface_j.push_back(j[ntr]*ifactor);
+
+            iface++;
+         }
+      }
+   }
+   nxface=iface;
+
+   map_yface2cell_lower.clear();
+   map_yface2cell_upper.clear();
+
+   yface_i.clear();
+   yface_j.clear();
+   yface_level.clear();
+
+   iymin_level.clear();
+   iymax_level.clear();
+   jymin_level.clear();
+   jymax_level.clear();
+   iymin_level.resize(levmx+1,  9999999);
+   iymax_level.resize(levmx+1, -9999999);
+   jymin_level.resize(levmx+1,  9999999);
+   jymax_level.resize(levmx+1, -9999999);
+
+   iyadjust.clear();
+   iyadjust.resize(levmx+1);
+   jyadjust.clear();
+   jyadjust.resize(levmx+1);
+
+   iface=0;
+   for (int nz=0; nz<(int)ncells; nz++){
+      int nt = ntop[nz];
+      if (nt == nz) continue;
+
+      int ifactor = 1;
+      if (level[nt] < level[nz]) ifactor = 2;
+
+      // Have top face
+      // printf("DEBUG -- iface %d lower nz %d upper nr %d\n",iface,nz,nt);
+      map_yface2cell_lower.push_back(nz);
+      map_yface2cell_upper.push_back(nt);
+      yface_level.push_back(MAX(level[nz],level[nt]));
+      yface_j.push_back(j[nt]*ifactor);
+      if (level[nt] < level[nz] && is_upper(i[nz]) ) {
+         yface_i.push_back(i[nt]*ifactor+1);
+      } else{
+         yface_i.push_back(i[nt]*ifactor);
+      }
+
+      iface++;
+      if (level[nt] > level[nz] && is_lower(i[nt]) ){
+         int nrt = nrht[nt];
+         if (nrt != nt) {
+            map_yface2cell_lower.push_back(nz);
+            map_yface2cell_upper.push_back(nrt);
+            yface_level.push_back(MAX(level[nz],level[nrt]));
+            yface_j.push_back(j[nrt]*ifactor);
+            yface_i.push_back(i[nrt]*ifactor);
+
+            iface++;
+         }
+      }
+   }
+   nyface=iface;
+
+   for (int iface=0; iface < nxface; iface++){
+      int fl = xface_level[iface];
+
+      int fi = xface_i[iface];
+      if (fi < ixmin_level[fl]) ixmin_level[fl] = fi;
+      if (fi > ixmax_level[fl]) ixmax_level[fl] = fi;
+
+      int fj = xface_j[iface];
+      if (fj < jxmin_level[fl]) jxmin_level[fl] = fj;
+      if (fj > jxmax_level[fl]) jxmax_level[fl] = fj;
+   }
+
+   for (int iface=0; iface < nxface; iface++){
+      int fl = xface_level[iface];
+      if (ixmax_level[fl] < ixmin_level[fl]) continue;
+
+      xface_i[iface] -= ixmin_level[fl];
+      xface_j[iface] -= jxmin_level[fl];
+   }
+
+   for (int fl = 0; fl <= levmx; fl++){
+      ixadjust[fl] = ixmin_level[fl];
+      jxadjust[fl] = jxmin_level[fl];
+      ixmax_level[fl] -= ixmin_level[fl];;
+      jxmax_level[fl] -= jxmin_level[fl];
+      ixmin_level[fl] = 0;
+      jxmin_level[fl] = 0;
+   }
+
+   for (int iface=0; iface < nyface; iface++){
+      int fl = yface_level[iface];
+
+      int fi = yface_i[iface];
+      if (fi < iymin_level[fl]) iymin_level[fl] = fi;
+      if (fi > iymax_level[fl]) iymax_level[fl] = fi;
+
+      int fj = yface_j[iface];
+      if (fj < jymin_level[fl]) jymin_level[fl] = fj;
+      if (fj > jymax_level[fl]) jymax_level[fl] = fj;
+   }
+
+   for (int iface=0; iface < nyface; iface++){
+      int fl = yface_level[iface];
+      if (iymax_level[fl] < iymin_level[fl]) continue;
+
+      yface_i[iface] -= iymin_level[fl];
+      yface_j[iface] -= jymin_level[fl];
+   }
+
+   for (int fl = 0; fl <= levmx; fl++){
+      iyadjust[fl] = iymin_level[fl];
+      jyadjust[fl] = jymin_level[fl];
+      iymax_level[fl] -= iymin_level[fl];;
+      jymax_level[fl] -= jymin_level[fl];
+      iymin_level[fl] = 0;
+      jymin_level[fl] = 0;
+   }
+
+}
+
+void Mesh::calc_face_list_wbidirmap(void)
+{
+   map_xface2cell_lower.clear();
+   map_xface2cell_upper.clear();
+
+   map_xcell2face_left1.clear();
+   map_xcell2face_left2.clear();
+   map_xcell2face_right1.clear();
+   map_xcell2face_right2.clear();
+   map_xcell2face_left1.resize(ncells, -1);
+   map_xcell2face_left2.resize(ncells, -1);
+   map_xcell2face_right1.resize(ncells, -1);
+   map_xcell2face_right2.resize(ncells, -1);
+
+   xface_i.clear();
+   xface_j.clear();
+   xface_level.clear();
+
+   ixmin_level.clear();
+   ixmax_level.clear();
+   jxmin_level.clear();
+   jxmax_level.clear();
+   ixmin_level.resize(levmx+1,  9999999);
+   ixmax_level.resize(levmx+1, -9999999);
+   jxmin_level.resize(levmx+1,  9999999);
+   jxmax_level.resize(levmx+1, -9999999);
+
+   ixadjust.clear();
+   ixadjust.resize(levmx+1);
+   jxadjust.clear();
+   jxadjust.resize(levmx+1);
+
+   int iface=0;
+   for (int nz=0; nz<(int)ncells; nz++){
+      int nr = nrht[nz];
+      if (nr == nz) continue;
+
+      int ifactor = 1;
+      if (level[nr] < level[nz]) ifactor = 2;
+
+      // Have right face
+      map_xface2cell_lower.push_back(nz);
+      map_xface2cell_upper.push_back(nr);
+      xface_level.push_back(MAX(level[nz],level[nr]));
+      xface_i.push_back(i[nr]*ifactor);
+      if (level[nr] < level[nz] && is_upper(j[nz]) ) {
+         xface_j.push_back(j[nr]*ifactor+1);
+      } else {
+         xface_j.push_back(j[nr]*ifactor);
+      }
+      map_xcell2face_right1[nz] = iface;
+
+      iface++;
+
+      if (level[nr] > level[nz] && is_lower(j[nr]) ){
+         int ntr = ntop[nr];
+         if (ntr != nr) {
+            map_xface2cell_lower.push_back(nz);
+            map_xface2cell_upper.push_back(ntr);
+            xface_level.push_back(MAX(level[nz],level[ntr]));
+            xface_i.push_back(i[ntr]*ifactor);
+            xface_j.push_back(j[ntr]*ifactor);
+            map_xcell2face_right2[nz] = iface;
+
+            iface++;
+         }
+      }
+   }
+   nxface=iface;
+
+   for (int nz=0; nz<(int)ncells; nz++){
+      int nl = nlft[nz];
+      if (nl == nz) continue;
+
+      if (level[nl] < level[nz] && is_upper(j[nz])){
+         map_xcell2face_left1[nz] = map_xcell2face_right2[nl];
+      } else {
+         map_xcell2face_left1[nz] = map_xcell2face_right1[nl];
+         if (level[nl] > level[nz]){
+            map_xcell2face_left2[nz] = map_xcell2face_right1[ntop[nl]];
+         }
+      }
+
+   }
+
+   map_yface2cell_lower.clear();
+   map_yface2cell_upper.clear();
+
+   map_ycell2face_bot1.clear();
+   map_ycell2face_bot2.clear();
+   map_ycell2face_top1.clear();
+   map_ycell2face_top2.clear();
+   map_ycell2face_bot1.resize(ncells, -1);
+   map_ycell2face_bot2.resize(ncells, -1);
+   map_ycell2face_top1.resize(ncells, -1);
+   map_ycell2face_top2.resize(ncells, -1);
+
+   yface_i.clear();
+   yface_j.clear();
+   yface_level.clear();
+
+   iymin_level.clear();
+   iymax_level.clear();
+   jymin_level.clear();
+   jymax_level.clear();
+   iymin_level.resize(levmx+1,  9999999);
+   iymax_level.resize(levmx+1, -9999999);
+   jymin_level.resize(levmx+1,  9999999);
+   jymax_level.resize(levmx+1, -9999999);
+
+   iyadjust.clear();
+   iyadjust.resize(levmx+1);
+   jyadjust.clear();
+   jyadjust.resize(levmx+1);
+
+   iface=0;
+   for (int nz=0; nz<(int)ncells; nz++){
+      int nt = ntop[nz];
+      if (nt == nz) continue;
+
+      int ifactor = 1;
+      if (level[nt] < level[nz]) ifactor = 2;
+
+      // Have top face
+      // printf("DEBUG -- iface %d lower nz %d upper nr %d\n",iface,nz,nt);
+      map_yface2cell_lower.push_back(nz);
+      map_yface2cell_upper.push_back(nt);
+      yface_level.push_back(MAX(level[nz],level[nt]));
+      yface_j.push_back(j[nt]*ifactor);
+      if (level[nt] < level[nz] && is_upper(i[nz]) ) {
+         yface_i.push_back(i[nt]*ifactor+1);
+      } else{
+         yface_i.push_back(i[nt]*ifactor);
+      }
+      map_ycell2face_top1[nz] = iface;
+
+      iface++;
+
+      if (level[nt] > level[nz]  &&is_lower(i[nt]) ){
+         int nrt = nrht[nt];
+         if (nrt != nt) {
+            map_yface2cell_lower.push_back(nz);
+            map_yface2cell_upper.push_back(nrt);
+            yface_level.push_back(MAX(level[nz],level[nrt]));
+            yface_j.push_back(j[nrt]*ifactor);
+            yface_i.push_back(i[nrt]*ifactor);
+            map_ycell2face_top2[nz] = iface;
+
+            iface++;
+         }
+      }
+   }
+   nyface=iface;
+
+   for (int nz=0; nz<(int)ncells; nz++){
+      int nb = nbot[nz];
+      if (nb == nz) continue;
+
+      if (level[nb] < level[nz] && is_upper(i[nz])){
+         map_ycell2face_bot1[nz] = map_ycell2face_top2[nb];
+      } else {
+         map_ycell2face_bot1[nz] = map_ycell2face_top1[nb];
+         if (level[nb] > level[nz]){
+            map_ycell2face_bot2[nz] = map_ycell2face_top1[nrht[nb]];
+         }
+      }
+
+   }
+
+   for (int iface=0; iface < nxface; iface++){
+      int fl = xface_level[iface];
+
+      int fi = xface_i[iface];
+      if (fi < ixmin_level[fl]) ixmin_level[fl] = fi;
+      if (fi > ixmax_level[fl]) ixmax_level[fl] = fi;
+
+      int fj = xface_j[iface];
+      if (fj < jxmin_level[fl]) jxmin_level[fl] = fj;
+      if (fj > jxmax_level[fl]) jxmax_level[fl] = fj;
+   }
+
+   for (int iface=0; iface < nxface; iface++){
+      int fl = xface_level[iface];
+      if (ixmax_level[fl] < ixmin_level[fl]) continue;
+
+      xface_i[iface] -= ixmin_level[fl];
+      xface_j[iface] -= jxmin_level[fl];
+   }
+
+   for (int fl = 0; fl <= levmx; fl++){
+      ixadjust[fl] = ixmin_level[fl];
+      jxadjust[fl] = jxmin_level[fl];
+      ixmax_level[fl] -= ixmin_level[fl];;
+      jxmax_level[fl] -= jxmin_level[fl];
+      ixmin_level[fl] = 0;
+      jxmin_level[fl] = 0;
+   }
+
+   for (int iface=0; iface < nyface; iface++){
+      int fl = yface_level[iface];
+
+      int fi = yface_i[iface];
+      if (fi < iymin_level[fl]) iymin_level[fl] = fi;
+      if (fi > iymax_level[fl]) iymax_level[fl] = fi;
+
+      int fj = yface_j[iface];
+      if (fj < jymin_level[fl]) jymin_level[fl] = fj;
+      if (fj > jymax_level[fl]) jymax_level[fl] = fj;
+   }
+
+   for (int iface=0; iface < nyface; iface++){
+      int fl = yface_level[iface];
+      if (iymax_level[fl] < iymin_level[fl]) continue;
+
+      yface_i[iface] -= iymin_level[fl];
+      yface_j[iface] -= jymin_level[fl];
+   }
+
+   for (int fl = 0; fl <= levmx; fl++){
+      iyadjust[fl] = iymin_level[fl];
+      jyadjust[fl] = jymin_level[fl];
+      iymax_level[fl] -= iymin_level[fl];;
+      jymax_level[fl] -= jymin_level[fl];
+      iymin_level[fl] = 0;
+      jymin_level[fl] = 0;
+   }
+
+}
+
+int **Mesh::get_xface_flag(int lev, bool print_output)
+{
+   int **xface_flag = (int **)genmatrix(jxmax_level[lev]+1,
+                                        ixmax_level[lev]+1, sizeof(int));
+   for (int jj=0; jj<jxmax_level[lev]+1; jj++){
+      for (int ii=0; ii<ixmax_level[lev]+1; ii++){
+         xface_flag[jj][ii] = -1;
+      }
+   }
+
+   for (int iface=0; iface < nxface; iface++){
+      if (xface_level[iface] == lev){
+         int ii = xface_i[iface];
+         int jj = xface_j[iface];
+
+         xface_flag[jj][ii] = 1;
+      }
+   }
+
+   if (DEBUG || print_output) {
+      printf("DEBUG -- x face_flag for level %d\n",lev);
+      printf("DEBUG -- sizes isize+1 %d jsize+1 %d\n",ixmax_level[lev]+1,jxmax_level[lev]+1);
+
+      printf("                           ");
+      for (int ii=0; ii<ixmax_level[lev]+1; ii++){
+         printf(" %4d ",ii);
+      }
+      printf("\n");
+
+      for (int jj=jxmax_level[lev]; jj>=0; jj--){
+
+         printf("DEBUG -- j  %4d:          ",jj);
+         for (int ii=0; ii<ixmax_level[lev]+1; ii++){
+            if (xface_flag[jj][ii] >= 0){
+               //printf("      xface_flag_check[%d][%d] = 1;\n",jj,ii);
+               printf(" %4d ", xface_flag[jj][ii]);
+            } else {
+               printf("      ");
+            }
+         }
+         printf("\n");
+      }
+   }
+
+   return(xface_flag);
+}
+
+int **Mesh::get_yface_flag(int lev, bool print_output)
+{
+   int **yface_flag = (int **)genmatrix(jymax_level[lev]+1,
+                                        iymax_level[lev]+1, sizeof(int));
+   for (int jj=0; jj<jymax_level[lev]+1; jj++){
+      for (int ii=0; ii<iymax_level[lev]+1; ii++){
+         yface_flag[jj][ii] = -1;
+      }
+   }
+
+   for (int iface=0; iface < nyface; iface++){
+      if (yface_level[iface] == lev){
+         int ii = yface_i[iface];
+         int jj = yface_j[iface];
+
+         yface_flag[jj][ii] = 1;
+      }
+   }
+
+   if (DEBUG || print_output) {
+      printf("DEBUG -- y face_flag for level %d\n",lev);
+      printf("DEBUG -- sizes isize+1 %d jsize+1 %d\n",iymax_level[lev]+1,jymax_level[lev]+1);
+
+      printf("                           ");
+      for (int ii=0; ii<iymax_level[lev]+1; ii++){
+         printf(" %4d ",ii);
+      }
+      printf("\n");
+
+      for (int jj=jymax_level[lev]; jj>=0; jj--){
+
+         printf("DEBUG -- j  %4d:          ",jj);
+         for (int ii=0; ii<iymax_level[lev]+1; ii++){
+            if (yface_flag[jj][ii] >= 0){
+               //printf("      yface_flag_check[%d][%d] = 1;\n",jj,ii);
+               printf(" %4d ", yface_flag[jj][ii]);
+            } else {
+               printf("      ");
+            }
+         }
+         printf("\n");
+      }
+   }
+
+   return(yface_flag);
+}
+
+void Mesh::get_flat_grid(int lev, int ***zone_flag_base, int ***zone_cell_base)
+{
+   int isize = ixmax_level[lev]+4;
+   int jsize = jymax_level[lev]+4;
+   int iadjust = ixadjust[lev]-2;
+   int jadjust = jyadjust[lev]-2;
+
+   //printf("DEBUG -- sizes isize %d jsize %d\n",isize,jsize);
+   //printf("DEBUG -- adjust ixadjust %d jxadjust %d\n",ixadjust[lev],jxadjust[lev]);
+   //printf("DEBUG -- adjust iyadjust %d jyadjust %d\n",iyadjust[lev],jyadjust[lev]);
+
+   (*zone_flag_base) = (int **)genmatrix(jsize, isize, sizeof(int));
+
+   int **zone_flag = *zone_flag_base;
+   for (int jj=0; jj<jsize; jj++){
+      for (int ii=0; ii<isize; ii++){
+          zone_flag[jj][ii] = -1;
+      }
+   }
+
+   (*zone_cell_base) = (int **)genmatrix(jsize, isize, sizeof(int));
+
+   int **zone_cell = *zone_cell_base;
+   for (int jj=0; jj<jsize; jj++){
+      for (int ii=0; ii<isize; ii++){
+         zone_cell[jj][ii] = -1;
+      }
+   }
+
+   for (int iface=0; iface < nxface; iface++){
+      if (xface_level[iface] == lev){
+         int nz1 = map_xface2cell_lower[iface];
+         int nz2 = map_xface2cell_upper[iface];
+
+         if (lev == level[nz1]) {
+            int iii = i[nz1]-iadjust;
+            int jjj = j[nz1]-jadjust;
+            zone_flag[jjj][iii] = 1;
+            zone_cell[jjj][iii] = nz1;
+            if (nlft[nz1] != REAL_CELL) {
+               zone_cell[jjj][iii-1] = nlft[nz1];
+            }
+         } else {
+            int iii = i[nz1]*2-iadjust+1;
+            int jjj = j[nz1]*2-jadjust;
+            if (is_upper(j[nz2])) jjj += 1;
+            zone_flag[jjj][iii] = 1;
+            zone_cell[jjj][iii] = nz1;
+            zone_cell[jjj][iii-1] = nz1;
+         }
+         if (lev == level[nz2]) {
+            int iii = i[nz2]-iadjust;
+            int jjj = j[nz2]-jadjust;
+            zone_flag[jjj][iii] = 1;
+            zone_cell[jjj][iii] = nz2;
+            if (nrht[nz2] != REAL_CELL) {
+               zone_cell[jjj][iii+1] = nrht[nz2];
+            }
+         } else {
+            int iii = i[nz2]*2-iadjust;
+            int jjj = j[nz2]*2-jadjust;
+            if (is_upper(j[nz1])) jjj += 1;
+            zone_flag[jjj][iii] = 1;
+            zone_cell[jjj][iii] = nz2;
+            zone_cell[jjj][iii+1] = nz2;
+         }
+      }
+   }
+
+   for (int iface=0; iface < nyface; iface++){
+      if (yface_level[iface] == lev){
+         int nz1 = map_yface2cell_lower[iface];
+         int nz2 = map_yface2cell_upper[iface];
+
+         if (lev == level[nz1]) {
+            int iii = i[nz1]-iadjust;
+            int jjj = j[nz1]-jadjust;
+            zone_flag[jjj][iii] = 1;
+            zone_cell[jjj][iii] = nz1;
+            if (nbot[nz1] != REAL_CELL) {
+               zone_cell[jjj-1][iii] = nbot[nz1];
+            }
+         } else {
+            int iii = i[nz1]*2-iadjust;
+            int jjj = j[nz1]*2-jadjust+1;
+            if (is_upper(i[nz2])) iii += 1;
+            zone_flag[jjj][iii] = 1;
+            zone_cell[jjj][iii] = nz1;
+            zone_cell[jjj-1][iii] = nz1;
+         }
+         if (lev == level[nz2]) {
+            int iii = i[nz2]-iadjust;
+            int jjj = j[nz2]-jadjust;
+            zone_flag[jjj][iii] = 1;
+            zone_cell[jjj][iii] = nz2;
+            if (ntop[nz2] != REAL_CELL) {
+               zone_cell[jjj+1][iii] = ntop[nz2];
+            }
+         } else {
+            int iii = i[nz2]*2-iadjust;
+            int jjj = j[nz2]*2-jadjust;
+            if (is_upper(i[nz1])) iii += 1;
+            zone_flag[jjj][iii] = 1;
+            zone_cell[jjj][iii] = nz2;
+            zone_cell[jjj+1][iii] = nz2;
+         }
+      }
+   }
+
+   if (DEBUG) {
+      printf("DEBUG -- zone_flag for level %d\n",lev);
+      printf("DEBUG -- sizes isize %d jsize %d\n",isize,jsize);
+      for (int j=jsize-1; j>=0; j--){
+         for (int i=0; i<isize; i++){
+            if (zone_flag[j][i] >= 0){
+               printf("      zone_flag_check[%d][%d] = 1;\n",j,i);
+            }
+         }
+      }
+      for (int j=jsize-1; j>=0; j--){
+         for (int i=0; i<isize; i++){
+            if (zone_cell[j][i] >= 0){
+               printf("      zone_cell_check[%d][%d] = %d;\n",j,i,zone_cell[j][i]);
+            }
+         }
+      }
+
+      printf("                  ");
+      for (int i=0; i<isize; i++){
+         printf(" %4d ",i);
+      }
+      printf("\n");
+
+      for (int j=jsize-1; j>=0; j--){
+
+         printf("DEBUG -- j  %4d: ",j);
+         for (int i=0; i<isize; i++){
+            if (zone_flag[j][i] >= 0){
+               printf(" %4d ", zone_flag[j][i]);
+            } else {
+               printf("      ");
+            }
+         }
+         printf("\n");
+      }
+
+      printf("DEBUG -- zone_cell for level %d\n",lev);
+
+      printf("                  ");
+      for (int i=0; i<isize; i++){
+         printf(" %4d ",i);
+      }
+      printf("\n");
+
+      for (int j=jsize-1; j>=0; j--){
+
+         printf("DEBUG -- j  %4d: ",j);
+         for (int i=0; i<isize; i++){
+            if (zone_cell[j][i] >= 0){
+               printf(" %4d ", zone_cell[j][i]);
+            } else {
+               printf("      ");
+            }
+         }
+         printf("\n");
+      }
+   }
+}
+
+void Mesh::calc_face_list_clearmaps()
+{
+   map_xface2cell_lower.clear();
+   map_xface2cell_upper.clear();
+
+   map_xcell2face_left1.clear();
+   map_xcell2face_left2.clear();
+   map_xcell2face_right1.clear();
+   map_xcell2face_right2.clear();
+
+   map_yface2cell_lower.clear();
+   map_yface2cell_upper.clear();
+
+   map_ycell2face_bot1.clear();
+   map_ycell2face_bot2.clear();
+   map_ycell2face_top1.clear();
+   map_ycell2face_top2.clear();
+}
+
+void Mesh::timer_output(mesh_timer_category category, mesh_device_types device_type, int timer_level)
+{
+   double local_time = 0.0;
+   if (device_type == MESH_DEVICE_CPU){
+      local_time = get_cpu_timer(category);
+   } else {
+      local_time = get_gpu_timer(category);
+   }
+
+   char string[80] = "/0";
+
+   if (mype == 0) {
+      const char *blank="          ";
+
+      if (device_type == MESH_DEVICE_CPU){
+         sprintf(string,"CPU: %.*s%-30.30s\t", 2*timer_level, blank, mesh_timer_descriptor[category]);
+      } else {
+         sprintf(string,"GPU: %.*s%-30.30s\t", 2*timer_level, blank, mesh_timer_descriptor[category]);
+      }
+   }
+
+   parallel_output(string, local_time, timer_level, "s");
+}
+
+void Mesh::parallel_output(const char *string, double local_value, int output_level, const char *units)
+{
+   vector<double> global_values(numpe);
+   global_values[0] = local_value;
+#ifdef HAVE_MPI
+   if (numpe > 1) { 
+      MPI_Gather(&local_value, 1, MPI_DOUBLE, &global_values[0], 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+   }
+#endif
+   if (mype == 0) {
+      const char *blank="          ";
+
+      printf("%s\t",string);
+      if (numpe <= 4) {
+         for(int ip = 0; ip < numpe; ip++){
+            printf("%.*s%8.4f\t", 2*output_level, blank, global_values[ip]);
+         }
+         printf("%s\n",units);
+      } else {
+         sort(global_values.begin(),global_values.end());
+         double median_value;
+         int half_value = numpe/2;
+         if (numpe%2 == 0) {
+            median_value = (global_values[half_value-1]+global_values[half_value])/2.0;
+         } else {
+            median_value = global_values[half_value+1];
+         }
+         printf("%.*s%8.4f\t%.*s%8.4f\t%.*s%8.4f   %s min/median/max\n",
+            2*output_level, blank, global_values[0],
+            2*output_level, blank, median_value,
+            2*output_level, blank, global_values[numpe-1],
+            units);
+      }
+   }
+}
+
+void Mesh::parallel_output(const char *string, long long local_value, int output_level, const char *units)
+{
+   vector<long long> global_values(numpe);
+   global_values[0] = local_value;
+#ifdef HAVE_MPI
+   if (numpe > 1) { 
+      MPI_Gather(&local_value, 1, MPI_LONG_LONG, &global_values[0], 1, MPI_LONG_LONG, 0, MPI_COMM_WORLD);
+   }
+#endif
+   if (mype == 0) {
+      const char *blank="          ";
+
+      printf("%s\t",string);
+      if (numpe <= 4) {
+         for(int ip = 0; ip < numpe; ip++){
+            printf("%.*s%10lld\t", 2*output_level, blank, global_values[ip]);
+         }
+         printf("%s\n",units);
+      } else {
+         sort(global_values.begin(),global_values.end());
+         long long median_value;
+         int half_value = numpe/2;
+         if (numpe%2 == 0) {
+            median_value = (global_values[half_value-1]+global_values[half_value])/2;
+         } else {
+            median_value = global_values[half_value+1];
+         }
+         printf("%.*s%10lld\t%.*s%10lld\t%.*s%10lld   %s min/median/max\n",
+            2*output_level, blank, global_values[0],
+            2*output_level, blank, median_value,
+            2*output_level, blank, global_values[numpe-1],
+            units);
+      }
+   }
+}
+
+void Mesh::parallel_output(const char *string, int local_value, int output_level, const char *units)
+{
+   vector<int> global_values(numpe);
+   global_values[0] = local_value;
+#ifdef HAVE_MPI
+   if (numpe > 1) { 
+      MPI_Gather(&local_value, 1, MPI_INT, &global_values[0], 1, MPI_INT, 0, MPI_COMM_WORLD);
+   }
+#endif
+   if (mype == 0) {
+      const char *blank="          ";
+
+      printf("%s\t",string);
+      if (numpe <= 4) {
+         for(int ip = 0; ip < numpe; ip++){
+            printf("%.*s%10d\t", 2*output_level, blank, global_values[ip]);
+         }
+         printf("%s\n",units);
+      } else {
+         sort(global_values.begin(),global_values.end());
+         int median_value;
+         int half_value = numpe/2;
+         if (numpe%2 == 0) {
+            median_value = (global_values[half_value-1]+global_values[half_value])/2;
+         } else {
+            median_value = global_values[half_value+1];
+         }
+         printf("%.*s%10d\t%.*s%10d\t%.*s%10d   %s min/median/max\n",
+            2*output_level, blank, global_values[0],
+            2*output_level, blank, median_value,
+            2*output_level, blank, global_values[numpe-1],
+            units);
+      }
+   }
+}
+
+const int CRUX_MESH_VERSION = 103;
+const int num_int_dist_vals = 3;
+const int num_int_vals      = 3;
+const int num_double_vals   = 1;
+
+size_t Mesh::get_checkpoint_size(void)
+{
+   size_t nsize;
+   nsize  = num_int_dist_vals*sizeof(int);
+   nsize += num_int_vals*sizeof(int);
+   nsize += num_double_vals*sizeof(double);
+   nsize += 2*MESH_COUNTER_SIZE*sizeof(int);
+   nsize += MESH_TIMER_SIZE*sizeof(double);
+   nsize += MESH_TIMER_SIZE*sizeof(long);
+   nsize += ncells*3*sizeof(int);
+   return(nsize);
+}
+
+void Mesh::store_checkpoint(Crux *crux)
+{
+   // Need ncells for memory allocation
+   int storage = mesh_memory.get_memory_capacity(level);
+   crux->store_named_ints("storage", 7, &storage, 1);
+   // Write scalars to arrays for storing in checkpoint
+   int int_vals[num_int_vals];
+
+   int_vals[ 0] = CRUX_MESH_VERSION;
+   int_vals[ 1] = ndim;
+   int_vals[ 2] = levmx;
+
+   // These are for values that will be different on every processor
+   int int_dist_vals[num_int_dist_vals];
+   int_dist_vals[ 0] = (int)ncells;
+   int_dist_vals[ 1] = (int)ncells_ghost;
+   int_dist_vals[ 2] = offtile_local_count;
+
+   double double_vals[num_double_vals];
+
+   double_vals[0] = offtile_ratio_local;
+
+   int flags = RESTART_DATA;
+   // Now add memory entries to database for storing checkpoint
+   mesh_memory.memory_add(int_dist_vals, (size_t)num_int_dist_vals, 4, "mesh_int_dist_vals", flags);
+   flags = RESTART_DATA | REPLICATED_DATA;
+   mesh_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "mesh_int_vals", flags);
+
+   flags = RESTART_DATA;
+   mesh_memory.memory_add(double_vals, (size_t)num_double_vals, 8, "mesh_double_vals", flags);
+   mesh_memory.memory_add(cpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_cpu_counters", flags);
+   mesh_memory.memory_add(gpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_gpu_counters", flags);
+
+   mesh_memory.memory_add(cpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_cpu_timers", flags);
+   mesh_memory.memory_add(gpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_gpu_timers", flags);
+
+   // Store MallocPlus memory database
+   crux->store_MallocPlus(mesh_memory);
+
+   // Remove memory entries from database now that data is stored
+   mesh_memory.memory_remove(int_dist_vals);
+   mesh_memory.memory_remove(int_vals);
+   mesh_memory.memory_remove(double_vals);
+   mesh_memory.memory_remove(cpu_counters);
+   mesh_memory.memory_remove(gpu_counters);
+   mesh_memory.memory_remove(cpu_timers);
+   mesh_memory.memory_remove(gpu_timers);
+}
+
+void Mesh::restore_checkpoint(Crux *crux)
+{
+   int storage;
+   crux->restore_named_ints("storage", 7, &storage, 1);
+
+   // Create memory for reading data into
+   int int_dist_vals[num_int_dist_vals];
+   int int_vals[num_int_vals];
+   double double_vals[num_double_vals];
+
+   mesh_memory.memory_delete(nlft);
+   mesh_memory.memory_delete(nrht);
+   mesh_memory.memory_delete(nbot);
+   mesh_memory.memory_delete(ntop);
+   mesh_memory.memory_delete(celltype);
+
+   nlft = NULL;
+   nrht = NULL;
+   ntop = NULL;
+   nbot = NULL;
+   celltype = NULL;
+
+   // Resize is a mesh method
+   // resize(storage);
+   // memory_reset_ptrs();
+   allocate (storage);
+   
+   int flags = RESTART_DATA;
+   // Now add memory entries to database for restoring checkpoint
+   mesh_memory.memory_add(int_dist_vals, (size_t)num_int_dist_vals, 4, "mesh_int_dist_vals", flags);
+   flags = RESTART_DATA | REPLICATED_DATA;
+   mesh_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "mesh_int_vals", flags);
+   mesh_memory.memory_add(double_vals, (size_t)num_double_vals, 8, "mesh_double_vals", flags);
+
+   flags = RESTART_DATA;
+   mesh_memory.memory_add(cpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_cpu_counters", flags);
+   mesh_memory.memory_add(gpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_gpu_counters", flags);
+
+   mesh_memory.memory_add(cpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_cpu_timers", flags);
+   mesh_memory.memory_add(gpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_gpu_timers", flags);
+
+   // Restore MallocPlus memory database
+   crux->restore_MallocPlus(mesh_memory);
+
+   // Remove memory entries from database now that data is restored
+   mesh_memory.memory_remove(int_dist_vals);
+   mesh_memory.memory_remove(int_vals);
+   mesh_memory.memory_remove(double_vals);
+   mesh_memory.memory_remove(cpu_counters);
+   mesh_memory.memory_remove(gpu_counters);
+   mesh_memory.memory_remove(cpu_timers);
+   mesh_memory.memory_remove(gpu_timers);
+
+   // Check version number
+   if (int_vals[ 0] != CRUX_MESH_VERSION) {
+      printf("CRUX version mismatch for mesh data, version on file is %d, version in code is %d\n",
+         int_vals[0], CRUX_MESH_VERSION);
+      exit(0);
+   }
+
+   // Copy out scalar values from array
+   ncells                    = int_dist_vals[ 0];
+   ncells_ghost              = int_dist_vals[ 1];
+   offtile_local_count       = int_dist_vals[ 2];
+
+   // Copy out scalar values from array
+   ndim                      = int_vals[ 1];
+   levmx                     = int_vals[ 2];
+
+#ifdef DEBUG_RESTORE_VALS
+   if (DEBUG_RESTORE_VALS && mype == 0) {
+      const char *int_dist_vals_descriptor[num_int_dist_vals] = {
+         "ncells",
+         "ncells_ghost",
+         "offtile_local_count"
+      };
+      const char *int_vals_descriptor[num_int_vals] = {
+         "CRUX_MESH_VERSION",
+         "ndim",
+         "levmx",
+      };
+      printf("\n");
+      printf("       === Restored mesh int_dist_vals ===\n");
+      for (int i = 0; i < num_int_dist_vals; i++){
+         printf("       %-30s %d\n",int_dist_vals_descriptor[i], int_dist_vals[i]);
+      }
+      printf("       === Restored mesh int_vals ===\n");
+      for (int i = 0; i < num_int_vals; i++){
+         printf("       %-30s %d\n",int_vals_descriptor[i], int_vals[i]);
+      }
+      printf("       === Restored mesh int_vals ===\n");
+      printf("\n");
+   }
+#endif
+
+   offtile_ratio_local = double_vals[0];
+
+#ifdef DEBUG_RESTORE_VALS
+   if (DEBUG_RESTORE_VALS && mype == 0) {
+      const char *double_vals_descriptor[num_double_vals] = {
+         "offtile_ratio_local"
+      };
+      printf("\n");
+      printf("       === Restored mesh double_vals ===\n");
+      for (int i = 0; i < num_double_vals; i++){
+         printf("       %-30s %lf\n",double_vals_descriptor[i], double_vals[i]);
+      }
+      printf("       === Restored mesh double_vals ===\n");
+      printf("\n");
+   }
+#endif
+
+#ifdef DEBUG_RESTORE_VALS
+   if (DEBUG_RESTORE_VALS && mype == 0) {
+      printf("       === Restored mesh cpu counters ===\n");
+      for (int i = 0; i < MESH_COUNTER_SIZE; i++){
+         printf("       %-30s %d\n",mesh_counter_descriptor[i], cpu_counters[i]);
+      }
+      printf("       === Restored mesh cpu counters ===\n");
+      printf("       === Restored mesh gpu counters ===\n");
+      for (int i = 0; i < MESH_COUNTER_SIZE; i++){
+         printf("       %-30s %d\n",mesh_counter_descriptor[i], gpu_counters[i]);
+      }
+      printf("       === Restored mesh gpu counters ===\n");
+      printf("\n");
+   }
+#endif
+
+#ifdef DEBUG_RESTORE_VALS
+   if (DEBUG_RESTORE_VALS && mype == 0) {
+      printf("       === Restored mesh cpu timers ===\n");
+      for (int i = 0; i < MESH_TIMER_SIZE; i++){
+         printf("       %-30s %lf\n",mesh_timer_descriptor[i], cpu_timers[i]);
+      }
+      printf("       === Restored mesh cpu timers ===\n");
+      printf("\n");
+   }
+#endif
+
+#ifdef DEBUG_RESTORE_VALS
+   if (DEBUG_RESTORE_VALS && mype == 0) {
+      printf("\n");
+      printf("       === Restored mesh gpu timers ===\n");
+      for (int i = 0; i < MESH_TIMER_SIZE; i++){
+         printf("       %-30s %lld\n",mesh_timer_descriptor[i], gpu_timers[i]);
+      }
+      printf("       === Restored mesh gpu timers ===\n");
+      printf("\n");
+   }
+#endif
+   //calc_celltype(ncells);
+}
+
+
+// This code due to Matt Calef
+void scan ( scanInt *input , scanInt *output , scanInt length) 
+{
+#ifdef _OPENMP
+   // This already assumes it is in a parallel region
+
+   // Get the total number of threads
+
+   scanInt numThreads = omp_get_num_threads ( );
+
+   // Compute the range for which this thread is responsible.
+
+   scanInt threadID   = omp_get_thread_num ( );
+   scanInt start = length * ( threadID     ) / numThreads;
+   scanInt end   = length * ( threadID + 1 ) / numThreads;
+
+   // In the case that there are fewer entries than threads, some
+   // threads will have no entries.  Only perform this operation if
+   // there is a postive number of entries.
+
+   if ( start < end ) {
+
+       // Do a scan over the region for this thread, with an initial
+       // value of zero.
+
+       output[start] = 0;
+       for ( scanInt i = start + 1 ; i < end ; i++ ) 
+          output[i] = output[i-1] + input[i-1];
+   }
+    
+   // Wait until all threads get here. 
+
+#pragma omp barrier
+    
+   // At this point each thread has done an independent scan of its
+   // region.  All scans, except the first, are off by an
+   // offset. Here we have a single thread compute that offset with a
+   // serial scan that strides over the regions assigned to each
+   // thread.
+
+#pragma omp single
+   for ( scanInt i = 1 ; i < numThreads ; i ++ ) {
+      scanInt s0 = length * ( i - 1 ) / numThreads;
+      scanInt s1 = length * ( i     ) / numThreads;
+
+      if ( s0 < s1 ) 
+         output[s1] = output[s0] + input[s1-1];
+
+      if ( s0 < s1 - 1 )
+         output[s1] += output[s1-1];
+   }
+
+   // Barrier is implicit from omp single Wait until all threads get here. 
+
+   // Apply the offset to the range for this thread.
+    
+   for ( scanInt i = start + 1 ; i < end ; i++ ) 
+      output[i] += output[start];
+
+#else
+   output[0] = 0;
+   for (int ic = 0; ic < length; ic++){
+      output[ic+1] = output[ic] + input[ic];
+   }
+#endif
+}
+/****************************************************//**
+*GET  BOUNDS!!!!!!****
+**********************************/
+void Mesh::get_bounds(int& lowerBound, int& upperBound){
+#ifdef _OPENMP
+        int threadID = omp_get_thread_num();
+	lowerBound = lowerBound_Global[threadID];
+	upperBound = upperBound_Global[threadID];
+//	printf("GETBOUNDs ThreadID: %d, upperBound: %d, lowerBound: %d \n",threadID, upperBound, lowerBound);
+#else
+	lowerBound = 0;
+	upperBound = ncells;
+#endif
+}
+
+/****************************************************//**
+*SETTING BOUNDS!!!!!!****
+**********************************/
+void Mesh::set_bounds(int n){
+
+#ifdef _OPENMP
+      //  #pragma omp parallel
+        {
+        int nthreads = omp_get_num_threads();//Private for each thread
+        int threadID = omp_get_thread_num(); //Private for each thread
+        #pragma omp master 
+	{
+        	if(lowerBound_Global == NULL) lowerBound_Global = (int *)malloc(nthreads*sizeof(int)); 
+        	if(upperBound_Global == NULL) upperBound_Global = (int *)malloc(nthreads*sizeof(int)); 
+        }
+	//#pragma omp flush (lowerBound_Global, upperBound_Global)
+	#pragma omp barrier
+ 	
+	int work = n/nthreads;
+        if(threadID<(n%nthreads))work++;
+        int lowerBound = ((n / nthreads)*threadID) + min(n%nthreads, threadID);
+        int upperBound = lowerBound + work;
+//      printf("ThreadID: %d, upperBound: %d, lowerBound: %d \n",threadID, upperBound, lowerBound);
+        lowerBound_Global[threadID] = lowerBound;
+        upperBound_Global[threadID] = upperBound;
+        }
+#else 
+     	if(lowerBound_Global == NULL) lowerBound_Global = (int *)malloc(1*sizeof(int)); 
+       	if(upperBound_Global == NULL) upperBound_Global = (int *)malloc(1*sizeof(int)); 
+        int lowerBound = 0;
+        int upperBound = ncells;
+        lowerBound_Global[0] = lowerBound;
+        upperBound_Global[0] = upperBound;
+#endif
+
+}
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.h
@@ -0,0 +1,113 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#ifndef PARTITION_H
+#define PARTITION_H
+
+#include <vector>
+
+#include "input.h"
+
+using namespace std;
+
+enum partition_method {
+   ORIGINAL_ORDER,
+   HILBERT_SORT,
+   HILBERT_PARTITION,
+   ZORDER
+};
+
+enum partition_measure {
+   NO_PARTITION_MEASURE,
+   WITH_DUPLICATES,
+   WITHOUT_DUPLICATES,
+   CVALUE,
+   CSTARVALUE
+};
+
+
+void calc_distribution(int numpe, vector<int> &proc);
+//void partition_cells(int numpe, vector<int> &proc, Mesh &mesh, enum partition_method method);
+
+typedef void (*maptonorm)( double * , double * , void * );
+
+extern "C" void hsfc2sort(
+                const int      N ,     /* IN: Number of points */
+                const double * X ,     /* IN: array of X-Coordinates */
+                const double * Y ,     /* IN: array of Y-Coordinates */
+                const int      ibase ,    /* IN: Stride for Y array */
+                int          * Info ,  /* OUT: (1 <= LDInfo) [ HSFC ordering ]
+                                  (2 <= LDInfo) [ HSFC index, #1 ]
+                                  (3 <= LDInfo) [ HSFC index, #2 ] */
+                int            LDInfo /* IN:  Leading dimension of Info */
+                );
+
+extern "C" void hsfc2part(
+               const int      Level , /* IN: Background grid level of partitioning */
+               const int      Limit , /* IN: Number of levels to consider for 'gaps' */
+               const int      NPart , /* IN: Target number of partitions */
+               const int      N ,     /* IN: Number of points */
+               const double * X ,     /* IN: array of X-Coordinates */
+               const double * Y ,     /* IN: array of Y-Coordinates */
+               const int      ibase ,    /* IN: Base - 0 for C, 1 for Fortran */
+                     int    * Info ,  /* IN:  Array of computational weights,
+                                 OUT: (1 <= LDInfo) [ Partitioning ]
+                                 (2 <= LDInfo) [ Adjusted HSFC ordering ]
+                                 (3 <= LDInfo) [ Original HSFC index, #1 ]
+                                 (4 <= LDInfo) [ Original HSFC index, #2 ] */
+                     int      LDInfo );/* IN:  Leading dimension of Info */
+
+
+#endif /* PARTITION_H */
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.cpp
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.cpp
@@ -0,0 +1,764 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#ifdef HAVE_MPI
+#include "mpi.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <list>
+#include <algorithm>
+#include "partition.h"
+#include "KDTree.h"
+#include "mesh.h"
+#ifdef HAVE_MPI
+#include "s7.h"
+#endif
+#include "zorder.h"
+#include "timer.h"
+#include "hsfc.h"
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+
+typedef unsigned int uint;
+
+int measure_type;
+int      meas_count                  = 0;
+double   meas_sum_average            = 0.0;
+
+extern bool localStencil;
+extern enum partition_method initial_order;
+extern enum partition_method cycle_reorder;
+
+void Mesh::partition_measure(void) 
+{
+  if (measure_type != NO_PARTITION_MEASURE){
+
+     int ntX     = TILE_SIZE; 
+     static double offtile_ratio = 0.0;
+
+     uint num_groups = (ncells + TILE_SIZE - 1)/TILE_SIZE;
+
+     if (measure_type == WITH_DUPLICATES) {
+        int i = 0;
+#ifdef _OPENMP
+#pragma omp for reduction(+:offtile_ratio)
+#endif
+        for (uint group_id=0; group_id < num_groups; group_id ++){ 
+ 
+           int start_idx = group_id * ntX;
+           int end_idx   = (group_id + 1) * ntX; 
+
+           int offtile =0;
+           for (uint ic = 0; ic < TILE_SIZE; ic++, i++){ 
+
+              if (i >= ncells) continue;
+              //taken from wave_kern_calc.cl 'setup tile' kernel
+              if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile++; 
+              if (level[nlft[i]] > level[i] &&
+                 (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile++;
+              if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile++; 
+              if (level[nrht[i]] > level[i] &&
+                 (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile++;
+              if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile++; 
+              if (level[nbot[i]] > level[i] &&
+                 (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile++;
+              if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile++; 
+              if (level[ntop[i]] > level[i] &&
+                 (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile++;
+           }
+           offtile_ratio += (double)offtile/(double)(TILE_SIZE);
+           //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
+        }
+     } else if (measure_type == WITHOUT_DUPLICATES) {
+        int i = 0;
+#ifdef _OPENMP
+#pragma omp for reduction(+:offtile_ratio)
+#endif
+        for (uint group_id=0; group_id < num_groups; group_id ++){ 
+           list<int> offtile_list;
+ 
+           int start_idx = group_id * ntX;
+           int end_idx   = (group_id + 1) * ntX; 
+
+           for (uint ic = 0; ic < TILE_SIZE; ic++, i++){ 
+
+              if (i >= ncells) continue;
+
+              if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile_list.push_back(nlft[i]);
+              if (level[nlft[i]] > level[i] &&
+                 (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile_list.push_back(ntop[nlft[i]]);
+              if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile_list.push_back(nrht[i]);
+              if (level[nrht[i]] > level[i] &&
+                 (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile_list.push_back(ntop[nrht[i]]);
+              if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile_list.push_back(nbot[i]);
+              if (level[nbot[i]] > level[i] &&
+                 (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile_list.push_back(nrht[nbot[i]]);
+              if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile_list.push_back(ntop[i]);
+              if (level[ntop[i]] > level[i] &&
+                 (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile_list.push_back(nrht[ntop[i]]);
+           }
+           offtile_list.sort();
+           offtile_list.unique();
+        
+           offtile_ratio += (double)offtile_list.size()/(double)(TILE_SIZE);
+           //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
+        }
+     } else if (measure_type == CVALUE) {
+        int i = 0;
+#ifdef _OPENMP
+#pragma omp for reduction(+:offtile_ratio)
+#endif
+        for (uint group_id=0; group_id < num_groups; group_id ++){ 
+           list<int> offtile_list;
+ 
+           int start_idx = group_id * ntX;
+           int end_idx   = (group_id + 1) * ntX; 
+
+           for (uint ic = 0; ic < TILE_SIZE; ic++, i++){ 
+
+              if (i >= ncells) continue;
+
+              if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile_list.push_back(nlft[i]);
+              if (level[nlft[i]] > level[i] &&
+                 (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile_list.push_back(ntop[nlft[i]]);
+              if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile_list.push_back(nrht[i]);
+              if (level[nrht[i]] > level[i] &&
+                 (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile_list.push_back(ntop[nrht[i]]);
+              if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile_list.push_back(nbot[i]);
+              if (level[nbot[i]] > level[i] &&
+                 (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile_list.push_back(nrht[nbot[i]]);
+              if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile_list.push_back(ntop[i]);
+              if (level[ntop[i]] > level[i] &&
+                 (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile_list.push_back(nrht[ntop[i]]);
+           }
+           offtile_list.sort();
+           offtile_list.unique();
+        
+           offtile_ratio += (double)offtile_list.size()/(4*sqrt((double)(TILE_SIZE)));
+           //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
+        }
+     } else if (measure_type == CSTARVALUE) {
+        int i = 0;
+#ifdef _OPENMP
+#pragma omp for reduction(+:offtile_ratio)
+#endif
+        for (uint group_id=0; group_id < num_groups; group_id ++){ 
+           list<int> offtile_list;
+           list<int> offtile_cache_lines; // Assumes memory is aligned
+           int cache_line_size = 4; // Some could be 8, or more?
+ 
+           int start_idx = group_id * ntX;
+           int end_idx   = (group_id + 1) * ntX; 
+
+           for (uint ic = 0; ic < TILE_SIZE; ic++, i++){ 
+
+              if (i >= ncells) continue;
+
+              if (nlft[i] < start_idx || nlft[i] >= end_idx) {
+                  offtile_list.push_back(nlft[i]);
+                  offtile_cache_lines.push_back(nlft[i]/cache_line_size);
+              }
+               
+              if (level[nlft[i]] > level[i] && (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) {
+                  offtile_list.push_back(ntop[nlft[i]]);
+                  offtile_cache_lines.push_back(ntop[nlft[i]]/cache_line_size);
+              }
+              if (nrht[i] < start_idx || nrht[i] >= end_idx) {
+                  offtile_list.push_back(nrht[i]);
+                  offtile_cache_lines.push_back(nrht[i]/cache_line_size);
+              }
+              if (level[nrht[i]] > level[i] && (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) {
+                  offtile_list.push_back(ntop[nrht[i]]);
+                  offtile_cache_lines.push_back(ntop[nrht[i]]/cache_line_size);
+              }
+              if (nbot[i] < start_idx || nbot[i] >= end_idx) {
+                  offtile_list.push_back(nbot[i]);
+                  offtile_cache_lines.push_back(nbot[i]/cache_line_size);
+              }
+              if (level[nbot[i]] > level[i] && (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) {
+                  offtile_list.push_back(nrht[nbot[i]]);
+                  offtile_cache_lines.push_back(nrht[nbot[i]]/cache_line_size);
+              }
+              if (ntop[i] < start_idx || ntop[i] >= end_idx) {
+                  offtile_list.push_back(ntop[i]);
+                  offtile_cache_lines.push_back(ntop[i]/cache_line_size);
+              }
+              if (level[ntop[i]] > level[i] && (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) {
+                  offtile_list.push_back(nrht[ntop[i]]);
+                  offtile_cache_lines.push_back(nrht[ntop[i]]/cache_line_size);
+              }
+           }
+           offtile_list.sort();
+           offtile_list.unique();
+           offtile_cache_lines.sort();
+           offtile_cache_lines.unique();
+
+           double s_ngeom = (double)(offtile_list.size());
+           double q_ngeom = (double)(offtile_cache_lines.size());
+           double ngeom = (double)(TILE_SIZE);
+           double cover = (double)(cache_line_size);
+//            offtile_ratio += (s_ngeom * q_ngeom) / (4*sqrt(ngeom)*2*(1+(ngeom+cache_line_size-1)/cache_line_size));
+//            offtile_ratio += (q_ngeom) / (2*sqrt(ngeom)+2*((sqrt(ngeom)+cover-1)/cover));
+//            offtile_ratio += (q_ngeom) / ( (8*sqrt(ngeom)+cover-1)/cover );
+           ngeom = sqrt(ngeom);
+           offtile_ratio += (s_ngeom*q_ngeom*cover) / ( 4 * ngeom * (8*ngeom+cover-1) );
+        
+           //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
+        }
+     } 
+
+     // printf("DEBUG Ratio of surface area to volume is equal to %d / %d \n", offtile, ontile);
+   
+#ifdef _OPENMP
+#pragma omp master
+     {
+#endif
+         meas_count ++;
+         meas_sum_average  += offtile_ratio/(double)num_groups;
+     // printf("DEBUG %d icount %d sum_average %lf\n",__LINE__,icount, sum_average);
+#ifdef _OPENMP
+     }
+#endif
+  } // if PARTITION TYPE
+}
+
+void Mesh::print_partition_measure()
+{
+   if (meas_count != 0) {
+      if        (measure_type == NO_PARTITION_MEASURE) {
+         if (mype == 0) printf("No Partition Measure\n");
+      } else if (measure_type == WITH_DUPLICATES) {
+         parallel_output("Average surface area to volume ratio  ", meas_sum_average/(double)meas_count, 0, "with duplicates");
+      } else if (measure_type == WITHOUT_DUPLICATES) {
+         parallel_output("Average surface area to volume ratio  ", meas_sum_average/(double)meas_count, 0, "without duplicates");
+      } else if (measure_type == CVALUE) {
+         parallel_output("Partition Quality Avg C value     ", meas_sum_average/(double)meas_count, 0, "");
+      } else if (measure_type == CSTARVALUE){
+         parallel_output("Partition Quality Avg C* value     ", meas_sum_average/(double)meas_count, 0, "");
+      }
+   }
+
+   if (numpe > 1){
+      parallel_output("The MPI surface area to volume ratio ", offtile_ratio_local, 0, "without duplicates");
+   }
+}
+
+void Mesh::print_partition_type()
+{
+   if (mype == 0) {
+      if (initial_order == ORIGINAL_ORDER) {
+         printf("Initial order is naive.");  
+      } else if (initial_order == HILBERT_SORT) {
+         printf("Initial order is Hilbert sort.");  
+      } else if (initial_order == HILBERT_PARTITION) {
+         printf("Initial order is Hilbert partitionr.");  
+      } else if (initial_order == ZORDER) {
+         printf("Initial order is Z order.");  
+      }
+
+      if (cycle_reorder == ORIGINAL_ORDER) {
+         printf("   No cycle reorder.");  
+      } else if (cycle_reorder == HILBERT_SORT) {
+         printf("   Cycle reorder is Hilbert sort.");  
+      } else if (cycle_reorder == HILBERT_PARTITION) {
+         printf("   Cycle reorder is Hilbert partition.");  
+      } else if (cycle_reorder == ZORDER) {
+         printf("   Cycle reorder is Z order.");  
+      }
+
+      if (localStencil) {
+         printf("   Local Stencil is on.\n");  
+      } else {
+         printf("\n");
+      }
+   }
+
+}
+void Mesh::partition_cells(
+                    int          numpe,             //  
+                    vector<int> &z_order,           //  Resulting index ordering.
+                    enum partition_method method)   //  Assigned partitioning method.
+{  
+   int           *info;      //
+   double         iscale,    //
+                  jscale;    //
+   int            imax,      //  Maximum x-index.
+                  jmax;      //  Maximum y-index.
+   vector<int>    z_index;   //  Ordered curve from hsfc.
+   vector<int>    i_scaled;  //  x-indices normalized to a scale of [0, 1] for hsfc.
+   vector<int>    j_scaled;  //  y-indices normalized to a scale of [0, 1] for hsfc.
+   vector<double> iunit;     //
+   vector<double> junit;     //
+
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   //  Initialize ordered curve index.
+   z_index.resize(ncells, 0);
+   //z_order.resize(ncells, 0);
+
+   if (parallel) {
+#ifdef HAVE_MPI
+      nsizes.resize(numpe);
+      ndispl.resize(numpe);
+      MPI_Allgather(&ncells, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD);
+      ndispl[0]=0;
+      for (int ip=1; ip<numpe; ip++){
+         ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
+      }
+      noffset=0;
+      for (int ip=0; ip<mype; ip++){
+        noffset += nsizes[ip];
+      }
+#endif
+   } else {
+      //   Adjust the number of required work items to the number of cells.
+      proc.resize(ncells);
+      //   Decompose the domain equitably.
+      calc_distribution(numpe);
+      noffset = 0;
+   }
+
+   
+   //  Partition cells according to one of several possible orderings.
+   int have_spatial_variables=0;
+   switch (method)
+   {   case ORIGINAL_ORDER:
+         //  Set z_order to the current cell order.
+         for (uint ic = 0; ic < ncells; ++ic)
+         {   z_order[ic] = ic; }
+
+         cpu_timers[MESH_TIMER_PARTITION] += cpu_timer_stop(tstart_cpu);
+
+         return;
+         break;
+
+       case HILBERT_SORT:
+         //  Resort the curve by Hilbert order.
+         have_spatial_variables = 1;
+         if (x.size() < ncells) {
+            calc_spatial_coordinates(0);
+            have_spatial_variables = 0;
+         }
+         calc_centerminmax();
+         iunit.resize(ncells);
+         junit.resize(ncells);
+
+         //   Get the range of values in the x- and y-directions and make the scale square.
+         iscale = 1.0 / (xcentermax - xcentermin);
+         jscale = 1.0 / (ycentermax - ycentermin);
+
+         //   Scale the indices to a normalized [0, 1] range for hsfc.
+         for (uint ic = 0; ic < ncells; ++ic){
+             iunit[ic] = (x[ic] + 0.5 * dx[ic] - xcentermin) * iscale;
+             junit[ic] = (y[ic] + 0.5 * dy[ic] - ycentermin) * jscale;
+         }
+
+         if (have_spatial_variables == 0){
+            x.clear();
+            dx.clear();
+            y.clear();
+            dy.clear();
+         }
+
+         if (parallel){
+#ifdef HAVE_MPI
+            info = (int *)malloc(sizeof(int) * 3 * ncells_global);
+            vector<double>iunit_global(ncells_global);
+            vector<double>junit_global(ncells_global);
+            vector<int>z_order_global(ncells_global);
+
+            MPI_Allgatherv(&iunit[0], ncells, MPI_DOUBLE, &iunit_global[0], &nsizes[0], &ndispl[0], MPI_DOUBLE, MPI_COMM_WORLD);
+            MPI_Allgatherv(&junit[0], ncells, MPI_DOUBLE, &junit_global[0], &nsizes[0], &ndispl[0], MPI_DOUBLE, MPI_COMM_WORLD);
+            //   Sort the mesh into an ordered space-filling curve from hsfc.
+            hsfc2sort(ncells_global, &iunit_global[0], &junit_global[0], 0, info, 1);
+
+            //   Copy the cell order information from info into z_order.
+            for (uint ic = 0; ic < ncells_global; ++ic)
+            {   z_order_global[ic] = info[ic]; }
+            free(info);
+
+            //   Order the mesh according to the calculated order (note that z_order is for both curves).
+            vector<int> int_global(ncells_global);
+            vector<int> int_global_new(ncells_global);
+
+            // gather, reorder and scatter i
+            MPI_Allgatherv(&i[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+            for (int ic = 0; ic<(int)ncells_global; ic++){
+               int_global_new[ic] = int_global[z_order_global[ic]];
+            }
+            MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &i[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+            // gather, reorder and scatter j
+            MPI_Allgatherv(&j[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+            for (int ic = 0; ic<(int)ncells_global; ic++){
+               int_global_new[ic] = int_global[z_order_global[ic]];
+            }
+            MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &j[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+            // gather, reorder and scatter level
+            MPI_Allgatherv(&level[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+            for (int ic = 0; ic<(int)ncells_global; ic++){
+               int_global_new[ic] = int_global[z_order_global[ic]];
+            }
+            MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &level[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+            // It is faster just to recalculate these variables instead of communicating them
+            if (mesh_memory.get_memory_size(celltype) >= ncells) {
+               calc_celltype(mesh_memory.get_memory_size(celltype));
+            }
+
+            if (have_spatial_variables) {
+               calc_spatial_coordinates(0);
+            }
+
+            if (mesh_memory.get_memory_size(nlft) >= ncells) {
+               vector<int> inv_z_order(ncells_global);
+               for (int ic = 0; ic<(int)ncells_global; ic++){
+                  inv_z_order[z_order_global[ic]] = ic;
+               }
+
+               MPI_Allgatherv(&nlft[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+               for (int ic = 0; ic<(int)ncells_global; ic++){
+                  int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
+               }
+               MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nlft[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+               MPI_Allgatherv(&nrht[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+               for (int ic = 0; ic<(int)ncells_global; ic++){
+                  int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
+               }
+               MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nrht[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+               MPI_Allgatherv(&nbot[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+               for (int ic = 0; ic<(int)ncells_global; ic++){
+                  int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
+               }
+               MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nbot[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+               MPI_Allgatherv(&ntop[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+               for (int ic = 0; ic<(int)ncells_global; ic++){
+                  int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
+               }
+               MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &ntop[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+            }
+
+            MPI_Scatterv(&z_order_global[0], &nsizes[0], &ndispl[0], MPI_INT, &z_order[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+#endif
+         } else {
+            info = (int *)malloc(sizeof(int) * 3 * ncells);
+
+            //   Sort the mesh into an ordered space-filling curve from hsfc.
+            hsfc2sort(ncells, &iunit[0], &junit[0], 0, info, 1);
+
+            //   Copy the cell order information from info into z_order.
+            for (uint ic = 0; ic < ncells; ++ic)
+            {   z_order[ic] = info[ic]; }
+            free(info);
+
+            //   Order the mesh according to the calculated order (note that z_order is for both curves).
+            vector<int> int_local(ncells);
+
+            mesh_memory.set_memory_attribute(nlft, 0x100);
+            mesh_memory.set_memory_attribute(nrht, 0x100);
+            mesh_memory.set_memory_attribute(nbot, 0x100);
+            mesh_memory.set_memory_attribute(ntop, 0x100);
+
+            mesh_memory.memory_reorder_all(&z_order[0]);
+            memory_reset_ptrs();
+
+            if (x.size() >= ncells) {
+               vector<spatial_t> real_local(ncells);
+
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  real_local[ic] = x[ic];
+               }
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  x[ic] = real_local[z_order[ic]];
+               }
+
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  real_local[ic] = dx[ic];
+               }
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  dx[ic] = real_local[z_order[ic]];
+               }
+           
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  real_local[ic] = y[ic];
+               }
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  y[ic] = real_local[z_order[ic]];
+               }
+
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  real_local[ic] = dy[ic];
+               }
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  dy[ic] = real_local[z_order[ic]];
+               }
+            }
+
+         }
+
+         break;
+
+      case ZORDER:
+         //  Resort the curve by z-order.
+         if (parallel) {
+#ifdef HAVE_MPI
+            vector<int>i_global(ncells_global);
+            vector<int>j_global(ncells_global);
+            vector<int>level_global(ncells_global);
+            vector<int>z_index_global(ncells_global);
+            vector<int>z_order_global(ncells_global);
+            MPI_Allgatherv(&i[0], ncells, MPI_REAL, &i_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD);
+            MPI_Allgatherv(&j[0], ncells, MPI_REAL, &j_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD);
+            MPI_Allgatherv(&level[0], ncells, MPI_REAL, &level_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD);
+
+            i_scaled.resize(ncells_global);
+            j_scaled.resize(ncells_global);
+
+            //
+            imax = 0;
+            jmax = 0;
+            for (uint ic = 0; ic < ncells_global; ++ic)
+            {   if (i_global[ic] > imax) imax = i_global[ic];
+               if (j_global[ic] > jmax) jmax = j_global[ic]; }
+
+            //
+            iscale = 16.0 / (double)imax;
+            jscale = 16.0 / (double)jmax;
+
+            //
+            for (uint ic = 0; ic < ncells_global; ++ic)
+            {   i_scaled[ic]=(int) ( (double)i_global[ic]*iscale);
+               j_scaled[ic]=(int) ( (double)j_global[ic]*jscale); }
+
+            //
+            calc_zorder(ncells_global, &i_scaled[0], &j_scaled[0], &level_global[0], levmx, ibase, &z_index_global[0], &z_order_global[0]);
+
+            //   Order the mesh according to the calculated order (note that z_order is for both curves).
+            vector<int> int_global(ncells_global);
+            vector<int> int_global_new(ncells_global);
+
+            // gather, reorder and scatter i
+            MPI_Allgatherv(&i[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+            for (int ic = 0; ic<(int)ncells_global; ic++){
+               int_global_new[ic] = int_global[z_order_global[ic]];
+            }
+            MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &i[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+            // gather, reorder and scatter j
+            MPI_Allgatherv(&j[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+            for (int ic = 0; ic<(int)ncells_global; ic++){
+               int_global_new[ic] = int_global[z_order_global[ic]];
+            }
+            MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &j[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+            // gather, reorder and scatter level
+            MPI_Allgatherv(&level[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+            for (int ic = 0; ic<(int)ncells_global; ic++){
+               int_global_new[ic] = int_global[z_order_global[ic]];
+            }
+            MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &level[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+            // It is faster just to recalculate these variables instead of communicating them
+            if (mesh_memory.get_memory_size(celltype) >= ncells) {
+               calc_celltype(mesh_memory.get_memory_size(celltype));
+            }
+
+            if (x.size() >= ncells) {
+               calc_spatial_coordinates(0);
+            }
+
+            if (mesh_memory.get_memory_size(nlft) >= ncells) {
+               vector<int> inv_z_order(ncells_global);
+               for (int ic = 0; ic<(int)ncells_global; ic++){
+                  inv_z_order[z_order_global[ic]] = ic;
+               }
+
+               MPI_Allgatherv(&nlft[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+               for (int ic = 0; ic<(int)ncells_global; ic++){
+                  int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
+               }
+               MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nlft[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+               MPI_Allgatherv(&nrht[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+               for (int ic = 0; ic<(int)ncells_global; ic++){
+                  int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
+               }
+               MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nrht[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+               MPI_Allgatherv(&nbot[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+               for (int ic = 0; ic<(int)ncells_global; ic++){
+                  int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
+               }
+               MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nbot[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+
+               MPI_Allgatherv(&ntop[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
+               for (int ic = 0; ic<(int)ncells_global; ic++){
+                  int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
+               }
+               MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &ntop[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
+            }
+            MPI_Scatterv(&z_order_global[0], &nsizes[0], &ndispl[0], MPI_REAL, &z_order[0], ncells, MPI_REAL, 0, MPI_COMM_WORLD);
+#endif
+         } else {
+            i_scaled.resize(ncells);
+            j_scaled.resize(ncells);
+
+            //
+            imax = 0;
+            jmax = 0;
+            for (uint ic = 0; ic < ncells; ++ic)
+            {   if (i[ic] > imax) imax = i[ic];
+               if (j[ic] > jmax) jmax = j[ic]; }
+
+            //
+            iscale = 16.0 / (double)imax;
+            jscale = 16.0 / (double)jmax;
+
+            //
+            for (uint ic = 0; ic < ncells; ++ic)
+            {   i_scaled[ic]=(int) ( (double)i[ic]*iscale);
+               j_scaled[ic]=(int) ( (double)j[ic]*jscale); }
+
+            //
+            calc_zorder(ncells, &i_scaled[0], &j_scaled[0], &level[0], levmx, ibase, &z_index[0], &z_order[0]);
+
+            //   Order the mesh according to the calculated order (note that z_order is for both curves).
+            vector<int> int_local(ncells);
+
+            mesh_memory.set_memory_attribute(nlft, 0x100);
+            mesh_memory.set_memory_attribute(nrht, 0x100);
+            mesh_memory.set_memory_attribute(nbot, 0x100);
+            mesh_memory.set_memory_attribute(ntop, 0x100);
+
+            mesh_memory.memory_reorder_all(&z_order[0]);
+            memory_reset_ptrs();
+
+
+            if (x.size() >= ncells) {
+               vector<spatial_t> real_local(ncells);
+
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  real_local[ic] = x[ic];
+               }
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  x[ic] = real_local[z_order[ic]];
+               }
+
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  real_local[ic] = dx[ic];
+               }
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  dx[ic] = real_local[z_order[ic]];
+               }
+           
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  real_local[ic] = y[ic];
+               }
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  y[ic] = real_local[z_order[ic]];
+               }
+
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  real_local[ic] = dy[ic];
+               }
+               for (int ic = 0; ic<(int)ncells; ic++){
+                  dy[ic] = real_local[z_order[ic]];
+               }
+            }
+
+         }
+
+         break;
+
+      default:
+         //  Note that HILBERT_PARTITION is not currently supported due to redundancy with HILBERT_SORT.
+         break;
+   }
+   
+   
+   //   Output ordered mesh information.
+   if (DEBUG)
+   {   printf("orig index   i     j     lev    nlft nrht nbot ntop   xlow    xhigh     ylow    yhigh   z index  z order\n");
+      for (uint ic=0; ic<ncells; ic++){
+         printf(" %6d   %4d  %4d   %4d  %4d %4d %4d %4d ", index[ic], j[ic], i[ic], level[ic], nlft[ic], nrht[ic], nbot[ic], ntop[ic]);
+         printf(" %8.2lf %8.2lf %8.2lf %8.2lf", x[ic], x[ic]+dx[ic], y[ic], y[ic]+dy[ic]);
+         printf(" %6d    %5d\n", z_index[ic], z_order[ic]); } }
+
+   cpu_timers[MESH_TIMER_PARTITION] += cpu_timer_stop(tstart_cpu);
+}
+
+//   The distribution needs to be modified in order to spread out extra cells equitably among the work items.
+void Mesh::calc_distribution(int numpe)
+{  
+   uint lsize = 0;     //
+   for (int ip = 0; ip < numpe; ++ip) {
+      lsize += proc.size()/numpe;
+      if (ip < (int)proc.size()%numpe) lsize++;
+      for (int ic = 0; ic < (int)lsize; ic++) {
+         proc[ic] = ip;
+      }
+   }
+}
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.h
@@ -0,0 +1,115 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#ifndef _REDUCE_H_
+#define _REDUCE_H_
+
+#ifdef HAVE_OPENCL
+#ifdef __APPLE_CC__
+#include <OpenCL/OpenCL.h>
+#else
+#include "CL/cl.h"
+#endif
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef HAVE_OPENCL
+cl_kernel   kernel_reduce_sum,
+            kernel_reduce_sum_stage1of2,
+            kernel_reduce_sum_stage2of2,
+            kernel_reduce_sum_int_stage1of2,
+            kernel_reduce_sum_int_stage2of2,
+            kernel_reduce_product,
+            kernel_reduce_max,
+            kernel_reduce_max_stage1of2,
+            kernel_reduce_max_stage2of2,
+            kernel_reduce_min,
+            kernel_reduce_min_stage1of2,
+            kernel_reduce_min_stage2of2;
+#endif
+
+void init_kernels_reduce(void);
+void init_kernel_sum(void);
+void init_kernel_2stage_sum(void);
+void init_kernel_2stage_sum_int(void);
+void init_kernel_product(void);
+void init_kernel_max(void);
+void init_kernel_2stage_max(void);
+void init_kernel_min(void);
+void init_kernel_2stage_min(void);
+
+void terminate_kernel_2stage_sum(void);
+void terminate_kernel_2stage_sum_int(void);
+
+void release_kernels_reduce();
+void release_kernel_sum();
+void release_kernel_2stage_sum();
+void release_kernel_2stage_sum_int();
+void release_kernel_product();
+void release_kernel_max();
+void release_kernel_2stage_max();
+void release_kernel_min();
+void release_kernel_2stage_min();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _REDUCE_H_ */
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.c
@@ -0,0 +1,245 @@
+/**
+ *  Copyright (c) 2011, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#include "reduce.h"
+#ifdef HAVE_OPENCL
+#include "ezcl/ezcl.h"
+#endif
+
+#ifdef HAVE_OPENCL
+#include "reduce_kernel.inc"
+#endif
+
+void init_kernels_reduce(void)
+{
+#ifdef HAVE_OPENCL
+    cl_context context = ezcl_get_context();
+    kernel_reduce_sum     = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_cl");
+    kernel_reduce_sum_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage1of2_cl");
+    kernel_reduce_sum_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage2of2_cl");
+    kernel_reduce_sum_int_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage1of2_cl");
+    kernel_reduce_sum_int_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage2of2_cl");
+    kernel_reduce_product = ezcl_create_kernel_wsource(context, reduce_source, "reduce_product_cl");
+    kernel_reduce_max     = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_cl");
+    kernel_reduce_max_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage1of2_cl");
+    kernel_reduce_max_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage2of2_cl");
+    kernel_reduce_min     = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_cl");
+    kernel_reduce_min_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage1of2_cl");
+    kernel_reduce_min_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage2of2_cl");
+#endif
+}
+
+void init_kernel_sum(void)
+{
+#ifdef HAVE_OPENCL
+    cl_context context = ezcl_get_context();
+    kernel_reduce_sum = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_cl");
+#endif
+}
+
+void init_kernel_2stage_sum(void)
+{
+#ifdef HAVE_OPENCL
+    cl_context context = ezcl_get_context();
+    kernel_reduce_sum_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage1of2_cl");
+    kernel_reduce_sum_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage2of2_cl");
+#endif
+}
+
+void terminate_kernel_2stage_sum(void)
+{
+#ifdef HAVE_OPENCL
+    ezcl_kernel_release(kernel_reduce_sum_stage1of2);
+    ezcl_kernel_release(kernel_reduce_sum_stage2of2);
+#endif
+}
+
+void init_kernel_2stage_sum_int(void)
+{   
+#ifdef HAVE_OPENCL
+    cl_context context = ezcl_get_context();
+    kernel_reduce_sum_int_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage1of2_cl");
+    kernel_reduce_sum_int_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage2of2_cl");
+#endif
+}
+
+void terminate_kernel_2stage_sum_int(void)
+{   
+#ifdef HAVE_OPENCL
+    ezcl_kernel_release(kernel_reduce_sum_int_stage1of2);
+    ezcl_kernel_release(kernel_reduce_sum_int_stage2of2);
+#endif
+}
+
+void init_kernel_product(void)
+{
+#ifdef HAVE_OPENCL
+    cl_context context = ezcl_get_context();
+    kernel_reduce_product = ezcl_create_kernel_wsource(context, reduce_source, "reduce_product_cl");
+#endif
+}
+
+void init_kernel_max(void)
+{   
+#ifdef HAVE_OPENCL
+    cl_context context = ezcl_get_context();
+    kernel_reduce_max = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_cl");
+#endif
+}
+
+void init_kernel_2stage_max(void)
+{
+#ifdef HAVE_OPENCL
+    cl_context context = ezcl_get_context();
+    kernel_reduce_max_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage1of2_cl");
+    kernel_reduce_max_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage2of2_cl");
+#endif
+}
+
+void init_kernel_min(void)
+{   
+#ifdef HAVE_OPENCL
+    cl_context context = ezcl_get_context();
+    kernel_reduce_min = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_cl");
+#endif
+}
+
+void init_kernel_2stage_min(void)
+{
+#ifdef HAVE_OPENCL
+    cl_context context = ezcl_get_context();
+    kernel_reduce_min_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage1of2_cl");
+    kernel_reduce_min_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage2of2_cl");
+#endif
+}
+
+void release_kernels_reduce()
+{
+#ifdef HAVE_OPENCL
+    ezcl_kernel_release(kernel_reduce_sum);
+    ezcl_kernel_release(kernel_reduce_sum_stage1of2);
+    ezcl_kernel_release(kernel_reduce_sum_stage2of2);
+    ezcl_kernel_release(kernel_reduce_sum_int_stage1of2);
+    ezcl_kernel_release(kernel_reduce_sum_int_stage2of2);
+    ezcl_kernel_release(kernel_reduce_product);
+    ezcl_kernel_release(kernel_reduce_max);
+    ezcl_kernel_release(kernel_reduce_max_stage1of2);
+    ezcl_kernel_release(kernel_reduce_max_stage2of2);
+    ezcl_kernel_release(kernel_reduce_min);
+    ezcl_kernel_release(kernel_reduce_min_stage1of2);
+    ezcl_kernel_release(kernel_reduce_min_stage2of2);
+#endif
+}
+
+void release_kernel_sum()
+{
+#ifdef HAVE_OPENCL
+    ezcl_kernel_release(kernel_reduce_sum);
+#endif
+}
+
+void release_kernel_2stage_sum()
+{
+#ifdef HAVE_OPENCL
+    ezcl_kernel_release(kernel_reduce_sum_stage1of2);  
+    ezcl_kernel_release(kernel_reduce_sum_stage2of2);
+#endif
+}
+
+void release_kernel_2stage_sum_int()
+{
+#ifdef HAVE_OPENCL
+    ezcl_kernel_release(kernel_reduce_sum_int_stage1of2);  
+    ezcl_kernel_release(kernel_reduce_sum_int_stage2of2);
+#endif
+}
+
+void release_kernel_product()
+{
+#ifdef HAVE_OPENCL
+    ezcl_kernel_release(kernel_reduce_product);
+#endif
+}
+
+void release_kernel_max()
+{
+#ifdef HAVE_OPENCL
+    ezcl_kernel_release(kernel_reduce_max);
+#endif
+}
+
+void release_kernel_2stage_max()
+{
+#ifdef HAVE_OPENCL
+    ezcl_kernel_release(kernel_reduce_max_stage1of2);  
+    ezcl_kernel_release(kernel_reduce_max_stage2of2);
+#endif
+}
+
+void release_kernel_min()
+{
+#ifdef HAVE_OPENCL
+    ezcl_kernel_release(kernel_reduce_min);
+#endif
+}
+
+void release_kernel_2stage_min()
+{
+#ifdef HAVE_OPENCL
+    ezcl_kernel_release(kernel_reduce_min_stage1of2);  
+    ezcl_kernel_release(kernel_reduce_min_stage2of2);
+#endif
+}
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.h
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#ifndef S7_H_
+#define S7_H_
+
+//#define _S7_DEBUG
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+   /*
+    * Some S7 parameters.
+    */
+
+#define S7_OK   0 /* Successful return. */
+
+   enum  S7_Datatype
+   {
+      S7_GENERIC8  = 0,
+      S7_BYTE,
+      S7_PACKED,
+
+      S7_CHAR,
+      S7_INT,
+      S7_LONG,
+      S7_LONG_LONG_INT,
+      S7_FLOAT,
+      S7_DOUBLE,
+
+      S7_CHARACTER,
+      S7_LOGICAL,
+      S7_INTEGER4,
+      S7_INTEGER8,
+      S7_REAL4,
+      S7_REAL8,
+
+      S7_DATATYPE_MIN = S7_GENERIC8,
+      S7_DATATYPE_MAX = S7_REAL8
+   };
+
+
+   void S7_Sort(
+                void                   *array_in,
+                const int              nsize,
+                const enum S7_Datatype S7_datatype
+                );
+
+   void S7_Sort_2Arrays(
+                        void *                 array_in1,
+                        void *                 array_in2,
+                        const int              nsize,
+                        const enum S7_Datatype S7_datatype
+                        );
+
+   void S7_Index_Sort(
+                      void *                 array_in,
+                      const int              nsize,
+                      const enum S7_Datatype S7_datatype,
+                      int *                  index
+                      );
+
+   void S7_Indexi8_Sort(
+                        void *                 array_in,
+                        const int              nsize,
+                        const enum S7_Datatype S7_datatype,
+                        long *                 index
+                        );
+
+
+   void S7_Index_sort_real8(const int n,double array_in[],int index[]);
+   void S7_Index_sort_int8(const int n,long long iarray_in[], int index[]);
+   void S7_Index_sort_int4(const int n, int iarray_in[], int index[]);
+   void S7_Index_sort_real8_int8(const int n,double array_in[],long long index[]);
+
+   void S7_Index_sort_int8_int8(const int n,long long iarray_in[], long long index[]);
+   void S7_Index_sort_int4_int8(const int n, int iarray_in[], long long index[]);
+   void S7_Sort_real8(const int n,double array_in[]);
+   void S7_Sort_int8(const int n,long long array_in[]);
+   void S7_Sort_int4(const int n,int array_in[]);
+   void S7_Sort_real8_real8(const int n,double array_in[],double array_in2[]);
+   void S7_Sort_int8_int8(const int n,long long array_in[],long long array_in2[]);
+   void S7_Sort_int4_int4(const int n,int array_in[],int array_in2[]);
+
+   /*
+    * End prototypes.
+    */
+
+   /*
+    * remove typesafe linkage if compiling under c++
+    */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* S7_H */
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.c
@@ -0,0 +1,977 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include "s7.h"
+
+void S7_Sort(
+             void *                 array_in,
+             const int              nsize,
+             const enum S7_Datatype S7_datatype
+             )
+{
+   int n, child, parent, i;
+
+   int qint;
+   long qlong;
+   long long qlonglong;
+   float qfloat;
+   double qdouble;
+
+   int
+   *int_data_ptr;
+   long
+   *long_data_ptr;
+   long long
+   *longlong_data_ptr;
+   float
+   *float_data_ptr;
+   double
+   *double_data_ptr;
+
+   // Heapsort
+
+   i=nsize/2;
+   n = nsize;
+
+   switch (S7_datatype){
+      case S7_INTEGER4:
+      case S7_INT:
+         int_data_ptr = (int *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               qint=int_data_ptr[--i];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+                  break; // End the sort here!
+               } // if n
+               qint=int_data_ptr[n];
+               int_data_ptr[n]=int_data_ptr[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && int_data_ptr[child+1] > int_data_ptr[child]) child++;
+               if (int_data_ptr[child] > qint) {
+                  int_data_ptr[parent] = int_data_ptr[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break; // Break out of sift while loop
+               } // else
+            } // while
+            int_data_ptr[parent]=qint;
+         } // for
+
+         break;
+
+      case S7_LONG:
+         long_data_ptr = (long *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               qlong=long_data_ptr[--i];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+                  break; // End the sort here!
+               } // if n
+               qlong=long_data_ptr[n];
+               long_data_ptr[n]=long_data_ptr[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && long_data_ptr[child+1] > long_data_ptr[child]) child++;
+               if (long_data_ptr[child] > qlong) {
+                  long_data_ptr[parent] = long_data_ptr[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break; // Break out of sift while loop
+               } // else
+            } // while
+            long_data_ptr[parent]=qlong;
+         } // for
+
+         break;
+
+      case S7_LONG_LONG_INT:
+      case S7_INTEGER8:
+         longlong_data_ptr = (long long *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               qlonglong=longlong_data_ptr[--i];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+                  break; // End the sort here!
+               } // if n
+               qlonglong=longlong_data_ptr[n];
+               longlong_data_ptr[n]=longlong_data_ptr[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && longlong_data_ptr[child+1] > longlong_data_ptr[child]) child++;
+               if (longlong_data_ptr[child] > qlonglong) {
+                  longlong_data_ptr[parent] = longlong_data_ptr[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break; // Break out of sift while loop
+               } // else
+            } // while
+            longlong_data_ptr[parent]=qlonglong;
+         } // for
+
+         break;
+
+      case S7_FLOAT:
+      case S7_REAL4:
+         float_data_ptr = (float *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               qfloat=float_data_ptr[--i];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+                  break; // End the sort here!
+               } // if n
+               qfloat=float_data_ptr[n];
+               float_data_ptr[n]=float_data_ptr[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && float_data_ptr[child+1] > float_data_ptr[child]) child++;
+               if (float_data_ptr[child] > qfloat) {
+                  float_data_ptr[parent] = float_data_ptr[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break; // Break out of sift while loop
+               } // else
+            } // while
+            float_data_ptr[parent]=qfloat;
+         } // for
+
+         break;
+
+      case S7_DOUBLE:
+      case S7_REAL8:
+         double_data_ptr = (double *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               qdouble=double_data_ptr[--i];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+                  break; // End the sort here!
+               } // if n
+               qdouble=double_data_ptr[n];
+               double_data_ptr[n]=double_data_ptr[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && double_data_ptr[child+1] > double_data_ptr[child]) child++;
+               if (double_data_ptr[child] > qdouble) {
+                  double_data_ptr[parent] = double_data_ptr[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break; // Break out of sift while loop
+               } // else
+            } // while
+            double_data_ptr[parent]=qdouble;
+         } // for
+
+         break;
+
+      default:
+         printf("Error -- S7_Datatype not supported in S7_Sort\n");
+         exit(1);
+         break;
+
+   }
+}
+
+
+void S7_Sort_2Arrays(
+                     void *                 array_in1,
+                     void *                 array_in2,
+                     const int              nsize,
+                     const enum S7_Datatype S7_datatype
+                     )
+{
+   int n, child, parent, i;
+
+   int qint1, qint2;
+   long qlong1, qlong2;
+   long long qlonglong1, qlonglong2;
+   float qfloat1, qfloat2;
+   double qdouble1, qdouble2;
+
+   int
+   *int_data_ptr1, *int_data_ptr2;
+   long
+   *long_data_ptr1, *long_data_ptr2;
+   long long
+   *longlong_data_ptr1, *longlong_data_ptr2;
+   float
+   *float_data_ptr1, *float_data_ptr2;
+   double
+   *double_data_ptr1, *double_data_ptr2;
+
+   // Heapsort
+
+   i=nsize/2;
+   n = nsize;
+
+   switch (S7_datatype){
+      case S7_INTEGER4:
+      case S7_INT:
+         int_data_ptr1 = (int *)array_in1;
+         int_data_ptr2 = (int *)array_in2;
+
+         for (;;) {
+            if (i > 0) {
+               qint1=int_data_ptr1[--i];
+               qint2=int_data_ptr2[i];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return; // End of sort
+               } // if n
+               qint1=int_data_ptr1[n];
+               qint2=int_data_ptr2[n];
+               int_data_ptr1[n]=int_data_ptr1[0];
+               int_data_ptr2[n]=int_data_ptr2[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && int_data_ptr1[child+1] > int_data_ptr1[child]) child++;
+               if (int_data_ptr1[child] > qint1) {
+                  int_data_ptr1[parent] = int_data_ptr1[child];
+                  int_data_ptr2[parent] = int_data_ptr2[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            int_data_ptr1[parent]=qint1;
+            int_data_ptr2[parent]=qint2;
+         } // for
+         break;
+
+      case S7_LONG:
+         long_data_ptr1 = (long *)array_in1;
+         long_data_ptr2 = (long *)array_in2;
+
+         for (;;) {
+            if (i > 0) {
+               qlong1=long_data_ptr1[--i];
+               qlong2=long_data_ptr2[i];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return; // End of sort
+               } // if n
+               qlong1=long_data_ptr1[n];
+               qlong2=long_data_ptr2[n];
+               long_data_ptr1[n]=long_data_ptr1[0];
+               long_data_ptr2[n]=long_data_ptr2[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && long_data_ptr1[child+1] > long_data_ptr1[child]) child++;
+               if (long_data_ptr1[child] > qlong1) {
+                  long_data_ptr1[parent] = long_data_ptr1[child];
+                  long_data_ptr2[parent] = long_data_ptr2[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            long_data_ptr1[parent]=qlong1;
+            long_data_ptr2[parent]=qlong2;
+         } // for
+         break;
+
+      case S7_LONG_LONG_INT:
+      case S7_INTEGER8:
+         longlong_data_ptr1 = (long long *)array_in1;
+         longlong_data_ptr2 = (long long *)array_in2;
+
+         for (;;) {
+            if (i > 0) {
+               qlonglong1=longlong_data_ptr1[--i];
+               qlonglong2=longlong_data_ptr2[i];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return; // End of sort
+               } // if n
+               qlonglong1=longlong_data_ptr1[n];
+               qlonglong2=longlong_data_ptr2[n];
+               longlong_data_ptr1[n]=longlong_data_ptr1[0];
+               longlong_data_ptr2[n]=longlong_data_ptr2[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && longlong_data_ptr1[child+1] > longlong_data_ptr1[child]) child++;
+               if (longlong_data_ptr1[child] > qlonglong1) {
+                  longlong_data_ptr1[parent] = longlong_data_ptr1[child];
+                  longlong_data_ptr2[parent] = longlong_data_ptr2[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            longlong_data_ptr1[parent]=qlonglong1;
+            longlong_data_ptr2[parent]=qlonglong2;
+         } // for
+         break;
+
+      case S7_FLOAT:
+      case S7_REAL4:
+         float_data_ptr1 = (float *)array_in1;
+         float_data_ptr2 = (float *)array_in2;
+
+         for (;;) {
+            if (i > 0) {
+               qfloat1=float_data_ptr1[--i];
+               qfloat2=float_data_ptr2[i];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return; // End of sort
+               } // if n
+               qfloat1=float_data_ptr1[n];
+               qfloat2=float_data_ptr2[n];
+               float_data_ptr1[n]=float_data_ptr1[0];
+               float_data_ptr2[n]=float_data_ptr2[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && float_data_ptr1[child+1] > float_data_ptr1[child]) child++;
+               if (float_data_ptr1[child] > qfloat1) {
+                  float_data_ptr1[parent] = float_data_ptr1[child];
+                  float_data_ptr2[parent] = float_data_ptr2[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            float_data_ptr1[parent]=qfloat1;
+            float_data_ptr2[parent]=qfloat2;
+         } // for
+         break;
+
+      case S7_DOUBLE:
+      case S7_REAL8:
+         double_data_ptr1 = (double *)array_in1;
+         double_data_ptr2 = (double *)array_in2;
+
+         for (;;) {
+            if (i > 0) {
+               qdouble1=double_data_ptr1[--i];
+               qdouble2=double_data_ptr2[i];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return; // End of sort
+               } // if n
+               qdouble1=double_data_ptr1[n];
+               qdouble2=double_data_ptr2[n];
+               double_data_ptr1[n]=double_data_ptr1[0];
+               double_data_ptr2[n]=double_data_ptr2[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && double_data_ptr1[child+1] > double_data_ptr1[child]) child++;
+               if (double_data_ptr1[child] > qdouble1) {
+                  double_data_ptr1[parent] = double_data_ptr1[child];
+                  double_data_ptr2[parent] = double_data_ptr2[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            double_data_ptr1[parent]=qdouble1;
+            double_data_ptr2[parent]=qdouble2;
+         } // for
+         break;
+
+      default:
+         printf("Error -- S7_Datatype not supported in S7_Sort\n");
+         exit(1);
+         break;
+   }
+}
+
+
+
+
+
+void S7_Index_Sort(
+                   void *                 array_in,
+                   const int              nsize,
+                   const enum S7_Datatype S7_datatype,
+                   int *                  index
+                   )
+{
+   int n, j, child, parent, i;
+   int indext;
+
+   int qint;
+   long qlong;
+   long long qlonglong;
+   float qfloat;
+   double qdouble;
+
+   int
+   *int_data_ptr;
+   long
+   *long_data_ptr;
+   long long
+   *longlong_data_ptr;
+   float
+   *float_data_ptr;
+   double
+   *double_data_ptr;
+
+   // Heapsort
+
+   // Initialize array with consecutive integers
+   for (j=0; j<nsize; j++) index[j]=j;
+
+   i=nsize/2;
+   n = nsize;
+
+   switch (S7_datatype){
+      case S7_INTEGER4:
+      case S7_INT:
+         int_data_ptr = (int *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               indext=index[--i];
+               qint=int_data_ptr[indext];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return;
+               } // if n
+               indext=index[n];
+               qint=int_data_ptr[indext];
+               index[n]=index[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && int_data_ptr[index[child+1]] > int_data_ptr[index[child]]) child++;
+               if (int_data_ptr[index[child]] > qint) {
+                  index[parent] = index[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            index[parent]=indext;
+         } // for
+         break;
+
+      case S7_LONG:
+         long_data_ptr = (long *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               indext=index[--i];
+               qlong=long_data_ptr[indext];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return;
+               } // if n
+               indext=index[n];
+               qlong=long_data_ptr[indext];
+               index[n]=index[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && long_data_ptr[index[child+1]] > long_data_ptr[index[child]]) child++;
+               if (long_data_ptr[index[child]] > qlong) {
+                  index[parent] = index[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            index[parent]=indext;
+         } // for
+         break;
+
+      case S7_LONG_LONG_INT:
+      case S7_INTEGER8:
+         longlong_data_ptr = (long long *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               indext=index[--i];
+               qlonglong=longlong_data_ptr[indext];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return;
+               } // if n
+               indext=index[n];
+               qlonglong=longlong_data_ptr[indext];
+               index[n]=index[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && longlong_data_ptr[index[child+1]] > longlong_data_ptr[index[child]]) child++;
+               if (longlong_data_ptr[index[child]] > qlonglong) {
+                  index[parent] = index[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            index[parent]=indext;
+         } // for
+         break;
+
+      case S7_FLOAT:
+      case S7_REAL4:
+         float_data_ptr = (float *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               indext=index[--i];
+               qfloat=float_data_ptr[indext];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return;
+               } // if n
+               indext=index[n];
+               qfloat=float_data_ptr[indext];
+               index[n]=index[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && float_data_ptr[index[child+1]] > float_data_ptr[index[child]]) child++;
+               if (float_data_ptr[index[child]] > qfloat) {
+                  index[parent] = index[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            index[parent]=indext;
+         } // for
+         break;
+
+
+      case S7_DOUBLE:
+      case S7_REAL8:
+         double_data_ptr = (double *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               indext=index[--i];
+               qdouble=double_data_ptr[indext];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return;
+               } // if n
+               indext=index[n];
+               qdouble=double_data_ptr[indext];
+               index[n]=index[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && double_data_ptr[index[child+1]] > double_data_ptr[index[child]]) child++;
+               if (double_data_ptr[index[child]] > qdouble) {
+                  index[parent] = index[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            index[parent]=indext;
+         } // for
+         break;
+
+      default:
+         printf("Error -- S7_Datatype not supported in S7_Index_Sort\n");
+         exit(1);
+         break;
+
+   }
+}
+
+void S7_Indexi8_Sort(
+                     void *                 array_in,
+                     const int              nsize,
+                     const enum S7_Datatype S7_datatype,
+                     long *                 index
+                     )
+{
+   int n, j, child, parent, i;
+   long indext;
+
+   int qint;
+   long qlong;
+   long long qlonglong;
+   float qfloat;
+   double qdouble;
+
+   int
+   *int_data_ptr;
+   long
+   *long_data_ptr;
+   long long
+   *longlong_data_ptr;
+   float
+   *float_data_ptr;
+   double
+   *double_data_ptr;
+
+   // Heapsort
+
+   // Initialize array with consecutive integers
+   for (j=0; j<nsize; j++) index[j]=j;
+
+   i=nsize/2;
+   n = nsize;
+
+   switch (S7_datatype){
+      case S7_INTEGER4:
+      case S7_INT:
+         int_data_ptr = (int *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               indext=index[--i];
+               qint=int_data_ptr[indext];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return;
+               } // if n
+               indext=index[n];
+               qint=int_data_ptr[indext];
+               index[n]=index[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && int_data_ptr[index[child+1]] > int_data_ptr[index[child]]) child++;
+               if (int_data_ptr[index[child]] > qint) {
+                  index[parent] = index[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            index[parent]=indext;
+         } // for
+         break;
+
+      case S7_LONG:
+         long_data_ptr = (long *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               indext=index[--i];
+               qlong=long_data_ptr[indext];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return;
+               } // if n
+               indext=index[n];
+               qlong=long_data_ptr[indext];
+               index[n]=index[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && long_data_ptr[index[child+1]] > long_data_ptr[index[child]]) child++;
+               if (long_data_ptr[index[child]] > qlong) {
+                  index[parent] = index[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            index[parent]=indext;
+         } // for
+         break;
+
+      case S7_LONG_LONG_INT:
+      case S7_INTEGER8:
+         longlong_data_ptr = (long long *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               indext=index[--i];
+               qlonglong=longlong_data_ptr[indext];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return;
+               } // if n
+               indext=index[n];
+               qlonglong=longlong_data_ptr[indext];
+               index[n]=index[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && longlong_data_ptr[index[child+1]] > longlong_data_ptr[index[child]]) child++;
+               if (longlong_data_ptr[index[child]] > qlonglong) {
+                  index[parent] = index[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            index[parent]=indext;
+         } // for
+         break;
+
+      case S7_FLOAT:
+      case S7_REAL4:
+         float_data_ptr = (float *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               indext=index[--i];
+               qfloat=float_data_ptr[indext];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return;
+               } // if n
+               indext=index[n];
+               qfloat=float_data_ptr[indext];
+               index[n]=index[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && float_data_ptr[index[child+1]] > float_data_ptr[index[child]]) child++;
+               if (float_data_ptr[index[child]] > qfloat) {
+                  index[parent] = index[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            index[parent]=indext;
+         } // for
+         break;
+
+
+      case S7_DOUBLE:
+      case S7_REAL8:
+         double_data_ptr = (double *)array_in;
+
+         for (;;) {
+            if (i > 0) {
+               indext=index[--i];
+               qdouble=double_data_ptr[indext];
+            } // if i > 0
+            else {
+               n--;
+               if (n == 0) {
+
+                  return;
+               } // if n
+               indext=index[n];
+               qdouble=double_data_ptr[indext];
+               index[n]=index[0];
+            } // else
+
+            parent=i;
+            child = i*2 + 1;
+            while (child < n) {
+               if (child +1 < n && double_data_ptr[index[child+1]] > double_data_ptr[index[child]]) child++;
+               if (double_data_ptr[index[child]] > qdouble) {
+                  index[parent] = index[child];
+                  parent=child;
+                  child = parent*2 + 1;
+               } // if q
+               else {
+                  break;
+               } // else
+            } // while
+            index[parent]=indext;
+         } // for
+         break;
+
+      default:
+         printf("Error -- S7_Datatype not supported in S7_Indexi8_Sort\n");
+         exit(1);
+         break;
+
+   }
+}
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.h
@@ -0,0 +1,364 @@
+/*
+ *  Copyright (c) 2011-2013, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#ifndef STATE_H_
+#define STATE_H_
+
+#include <list>
+#include "MallocPlus.h"
+#include "mesh.h"
+#include "crux.h"
+#ifdef HAVE_OPENCL
+#include "ezcl/ezcl.h"
+#endif
+//#include "l7/l7.h"
+
+#define STATUS_OK        0
+#define STATUS_NAN       1
+#define STATUS_MASS_LOSS 2
+
+#if !defined(FULL_PRECISION) && !defined(MIXED_PRECISION) && !defined(MINIMUM_PRECISION)
+#define FULL_PRECISION
+#endif
+#ifdef NO_CL_DOUBLE
+#undef  FULL_PRECISION
+#undef  MIXED_PRECISION
+#define MINIMUM_PRECISION
+#endif
+
+#if defined(MINIMUM_PRECISION)
+   typedef float state_t; // this is for physics state variables ncell in size
+   typedef float real_t; // this is used for intermediate calculations
+   typedef struct
+   {
+      float s0;
+      float s1;
+   }  real2_t;
+#define CONSERVATION_EPS    15.0
+#ifdef HAVE_OPENCL
+   typedef cl_float  cl_state_t; // for gpu physics state variables
+   typedef cl_float4 cl_state4_t; // for gpu physics state variables
+   typedef cl_float  cl_real_t; // for intermediate gpu physics state variables
+   typedef cl_float2 cl_real2_t; // for intermediate gpu physics state variables
+   typedef cl_float4 cl_real4_t; // for intermediate gpu physics state variables
+#endif
+#ifdef HAVE_MPI
+   #define MPI_STATE_T MPI_FLOAT // for MPI communication for physics state variables
+   #define MPI_REAL_T MPI_FLOAT // for MPI communication for physics state variables
+   #define L7_STATE_T L7_FLOAT
+   #define L7_REAL_T L7_FLOAT
+#endif
+
+#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats
+   typedef float state_t;
+   typedef double real_t;
+   typedef struct
+   {
+      double s0;
+      double s1;
+   }  real2_t;
+#define CONSERVATION_EPS    .02
+#ifdef HAVE_OPENCL
+   typedef cl_float   cl_state_t;
+   typedef cl_float4  cl_state4_t;
+   typedef cl_double  cl_real_t; // for intermediate gpu physics state variables
+   typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables
+   typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables
+#endif
+#ifdef HAVE_MPI
+   #define MPI_STATE_T MPI_FLOAT
+   #define MPI_REAL_T MPI_DOUBLE
+   #define L7_STATE_T L7_FLOAT
+   #define L7_REAL_T L7_DOUBLE
+#endif
+
+#elif defined(FULL_PRECISION)
+   typedef double state_t;
+   typedef double real_t;
+   typedef struct
+   {
+      double s0;
+      double s1;
+   }  real2_t;
+#define CONSERVATION_EPS    .02
+#ifdef HAVE_OPENCL
+   typedef cl_double  cl_state_t;
+   typedef cl_double4 cl_state4_t;
+   typedef cl_double  cl_real_t; // for intermediate gpu physics state variables
+   typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables
+   typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables
+#endif
+#ifdef HAVE_MPI
+   #define MPI_STATE_T MPI_DOUBLE
+   #define MPI_REAL_T MPI_DOUBLE
+   #define L7_STATE_T L7_DOUBLE
+   #define L7_REAL_T L7_DOUBLE
+#endif
+#endif
+
+extern "C" void do_calc(void);
+
+enum SUM_TYPE {
+   SUM_REGULAR,
+   SUM_KAHAN
+};
+
+
+enum SIGN_RULE {
+   DIAG_RULE,
+   X_RULE,
+   Y_RULE,
+};
+
+enum state_timers
+{
+   STATE_TIMER_APPLY_BCS,
+   STATE_TIMER_SET_TIMESTEP,
+   STATE_TIMER_FINITE_DIFFERENCE,
+   STATE_TIMER_REFINE_POTENTIAL,
+   STATE_TIMER_CALC_MPOT,
+   STATE_TIMER_REZONE_ALL,
+   STATE_TIMER_MASS_SUM,
+   STATE_TIMER_READ,
+   STATE_TIMER_WRITE,
+   STATE_TIMER_SIZE
+};
+
+typedef enum state_timers   state_timer_category;
+
+using namespace std;
+
+class State {
+   
+public:
+   MallocPlus state_memory;
+   MallocPlus gpu_state_memory;
+   Mesh *mesh;
+   state_t *H;
+   state_t *U;
+   state_t *V;
+
+#ifdef HAVE_OPENCL
+   cl_mem dev_H;
+   cl_mem dev_U;
+   cl_mem dev_V;
+
+   cl_mem dev_mass_sum;
+   cl_mem dev_deltaT;
+
+   cl_event apply_BCs_event;
+
+   cl_mem dev_mpot;
+   //cl_mem dev_ioffset;
+   cl_mem dev_result;
+#endif
+
+   double    cpu_timers[STATE_TIMER_SIZE];
+   long long gpu_timers[STATE_TIMER_SIZE];
+
+   // constructor -- allocates state arrays to size ncells
+   State(Mesh *mesh_in);
+
+   void init(int do_gpu_calc);
+   void terminate(void);
+
+   /* Memory routines for linked list of state arrays */
+   void allocate(size_t ncells);
+   void allocate_from_backup_file(FILE *fp);
+   void allocate_for_rollback(State *state_to_copy);
+   void resize(size_t ncells);
+   void memory_reset_ptrs(void);
+#ifdef HAVE_OPENCL
+   void allocate_device_memory(size_t ncells);
+#endif
+   void resize_old_device_memory(size_t ncells);
+
+   /* Accessor routines */
+   double get_cpu_timer(state_timer_category category)  {return(cpu_timers[category]); };
+   /* Convert nanoseconds to msecs */
+   double get_gpu_timer(state_timer_category category)  {return((double)(gpu_timers[category])*1.0e-9); };
+
+   /* Boundary routines -- not currently used */
+   void add_boundary_cells(void);
+   void apply_boundary_conditions(void);
+   void apply_boundary_conditions_local(void);
+   void apply_boundary_conditions_ghost(void);
+   void remove_boundary_cells(void);
+
+   /*******************************************************************
+   * set_timestep
+   *  Input
+   *    H, U, V -- from state object
+   *    celltype, level, lev_delta
+   *  Output
+   *    mindeltaT returned
+   *******************************************************************/
+   double set_timestep(double g, double sigma);
+#ifdef HAVE_OPENCL
+   double gpu_set_timestep(double sigma);
+#endif
+
+   /*******************************************************************
+   * calc finite difference
+   *      will add ghost region to H, U, V and fill at start of routine
+   *   Input
+   *      H, U, V -- from state object
+   *      nlft, nrht, nbot, ntop, level, celltype -- from mesh object
+   *   Output
+   *      H, U, V
+   *******************************************************************/
+   void calc_finite_difference(double deltaT);
+   void calc_finite_difference_via_faces(double deltaT);
+#ifdef HAVE_OPENCL
+   void gpu_calc_finite_difference(double deltaT);
+#endif
+
+   /*******************************************************************
+   * calc refine potential -- state has responsibility to calc initial
+   *      refinement potential array that is then passed to mesh for
+   *      smoothing and enforcing refinement ruiles
+   *  Input
+   *    H, U, V -- from state object
+   *  Output
+   *    mpot
+   *    ioffset
+   *    count
+   *******************************************************************/
+   size_t calc_refine_potential(vector<int> &mpot, int &icount, int &jcount);
+#ifdef HAVE_OPENCL
+   size_t gpu_calc_refine_potential(int &icount, int &jcount);
+#endif
+
+   /*******************************************************************
+   * rezone all -- most of call is done in mesh
+   *  Input
+   *    Mesh and state variables
+   *  Output
+   *    New mesh and state variables on refined mesh
+   *******************************************************************/
+   void rezone_all(int icount, int jcount, vector<int> mpot);
+#ifdef HAVE_OPENCL
+   void gpu_rezone_all(int icount, int jcount, bool localStencil);
+#endif
+
+   /*******************************************************************
+   * load balance -- most of call is done in mesh, but pointers are
+   *    reset to newly allocated state arrays
+   *  Input
+   *    Mesh and state variables
+   *  Output
+   *    New mesh and state variables on refined mesh
+   *******************************************************************/
+#ifdef HAVE_MPI
+   void do_load_balance_local(size_t &numcells);
+#ifdef HAVE_OPENCL
+   void gpu_do_load_balance_local(size_t &numcells);
+#endif
+#endif
+
+   /*******************************************************************
+   * mass sum -- Conservation of mass check
+   *  Input
+   *    H from state object
+   *    Precision type for sum
+   *  Output
+   *    total mass is returned
+   *******************************************************************/
+   double mass_sum(int enhanced_precision_sum);
+#ifdef HAVE_OPENCL
+   double gpu_mass_sum(int enhanced_precision_sum);
+#endif
+   
+   void fill_circle(double circ_radius, double fill_value, double background);
+   void state_reorder(vector<int> iorder);
+
+   void symmetry_check(const char *string, vector<int> sym_index, double eps, 
+                       SIGN_RULE sign_rule, int &flag);
+
+   void output_timing_info(int do_cpu_calc, int do_gpu_calc, double total_elapsed_time);
+
+   /* state comparison routines */
+#ifdef HAVE_OPENCL
+   void compare_state_gpu_global_to_cpu_global(const char* string, int cycle, uint ncells);
+#endif
+   void compare_state_cpu_local_to_cpu_global(State *state_global, const char* string, int cycle, uint ncells, uint ncells_global, int *nsizes, int *ndispl);
+#ifdef HAVE_OPENCL
+   void compare_state_all_to_gpu_local(State *state_global, uint ncells, uint ncells_global, int mype, int ncycle, int *nsizes, int *ndispl);
+#endif
+
+   void output_timer_block(mesh_device_types device_type, double elapsed_time,
+      double mesh_time, double compute_time, double total_elapsed_time, double speedup_ratio);
+
+   void timer_output(state_timer_category category, mesh_device_types device_type, int timer_level);
+
+   void print(void);
+
+   size_t get_checkpoint_size(void);
+   void store_checkpoint(Crux *crux);
+   void restore_checkpoint(Crux *crux);
+   //Added to for second print for every interation: Brian Atkinson (5-29-14)
+   void print(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage);  
+   void print_local(int ncycle);
+   void print_failure_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, bool got_nan);
+   void print_rollback_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, int backup_attempt, int num_of_attempts, int error_status);
+
+private:
+   State(const State&); // To block copy constructor so copies are not made inadvertently
+
+   void print_object_info(void);
+};
+
+#endif // ifndef STATE_H_
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.cpp
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.cpp
@@ -0,0 +1,3966 @@
+/*
+ *  Copyright (c) 2011-2013, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#include "mesh.h"
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <algorithm>
+#include <queue>
+#include "state.h"
+#include "timer.h"
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
+
+#undef DEBUG
+//#define DEBUG 0
+#define DEBUG_RESTORE_VALS 1
+#define TIMING_LEVEL 2
+
+#if defined(MINIMUM_PRECISION)
+#define ZERO 0.0f
+#define ONE 1.0f
+#define HALF 0.5f
+#define EPSILON 1.0f-30
+#define STATE_EPS        15.0
+// calc refine is done in single precision
+#define REFINE_GRADIENT  0.10f
+#define COARSEN_GRADIENT 0.05f
+#define REFINE_HALF 0.5f
+#define REFINE_NEG_THOUSAND -1000.0f
+
+#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats
+#define ZERO 0.0
+#define ONE 1.0
+#define HALF 0.5
+#define EPSILON 1.0e-30
+#define STATE_EPS        .02
+// calc refine is done in single precision
+#define REFINE_GRADIENT  0.10f
+#define COARSEN_GRADIENT 0.05f
+#define REFINE_HALF 0.5f
+#define REFINE_NEG_THOUSAND -1000.0f
+
+#elif defined(FULL_PRECISION)
+#define ZERO 0.0
+#define ONE 1.0
+#define HALF 0.5
+#define EPSILON 1.0e-30
+#define STATE_EPS        .02
+// calc refine is done in single precision
+#define REFINE_GRADIENT  0.10
+#define COARSEN_GRADIENT 0.05
+#define REFINE_HALF 0.5
+#define REFINE_NEG_THOUSAND -1000.0
+
+#endif
+
+#ifdef _OPENMP
+static bool iversion_flag = false;
+#endif
+
+typedef unsigned int uint;
+
+static const char *state_timer_descriptor[STATE_TIMER_SIZE] = {
+   "state_timer_apply_BCs",
+   "state_timer_set_timestep",
+   "state_timer_finite_difference",
+   "state_timer_refine_potential",
+   "state_timer_calc_mpot",
+   "state_timer_rezone_all",
+   "state_timer_mass_sum",
+   "state_timer_read",
+   "state_timer_write"
+};
+
+#ifdef HAVE_OPENCL
+#include "state_kernel.inc"
+#endif
+
+struct esum_type{
+   double sum;
+   double correction;
+};
+#ifdef HAVE_MPI
+MPI_Datatype MPI_TWO_DOUBLES;
+MPI_Op KNUTH_SUM;
+int commutative = 1;
+void knuth_sum(struct esum_type *in, struct esum_type *inout, int *len, MPI_Datatype *MPI_TWO_DOUBLES);
+#endif
+
+int save_ncells;
+
+#define CONSERVED_EQNS
+
+#define SQR(x) ( x*x )
+#define MIN3(x,y,z) ( min( min(x,y), z) )
+
+#ifdef HAVE_OPENCL
+cl_kernel kernel_set_timestep;
+cl_kernel kernel_reduction_min;
+cl_kernel kernel_copy_state_data;
+cl_kernel kernel_copy_state_ghost_data;
+cl_kernel kernel_apply_boundary_conditions;
+cl_kernel kernel_apply_boundary_conditions_local;
+cl_kernel kernel_apply_boundary_conditions_ghost;
+cl_kernel kernel_calc_finite_difference;
+cl_kernel kernel_refine_potential;
+cl_kernel kernel_reduce_sum_mass_stage1of2;
+cl_kernel kernel_reduce_sum_mass_stage2of2;
+cl_kernel kernel_reduce_epsum_mass_stage1of2;
+cl_kernel kernel_reduce_epsum_mass_stage2of2;
+#endif
+
+inline real_t U_halfstep(// XXX Fix the subindices to be more intuitive XXX
+        real_t    deltaT,     // Timestep
+        real_t    U_i,        // Initial cell's (downwind's) state variable
+        real_t    U_n,        // Next cell's    (upwind's)   state variable
+        real_t    F_i,        // Initial cell's (downwind's) state variable flux
+        real_t    F_n,        // Next cell's    (upwind's)   state variable flux
+        real_t    r_i,        // Initial cell's (downwind's) center to face distance
+        real_t    r_n,        // Next cell's    (upwind's)   center to face distance
+        real_t    A_i,        // Cell's            face surface area
+        real_t    A_n,        // Cell's neighbor's face surface area
+        real_t    V_i,        // Cell's            volume
+        real_t    V_n) {      // Cell's neighbor's volume
+
+   return (( r_i*U_n + r_n*U_i ) / ( r_i + r_n )) 
+          - HALF*deltaT*(( F_n*A_n*min(ONE, A_i/A_n) - F_i*A_i*min(ONE, A_n/A_i) )
+                    / ( V_n*min(HALF, V_i/V_n) + V_i*min(HALF, V_n/V_i) ));
+
+}
+
+inline real_t U_fullstep(
+        real_t    deltaT,
+        real_t    dr,
+        real_t    U,
+        real_t    F_plus,
+        real_t    F_minus,
+        real_t    G_plus,
+        real_t    G_minus) {
+
+   return (U - (deltaT / dr)*(F_plus - F_minus + G_plus - G_minus));
+
+}
+
+
+inline real_t w_corrector(
+        real_t    deltaT,       // Timestep
+        real_t    dr,           // Cell's center to face distance
+        real_t    U_eigen,      // State variable's eigenvalue (speed)
+        real_t    grad_half,    // Centered gradient
+        real_t    grad_minus,   // Downwind gradient
+        real_t    grad_plus) {  // Upwind gradient
+
+   real_t nu     = HALF * U_eigen * deltaT / dr;
+   nu          = nu * (ONE - nu);
+
+   real_t rdenom = ONE / max(SQR(grad_half), EPSILON);
+   real_t rplus  = (grad_plus  * grad_half) * rdenom;
+   real_t rminus = (grad_minus * grad_half) * rdenom;
+
+   return HALF*nu*(ONE- max(MIN3(ONE, rplus, rminus), ZERO));
+}
+
+State::State(Mesh *mesh_in)
+{
+   for (int i = 0; i < STATE_TIMER_SIZE; i++){
+      cpu_timers[i] = 0.0;
+   }
+   for (int i = 0; i < STATE_TIMER_SIZE; i++){
+      gpu_timers[i] = 0L;
+   }
+
+   mesh = mesh_in;
+
+#ifdef HAVE_MPI
+   int mpi_init;
+   MPI_Initialized(&mpi_init);
+   if (mpi_init){
+      MPI_Type_contiguous(2, MPI_DOUBLE, &MPI_TWO_DOUBLES);
+      MPI_Type_commit(&MPI_TWO_DOUBLES);
+      MPI_Op_create((MPI_User_function *)knuth_sum, commutative, &KNUTH_SUM);
+      // FIXME add fini and set size
+      if (mesh->parallel) state_memory.pinit(MPI_COMM_WORLD, 2L * 1024 * 1024 * 1024);
+   }
+#endif
+}
+
+void State::init(int do_gpu_calc)
+{
+   if (do_gpu_calc) {
+#ifdef HAVE_OPENCL
+      cl_context context = ezcl_get_context();
+
+      if (mesh->mype == 0) printf("Starting compile of kernels in state\n");
+      const char *defines = NULL;
+      cl_program program                 = ezcl_create_program_wsource(context, defines, state_kern_source);
+
+      kernel_set_timestep                    = ezcl_create_kernel_wprogram(program, "set_timestep_cl");
+      kernel_reduction_min                   = ezcl_create_kernel_wprogram(program, "finish_reduction_min_cl");
+      kernel_copy_state_data                 = ezcl_create_kernel_wprogram(program, "copy_state_data_cl");
+      kernel_copy_state_ghost_data           = ezcl_create_kernel_wprogram(program, "copy_state_ghost_data_cl");
+      kernel_apply_boundary_conditions       = ezcl_create_kernel_wprogram(program, "apply_boundary_conditions_cl");
+      kernel_apply_boundary_conditions_local = ezcl_create_kernel_wprogram(program, "apply_boundary_conditions_local_cl");
+      kernel_apply_boundary_conditions_ghost = ezcl_create_kernel_wprogram(program, "apply_boundary_conditions_ghost_cl");
+      kernel_calc_finite_difference          = ezcl_create_kernel_wprogram(program, "calc_finite_difference_cl");
+      kernel_refine_potential                = ezcl_create_kernel_wprogram(program, "refine_potential_cl");
+      kernel_reduce_sum_mass_stage1of2       = ezcl_create_kernel_wprogram(program, "reduce_sum_mass_stage1of2_cl");
+      kernel_reduce_sum_mass_stage2of2       = ezcl_create_kernel_wprogram(program, "reduce_sum_mass_stage2of2_cl");
+      kernel_reduce_epsum_mass_stage1of2     = ezcl_create_kernel_wprogram(program, "reduce_epsum_mass_stage1of2_cl");
+      kernel_reduce_epsum_mass_stage2of2     = ezcl_create_kernel_wprogram(program, "reduce_epsum_mass_stage2of2_cl");
+
+      ezcl_program_release(program);
+      if (mesh->mype == 0) printf("Finishing compile of kernels in state\n");
+#endif
+   }
+
+   //printf("\nDEBUG -- Calling state memory memory malloc at line %d\n",__LINE__);
+   allocate(mesh->ncells);
+   //state_memory.memory_report();
+   //printf("DEBUG -- Finished state memory memory malloc at line %d\n\n",__LINE__);
+
+}
+
+void State::allocate(size_t ncells)
+{
+   int flags = 0;
+   flags = RESTART_DATA;
+#ifdef HAVE_J7
+   if (mesh->parallel) flags = LOAD_BALANCE_MEMORY;
+#endif
+
+   H = (state_t *)state_memory.memory_malloc(ncells, sizeof(state_t), "H", flags);
+   U = (state_t *)state_memory.memory_malloc(ncells, sizeof(state_t), "U", flags);
+   V = (state_t *)state_memory.memory_malloc(ncells, sizeof(state_t), "V", flags);
+}
+
+void State::resize(size_t new_ncells){
+   size_t current_size = state_memory.get_memory_size(H);
+   if (new_ncells > current_size) state_memory.memory_realloc_all(new_ncells);
+
+   //printf("\nDEBUG -- Calling state memory resize at line %d\n",__LINE__);
+   //state_memory.memory_report();
+   //printf("DEBUG -- Finished state memory resize at line %d\n\n",__LINE__);
+}
+
+void State::memory_reset_ptrs(void){
+   H = (state_t *)state_memory.get_memory_ptr("H");
+   U = (state_t *)state_memory.get_memory_ptr("U");
+   V = (state_t *)state_memory.get_memory_ptr("V");
+
+   //printf("\nDEBUG -- Calling state memory reset_ptrs at line %d\n",__LINE__);
+   //state_memory.memory_report();
+   //printf("DEBUG -- Finished state memory reset_ptrs at line %d\n\n",__LINE__);
+}
+
+void State::terminate(void)
+{
+   state_memory.memory_delete(H);
+   state_memory.memory_delete(U);
+   state_memory.memory_delete(V);
+
+#ifdef HAVE_OPENCL
+   ezcl_device_memory_delete(dev_deltaT);
+
+   gpu_state_memory.memory_delete(dev_H);
+   gpu_state_memory.memory_delete(dev_U);
+   gpu_state_memory.memory_delete(dev_V);
+
+   ezcl_kernel_release(kernel_set_timestep);
+   ezcl_kernel_release(kernel_reduction_min);
+   ezcl_kernel_release(kernel_copy_state_data);
+   ezcl_kernel_release(kernel_copy_state_ghost_data);
+   ezcl_kernel_release(kernel_apply_boundary_conditions);
+   ezcl_kernel_release(kernel_apply_boundary_conditions_local);
+   ezcl_kernel_release(kernel_apply_boundary_conditions_ghost);
+   ezcl_kernel_release(kernel_calc_finite_difference);
+   ezcl_kernel_release(kernel_refine_potential);
+   ezcl_kernel_release(kernel_reduce_sum_mass_stage1of2);
+   ezcl_kernel_release(kernel_reduce_sum_mass_stage2of2);
+   ezcl_kernel_release(kernel_reduce_epsum_mass_stage1of2);
+   ezcl_kernel_release(kernel_reduce_epsum_mass_stage2of2);
+#endif
+#ifdef HAVE_MPI
+   if (mesh->parallel) state_memory.pfini();
+#endif
+}
+
+#ifdef HAVE_MPI
+void knuth_sum(struct esum_type *in, struct esum_type *inout, int *len, MPI_Datatype *MPI_TWO_DOUBLES)
+{
+   double u, v, upt, up, vpp;
+   u = inout->sum;
+   v = in->sum + (in->correction+inout->correction);
+   upt = u + v;
+   up = upt - v;
+   vpp = upt - up;
+   inout->sum = upt;
+   inout->correction = (u - up) + (v - vpp);
+
+   // Just to block compiler warnings
+   if (1==2) printf("DEBUG len %d datatype %lld\n",*len,(long long)(*MPI_TWO_DOUBLES) );
+}
+#endif
+
+void State::add_boundary_cells(void)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   // This is for a mesh with no boundary cells -- they are added and
+   // the mesh sizes increased
+   size_t &ncells        = mesh->ncells;
+   vector<int>  &index    = mesh->index;
+   vector<spatial_t> &x        = mesh->x;
+   vector<spatial_t> &dx       = mesh->dx;
+   vector<spatial_t> &y        = mesh->y;
+   vector<spatial_t> &dy       = mesh->dy;
+
+   int *i        = mesh->i;
+   int *j        = mesh->j;
+   int *level    = mesh->level;
+   int *celltype = mesh->celltype;
+   int *nlft     = mesh->nlft;
+   int *nrht     = mesh->nrht;
+   int *nbot     = mesh->nbot;
+   int *ntop     = mesh->ntop;
+
+   vector<int> &lev_ibegin = mesh->lev_ibegin;
+   vector<int> &lev_iend   = mesh->lev_iend;
+   vector<int> &lev_jbegin = mesh->lev_jbegin;
+   vector<int> &lev_jend   = mesh->lev_jend;
+
+   // Pre-count number of cells to add
+   int icount = 0;
+   for (uint ic=0; ic<ncells; ic++) {
+      if (i[ic] == lev_ibegin[level[ic]]) icount++; // Left boundary
+      if (i[ic] == lev_iend[level[ic]])   icount++; // Right boundary
+      if (j[ic] == lev_jbegin[level[ic]]) icount++; // Bottom boundary
+      if (j[ic] == lev_jend[level[ic]])   icount++; // Top boundary
+   }
+      
+   int new_ncells = ncells + icount;
+   // Increase the arrays for the new boundary cells
+   H=(state_t *)state_memory.memory_realloc(new_ncells, H);
+   U=(state_t *)state_memory.memory_realloc(new_ncells, U);
+   V=(state_t *)state_memory.memory_realloc(new_ncells, V);
+   //printf("\nDEBUG add_boundary cells\n"); 
+   //state_memory.memory_report();
+   //printf("DEBUG end add_boundary cells\n\n"); 
+
+   mesh->i        =(int *)mesh->mesh_memory.memory_realloc(new_ncells, i);
+   mesh->j        =(int *)mesh->mesh_memory.memory_realloc(new_ncells, j);
+   mesh->level    =(int *)mesh->mesh_memory.memory_realloc(new_ncells, level);
+   mesh->celltype =(int *)mesh->mesh_memory.memory_realloc(new_ncells, celltype);
+   mesh->nlft     =(int *)mesh->mesh_memory.memory_realloc(new_ncells, nlft);
+   mesh->nrht     =(int *)mesh->mesh_memory.memory_realloc(new_ncells, nrht);
+   mesh->nbot     =(int *)mesh->mesh_memory.memory_realloc(new_ncells, nbot);
+   mesh->ntop     =(int *)mesh->mesh_memory.memory_realloc(new_ncells, ntop);
+   //memory_reset_ptrs();
+   i        = mesh->i;
+   j        = mesh->j;
+   level    = mesh->level;
+   celltype = mesh->celltype;
+   nlft     = mesh->nlft;
+   nrht     = mesh->nrht;
+   nbot     = mesh->nbot;
+   ntop     = mesh->ntop;
+
+   index.resize(new_ncells);
+   x.resize(new_ncells);
+   dx.resize(new_ncells);
+   y.resize(new_ncells);
+   dy.resize(new_ncells);
+
+   for (int nc=ncells; nc<new_ncells; nc++) {
+      nlft[nc] = -1;
+      nrht[nc] = -1;
+      nbot[nc] = -1;
+      ntop[nc] = -1;
+   }
+      
+   // In the first pass, set two of the neighbor indices and all
+   // the other data to be brought across. Set the inverse of the
+   // the velocity to enforce the reflective boundary condition
+   uint nc=ncells;
+   for (uint ic=0; ic<ncells; ic++) {
+      if (i[ic] == lev_ibegin[level[ic]]) {
+         nlft[ic] = nc;
+         nlft[nc] = nc;
+         nrht[nc] = ic;
+         i[nc] = lev_ibegin[level[ic]]-1;
+         j[nc] = j[ic];
+         level[nc] = level[ic];
+         dx[nc] = dx[ic];
+         dy[nc] = dy[ic];
+         x[nc] = x[ic]-dx[ic];
+         y[nc] = y[ic];
+         H[nc] =  H[ic];
+         U[nc] = -U[ic];
+         V[nc] =  V[ic];
+         nc++;
+      }
+      if (i[ic] == lev_iend[level[ic]]) {
+         nrht[ic] = nc;
+         nrht[nc] = nc;
+         nlft[nc] = ic;
+         i[nc] = lev_iend[level[ic]]+1;
+         j[nc] = j[ic];
+         level[nc] = level[ic];
+         dx[nc] = dx[ic];
+         dy[nc] = dy[ic];
+         x[nc] = x[ic]+dx[ic];
+         y[nc] = y[ic];
+         H[nc] =  H[ic];
+         U[nc] = -U[ic];
+         V[nc] =  V[ic];
+         nc++;
+      }
+      if (j[ic] == lev_jbegin[level[ic]]) {
+         nbot[ic] = nc;
+         nbot[nc] = nc;
+         ntop[nc] = ic;
+         i[nc] = i[ic];
+         j[nc] = lev_jbegin[level[ic]]-1;
+         level[nc] = level[ic];
+         dx[nc] = dx[ic];
+         dy[nc] = dy[ic];
+         x[nc] = x[ic];
+         y[nc] = y[ic]-dy[ic];
+         H[nc] =  H[ic];
+         U[nc] =  U[ic];
+         V[nc] = -V[ic];
+         nc++;
+      }
+      if (j[ic] == lev_jend[level[ic]]) {
+         ntop[ic] = nc;
+         ntop[nc] = nc;
+         nbot[nc] = ic;
+         i[nc] = i[ic];
+         j[nc] = lev_jend[level[ic]]+1;
+         level[nc] = level[ic];
+         dx[nc] = dx[ic];
+         dy[nc] = dy[ic];
+         x[nc] = x[ic];
+         y[nc] = y[ic]+dy[ic];
+         H[nc] =  H[ic];
+         U[nc] =  U[ic];
+         V[nc] = -V[ic];
+         nc++;
+      }
+   }
+
+   // Now set the other two neighbor indices
+   for (int nc=ncells; nc<new_ncells; nc++) {
+      if (i[nc] == lev_ibegin[level[nc]]-1) {
+         // Need to check if also a bottom boundary cell
+         if (j[nc] == lev_jbegin[level[nc]]){
+           nbot[nc] = nc;
+         } else {
+           nbot[nc] = nlft[nbot[nrht[nc]]];
+         }
+         if (j[nc] == lev_jend[level[nc]]){
+           ntop[nc] = nc;
+         } else {
+           ntop[nc] = nlft[ntop[nrht[nc]]];
+         }
+      }
+      if (i[nc] == lev_iend[level[nc]]+1)   {
+         if (level[nc] <= level[nbot[nlft[nc]]]){
+            if (j[nc] == lev_jbegin[level[nc]]){
+               nbot[nc] = nc;
+            } else {
+               nbot[nc] = nrht[nbot[nlft[nc]]];
+            }
+            if (j[nc] == lev_jend[level[nc]]){
+               ntop[nc] = nc;
+            } else {
+               ntop[nc] = nrht[ntop[nlft[nc]]];
+            }
+         // calculation is a little different if going through a
+         // finer zoned region
+         } else {
+            nbot[nc] = nrht[nrht[nbot[nlft[nc]]]];
+            ntop[nc] = nrht[nrht[ntop[nlft[nc]]]];
+         }
+      }
+      if (j[nc] == lev_jbegin[level[nc]]-1) {
+         if (i[nc] == lev_ibegin[level[nc]]){
+            nlft[nc] = nc;
+         } else {
+            nlft[nc] = nbot[nlft[ntop[nc]]];
+         }
+         if (i[nc] == lev_iend[level[nc]]){
+            nrht[nc] = nc;
+         } else {
+            nrht[nc] = nbot[nrht[ntop[nc]]];
+         }
+      }
+      if (j[nc] == lev_jend[level[nc]]+1)   {
+         if (level[nc] <= level[nlft[nbot[nc]]]){
+            if (i[nc] == lev_ibegin[level[nc]]){
+               nlft[nc] = nc;
+            } else {
+               nlft[nc] = ntop[nlft[nbot[nc]]];
+            }
+            if (i[nc] == lev_iend[level[nc]]){
+               nrht[nc] = nc;
+            } else {
+               nrht[nc] = ntop[nrht[nbot[nc]]];
+            }
+         } else {
+            nlft[nc] = ntop[ntop[nlft[nbot[nc]]]];
+            nrht[nc] = ntop[ntop[nrht[nbot[nc]]]];
+         }
+      }
+   }
+   save_ncells = ncells;
+   ncells = new_ncells;
+
+   cpu_timers[STATE_TIMER_APPLY_BCS] += cpu_timer_stop(tstart_cpu);
+}
+
+void State::apply_boundary_conditions_local(void)
+{
+   static int *nlft, *nrht, *nbot, *ntop;
+
+   size_t &ncells = mesh->ncells;
+   nlft = mesh->nlft;
+   nrht = mesh->nrht;
+   nbot = mesh->nbot;
+   ntop = mesh->ntop;
+
+   // This is for a mesh with boundary cells
+   int lowerBound, upperBound;
+   mesh->get_bounds(lowerBound, upperBound);
+   for (uint ic=lowerBound; ic<upperBound; ic++) {
+      if (mesh->is_left_boundary(ic)) {
+         int nr = nrht[ic];
+         if (nr < (int)ncells) {
+            H[ic] =  H[nr];
+            U[ic] = -U[nr];
+            V[ic] =  V[nr];
+         }
+      }
+      if (mesh->is_right_boundary(ic))  {
+         int nl = nlft[ic];
+         if (nl < (int)ncells) {
+            H[ic] =  H[nl];
+            U[ic] = -U[nl];
+            V[ic] =  V[nl];
+         }
+      }
+      if (mesh->is_bottom_boundary(ic)) {
+         int nt = ntop[ic];
+         if (nt < (int)ncells) {
+            H[ic] =  H[nt];
+            U[ic] =  U[nt];
+            V[ic] = -V[nt];
+         }
+      }
+      if (mesh->is_top_boundary(ic)) {
+         int nb = nbot[ic];
+         if (nb < (int)ncells) {
+            H[ic] =  H[nb];
+            U[ic] =  U[nb];
+            V[ic] = -V[nb];
+         }
+      }
+   }
+}
+
+void State::apply_boundary_conditions_ghost(void)
+{
+   static int *nlft, *nrht, *nbot, *ntop;
+
+   size_t &ncells = mesh->ncells;
+   nlft = mesh->nlft;
+   nrht = mesh->nrht;
+   nbot = mesh->nbot;
+   ntop = mesh->ntop;
+
+   // This is for a mesh with boundary cells
+   int lowerBound, upperBound; 
+   mesh->get_bounds(lowerBound, upperBound);
+   for (uint ic=lowerBound; ic<upperBound; ic++) {
+      if (mesh->is_left_boundary(ic)) {
+         int nr = nrht[ic];
+         if (nr >= (int)ncells) {
+            H[ic] =  H[nr];
+            U[ic] = -U[nr];
+            V[ic] =  V[nr];
+         }
+      }
+      if (mesh->is_right_boundary(ic))  {
+         int nl = nlft[ic];
+         if (nl >= (int)ncells) {
+            H[ic] =  H[nl];
+            U[ic] = -U[nl];
+            V[ic] =  V[nl];
+         }
+      }
+      if (mesh->is_bottom_boundary(ic)) {
+         int nt = ntop[ic];
+         if (nt >= (int)ncells) {
+            H[ic] =  H[nt];
+            U[ic] =  U[nt];
+            V[ic] = -V[nt];
+         }
+      }
+      if (mesh->is_top_boundary(ic)) {
+         int nb = nbot[ic];
+         if (nb >= (int)ncells) {
+            H[ic] =  H[nb];
+            U[ic] =  U[nb];
+            V[ic] = -V[nb];
+         }
+      }
+   }
+}
+
+void State::apply_boundary_conditions(void)
+{
+   int *nlft, *nrht, *nbot, *ntop;
+
+   size_t &ncells = mesh->ncells;
+   nlft = mesh->nlft;
+   nrht = mesh->nrht;
+   nbot = mesh->nbot;
+   ntop = mesh->ntop;
+
+   // This is for a mesh with boundary cells
+   int lowerBound, upperBound;
+   mesh->get_bounds(lowerBound, upperBound);
+   for (uint ic=lowerBound; ic<upperBound; ic++) {
+      if (mesh->is_left_boundary(ic)) {
+         int nr = nrht[ic];
+         H[ic] =  H[nr];
+         U[ic] = -U[nr];
+         V[ic] =  V[nr];
+      }
+      if (mesh->is_right_boundary(ic))  {
+         int nl = nlft[ic];
+         H[ic] =  H[nl];
+         U[ic] = -U[nl];
+         V[ic] =  V[nl];
+      }
+      if (mesh->is_bottom_boundary(ic)) {
+         int nt = ntop[ic];
+         H[ic] =  H[nt];
+         U[ic] =  U[nt];
+         V[ic] = -V[nt];
+      }
+      if (mesh->is_top_boundary(ic)) {
+         int nb = nbot[ic];
+         H[ic] =  H[nb];
+         U[ic] =  U[nb];
+         V[ic] = -V[nb];
+      }
+   }
+}
+
+void State::remove_boundary_cells(void)
+{
+   if(! mesh->have_boundary) {
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+      {
+#endif
+         size_t &ncells = mesh->ncells;
+
+         // Resize to drop all the boundary cells
+         ncells = save_ncells;
+         H=(state_t *)state_memory.memory_realloc(save_ncells, H);
+         U=(state_t *)state_memory.memory_realloc(save_ncells, U);
+         V=(state_t *)state_memory.memory_realloc(save_ncells, V);
+         //printf("\nDEBUG remove_boundary cells\n"); 
+         //state_memory.memory_report();
+         //printf("DEBUG end remove_boundary cells\n\n"); 
+
+         mesh->i        = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->i);
+         mesh->j        = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->j);
+         mesh->level    = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->level);
+         mesh->celltype = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->celltype);
+         mesh->nlft     = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->nlft);
+         mesh->nrht     = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->nrht);
+         mesh->nbot     = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->nbot);
+         mesh->ntop     = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->ntop);
+
+         // Reset the neighbors due to the dropped boundary cells
+         mesh->index.resize(save_ncells);
+         mesh->x.resize(save_ncells);
+         mesh->dx.resize(save_ncells);
+         mesh->y.resize(save_ncells);
+         mesh->dy.resize(save_ncells);
+#ifdef _OPENMP
+      }
+#pragma omp barrier
+#endif
+
+      mesh->set_bounds(mesh->ncells);
+
+      int lowerBound, upperBound;
+      mesh->get_bounds(lowerBound, upperBound);
+      for (uint ic=lowerBound; ic<upperBound; ic++) {
+         if (mesh->i[ic] == mesh->lev_ibegin[mesh->level[ic]]) mesh->nlft[ic] = ic;
+         if (mesh->i[ic] == mesh->lev_iend[mesh->level[ic]])   mesh->nrht[ic] = ic;
+         if (mesh->j[ic] == mesh->lev_jbegin[mesh->level[ic]]) mesh->nbot[ic] = ic;
+         if (mesh->j[ic] == mesh->lev_jend[mesh->level[ic]])   mesh->ntop[ic] = ic;
+      }
+
+   } // if have_boundary
+}
+
+double State::set_timestep(double g, double sigma)
+{
+   double globalmindeltaT;
+
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   static double mindeltaT;
+
+   int lowerBounds, upperBounds;
+   mesh->set_bounds(mesh->ncells);
+   mesh->get_bounds(lowerBounds, upperBounds);
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+      mindeltaT = 1000;
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+   double mymindeltaT = 1000.0; // private for each thread
+
+   for (int ic=lowerBounds; ic<upperBounds; ic++) {
+      if (mesh->celltype[ic] == REAL_CELL) {
+         int lev = mesh->level[ic];
+         double wavespeed = sqrt(g*H[ic]);
+         double xspeed = (fabs(U[ic])+wavespeed)/mesh->lev_deltax[lev];
+         double yspeed = (fabs(V[ic])+wavespeed)/mesh->lev_deltay[lev];
+         double deltaT=sigma/(xspeed+yspeed);
+         if (deltaT < mymindeltaT) mymindeltaT = deltaT;
+      }
+   }
+
+#ifdef _OPENMP
+#pragma omp critical
+   {
+#endif
+      if (mymindeltaT < mindeltaT) mindeltaT = mymindeltaT;
+#ifdef _OPENMP
+   } // End critical region
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp master
+   {
+#endif
+
+
+   globalmindeltaT = mindeltaT;
+#ifdef HAVE_MPI
+      if (mesh->parallel) MPI_Allreduce(&mindeltaT, &globalmindeltaT, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+#endif
+
+      cpu_timers[STATE_TIMER_SET_TIMESTEP] += cpu_timer_stop(tstart_cpu);
+#ifdef _OPENMP
+   } // End master region
+#pragma omp barrier
+#endif
+
+   return(globalmindeltaT);
+}
+
+#ifdef HAVE_OPENCL
+double State::gpu_set_timestep(double sigma)
+{
+   double deltaT, globalmindeltaT;
+
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   size_t &ncells       = mesh->ncells;
+#ifdef HAVE_MPI
+   int &parallel        = mesh->parallel;
+#endif
+   cl_mem &dev_level    = mesh->dev_level;
+   cl_mem &dev_celltype = mesh->dev_celltype;
+   cl_mem &dev_levdx    = mesh->dev_levdx;
+   cl_mem &dev_levdy    = mesh->dev_levdy;
+
+   assert(dev_H);
+   assert(dev_U);
+   assert(dev_V);
+   assert(dev_level);
+   assert(dev_celltype);
+   assert(dev_levdx);
+   assert(dev_levdy);
+
+   size_t local_work_size = 128;
+   size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
+   size_t block_size     = global_work_size/local_work_size;
+
+   cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0);
+
+      /*
+      __kernel void set_timestep_cl(
+                       const int       ncells,     // 0  Total number of cells.
+                       const real_t    sigma,      // 1
+              __global const state_t  *H,          // 2
+              __global const state_t  *U,          // 3
+              __global const state_t  *V,          // 4
+              __global const int      *level,      // 5  Array of level information.
+              __global const int      *celltype,   // 6
+              __global const real_t   *lev_dx,     // 7
+              __global const real_t   *lev_dy,     // 8
+              __global       real_t   *redscratch, // 9
+              __global       real_t   *deltaT,     // 10
+              __local        real_t   *tile)       // 11
+      */
+
+   real_t sigma_local = sigma;
+   ezcl_set_kernel_arg(kernel_set_timestep,  0, sizeof(cl_int),  (void *)&ncells);
+   ezcl_set_kernel_arg(kernel_set_timestep,  1, sizeof(cl_real_t), (void *)&sigma_local);
+   ezcl_set_kernel_arg(kernel_set_timestep,  2, sizeof(cl_mem),  (void *)&dev_H);
+   ezcl_set_kernel_arg(kernel_set_timestep,  3, sizeof(cl_mem),  (void *)&dev_U);
+   ezcl_set_kernel_arg(kernel_set_timestep,  4, sizeof(cl_mem),  (void *)&dev_V);
+   ezcl_set_kernel_arg(kernel_set_timestep,  5, sizeof(cl_mem),  (void *)&dev_level);
+   ezcl_set_kernel_arg(kernel_set_timestep,  6, sizeof(cl_mem),  (void *)&dev_celltype);
+   ezcl_set_kernel_arg(kernel_set_timestep,  7, sizeof(cl_mem),  (void *)&dev_levdx);
+   ezcl_set_kernel_arg(kernel_set_timestep,  8, sizeof(cl_mem),  (void *)&dev_levdy);
+   ezcl_set_kernel_arg(kernel_set_timestep,  9, sizeof(cl_mem),  (void *)&dev_redscratch);
+   ezcl_set_kernel_arg(kernel_set_timestep, 10, sizeof(cl_mem),  (void *)&dev_deltaT);
+   ezcl_set_kernel_arg(kernel_set_timestep, 11, local_work_size*sizeof(cl_real_t),  NULL);
+
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_set_timestep, 1, NULL, &global_work_size, &local_work_size, NULL);
+
+   if (block_size > 1){
+         /*
+         __kernel void finish_reduction_min_cl(
+           const    int      isize,
+           __global real_t  *redscratch,
+           __global real_t  *deltaT,
+           __local  real_t  *tile)
+         */
+      ezcl_set_kernel_arg(kernel_reduction_min, 0, sizeof(cl_int),  (void *)&block_size);
+      ezcl_set_kernel_arg(kernel_reduction_min, 1, sizeof(cl_mem),  (void *)&dev_redscratch);
+      ezcl_set_kernel_arg(kernel_reduction_min, 2, sizeof(cl_mem),  (void *)&dev_deltaT);
+      ezcl_set_kernel_arg(kernel_reduction_min, 3, local_work_size*sizeof(cl_real_t), NULL);
+
+     ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_min, 1, NULL, &local_work_size, &local_work_size, NULL);
+   }
+
+   real_t deltaT_local;
+   ezcl_enqueue_read_buffer(command_queue, dev_deltaT, CL_TRUE,  0, sizeof(cl_real_t), &deltaT_local, NULL);
+   deltaT = deltaT_local;
+
+   globalmindeltaT = deltaT;
+#ifdef HAVE_MPI
+   if (parallel) MPI_Allreduce(&deltaT, &globalmindeltaT, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+#endif
+
+   ezcl_device_memory_delete(dev_redscratch);
+
+   gpu_timers[STATE_TIMER_SET_TIMESTEP] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9);
+
+   return(globalmindeltaT);
+}
+#endif
+
+void State::fill_circle(double  circ_radius,//  Radius of circle in grid units.
+                        double  fill_value, //  Circle height for shallow water.
+                        double  background) //  Background height for shallow water.
+{  
+   size_t &ncells = mesh->ncells;
+   vector<spatial_t> &x  = mesh->x;
+   vector<spatial_t> &dx = mesh->dx;
+   vector<spatial_t> &y  = mesh->y;
+   vector<spatial_t> &dy = mesh->dy;
+
+   for (uint ic = 0; ic < ncells; ic++)
+   {  H[ic] = background;
+      U[ic] = V[ic] = 0.0; }
+   
+   //  Clear the old k-D tree and generate new data (slow but necessary here).
+   //KDTree_Destroy(&mesh->tree);
+   mesh->kdtree_setup();
+   
+   int nez;
+   vector<int>    ind(ncells);
+   vector<double> weight(ncells);
+   
+#ifdef FULL_PRECISION
+   KDTree_QueryCircleInterior_Double(&mesh->tree, &nez, &(ind[0]), circ_radius, ncells,
+                                     &x[0], &dx[0],
+                                     &y[0], &dy[0]);
+#else
+   KDTree_QueryCircleInterior_Float(&mesh->tree, &nez, &(ind[0]), circ_radius, ncells,
+                                    &x[0], &dx[0],
+                                    &y[0], &dy[0]);
+#endif
+   for (int ic = 0; ic < nez; ++ic)
+   {  H[ind[ic]] = fill_value; }
+   
+#ifdef FULL_PRECISION
+   KDTree_QueryCircleIntersectWeighted_Double(&mesh->tree, &nez, &(ind[0]), &(weight[0]),
+                              circ_radius, ncells,
+                              &x[0], &dx[0],
+                              &y[0], &dy[0]);
+#else
+   KDTree_QueryCircleIntersectWeighted_Float(&mesh->tree, &nez, &(ind[0]), &(weight[0]),
+                              circ_radius, ncells,
+                              &x[0], &dx[0],
+                              &y[0], &dy[0]);
+#endif
+
+   for (int ic = 0; ic < nez; ++ic)
+   {  H[ind[ic]] = background + (fill_value - background) * weight[ic]; }
+
+   KDTree_Destroy(&mesh->tree);
+}
+
+void State::state_reorder(vector<int> iorder)
+{
+   H = state_memory.memory_reorder(H, &iorder[0]);
+   U = state_memory.memory_reorder(U, &iorder[0]);
+   V = state_memory.memory_reorder(V, &iorder[0]);
+   //printf("\nDEBUG reorder cells\n"); 
+   //state_memory.memory_report();
+   //printf("DEBUG end reorder cells\n\n"); 
+}
+
+void State::rezone_all(int icount, int jcount, vector<int> mpot)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   mesh->rezone_all(icount, jcount, mpot, 1, state_memory);
+
+#ifdef _OPENMP
+#pragma omp master
+   {
+#endif
+   memory_reset_ptrs();
+
+   cpu_timers[STATE_TIMER_REZONE_ALL] += cpu_timer_stop(tstart_cpu);
+#ifdef _OPENMP
+   } // end master region
+#endif
+}
+
+
+#ifdef HAVE_OPENCL
+void State::gpu_rezone_all(int icount, int jcount, bool localStencil)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   // Just to get rid of compiler warnings
+   if (1 == 2) printf("DEBUG -- localStencil is %d\n",localStencil);
+
+   mesh->gpu_rezone_all(icount, jcount, dev_mpot, gpu_state_memory);
+   dev_H = (cl_mem)gpu_state_memory.get_memory_ptr("dev_H");
+   dev_U = (cl_mem)gpu_state_memory.get_memory_ptr("dev_U");
+   dev_V = (cl_mem)gpu_state_memory.get_memory_ptr("dev_V");
+
+   gpu_timers[STATE_TIMER_REZONE_ALL] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9);
+}
+#endif
+
+//define macro for squaring a number
+#define SQ(x) ((x)*(x))
+//define macro to find minimum of 3 values
+//#define MIN3(a,b,c) (min(min((a),(b)),(c)))
+
+#define HXFLUX(ic)  ( U[ic] )
+#define UXFLUX(ic)  ( SQ(U[ic])/H[ic] + ghalf*SQ(H[ic]) )
+#define UVFLUX(ic)  ( U[ic]*V[ic]/H[ic] )
+
+#define HXFLUXIC ( Uic )
+#define HXFLUXNL ( Ul )
+#define HXFLUXNR ( Ur )
+#define HXFLUXNB ( Ub )
+#define HXFLUXNT ( Ut )
+
+#define UXFLUXIC ( SQ(Uic)/Hic + ghalf*SQ(Hic) )
+#define UXFLUXNL ( SQ(Ul)/Hl + ghalf*SQ(Hl) )
+#define UXFLUXNR ( SQ(Ur)/Hr + ghalf*SQ(Hr) )
+#define UXFLUXNB ( SQ(Ub)/Hb + ghalf*SQ(Hb) )
+#define UXFLUXNT ( SQ(Ut)/Ht + ghalf*SQ(Ht) )
+
+#define UVFLUXIC ( Uic*Vic/Hic )
+#define UVFLUXNL ( Ul*Vl/Hl )
+#define UVFLUXNR ( Ur*Vr/Hr )
+#define UVFLUXNB ( Ub*Vb/Hb )
+#define UVFLUXNT ( Ut*Vt/Ht )
+
+#define HYFLUX(ic)  ( V[ic] )
+#define VUFLUX(ic)  ( V[ic]*U[ic]/H[ic] )
+#define VYFLUX(ic)  ( SQ(V[ic])/H[ic] + ghalf*SQ(H[ic]) )
+
+#define HYFLUXIC ( Vic )
+#define HYFLUXNL ( Vl )
+#define HYFLUXNR ( Vr )
+#define HYFLUXNB ( Vb )
+#define HYFLUXNT ( Vt )
+
+#define VUFLUXIC  ( Vic*Uic/Hic )
+#define VUFLUXNL  ( Vl*Ul/Hl )
+#define VUFLUXNR  ( Vr*Ur/Hr )
+#define VUFLUXNB  ( Vb*Ub/Hb )
+#define VUFLUXNT  ( Vt*Ut/Ht )
+
+#define VYFLUXIC  ( SQ(Vic)/Hic + ghalf*SQ(Hic) )
+#define VYFLUXNL  ( SQ(Vl)/Hl + ghalf*SQ(Hl) )
+#define VYFLUXNR  ( SQ(Vr)/Hr + ghalf*SQ(Hr) )
+#define VYFLUXNB  ( SQ(Vb)/Hb + ghalf*SQ(Hb) )
+#define VYFLUXNT  ( SQ(Vt)/Ht + ghalf*SQ(Ht) )
+
+
+#define HNEWXFLUXMINUS  ( Uxminus )
+#define HNEWXFLUXPLUS   ( Uxplus )
+#define UNEWXFLUXMINUS  ( SQ(Uxminus)/Hxminus + ghalf*SQ(Hxminus) )
+#define UNEWXFLUXPLUS   ( SQ(Uxplus) /Hxplus +  ghalf*SQ(Hxplus)  )
+#define UVNEWFLUXMINUS  ( Uxminus*Vxminus/Hxminus )
+#define UVNEWFLUXPLUS   ( Uxplus *Vxplus /Hxplus  )
+
+#define HNEWYFLUXMINUS  ( Vyminus )
+#define HNEWYFLUXPLUS   ( Vyplus  )
+#define VNEWYFLUXMINUS  ( SQ(Vyminus)/Hyminus + ghalf*SQ(Hyminus) )
+#define VNEWYFLUXPLUS   ( SQ(Vyplus) /Hyplus  + ghalf*SQ(Hyplus)  )
+#define VUNEWFLUXMINUS  ( Vyminus*Uyminus/Hyminus )
+#define VUNEWFLUXPLUS   ( Vyplus *Uyplus /Hyplus )
+
+// XXX ADDED XXX
+#define HXFLUXNLT ( Ult )
+#define HXFLUXNRT ( Urt )
+#define UXFLUXNLT ( SQR(Ult)/Hlt + ghalf*SQR(Hlt) )
+#define UXFLUXNRT ( SQR(Urt)/Hrt + ghalf*SQR(Hrt) )
+#define UVFLUXNLT ( Ult*Vlt/Hlt )
+#define UVFLUXNRT ( Urt*Vrt/Hrt )
+#define HYFLUXNBR ( Vbr )
+#define HYFLUXNTR ( Vtr )
+#define VUFLUXNBR  ( Vbr*Ubr/Hbr )
+#define VUFLUXNTR  ( Vtr*Utr/Htr )
+#define VYFLUXNBR  ( SQR(Vbr)/Hbr + ghalf*SQR(Hbr) )
+#define VYFLUXNTR  ( SQR(Vtr)/Htr + ghalf*SQR(Htr) )
+#define HNEWXFLUXMINUS2  ( Uxminus2 )
+#define HNEWXFLUXPLUS2   ( Uxplus2 )
+#define UNEWXFLUXMINUS2  ( SQR(Uxminus2)/Hxminus2 + ghalf*SQR(Hxminus2) )
+#define UNEWXFLUXPLUS2   ( SQR(Uxplus2) /Hxplus2 +  ghalf*SQR(Hxplus2)  )
+#define UVNEWFLUXMINUS2  ( Uxminus2*Vxminus2/Hxminus2 )
+#define UVNEWFLUXPLUS2   ( Uxplus2 *Vxplus2 /Hxplus2  )
+#define HNEWYFLUXMINUS2  ( Vyminus2 )
+#define HNEWYFLUXPLUS2   ( Vyplus2  )
+#define VNEWYFLUXMINUS2  ( SQR(Vyminus2)/Hyminus2 + ghalf*SQR(Hyminus2) )
+#define VNEWYFLUXPLUS2   ( SQR(Vyplus2) /Hyplus2  + ghalf*SQR(Hyplus2)  )
+#define VUNEWFLUXMINUS2  ( Vyminus2*Uyminus2/Hyminus2 )
+#define VUNEWFLUXPLUS2   ( Vyplus2 *Uyplus2 /Hyplus2 )
+
+void State::calc_finite_difference(double deltaT){
+   real_t   g     = 9.80;   // gravitational constant
+   real_t   ghalf = 0.5*g;
+
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   size_t ncells     = mesh->ncells;
+   size_t &ncells_ghost = mesh->ncells_ghost;
+#ifdef _OPENMP
+#pragma omp master
+#endif
+   if (ncells_ghost < ncells) ncells_ghost = ncells;
+
+   //printf("\nDEBUG finite diff\n"); 
+
+#ifdef HAVE_MPI
+   // We need to populate the ghost regions since the calc neighbors has just been
+   // established for the mesh shortly before
+   if (mesh->numpe > 1) {
+      apply_boundary_conditions_local();
+
+#ifdef _OPENMP
+#pragma omp master
+      {
+#endif
+      H=(state_t *)state_memory.memory_realloc(ncells_ghost, H);
+      U=(state_t *)state_memory.memory_realloc(ncells_ghost, U);
+      V=(state_t *)state_memory.memory_realloc(ncells_ghost, V);
+
+      L7_Update(&H[0], L7_STATE_T, mesh->cell_handle);
+      L7_Update(&U[0], L7_STATE_T, mesh->cell_handle);
+      L7_Update(&V[0], L7_STATE_T, mesh->cell_handle);
+#ifdef _OPENMP
+      }
+#pragma omp barrier
+#endif
+
+      apply_boundary_conditions_ghost();
+   } else {
+      apply_boundary_conditions();
+   }
+#else
+   apply_boundary_conditions();
+#endif
+
+   static state_t *H_new, *U_new, *V_new;
+   int *nlft, *nrht, *nbot, *ntop, *level;
+
+   nlft  = mesh->nlft;
+   nrht  = mesh->nrht;
+   nbot  = mesh->nbot;
+   ntop  = mesh->ntop;
+   level = mesh->level;
+
+   vector<real_t> &lev_deltax = mesh->lev_deltax;
+   vector<real_t> &lev_deltay = mesh->lev_deltay;
+
+   int flags = 0;
+   flags = RESTART_DATA;
+#if defined (HAVE_J7)
+   if (mesh->parallel) flags = LOAD_BALANCE_MEMORY;
+#endif
+
+#ifdef _OPENMP
+#pragma omp master
+#endif
+   {
+      H_new = (state_t *)state_memory.memory_malloc(ncells_ghost,
+                                                    sizeof(state_t),
+                                                    "H_new", flags);
+      U_new = (state_t *)state_memory.memory_malloc(ncells_ghost,
+                                                    sizeof(state_t),
+                                                    "U_new", flags);
+      V_new = (state_t *)state_memory.memory_malloc(ncells_ghost,
+                                                    sizeof(state_t),
+                                                    "V_new", flags);
+   }
+#ifdef _OPENMP
+#pragma omp barrier
+#endif
+
+   int lowerBound, upperBound;
+   mesh->get_bounds(lowerBound, upperBound);
+
+   for(int gix = lowerBound; gix < upperBound; gix++) {
+#if DEBUG >= 3
+      printf("%d: DEBUG gix is %d at line %d in file %s\n",mesh->mype,gix,__LINE__,__FILE__);
+#endif
+
+      int lvl     = level[gix];
+      int nl      = nlft[gix];
+      int nr      = nrht[gix];
+      int nt      = ntop[gix];
+      int nb      = nbot[gix];
+
+      real_t Hic     = H[gix];
+      real_t Uic     = U[gix];
+      real_t Vic     = V[gix];
+
+#if DEBUG >= 3
+      if (nl < 0 || nl >= ncells_ghost ) printf("%d: Problem at file %s line %d with nl %ld\n",mesh->mype,__FILE__,__LINE__,nl);
+#endif
+      int nll     = nlft[nl];
+      real_t Hl      = H[nl];
+      real_t Ul      = U[nl];
+      real_t Vl      = V[nl];
+
+#if DEBUG >= 3
+      if (nr < 0 || nr >= ncells_ghost ) printf("%d: Problem at file %s line %d with nr %ld\n",mesh->mype,__FILE__,__LINE__,nr);
+#endif
+      int nrr     = nrht[nr];
+      real_t Hr      = H[nr];
+      real_t Ur      = U[nr];
+      real_t Vr      = V[nr];
+
+#if DEBUG >= 3
+      if (nt < 0 || nt >= ncells_ghost ) printf("%d: Problem at file %s line %d with nt %ld\n",mesh->mype,__FILE__,__LINE__,nt);
+#endif
+      int ntt     = ntop[nt];
+      real_t Ht      = H[nt];
+      real_t Ut      = U[nt];
+      real_t Vt      = V[nt];
+
+#if DEBUG >= 3
+      if (nb < 0 || nb >= ncells_ghost ) printf("%d: Problem at file %s line %d with nb %ld\n",mesh->mype,__FILE__,__LINE__,nb);
+#endif
+      int nbb     = nbot[nb];
+      real_t Hb      = H[nb];
+      real_t Ub      = U[nb];
+      real_t Vb      = V[nb];
+
+      int nlt     = ntop[nl];
+      int nrt     = ntop[nr];
+      int ntr     = nrht[nt];
+      int nbr     = nrht[nb];
+
+#if DEBUG >= 3
+      if (nll < 0 || nll >= ncells_ghost ) printf("%d: Problem at file %s line %d with nll %ld\n",mesh->mype,__FILE__,__LINE__,nll);
+#endif
+      real_t Hll     = H[nll];
+      real_t Ull     = U[nll];
+      //real_t Vll     = V[nll];
+
+#if DEBUG >= 3
+      if (nrr < 0 || nrr >= ncells_ghost ) printf("%d: Problem at file %s line %d with nrr %ld\n",mesh->mype,__FILE__,__LINE__,nrr);
+#endif
+      real_t Hrr     = H[nrr];
+      real_t Urr     = U[nrr];
+      //real_t Vrr     = V[nrr];
+
+#if DEBUG >= 3
+      if (ntt < 0 || ntt >= ncells_ghost ) printf("%d: Problem at file %s line %d with ntt %ld\n",mesh->mype,__FILE__,__LINE__,ntt);
+#endif
+      real_t Htt     = H[ntt];
+      //real_t Utt     = U[ntt];
+      real_t Vtt     = V[ntt];
+
+#if DEBUG >= 3
+      if (nbb < 0 || nbb >= ncells_ghost ) {printf("%d: Problem at file %s line %d ic %d %d with nbb %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,nbb); sleep(15); }
+#endif
+      real_t Hbb     = H[nbb];
+      //real_t Ubb     = U[nbb];
+      real_t Vbb     = V[nbb];
+
+#if DEBUG >= 3
+      if (lvl < 0 || lvl >= (int)lev_deltax.size() ) printf("%d: Problem at file %s line %d with lvl %d\n",mesh->mype,__FILE__,__LINE__,lvl);
+#endif
+      real_t dxic    = lev_deltax[lvl];
+      real_t dyic    = lev_deltay[lvl];
+
+      real_t dxl     = lev_deltax[level[nl]];
+      real_t dxr     = lev_deltax[level[nr]];
+
+      real_t dyt     = lev_deltay[level[nt]];
+      real_t dyb     = lev_deltay[level[nb]];
+
+      real_t drl     = dxl;
+      real_t drr     = dxr;
+      real_t drt     = dyt;
+      real_t drb     = dyb;
+
+      real_t dric    = dxic;
+
+      int nltl = 0;
+      real_t Hlt = 0.0, Ult = 0.0, Vlt = 0.0;
+      real_t Hll2 = 0.0;
+      real_t Ull2 = 0.0;
+      if(lvl < level[nl]) {
+#if DEBUG >= 3
+         if (nlt < 0 || nlt > ncells_ghost ) printf("%d: Problem at file %s line %d with nlt %ld\n",mesh->mype,__FILE__,__LINE__,nlt);
+#endif
+         Hlt  = H[ ntop[nl] ];
+         Ult  = U[ ntop[nl] ];
+         Vlt  = V[ ntop[nl] ];
+         nltl = nlft[nlt];
+#if DEBUG >= 3
+         if (nltl < 0 || nltl > ncells_ghost ) printf("%d: Problem at file %s line %d with nltl %ld\n",mesh->mype,__FILE__,__LINE__,nltl);
+#endif
+         Hll2 = H[nltl];
+         Ull2 = U[nltl];
+      }
+
+      int nrtr = 0;
+      real_t Hrt = 0.0, Urt = 0.0, Vrt = 0.0;
+      real_t Hrr2 = 0.0;
+      real_t Urr2 = 0.0;
+      if(lvl < level[nr]) {
+#if DEBUG >= 3
+         if (nrt < 0 || nrt > ncells_ghost ) printf("%d: Problem at file %s line %d with nrt %ld\n",mesh->mype,__FILE__,__LINE__,nrt);
+#endif
+         Hrt  = H[ ntop[nr] ];
+         Urt  = U[ ntop[nr] ];
+         Vrt  = V[ ntop[nr] ];
+         nrtr = nrht[nrt];
+#if DEBUG >= 3
+         if (nrtr < 0 || nrtr > ncells_ghost ) printf("%d: Problem at file %s line %d with nrtr %ld\n",mesh->mype,__FILE__,__LINE__,nrtr);
+#endif
+         Hrr2 = H[nrtr];
+         Urr2 = U[nrtr];
+      }
+
+      int nbrb = 0;
+      real_t Hbr = 0.0, Ubr = 0.0, Vbr = 0.0;
+      real_t Hbb2 = 0.0;
+      real_t Vbb2 = 0.0;
+      if(lvl < level[nb]) {
+#if DEBUG >= 3
+         if (nbr < 0 || nbr > ncells_ghost ) printf("%d: Problem at file %s line %d with nbr %ld\n",mesh->mype,__FILE__,__LINE__,nbr);
+#endif
+         Hbr  = H[ nrht[nb] ];
+         Ubr  = U[ nrht[nb] ];
+         Vbr  = V[ nrht[nb] ];
+         nbrb = nbot[nbr];
+#if DEBUG >= 3
+         if (nbrb < 0 || nbrb > ncells_ghost ) {printf("%d: Problem at file %s line %d ic %d %d with nbrb %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,nbrb); sleep(20);}
+#endif
+         Hbb2 = H[nbrb];
+         Vbb2 = V[nbrb];
+      }
+
+      int ntrt = 0;
+      real_t Htr = 0.0, Utr = 0.0, Vtr = 0.0;
+      real_t Htt2 = 0.0;
+      real_t Vtt2 = 0.0;
+      if(lvl < level[nt]) {
+#if DEBUG >= 3
+         if (ntr < 0 || ntr > ncells_ghost ) printf("%d: Problem at file %s line %d with ntr %ld\n",mesh->mype,__FILE__,__LINE__,ntr);
+#endif
+         Htr  = H[ nrht[nt] ];
+         Utr  = U[ nrht[nt] ];
+         Vtr  = V[ nrht[nt] ];
+         ntrt = ntop[ntr];
+#if DEBUG >= 3
+         if (ntrt < 0 || ntrt > ncells_ghost ) {printf("%d: Problem at file %s line %d ic %d %d with ntrt %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,ntrt); sleep(20); }
+#endif
+         Htt2 = H[ntrt];
+         Vtt2 = V[ntrt];
+      }
+
+
+      real_t Hxminus = U_halfstep(deltaT, Hl, Hic, HXFLUXNL, HXFLUXIC,
+                           dxl, dxic, dxl, dxic, SQR(dxl), SQR(dxic));
+      real_t Uxminus = U_halfstep(deltaT, Ul, Uic, UXFLUXNL, UXFLUXIC,
+                           dxl, dxic, dxl, dxic, SQR(dxl), SQR(dxic));
+      real_t Vxminus = U_halfstep(deltaT, Vl, Vic, UVFLUXNL, UVFLUXIC,
+                           dxl, dxic, dxl, dxic, SQR(dxl), SQR(dxic));
+
+      real_t Hxplus  = U_halfstep(deltaT, Hic, Hr, HXFLUXIC, HXFLUXNR,
+                           dxic, dxr, dxic, dxr, SQR(dxic), SQR(dxr));
+      real_t Uxplus  = U_halfstep(deltaT, Uic, Ur, UXFLUXIC, UXFLUXNR,
+                           dxic, dxr, dxic, dxr, SQR(dxic), SQR(dxr));
+      real_t Vxplus  = U_halfstep(deltaT, Vic, Vr, UVFLUXIC, UVFLUXNR,
+                           dxic, dxr, dxic, dxr, SQR(dxic), SQR(dxr));
+
+      real_t Hyminus = U_halfstep(deltaT, Hb, Hic, HYFLUXNB, HYFLUXIC,
+                           dyb, dyic, dyb, dyic, SQR(dyb), SQR(dyic));
+      real_t Uyminus = U_halfstep(deltaT, Ub, Uic, VUFLUXNB, VUFLUXIC,
+                           dyb, dyic, dyb, dyic, SQR(dyb), SQR(dyic));
+      real_t Vyminus = U_halfstep(deltaT, Vb, Vic, VYFLUXNB, VYFLUXIC,
+                           dyb, dyic, dyb, dyic, SQR(dyb), SQR(dyic));
+
+      real_t Hyplus  = U_halfstep(deltaT, Hic, Ht, HYFLUXIC, HYFLUXNT,
+                           dyic, dyt, dyic, dyt, SQR(dyic), SQR(dyt));
+      real_t Uyplus  = U_halfstep(deltaT, Uic, Ut, VUFLUXIC, VUFLUXNT,
+                           dyic, dyt, dyic, dyt, SQR(dyic), SQR(dyt));
+      real_t Vyplus  = U_halfstep(deltaT, Vic, Vt, VYFLUXIC, VYFLUXNT,
+                           dyic, dyt, dyic, dyt, SQR(dyic), SQR(dyt));
+
+      real_t Hxfluxminus = HNEWXFLUXMINUS;
+      real_t Uxfluxminus = UNEWXFLUXMINUS;
+      real_t Vxfluxminus = UVNEWFLUXMINUS;
+
+      real_t Hxfluxplus  = HNEWXFLUXPLUS;
+      real_t Uxfluxplus  = UNEWXFLUXPLUS;
+      real_t Vxfluxplus  = UVNEWFLUXPLUS;
+
+      real_t Hyfluxminus = HNEWYFLUXMINUS;
+      real_t Uyfluxminus = VUNEWFLUXMINUS;
+      real_t Vyfluxminus = VNEWYFLUXMINUS;
+
+      real_t Hyfluxplus  = HNEWYFLUXPLUS;
+      real_t Uyfluxplus  = VUNEWFLUXPLUS;
+      real_t Vyfluxplus  = VNEWYFLUXPLUS;
+
+      real_t Hxminus2 = 0.0;
+      real_t Uxminus2 = 0.0;
+      real_t Vxminus2 = 0.0;
+      if(lvl < level[nl]) {
+
+         Hxminus2 = U_halfstep(deltaT, Hlt, Hic, HXFLUXNLT, HXFLUXIC,
+                               drl, dric, drl, dric, SQR(drl), SQR(dric));
+         Uxminus2 = U_halfstep(deltaT, Ult, Uic, UXFLUXNLT, UXFLUXIC,
+                               drl, dric, drl, dric, SQR(drl), SQR(dric));
+         Vxminus2 = U_halfstep(deltaT, Vlt, Vic, UVFLUXNLT, UVFLUXIC,
+                               drl, dric, drl, dric, SQR(drl), SQR(dric));
+
+         Hxfluxminus = (Hxfluxminus + HNEWXFLUXMINUS2) * HALF;
+         Uxfluxminus = (Uxfluxminus + UNEWXFLUXMINUS2) * HALF;
+         Vxfluxminus = (Vxfluxminus + UVNEWFLUXMINUS2) * HALF;
+
+      }
+
+      real_t Hxplus2 = 0.0;
+      real_t Uxplus2 = 0.0;
+      real_t Vxplus2 = 0.0;
+      if(lvl < level[nr]) {
+
+         Hxplus2  = U_halfstep(deltaT, Hic, Hrt, HXFLUXIC, HXFLUXNRT,
+                               dric, drr, dric, drr, SQR(dric), SQR(drr));
+         Uxplus2  = U_halfstep(deltaT, Uic, Urt, UXFLUXIC, UXFLUXNRT,
+                               dric, drr, dric, drr, SQR(dric), SQR(drr));
+         Vxplus2  = U_halfstep(deltaT, Vic, Vrt, UVFLUXIC, UVFLUXNRT,
+                               dric, drr, dric, drr, SQR(dric), SQR(drr));
+
+         Hxfluxplus  = (Hxfluxplus + HNEWXFLUXPLUS2) * HALF;
+         Uxfluxplus  = (Uxfluxplus + UNEWXFLUXPLUS2) * HALF;
+         Vxfluxplus  = (Vxfluxplus + UVNEWFLUXPLUS2) * HALF;
+
+      }
+
+      real_t Hyminus2 = 0.0;
+      real_t Uyminus2 = 0.0;
+      real_t Vyminus2 = 0.0;
+      if(lvl < level[nb]) {
+
+         Hyminus2 = U_halfstep(deltaT, Hbr, Hic, HYFLUXNBR, HYFLUXIC,
+                               drb, dric, drb, dric, SQR(drb), SQR(dric));
+         Uyminus2 = U_halfstep(deltaT, Ubr, Uic, VUFLUXNBR, VUFLUXIC,
+                               drb, dric, drb, dric, SQR(drb), SQR(dric));
+         Vyminus2 = U_halfstep(deltaT, Vbr, Vic, VYFLUXNBR, VYFLUXIC,
+                               drb, dric, drb, dric, SQR(drb), SQR(dric));
+
+         Hyfluxminus = (Hyfluxminus + HNEWYFLUXMINUS2) * HALF;
+         Uyfluxminus = (Uyfluxminus + VUNEWFLUXMINUS2) * HALF;
+         Vyfluxminus = (Vyfluxminus + VNEWYFLUXMINUS2) * HALF;
+
+      }
+
+      real_t Hyplus2 = 0.0;
+      real_t Uyplus2 = 0.0;
+      real_t Vyplus2 = 0.0;
+      if(lvl < level[nt]) {
+
+         Hyplus2  = U_halfstep(deltaT, Hic, Htr, HYFLUXIC, HYFLUXNTR,
+                               dric, drt, dric, drt, SQR(dric), SQR(drt));
+         Uyplus2  = U_halfstep(deltaT, Uic, Utr, VUFLUXIC, VUFLUXNTR,
+                               dric, drt, dric, drt, SQR(dric), SQR(drt));
+         Vyplus2  = U_halfstep(deltaT, Vic, Vtr, VYFLUXIC, VYFLUXNTR,
+                               dric, drt, dric, drt, SQR(dric), SQR(drt));
+
+         Hyfluxplus  = (Hyfluxplus + HNEWYFLUXPLUS2) * HALF;
+         Uyfluxplus  = (Uyfluxplus + VUNEWFLUXPLUS2) * HALF;
+         Vyfluxplus  = (Vyfluxplus + VNEWYFLUXPLUS2) * HALF;
+
+      }
+
+      //if (DEBUG >= 2) {
+      // printf("1st pass x direction nz %d nzlower %d nzupper %d %lf %lf %lf %lf %lf %lf\n",
+      //    gix, nl, nr,
+      //    Hxplus,Hxplus2,Uxplus,Uxplus2,Vxplus,Vxplus2);
+      //    //H[cell_upper],H[cell_lower],U[cell_upper],U[cell_lower],V[cell_upper],V[cell_lower]);
+      //}
+
+      ////////////////////////////////////////
+      /// Artificial Viscosity corrections ///
+      ////////////////////////////////////////
+
+
+      if(level[nl] < level[nll]) {
+#if DEBUG >= 3
+         size_t nllt = ntop[nll];
+         if (nllt < 0 || nllt >= ncells_ghost ) printf("%d: Problem at file %s line %d with nllt %ld\n",mesh->mype,__FILE__,__LINE__,nllt);
+#endif
+         Hll = (Hll + H[ ntop[nll] ]) * HALF;
+         Ull = (Ull + U[ ntop[nll] ]) * HALF;
+      }
+
+      real_t Hr2 = Hr;
+      real_t Ur2 = Ur;
+      if(lvl < level[nr]) {
+         Hr2 = (Hr2 + Hrt) * HALF;
+         Ur2 = (Ur2 + Urt) * HALF;
+      }
+
+      real_t wminusx_H = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus),
+                              Hic-Hl, Hl-Hll, Hr2-Hic);
+
+      wminusx_H *= Hic - Hl;
+
+      if(lvl < level[nl]) {
+         if(level[nlt] < level[nltl])
+            Hll2 = (Hll2 + H[ ntop[nltl] ]) * HALF;
+         wminusx_H = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) +
+                                  sqrt(g*Hxminus2), Hic-Hlt, Hlt-Hll2, Hr2-Hic) *
+                      (Hic - Hlt)) + wminusx_H)*HALF*HALF;
+      }
+
+
+      if(level[nr] < level[nrr]) {
+#if DEBUG >= 3
+         size_t nrrt = ntop[nrr];
+         if (nrrt < 0 || nrrt >= ncells_ghost ) printf("%d: Problem at file %s line %d with nrrt %ld\n",mesh->mype,__FILE__,__LINE__,nrrt);
+#endif
+         Hrr = (Hrr + H[ ntop[nrr] ]) * HALF;
+         Urr = (Urr + U[ ntop[nrr] ]) * HALF;
+      }
+
+      real_t Hl2 = Hl;
+      real_t Ul2 = Ul;
+      if(lvl < level[nl]) {
+         Hl2 = (Hl2 + Hlt) * HALF;
+         Ul2 = (Ul2 + Ult) * HALF;
+      }
+
+      real_t wplusx_H = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus),
+                           Hr-Hic, Hic-Hl2, Hrr-Hr);
+
+      wplusx_H *= Hr - Hic;
+
+      if(lvl < level[nr]) {
+         if(level[nrt] < level[nrtr])
+            Hrr2 = (Hrr2 + H[ ntop[nrtr] ]) * HALF;
+         wplusx_H = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) +
+                                  sqrt(g*Hxplus2), Hrt-Hic, Hic-Hl2, Hrr2-Hrt) *
+                      (Hrt - Hic))+wplusx_H)*HALF*HALF;
+      }
+
+
+      real_t wminusx_U = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus),
+                              Uic-Ul, Ul-Ull, Ur2-Uic);
+
+      wminusx_U *= Uic - Ul;
+
+      if(lvl < level[nl]) {
+         if(level[nlt] < level[nltl])
+            Ull2 = (Ull2 + U[ ntop[nltl] ]) * HALF;
+         wminusx_U = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) +
+                                  sqrt(g*Hxminus2), Uic-Ult, Ult-Ull2, Ur2-Uic) *
+                      (Uic - Ult))+wminusx_U)*HALF*HALF;
+      }
+
+
+      real_t wplusx_U = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus),
+                              Ur-Uic, Uic-Ul2, Urr-Ur);
+
+      wplusx_U *= Ur - Uic;
+
+      if(lvl < level[nr]) {
+         if(level[nrt] < level[nrtr])
+            Urr2 = (Urr2 + U[ ntop[nrtr] ]) * HALF;
+         wplusx_U = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) +
+                                  sqrt(g*Hxplus2), Urt-Uic, Uic-Ul2, Urr2-Urt) *
+                      (Urt - Uic))+wplusx_U)*HALF*HALF;
+      }
+
+
+      if(level[nb] < level[nbb]) {
+#if DEBUG >= 3
+         size_t nbbr = nrht[nbb];
+         if (nbbr < 0 || nbbr >= ncells_ghost ) printf("%d: Problem at file %s line %d gix %d %d with nbbr %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,nbbr);
+#endif
+         Hbb = (Hbb + H[ nrht[nbb] ]) * HALF;
+         Vbb = (Vbb + V[ nrht[nbb] ]) * HALF;
+      }
+
+      real_t Ht2 = Ht;
+      real_t Vt2 = Vt;
+      if(lvl < level[nt]) {
+         Ht2 = (Ht2 + Htr) * HALF;
+         Vt2 = (Vt2 + Vtr) * HALF;
+      }
+
+      real_t wminusy_H = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus),
+                              Hic-Hb, Hb-Hbb, Ht2-Hic);
+
+      wminusy_H *= Hic - Hb;
+
+      if(lvl < level[nb]) {
+         if(level[nbr] < level[nbrb])
+            Hbb2 = (Hbb2 + H[ nrht[nbrb] ]) * HALF;
+         wminusy_H = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) +
+                                  sqrt(g*Hyminus2), Hic-Hbr, Hbr-Hbb2, Ht2-Hic) *
+                      (Hic - Hbr))+wminusy_H)*HALF*HALF;
+      }
+
+
+      if(level[nt] < level[ntt]) {
+#if DEBUG >= 3
+         size_t nttr = nrht[ntt];
+         if (nttr < 0 || nttr >= ncells_ghost ) printf("%d: Problem at file %s line %d with nttr %ld\n",mesh->mype,__FILE__,__LINE__,nttr);
+#endif
+         Htt = (Htt + H[ nrht[ntt] ]) * HALF;
+         Vtt = (Vtt + V[ nrht[ntt] ]) * HALF;
+      }
+
+      real_t Hb2 = Hb;
+      real_t Vb2 = Vb;
+      if(lvl < level[nb]) {
+         Hb2 = (Hb2 + Hbr) * HALF;
+         Vb2 = (Vb2 + Vbr) * HALF;
+      }
+
+      real_t wplusy_H = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus),
+                             Ht-Hic, Hic-Hb2, Htt-Ht);
+
+      wplusy_H *= Ht - Hic;
+
+      if(lvl < level[nt]) {
+         if(level[ntr] < level[ntrt])
+            Htt2 = (Htt2 + H[ nrht[ntrt] ]) * HALF;
+         wplusy_H = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) +
+                                  sqrt(g*Hyplus2), Htr-Hic, Hic-Hb2, Htt2-Htr) *
+                      (Htr - Hic))+wplusy_H)*HALF*HALF;
+      }
+
+      real_t wminusy_V = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus),
+                              Vic-Vb, Vb-Vbb, Vt2-Vic);
+
+      wminusy_V *= Vic - Vb;
+
+      if(lvl < level[nb]) {
+         if(level[nbr] < level[nbrb])
+            Vbb2 = (Vbb2 + V[ nrht[nbrb] ]) * HALF;
+         wminusy_V = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) +
+                                  sqrt(g*Hyminus2), Vic-Vbr, Vbr-Vbb2, Vt2-Vic) *
+                      (Vic - Vbr))+wminusy_V)*HALF*HALF;
+      }
+
+      real_t wplusy_V = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus),
+                           Vt-Vic, Vic-Vb2, Vtt-Vt);
+
+      wplusy_V *= Vt - Vic;
+
+      if(lvl < level[nt]) {
+         if(level[ntr] < level[ntrt])
+            Vtt2 = (Vtt2 + V[ nrht[ntrt] ]) * HALF;
+         wplusy_V = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) +
+                                  sqrt(g*Hyplus2), Vtr-Vic, Vic-Vb2, Vtt2-Vtr) *
+                      (Vtr - Vic))+wplusy_V)*HALF*HALF;
+      }
+
+      H_new[gix] = U_fullstep(deltaT, dxic, Hic,
+                       Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus)
+                  - wminusx_H + wplusx_H - wminusy_H + wplusy_H;
+      U_new[gix] = U_fullstep(deltaT, dxic, Uic,
+                       Uxfluxplus, Uxfluxminus, Uyfluxplus, Uyfluxminus)
+                  - wminusx_U + wplusx_U;
+      V_new[gix] = U_fullstep(deltaT, dxic, Vic,
+                       Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus)
+                  - wminusy_V + wplusy_V;
+
+#if DEBUG >= 1
+      if (DEBUG >= 1) {
+         real_t U_tmp = U_new[gix];
+         real_t V_tmp = V_new[gix];
+         if (U_tmp == 0.0) U_tmp = 0.0;
+         if (V_tmp == 0.0) V_tmp = 0.0;
+         printf("DEBUG ic %d H_new %lf U_new %lf V_new %lf\n",gix,H_new[gix],U_tmp,V_tmp);
+      }
+#endif
+
+/*
+      printf("DEBUG ic %d deltaT, %lf dxic, %lf Hic, %lf Hxfluxplus, %lf Hxfluxminus, %lf Hyfluxplus, %lf Hyfluxminus %lf\n",
+         gix, deltaT, dxic, Hic, Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus);
+      printf("DEBUG ic %d wminusx_H %lf wplusx_H %lf wminusy_H %lf wplusy_H %lf\n",gix, wminusx_H, wplusx_H, wminusy_H, wplusy_H);
+      printf("DEBUG ic %d deltaT, %lf dxic, %lf Vic, %lf Vxfluxplus, %lf Vxfluxminus, %lf Vyfluxplus, %lf Vyfluxminus %lf\n",
+         gix, deltaT, dxic, Vic, Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus);
+      printf("DEBUG ic %d wminusy_V %lf wplusy_V %lf\n",gix, wminusy_V, wplusy_V);
+*/
+   } // cell loop
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+      // Replace H with H_new and deallocate H. New memory will have the characteristics
+      // of the new memory and the name of the old. Both return and arg1 will be reset to new memory
+      H = (state_t *)state_memory.memory_replace(H, H_new);
+      U = (state_t *)state_memory.memory_replace(U, U_new);
+      V = (state_t *)state_memory.memory_replace(V, V_new);
+
+      //state_memory.memory_report();
+      //printf("DEBUG end finite diff\n\n"); 
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp master
+#endif
+      cpu_timers[STATE_TIMER_FINITE_DIFFERENCE] += cpu_timer_stop(tstart_cpu);
+}
+
+void State::calc_finite_difference_via_faces(double deltaT){
+   real_t   g     = 9.80;   // gravitational constant
+   real_t   ghalf = HALF*g;
+
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   size_t ncells     = mesh->ncells;
+   size_t &ncells_ghost = mesh->ncells_ghost;
+#ifdef _OPENMP
+#pragma omp master
+#endif
+   if (ncells_ghost < ncells) ncells_ghost = ncells;
+
+   //printf("\nDEBUG finite diff\n");
+
+#ifdef HAVE_MPI
+   // We need to populate the ghost regions since the calc neighbors has just been
+   // established for the mesh shortly before
+   if (mesh->numpe > 1) {
+      apply_boundary_conditions_local();
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master 
+      {
+#endif
+         H=(state_t *)state_memory.memory_realloc(ncells_ghost, H);
+         U=(state_t *)state_memory.memory_realloc(ncells_ghost, U);
+         V=(state_t *)state_memory.memory_realloc(ncells_ghost, V);
+
+         L7_Update(&H[0], L7_STATE_T, mesh->cell_handle);
+         L7_Update(&U[0], L7_STATE_T, mesh->cell_handle);
+         L7_Update(&V[0], L7_STATE_T, mesh->cell_handle);
+#ifdef _OPENMP
+      }
+#pragma omp barrier
+#endif
+
+      apply_boundary_conditions_ghost();
+   } else {
+      apply_boundary_conditions();
+   }
+#else
+   apply_boundary_conditions();
+#endif
+
+   int *nlft, *nrht, *nbot, *ntop, *level;
+
+   nlft  = mesh->nlft;
+   nrht  = mesh->nrht;
+   nbot  = mesh->nbot;
+   ntop  = mesh->ntop;
+   level = mesh->level;
+
+   vector<real_t> &lev_deltax = mesh->lev_deltax;
+   vector<real_t> &lev_deltay = mesh->lev_deltay;
+
+   int flags = 0;
+   flags = RESTART_DATA;
+#if defined (HAVE_J7)
+   if (mesh->parallel) flags = LOAD_BALANCE_MEMORY;
+#endif
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+   mesh->calc_face_list_wbidirmap();
+#ifdef _OPENMP
+   }
+#endif
+
+   static vector<state_t> Hx, Ux, Vx;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+   Hx.resize(mesh->nxface);
+   Ux.resize(mesh->nxface);
+   Vx.resize(mesh->nxface);
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for 
+#endif
+   for (int iface = 0; iface < mesh->nxface; iface++){
+      int cell_lower = mesh->map_xface2cell_lower[iface];
+      int cell_upper = mesh->map_xface2cell_upper[iface];
+      int level_lower = level[cell_lower];
+      int level_upper = level[cell_upper];
+      if (level_lower == level_upper) {
+         int lev = level_upper;
+         real_t Cxhalf = 0.5*deltaT/mesh->lev_deltax[lev];
+         Hx[iface]=HALF*(H[cell_upper]+H[cell_lower]) - Cxhalf*( HXFLUX(cell_upper)-HXFLUX(cell_lower) );
+         Ux[iface]=HALF*(U[cell_upper]+U[cell_lower]) - Cxhalf*( UXFLUX(cell_upper)-UXFLUX(cell_lower) );
+         Vx[iface]=HALF*(V[cell_upper]+V[cell_lower]) - Cxhalf*( UVFLUX(cell_upper)-UVFLUX(cell_lower) );
+      } else {
+         real_t dx_lower = mesh->lev_deltax[level[cell_lower]];
+         real_t dx_upper = mesh->lev_deltax[level[cell_upper]];
+
+         real_t FA_lower = dx_lower;
+         real_t FA_upper = dx_upper;
+         real_t FA_lolim = FA_lower*min(ONE, FA_upper/FA_lower);
+         real_t FA_uplim = FA_upper*min(ONE, FA_lower/FA_upper);
+
+         real_t CV_lower = SQ(dx_lower);
+         real_t CV_upper = SQ(dx_upper);
+         real_t CV_lolim = CV_lower*min(HALF, CV_upper/CV_lower);
+         real_t CV_uplim = CV_upper*min(HALF, CV_lower/CV_upper);
+
+         // Weighted half-step calculation
+         //
+         // (dx_lower*H[cell_upper]+dx_upper*H[cell_lower])
+         // -----------------------------------------------   -
+         //             (dx_lower+dx_upper)
+         //
+         //                ( (FA_uplim*HXFLUX(cell_upper))-(FA_lolim*HXFLUX(cell_lower)) )
+         // 0.5*deltaT  *  ----------------------------------------------------------------
+         //                                    (CV_uplim+CV_lolim)
+         //
+
+         Hx[iface]=(dx_lower*H[cell_upper]+dx_upper*H[cell_lower])/(dx_lower+dx_upper) -
+                   HALF*deltaT*( (FA_uplim*HXFLUX(cell_upper))-(FA_lolim*HXFLUX(cell_lower)) )/
+                   (CV_uplim+CV_lolim);
+         Ux[iface]=(dx_lower*U[cell_upper]+dx_upper*U[cell_lower])/(dx_lower+dx_upper) -
+                   HALF*deltaT*( (FA_uplim*UXFLUX(cell_upper))-(FA_lolim*UXFLUX(cell_lower)) )/
+                   (CV_uplim+CV_lolim);
+         Vx[iface]=(dx_lower*V[cell_upper]+dx_upper*V[cell_lower])/(dx_lower+dx_upper) -
+                   HALF*deltaT*( (FA_uplim*UVFLUX(cell_upper))-(FA_lolim*UVFLUX(cell_lower)) )/
+                   (CV_uplim+CV_lolim);
+      }
+#if DEBUG >= 2
+      if (DEBUG >= 2) {
+         printf("1st pass x direction iface %d i %d j %d lev %d nzlower %d nzupper %d %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
+            iface, mesh->xface_i[iface], mesh->xface_j[iface], mesh->xface_level[iface],
+            mesh->map_xface2cell_lower[iface], mesh->map_xface2cell_upper[iface],
+            Hx[iface],Ux[iface],Vx[iface],
+            H[cell_upper],H[cell_lower],U[cell_upper],U[cell_lower],V[cell_upper],V[cell_lower]);
+      }
+#endif
+   }
+#if DEBUG >= 2
+   if (DEBUG >= 2) {
+      printf("\n");
+   }
+#endif
+
+   static vector<state_t> Hy, Uy, Vy;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+   Hy.resize(mesh->nyface);
+   Uy.resize(mesh->nyface);
+   Vy.resize(mesh->nyface);
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp for 
+#endif
+   for (int iface = 0; iface < mesh->nyface; iface++){
+      int cell_lower = mesh->map_yface2cell_lower[iface];
+      int cell_upper = mesh->map_yface2cell_upper[iface];
+      int level_lower = level[cell_lower];
+      int level_upper = level[cell_upper];
+      if (level_lower == level_upper) {
+         int lev = level_upper;
+         real_t Cyhalf = 0.5*deltaT/mesh->lev_deltay[lev];
+         Hy[iface]=HALF*(H[cell_upper]+H[cell_lower]) - Cyhalf*( HYFLUX(cell_upper)-HYFLUX(cell_lower) );
+         Uy[iface]=HALF*(U[cell_upper]+U[cell_lower]) - Cyhalf*( UVFLUX(cell_upper)-UVFLUX(cell_lower) );
+         Vy[iface]=HALF*(V[cell_upper]+V[cell_lower]) - Cyhalf*( VYFLUX(cell_upper)-VYFLUX(cell_lower) );
+      } else {
+         real_t dy_lower = mesh->lev_deltay[level[cell_lower]];
+         real_t dy_upper = mesh->lev_deltay[level[cell_upper]];
+
+         real_t FA_lower = dy_lower;
+         real_t FA_upper = dy_upper;
+         real_t FA_lolim = FA_lower*min(ONE, FA_upper/FA_lower);
+         real_t FA_uplim = FA_upper*min(ONE, FA_lower/FA_upper);
+
+         real_t CV_lower = SQ(dy_lower);
+         real_t CV_upper = SQ(dy_upper);
+         real_t CV_lolim = CV_lower*min(HALF, CV_upper/CV_lower);
+         real_t CV_uplim = CV_upper*min(HALF, CV_lower/CV_upper);
+
+         // Weighted half-step calculation
+         //
+         // (dy_lower*H[cell_upper]+dy_upper*H[cell_lower])
+         // -----------------------------------------------   -
+         //             (dy_lower+dy_upper)
+         //
+         //                ( (FA_uplim*HYFLUX(cell_upper))-(FA_lolim*HYFLUX(cell_lower)) )
+         // 0.5*deltaT  *  ----------------------------------------------------------------
+         //                                    (CV_uplim+CV_lolim)
+         //
+
+         Hy[iface]=(dy_lower*H[cell_upper]+dy_upper*H[cell_lower])/(dy_lower+dy_upper) -
+                   HALF*deltaT*( (FA_uplim*HYFLUX(cell_upper))-(FA_lolim*HYFLUX(cell_lower)) )/
+                   (CV_uplim+CV_lolim);
+         Uy[iface]=(dy_lower*U[cell_upper]+dy_upper*U[cell_lower])/(dy_lower+dy_upper) -
+                   HALF*deltaT*( (FA_uplim*UVFLUX(cell_upper))-(FA_lolim*UVFLUX(cell_lower)) )/
+                   (CV_uplim+CV_lolim);
+         Vy[iface]=(dy_lower*V[cell_upper]+dy_upper*V[cell_lower])/(dy_lower+dy_upper) -
+                   HALF*deltaT*( (FA_uplim*VYFLUX(cell_upper))-(FA_lolim*VYFLUX(cell_lower)) )/
+                   (CV_uplim+CV_lolim);
+
+      }
+
+#if DEBUG >= 2
+      if (DEBUG >= 2) {
+         printf("1st pass y direction iface %d i %d j %d lev %d nzlower %d nzupper %d %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
+            iface, mesh->yface_i[iface], mesh->yface_j[iface], mesh->yface_level[iface],
+            mesh->map_yface2cell_lower[iface], mesh->map_yface2cell_upper[iface],
+            Hy[iface],Uy[iface],Vy[iface],
+            H[cell_upper],H[cell_lower],U[cell_upper],U[cell_lower],V[cell_upper],V[cell_lower]);
+      }
+#endif
+   }
+#if DEBUG >= 2
+   if (DEBUG >= 2) {
+      printf("\n");
+   }
+#endif
+
+   static state_t *H_new, *U_new, *V_new;
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+      H_new = (state_t *)state_memory.memory_malloc(mesh->ncells_ghost, sizeof(state_t), "H_new", flags);
+      U_new = (state_t *)state_memory.memory_malloc(mesh->ncells_ghost, sizeof(state_t), "U_new", flags);
+      V_new = (state_t *)state_memory.memory_malloc(mesh->ncells_ghost, sizeof(state_t), "V_new", flags);
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+   int lowerBound, upperBound;
+
+   mesh->get_bounds(lowerBound, upperBound);
+   for (int ic = lowerBound; ic < upperBound; ic++){
+
+      int lvl     = level[ic];
+      int nl      = nlft[ic];
+      int nr      = nrht[ic];
+      int nt      = ntop[ic];
+      int nb      = nbot[ic];
+
+      real_t Hic     = H[ic];
+      real_t Uic     = U[ic];
+      real_t Vic     = V[ic];
+
+      int nll     = nlft[nl];
+      real_t Hl      = H[nl];
+      real_t Ul      = U[nl];
+      //real_t Vl      = V[nl];
+
+      int nrr     = nrht[nr];
+      real_t Hr      = H[nr];
+      real_t Ur      = U[nr];
+      //real_t Vr      = V[nr];
+
+      int ntt     = ntop[nt];
+      real_t Ht      = H[nt];
+      //real_t Ut      = U[nt];
+      real_t Vt      = V[nt];
+
+      int nbb     = nbot[nb];
+      real_t Hb      = H[nb];
+      //real_t Ub      = U[nb];
+      real_t Vb      = V[nb];
+
+      int nlt     = ntop[nl];
+      int nrt     = ntop[nr];
+      int ntr     = nrht[nt];
+      int nbr     = nrht[nb];
+
+      real_t Hll     = H[nll];
+      real_t Ull     = U[nll];
+      //real_t Vll     = V[nll];
+
+      real_t Hrr     = H[nrr];
+      real_t Urr     = U[nrr];
+      //real_t Vrr     = V[nrr];
+
+      real_t Htt     = H[ntt];
+      //real_t Utt     = U[ntt];
+      real_t Vtt     = V[ntt];
+
+      real_t Hbb     = H[nbb];
+      //real_t Ubb     = U[nbb];
+      real_t Vbb     = V[nbb];
+
+      real_t dxic    = lev_deltax[lvl];
+      //real_t dyic    = lev_deltay[lvl];
+
+      real_t dxl     = lev_deltax[level[nl]];
+      real_t dxr     = lev_deltax[level[nr]];
+
+      real_t dyt     = lev_deltay[level[nt]];
+      real_t dyb     = lev_deltay[level[nb]];
+
+      //real_t drl     = dxl;
+      //real_t drr     = dxr;
+      //real_t drt     = dyt;
+      //real_t drb     = dyb;
+
+      real_t dric    = dxic;
+
+      int nltl = 0;
+      real_t Hlt = 0.0, Ult = 0.0; // Vlt = 0.0;
+      real_t Hll2 = 0.0;
+      real_t Ull2 = 0.0;
+      if(lvl < level[nl]) {
+         Hlt  = H[ ntop[nl] ];
+         Ult  = U[ ntop[nl] ];
+         //Vlt  = V[ ntop[nl] ];
+
+         nltl = nlft[nlt];
+         Hll2 = H[nltl];
+         Ull2 = U[nltl];
+      }
+
+      int nrtr = 0;
+      real_t Hrt = 0.0, Urt = 0.0; // Vrt = 0.0;
+      real_t Hrr2 = 0.0;
+      real_t Urr2 = 0.0;
+      if(lvl < level[nr]) {
+         Hrt  = H[ ntop[nr] ];
+         Urt  = U[ ntop[nr] ];
+         //Vrt  = V[ ntop[nr] ];
+
+         nrtr = nrht[nrt];
+         Hrr2 = H[nrtr];
+         Urr2 = U[nrtr];
+      }
+
+      int nbrb = 0;
+      real_t Hbr = 0.0, Vbr = 0.0; // Ubr = 0.0
+      real_t Hbb2 = 0.0;
+      real_t Vbb2 = 0.0;
+      if(lvl < level[nb]) {
+         Hbr  = H[ nrht[nb] ];
+         //Ubr  = U[ nrht[nb] ];
+         Vbr  = V[ nrht[nb] ];
+
+         nbrb = nbot[nbr];
+         Hbb2 = H[nbrb];
+         Vbb2 = V[nbrb];
+      }
+
+      int ntrt = 0;
+      real_t Htr = 0.0, Vtr = 0.0; // Utr = 0.0
+      real_t Htt2 = 0.0;
+      real_t Vtt2 = 0.0;
+      if(lvl < level[nt]) {
+         Htr  = H[ nrht[nt] ];
+         //Utr  = U[ nrht[nt] ];
+         Vtr  = V[ nrht[nt] ];
+
+         ntrt = ntop[ntr];
+         Htt2 = H[ntrt];
+         Vtt2 = V[ntrt];
+      }
+
+      ////////////////////////////////////////
+      /// Artificial Viscosity corrections ///
+      ////////////////////////////////////////
+
+      real_t Hxminus = H[ic];
+      real_t Uxminus = 0.0;
+      real_t Vxminus = 0.0;
+      if (mesh->map_xcell2face_left1[ic] >= 0){
+         Hxminus  = Hx[mesh->map_xcell2face_left1[ic]];
+         Uxminus  = Ux[mesh->map_xcell2face_left1[ic]];
+         Vxminus  = Vx[mesh->map_xcell2face_left1[ic]];
+      }
+
+      real_t Hxminus2 = 0.0;
+      if(lvl < level[nl]) Hxminus2 = H[ic];
+      real_t Uxminus2 = 0.0;
+      real_t Vxminus2 = 0.0;
+      if (mesh->map_xcell2face_left2[ic] >= 0) {
+         Hxminus2 = Hx[mesh->map_xcell2face_left2[ic]];
+         Uxminus2 = Ux[mesh->map_xcell2face_left2[ic]];
+         Vxminus2 = Vx[mesh->map_xcell2face_left2[ic]];
+      }
+
+      real_t Hxplus = H[ic];
+      real_t Uxplus = 0.0;
+      real_t Vxplus = 0.0;
+      if (mesh->map_xcell2face_right1[ic] >= 0){
+         Hxplus   = Hx[mesh->map_xcell2face_right1[ic]];
+         Uxplus   = Ux[mesh->map_xcell2face_right1[ic]];
+         Vxplus   = Vx[mesh->map_xcell2face_right1[ic]];
+      }
+
+      real_t Hxplus2 = 0.0;
+      if(lvl < level[nr]) Hxplus2 = H[ic];
+      real_t Uxplus2 = 0.0;
+      real_t Vxplus2 = 0.0;
+      if (mesh->map_xcell2face_right2[ic] >= 0){
+         Hxplus2  = Hx[mesh->map_xcell2face_right2[ic]];
+         Uxplus2  = Ux[mesh->map_xcell2face_right2[ic]];
+         Vxplus2  = Vx[mesh->map_xcell2face_right2[ic]];
+      }
+
+      if(level[nl] < level[nll]) {
+         Hll = (Hll + H[ ntop[nll] ]) * HALF;
+         Ull = (Ull + U[ ntop[nll] ]) * HALF;
+      }
+
+      real_t Hr2 = Hr;
+      real_t Ur2 = Ur;
+      if(lvl < level[nr]) {
+         Hr2 = (Hr2 + Hrt) * HALF;
+         Ur2 = (Ur2 + Urt) * HALF;
+      }
+
+      real_t wminusx_H = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus),
+                              Hic-Hl, Hl-Hll, Hr2-Hic);
+
+      wminusx_H *= Hic - Hl;
+
+      if(lvl < level[nl]) {
+         if(level[nlt] < level[nltl])
+            Hll2 = (Hll2 + H[ ntop[nltl] ]) * HALF;
+         wminusx_H = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) +
+                                  sqrt(g*Hxminus2), Hic-Hlt, Hlt-Hll2, Hr2-Hic) *
+                      (Hic - Hlt)) + wminusx_H)*HALF*HALF;
+      }
+
+      if(level[nr] < level[nrr]) {
+         Hrr = (Hrr + H[ ntop[nrr] ]) * HALF;
+         Urr = (Urr + U[ ntop[nrr] ]) * HALF;
+      }
+
+      real_t Hl2 = Hl;
+      real_t Ul2 = Ul;
+      if(lvl < level[nl]) {
+         Hl2 = (Hl2 + Hlt) * HALF;
+         Ul2 = (Ul2 + Ult) * HALF;
+      }
+
+      real_t wplusx_H = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus),
+                           Hr-Hic, Hic-Hl2, Hrr-Hr);
+
+      wplusx_H *= Hr - Hic;
+
+      if(lvl < level[nr]) {
+         if(level[nrt] < level[nrtr])
+            Hrr2 = (Hrr2 + H[ ntop[nrtr] ]) * HALF;
+         wplusx_H = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) +
+                                  sqrt(g*Hxplus2), Hrt-Hic, Hic-Hl2, Hrr2-Hrt) *
+                      (Hrt - Hic))+wplusx_H)*HALF*HALF;
+      }
+
+
+      real_t wminusx_U = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus),
+                              Uic-Ul, Ul-Ull, Ur2-Uic);
+
+      wminusx_U *= Uic - Ul;
+
+      if(lvl < level[nl]) {
+         if(level[nlt] < level[nltl])
+            Ull2 = (Ull2 + U[ ntop[nltl] ]) * HALF;
+         wminusx_U = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) +
+                                  sqrt(g*Hxminus2), Uic-Ult, Ult-Ull2, Ur2-Uic) *
+                      (Uic - Ult))+wminusx_U)*HALF*HALF;
+      }
+
+
+      real_t wplusx_U = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus),
+                              Ur-Uic, Uic-Ul2, Urr-Ur);
+
+      wplusx_U *= Ur - Uic;
+
+      if(lvl < level[nr]) {
+         if(level[nrt] < level[nrtr])
+            Urr2 = (Urr2 + U[ ntop[nrtr] ]) * HALF;
+         wplusx_U = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) +
+                                  sqrt(g*Hxplus2), Urt-Uic, Uic-Ul2, Urr2-Urt) *
+                      (Urt - Uic))+wplusx_U)*HALF*HALF;
+      }
+
+
+      if(level[nb] < level[nbb]) {
+         Hbb = (Hbb + H[ nrht[nbb] ]) * HALF;
+         Vbb = (Vbb + V[ nrht[nbb] ]) * HALF;
+      }
+
+      real_t Ht2 = Ht;
+      real_t Vt2 = Vt;
+      if(lvl < level[nt]) {
+         Ht2 = (Ht2 + Htr) * HALF;
+         Vt2 = (Vt2 + Vtr) * HALF;
+      }
+
+      real_t Hyminus = H[ic];
+      real_t Uyminus = 0.0;
+      real_t Vyminus = 0.0;
+      if (mesh->map_ycell2face_bot1[ic] >= 0){
+         Hyminus  = Hy[mesh->map_ycell2face_bot1[ic]];
+         Uyminus  = Uy[mesh->map_ycell2face_bot1[ic]];
+         Vyminus  = Vy[mesh->map_ycell2face_bot1[ic]];
+      }
+
+      real_t Hyminus2 = 0.0;
+      if(lvl < level[nb]) Hyminus2 = H[ic];
+      real_t Uyminus2 = 0.0;
+      real_t Vyminus2 = 0.0;
+      if (mesh->map_ycell2face_bot2[ic] >= 0){
+         Hyminus2 = Hy[mesh->map_ycell2face_bot2[ic]];
+         Uyminus2 = Uy[mesh->map_ycell2face_bot2[ic]];
+         Vyminus2 = Vy[mesh->map_ycell2face_bot2[ic]];
+      }
+
+      real_t Hyplus = H[ic];
+      real_t Uyplus = 0.0;
+      real_t Vyplus = 0.0;
+      if (mesh->map_ycell2face_top1[ic] >= 0){
+         Hyplus   = Hy[mesh->map_ycell2face_top1[ic]];
+         Uyplus   = Uy[mesh->map_ycell2face_top1[ic]];
+         Vyplus   = Vy[mesh->map_ycell2face_top1[ic]];
+      }
+
+      real_t Hyplus2 = 0.0;
+      if(lvl < level[nt]) Hyplus2 = H[ic];
+      real_t Uyplus2 = 0.0;
+      real_t Vyplus2 = 0.0;
+      if (mesh->map_ycell2face_top2[ic] >= 0){
+         Hyplus2  = Hy[mesh->map_ycell2face_top2[ic]];
+         Uyplus2  = Uy[mesh->map_ycell2face_top2[ic]];
+         Vyplus2  = Vy[mesh->map_ycell2face_top2[ic]];
+      }
+
+      real_t wminusy_H = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus),
+                              Hic-Hb, Hb-Hbb, Ht2-Hic);
+
+      wminusy_H *= Hic - Hb;
+
+      if(lvl < level[nb]) {
+         if(level[nbr] < level[nbrb])
+            Hbb2 = (Hbb2 + H[ nrht[nbrb] ]) * HALF;
+         wminusy_H = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) +
+                                  sqrt(g*Hyminus2), Hic-Hbr, Hbr-Hbb2, Ht2-Hic) *
+                      (Hic - Hbr))+wminusy_H)*HALF*HALF;
+      }
+
+
+      if(level[nt] < level[ntt]) {
+         Htt = (Htt + H[ nrht[ntt] ]) * HALF;
+         Vtt = (Vtt + V[ nrht[ntt] ]) * HALF;
+      }
+
+      real_t Hb2 = Hb;
+      real_t Vb2 = Vb;
+      if(lvl < level[nb]) {
+         Hb2 = (Hb2 + Hbr) * HALF;
+         Vb2 = (Vb2 + Vbr) * HALF;
+      }
+
+      real_t wplusy_H = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus),
+                             Ht-Hic, Hic-Hb2, Htt-Ht);
+
+      wplusy_H *= Ht - Hic;
+
+      if(lvl < level[nt]) {
+         if(level[ntr] < level[ntrt])
+            Htt2 = (Htt2 + H[ nrht[ntrt] ]) * HALF;
+         wplusy_H = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) +
+                                  sqrt(g*Hyplus2), Htr-Hic, Hic-Hb2, Htt2-Htr) *
+                      (Htr - Hic))+wplusy_H)*HALF*HALF;
+      }
+
+      real_t wminusy_V = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus),
+                              Vic-Vb, Vb-Vbb, Vt2-Vic);
+
+      wminusy_V *= Vic - Vb;
+
+      if(lvl < level[nb]) {
+         if(level[nbr] < level[nbrb])
+            Vbb2 = (Vbb2 + V[ nrht[nbrb] ]) * HALF;
+         wminusy_V = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) +
+                                  sqrt(g*Hyminus2), Vic-Vbr, Vbr-Vbb2, Vt2-Vic) *
+                      (Vic - Vbr))+wminusy_V)*HALF*HALF;
+      }
+
+      real_t wplusy_V = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus),
+                           Vt-Vic, Vic-Vb2, Vtt-Vt);
+
+      wplusy_V *= Vt - Vic;
+
+      if(lvl < level[nt]) {
+         if(level[ntr] < level[ntrt])
+            Vtt2 = (Vtt2 + V[ nrht[ntrt] ]) * HALF;
+         wplusy_V = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) +
+                                  sqrt(g*Hyplus2), Vtr-Vic, Vic-Vb2, Vtt2-Vtr) *
+                      (Vtr - Vic))+wplusy_V)*HALF*HALF;
+      }
+
+      real_t Hxfluxminus = HNEWXFLUXMINUS;
+      real_t Uxfluxminus = UNEWXFLUXMINUS;
+      real_t Vxfluxminus = UVNEWFLUXMINUS;
+
+      real_t Hxfluxplus  = HNEWXFLUXPLUS;
+      real_t Uxfluxplus  = UNEWXFLUXPLUS;
+      real_t Vxfluxplus  = UVNEWFLUXPLUS;
+
+      real_t Hyfluxminus = HNEWYFLUXMINUS;
+      real_t Uyfluxminus = VUNEWFLUXMINUS;
+      real_t Vyfluxminus = VNEWYFLUXMINUS;
+
+      real_t Hyfluxplus  = HNEWYFLUXPLUS;
+      real_t Uyfluxplus  = VUNEWFLUXPLUS;
+      real_t Vyfluxplus  = VNEWYFLUXPLUS;
+
+      if(lvl < level[nl]) {
+         Hxfluxminus = (Hxfluxminus + HNEWXFLUXMINUS2) * HALF;
+         Uxfluxminus = (Uxfluxminus + UNEWXFLUXMINUS2) * HALF;
+         Vxfluxminus = (Vxfluxminus + UVNEWFLUXMINUS2) * HALF;
+      }
+
+      if(lvl < level[nr]) {
+         Hxfluxplus  = (Hxfluxplus + HNEWXFLUXPLUS2) * HALF;
+         Uxfluxplus  = (Uxfluxplus + UNEWXFLUXPLUS2) * HALF;
+         Vxfluxplus  = (Vxfluxplus + UVNEWFLUXPLUS2) * HALF;
+      }
+
+      if(lvl < level[nb]) {
+         Hyfluxminus = (Hyfluxminus + HNEWYFLUXMINUS2) * HALF;
+         Uyfluxminus = (Uyfluxminus + VUNEWFLUXMINUS2) * HALF;
+         Vyfluxminus = (Vyfluxminus + VNEWYFLUXMINUS2) * HALF;
+      }
+
+      if(lvl < level[nt]) {
+         Hyfluxplus  = (Hyfluxplus + HNEWYFLUXPLUS2) * HALF;
+         Uyfluxplus  = (Uyfluxplus + VUNEWFLUXPLUS2) * HALF;
+         Vyfluxplus  = (Vyfluxplus + VNEWYFLUXPLUS2) * HALF;
+      }
+
+      H_new[ic] = U_fullstep(deltaT, dxic, Hic,
+                      Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus)
+                 - wminusx_H + wplusx_H - wminusy_H + wplusy_H;
+      U_new[ic] = U_fullstep(deltaT, dxic, Uic,
+                      Uxfluxplus, Uxfluxminus, Uyfluxplus, Uyfluxminus)
+                 - wminusx_U + wplusx_U;
+      V_new[ic] = U_fullstep(deltaT, dxic, Vic,
+                      Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus)
+                 - wminusy_V + wplusy_V;
+
+#if DEBUG >= 1
+      if (DEBUG >= 1) {
+         real_t U_tmp = U_new[ic];
+         real_t V_tmp = V_new[ic];
+         if (U_tmp == 0.0) U_tmp = 0.0;
+         if (V_tmp == 0.0) V_tmp = 0.0;
+         printf("DEBUG ic %d H_new %lf U_new %lf V_new %lf\n",ic,H_new[ic],U_tmp,V_tmp);
+      }
+#endif
+
+/*
+      printf("DEBUG ic %d deltaT, %lf dxic, %lf Hic, %lf Hxfluxplus, %lf Hxfluxminus, %lf Hyfluxplus, %lf Hyfluxminus %lf\n",
+         ic, deltaT, dxic, Hic, Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus);
+      printf("DEBUG ic %d wminusx_H %lf wplusx_H %lf wminusy_H %lf wplusy_H %lf\n",ic, wminusx_H, wplusx_H, wminusy_H, wplusy_H);
+      printf("DEBUG ic %d deltaT, %lf dxic, %lf Vic, %lf Vxfluxplus, %lf Vxfluxminus, %lf Vyfluxplus, %lf Vyfluxminus %lf\n",
+         ic, deltaT, dxic, Vic, Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus);
+      printf("DEBUG ic %d wminusy_V %lf wplusy_V %lf\n",ic, wminusy_V, wplusy_V);
+*/
+   }//end forloop
+
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+   {
+#endif
+      // Replace H with H_new and deallocate H. New memory will have the characteristics
+      // of the new memory and the name of the old. Both return and arg1 will be reset to new memory
+      H = (state_t *)state_memory.memory_replace(H, H_new);
+      U = (state_t *)state_memory.memory_replace(U, U_new);
+      V = (state_t *)state_memory.memory_replace(V, V_new);
+
+      //state_memory.memory_report();
+      //printf("DEBUG end finite diff\n\n"); 
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+#ifdef _OPENMP
+#pragma omp master
+#endif
+      cpu_timers[STATE_TIMER_FINITE_DIFFERENCE] += cpu_timer_stop(tstart_cpu);
+}
+
+#ifdef HAVE_OPENCL
+void State::gpu_calc_finite_difference(double deltaT)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   //cl_mem dev_ptr = NULL;
+
+   size_t &ncells    = mesh->ncells;
+   size_t &ncells_ghost = mesh->ncells_ghost;
+   if (ncells_ghost < ncells) ncells_ghost = ncells;
+   int &levmx           = mesh->levmx;
+   cl_mem &dev_celltype = mesh->dev_celltype;
+   cl_mem &dev_nlft     = mesh->dev_nlft;
+   cl_mem &dev_nrht     = mesh->dev_nrht;
+   cl_mem &dev_nbot     = mesh->dev_nbot;
+   cl_mem &dev_ntop     = mesh->dev_ntop;
+   cl_mem &dev_level    = mesh->dev_level;
+   cl_mem &dev_levdx    = mesh->dev_levdx;
+   cl_mem &dev_levdy    = mesh->dev_levdy;
+
+   assert(dev_H);
+   assert(dev_U);
+   assert(dev_V);
+   assert(dev_nlft);
+   assert(dev_nrht);
+   assert(dev_nbot);
+   assert(dev_ntop);
+   assert(dev_level);
+   assert(dev_levdx);
+   assert(dev_levdy);
+
+   cl_mem dev_H_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_H_new"), DEVICE_REGULAR_MEMORY);
+   cl_mem dev_U_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_U_new"), DEVICE_REGULAR_MEMORY);
+   cl_mem dev_V_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_V_new"), DEVICE_REGULAR_MEMORY);
+ 
+   size_t local_work_size = 128;
+   size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
+
+#ifdef HAVE_MPI
+   if (mesh->numpe > 1) {
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 0, sizeof(cl_int), &ncells);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 1, sizeof(cl_mem), &dev_celltype);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 2, sizeof(cl_mem), &dev_nlft);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 3, sizeof(cl_mem), &dev_nrht);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 4, sizeof(cl_mem), &dev_ntop);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 5, sizeof(cl_mem), &dev_nbot);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 6, sizeof(cl_mem), &dev_H);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 7, sizeof(cl_mem), &dev_U);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 8, sizeof(cl_mem), &dev_V);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_local,   1, NULL, &global_work_size, &local_work_size, NULL);
+    
+        /*
+        __kernel void copy_state_data_cl(
+                         const int    isize,         // 0
+                __global      state_t *H,            // 1
+                __global      state_t *U,            // 2
+                __global      state_t *V,            // 3
+                __global      state_t *H_new,        // 4
+                __global      state_t *U_new,        // 5
+                __global      state_t *V_new)        // 6
+        */
+
+      ezcl_set_kernel_arg(kernel_copy_state_data, 0, sizeof(cl_int), (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_copy_state_data, 1, sizeof(cl_mem), (void *)&dev_H);
+      ezcl_set_kernel_arg(kernel_copy_state_data, 2, sizeof(cl_mem), (void *)&dev_U);
+      ezcl_set_kernel_arg(kernel_copy_state_data, 3, sizeof(cl_mem), (void *)&dev_V);
+      ezcl_set_kernel_arg(kernel_copy_state_data, 4, sizeof(cl_mem), (void *)&dev_H_new);
+      ezcl_set_kernel_arg(kernel_copy_state_data, 5, sizeof(cl_mem), (void *)&dev_U_new);
+      ezcl_set_kernel_arg(kernel_copy_state_data, 6, sizeof(cl_mem), (void *)&dev_V_new);
+
+      //ezcl_enqueue_ndrange_kernel(command_queue, kernel_copy_state_data,   1, NULL, &global_work_size, &local_work_size, &copy_state_data_event);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_copy_state_data,   1, NULL, &global_work_size, &local_work_size, NULL);
+
+      dev_H = (cl_mem)gpu_state_memory.memory_replace(dev_H, dev_H_new);
+      dev_U = (cl_mem)gpu_state_memory.memory_replace(dev_U, dev_U_new);
+      dev_V = (cl_mem)gpu_state_memory.memory_replace(dev_V, dev_V_new);
+
+      L7_Dev_Update(dev_H, L7_STATE_T, mesh->cell_handle);
+      L7_Dev_Update(dev_U, L7_STATE_T, mesh->cell_handle);
+      L7_Dev_Update(dev_V, L7_STATE_T, mesh->cell_handle);
+
+      dev_H_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_H_new"), DEVICE_REGULAR_MEMORY);
+      dev_U_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_U_new"), DEVICE_REGULAR_MEMORY);
+      dev_V_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_V_new"), DEVICE_REGULAR_MEMORY);
+
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 0, sizeof(cl_int), &ncells);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 1, sizeof(cl_mem), &dev_celltype);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 2, sizeof(cl_mem), &dev_nlft);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 3, sizeof(cl_mem), &dev_nrht);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 4, sizeof(cl_mem), &dev_ntop);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 5, sizeof(cl_mem), &dev_nbot);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 6, sizeof(cl_mem), &dev_H);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 7, sizeof(cl_mem), &dev_U);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 8, sizeof(cl_mem), &dev_V);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_ghost,   1, NULL, &global_work_size, &local_work_size, NULL);
+   } else {
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions,   1, NULL, &global_work_size, &local_work_size, NULL);
+   }
+#else
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V);
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions,   1, NULL, &global_work_size, &local_work_size, NULL);
+#endif
+
+     /*
+     __kernel void calc_finite_difference_cl(
+                      const int     ncells,    // 0  Total number of cells.
+                      const int     lvmax,     // 1  Maximum level
+             __global       state_t *H,        // 2
+             __global       state_t *U,        // 3
+             __global       state_t *V,        // 4
+             __global       state_t *H_new,    // 5
+             __global       state_t *U_new,    // 6
+             __global       state_t *V_new,    // 7
+             __global const int     *nlft,     // 8  Array of left neighbors.
+             __global const int     *nrht,     // 9  Array of right neighbors.
+             __global const int     *ntop,     // 10  Array of bottom neighbors.
+             __global const int     *nbot,     // 11  Array of top neighbors.
+             __global const int     *level,    // 12  Array of level information.
+                      const real_t   deltaT,   // 13  Size of time step.
+             __global const real_t  *lev_dx,   // 14
+             __global const real_t  *lev_dy,   // 15
+             __local        state4_t *tile,    // 16  Tile size in state4.
+             __local        int8  *itile)      // 17  Tile size in int8.
+     */
+   cl_event calc_finite_difference_event;
+
+   real_t deltaT_local = deltaT;
+   ezcl_set_kernel_arg(kernel_calc_finite_difference, 0, sizeof(cl_int),  (void *)&ncells);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference, 1, sizeof(cl_int),  (void *)&levmx);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference, 2, sizeof(cl_mem),  (void *)&dev_H);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference, 3, sizeof(cl_mem),  (void *)&dev_U);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference, 4, sizeof(cl_mem),  (void *)&dev_V);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference, 5, sizeof(cl_mem),  (void *)&dev_H_new);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference, 6, sizeof(cl_mem),  (void *)&dev_U_new);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference, 7, sizeof(cl_mem),  (void *)&dev_V_new);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference, 8, sizeof(cl_mem),  (void *)&dev_nlft);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference, 9, sizeof(cl_mem),  (void *)&dev_nrht);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference,10, sizeof(cl_mem),  (void *)&dev_ntop);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference,11, sizeof(cl_mem),  (void *)&dev_nbot);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference,12, sizeof(cl_mem),  (void *)&dev_level);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference,13, sizeof(cl_real_t), (void *)&deltaT_local);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference,14, sizeof(cl_mem),  (void *)&dev_levdx);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference,15, sizeof(cl_mem),  (void *)&dev_levdy);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference,16, local_work_size*sizeof(cl_state4_t),    NULL);
+   ezcl_set_kernel_arg(kernel_calc_finite_difference,17, local_work_size*sizeof(cl_int8),    NULL);
+
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_finite_difference,   1, NULL, &global_work_size, &local_work_size, &calc_finite_difference_event);
+
+   ezcl_wait_for_events(1, &calc_finite_difference_event);
+   ezcl_event_release(calc_finite_difference_event);
+
+   dev_H = (cl_mem)gpu_state_memory.memory_replace(dev_H, dev_H_new);
+   dev_U = (cl_mem)gpu_state_memory.memory_replace(dev_U, dev_U_new);
+   dev_V = (cl_mem)gpu_state_memory.memory_replace(dev_V, dev_V_new);
+
+   gpu_timers[STATE_TIMER_FINITE_DIFFERENCE] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9);
+}
+#endif
+
+void State::symmetry_check(const char *string, vector<int> sym_index, double eps,
+                           SIGN_RULE sign_rule, int &flag)
+{
+   size_t &ncells = mesh->ncells;
+
+   double xsign = 1.0, ysign = 1.0;
+
+   if (sign_rule == DIAG_RULE || sign_rule == X_RULE) {
+      xsign = -1.0;
+   }
+
+   if (sign_rule == DIAG_RULE || sign_rule == Y_RULE) {
+      ysign = -1.0;
+   }
+
+   for (uint ic=0; ic<ncells; ic++) {
+      /*  Symmetrical check */
+      if (fabs(H[ic] - H[sym_index[ic]]) > eps) {
+         printf("%s ic %d sym %d H[ic] %lf Hsym %lf diff %lf\n",
+                string,ic,sym_index[ic],H[ic],H[sym_index[ic]],fabs(H[ic]-H[sym_index[ic]]));
+         flag++;
+      }
+      if (fabs(U[ic] - xsign*U[sym_index[ic]]) > eps) {
+         printf("%s ic %d sym %d U[ic] %lf Usym %lf diff %lf\n",
+                string,ic,sym_index[ic],U[ic],U[sym_index[ic]],fabs(U[ic]-xsign*U[sym_index[ic]]));
+         flag++;
+      }
+      if (fabs(V[ic] - ysign*V[sym_index[ic]]) > eps) {
+         printf("%s ic %d sym %d V[ic] %lf Vsym %lf diff %lf\n",
+                string,ic,sym_index[ic],V[ic],V[sym_index[ic]],fabs(V[ic]-ysign*V[sym_index[ic]]));
+         flag++;
+      }
+   }
+
+}
+
+size_t State::calc_refine_potential(vector<int> &mpot,int &icount, int &jcount)
+{
+   
+  struct timeval tstart_cpu;
+#ifdef _OPENMP
+#pragma omp parallel 
+{
+#endif
+
+  struct timeval tstart_lev2;
+
+#ifdef _OPENMP
+#pragma omp master
+{
+#endif
+   cpu_timer_start(&tstart_cpu);
+   if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
+#ifdef _OPENMP
+}
+#endif
+
+   int *nlft, *nrht, *nbot, *ntop, *level;
+   
+   size_t ncells = mesh->ncells;
+   nlft  = mesh->nlft;
+   nrht  = mesh->nrht;
+   nbot  = mesh->nbot;
+   ntop  = mesh->ntop;
+   level = mesh->level;
+
+#ifdef _OPENMP
+#pragma omp master
+   {
+#endif
+   icount=0;
+   jcount=0;
+#ifdef _OPENMP
+   }
+#pragma omp barrier
+#endif
+
+#ifdef HAVE_MPI
+   // We need to update the ghost regions and boundary regions for the state
+   // variables since they were changed in the finite difference routine. We
+   // want to use the updated values for refinement decisions
+   if (mesh->numpe > 1) {
+      apply_boundary_conditions_local();
+#ifdef _OPENMP
+#pragma omp barrier
+#pragma omp master
+{
+#endif
+      L7_Update(&H[0], L7_STATE_T, mesh->cell_handle);
+      L7_Update(&U[0], L7_STATE_T, mesh->cell_handle);
+      L7_Update(&V[0], L7_STATE_T, mesh->cell_handle);
+#ifdef _OPENMP
+}
+#pragma omp barrier
+#endif
+      apply_boundary_conditions_ghost();
+   } else {
+      apply_boundary_conditions();
+   }
+#else
+   apply_boundary_conditions();
+#endif
+
+#ifdef _OPENMP
+#pragma omp barrier
+#endif
+/*****HIGH LEVEL OMP******/
+
+   int lowerBound, upperBound;
+   //mesh->set_bounds(ncells);
+   mesh->get_bounds(lowerBound,upperBound);
+   for (int ic=lowerBound; ic<upperBound; ic++) {
+
+      if (mesh->celltype[ic] != REAL_CELL) continue;
+
+      state_t Hic = H[ic];
+      //state_t Uic = U[ic];
+      //state_t Vic = V[ic];
+
+      int nl = nlft[ic];
+      state_t Hl = H[nl];
+      //state_t Ul = U[nl];
+      //state_t Vl = V[nl];
+
+      if (level[nl] > level[ic]){
+         int nlt = ntop[nl];
+         Hl = REFINE_HALF * (Hl + H[nlt]);
+      }
+
+      int nr = nrht[ic];
+      state_t Hr = H[nr];
+      //state_t Ur = U[nr];
+      //state_t Vr = V[nr];
+
+      if (level[nr] > level[ic]){
+         int nrt = ntop[nr];
+         Hr = REFINE_HALF * (Hr + H[nrt]);
+      }
+
+      int nb = nbot[ic];
+      state_t Hb = H[nb];
+      //state_t Ub = U[nb];
+      //state_t Vb = V[nb];
+
+      if (level[nb] > level[ic]){
+         int nbr = nrht[nb];
+         Hb = REFINE_HALF * (Hb + H[nbr]);
+      }
+
+      int nt = ntop[ic];
+      state_t Ht = H[nt];
+      //state_t Ut = U[nt];
+      //state_t Vt = V[nt];
+
+      if (level[nt] > level[ic]){
+         int ntr = nrht[nt];
+         Ht = REFINE_HALF * (Ht + H[ntr]);
+      }
+
+      state_t duplus1; //, duplus2;
+      state_t duhalf1; //, duhalf2;
+      state_t duminus1; //, duminus2;
+
+      duplus1 = Hr-Hic;
+      //duplus2 = Ur-Uic;
+      duhalf1 = Hic-Hl;
+      //duhalf2 = Uic-Ul;
+
+      state_t qmax = REFINE_NEG_THOUSAND;
+
+      state_t qpot = max(fabs(duplus1/Hic), fabs(duhalf1/Hic));
+      if (qpot > qmax) qmax = qpot;
+
+      duminus1 = Hic-Hl;
+      //duminus2 = Uic-Ul;
+      duhalf1 = Hr-Hic;
+      //duhalf2 = Ur-Uic;
+
+      qpot = max(fabs(duminus1/Hic), fabs(duhalf1/Hic));
+      if (qpot > qmax) qmax = qpot;
+
+      duplus1 = Ht-Hic;
+      //duplus2 = Vt-Vic;
+      duhalf1 = Hic-Hb;
+      //duhalf2 = Vic-Vb;
+
+      qpot = max(fabs(duplus1/Hic), fabs(duhalf1/Hic));
+      if (qpot > qmax) qmax = qpot;
+
+      duminus1 = Hic-Hb;
+      //duminus2 = Vic-Vb;
+      duhalf1 = Ht-Hic;
+      //duhalf2 = Vt-Vic;
+
+      qpot = max(fabs(duminus1/Hic), fabs(duhalf1/Hic));
+      if (qpot > qmax) qmax = qpot;
+
+      mpot[ic]=0;
+      if (qmax > REFINE_GRADIENT && level[ic] < mesh->levmx) {
+         mpot[ic]=1;
+      } else if (qmax < COARSEN_GRADIENT && level[ic] > 0) {
+         mpot[ic] = -1;
+      }
+      //if (mpot[ic]) printf("DEBUG cpu cell is %d mpot %d\n",ic,mpot[ic]);
+   }
+
+#ifdef _OPENMP
+#pragma omp master
+{
+#endif
+   if (TIMING_LEVEL >= 2) {
+      cpu_timers[STATE_TIMER_CALC_MPOT] += cpu_timer_stop(tstart_lev2);
+   }
+#ifdef _OPENMP
+}
+#endif
+
+#ifdef _OPENMP
+}
+#pragma omp barrier
+#endif
+   int newcount = mesh->refine_smooth(mpot, icount, jcount);
+   //printf("DEBUG -- after refine smooth in file %s line %d icount %d jcount %d newcount %d\n",__FILE__,__LINE__,icount,jcount,newcount);
+
+   cpu_timers[STATE_TIMER_REFINE_POTENTIAL] += cpu_timer_stop(tstart_cpu);
+
+   return(newcount);
+}
+
+#ifdef HAVE_OPENCL
+size_t State::gpu_calc_refine_potential(int &icount, int &jcount)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   struct timeval tstart_lev2;
+   if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
+
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   size_t &ncells       = mesh->ncells;
+   int &levmx           = mesh->levmx;
+   cl_mem &dev_nlft     = mesh->dev_nlft;
+   cl_mem &dev_nrht     = mesh->dev_nrht;
+   cl_mem &dev_nbot     = mesh->dev_nbot;
+   cl_mem &dev_ntop     = mesh->dev_ntop;
+   //cl_mem &dev_mpot     = mesh->dev_mpot;
+   cl_mem &dev_i        = mesh->dev_i;
+   cl_mem &dev_j        = mesh->dev_j;
+   cl_mem &dev_level    = mesh->dev_level;
+   cl_mem &dev_celltype = mesh->dev_celltype;
+   cl_mem &dev_levdx    = mesh->dev_levdx;
+   cl_mem &dev_levdy    = mesh->dev_levdy;
+
+   assert(dev_H);
+   assert(dev_U);
+   assert(dev_V);
+   assert(dev_nlft);
+   assert(dev_nrht);
+   assert(dev_nbot);
+   assert(dev_ntop);
+   assert(dev_i);
+   assert(dev_j);
+   assert(dev_level);
+   //assert(dev_mpot);
+   //assert(dev_ioffset);
+   assert(dev_levdx);
+   assert(dev_levdy);
+
+   icount = 0;
+   jcount = 0;
+
+   size_t local_work_size = 128;
+   size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
+   size_t block_size = global_work_size/local_work_size;
+
+#ifdef HAVE_MPI
+   //size_t nghost_local = mesh->ncells_ghost - ncells;
+
+   if (mesh->numpe > 1) {
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 0, sizeof(cl_int), &ncells);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 1, sizeof(cl_mem), &dev_celltype);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 2, sizeof(cl_mem), &dev_nlft);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 3, sizeof(cl_mem), &dev_nrht);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 4, sizeof(cl_mem), &dev_ntop);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 5, sizeof(cl_mem), &dev_nbot);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 6, sizeof(cl_mem), &dev_H);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 7, sizeof(cl_mem), &dev_U);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 8, sizeof(cl_mem), &dev_V);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_local,   1, NULL, &global_work_size, &local_work_size, NULL);
+
+      L7_Dev_Update(dev_H, L7_STATE_T, mesh->cell_handle);
+      L7_Dev_Update(dev_U, L7_STATE_T, mesh->cell_handle);
+      L7_Dev_Update(dev_V, L7_STATE_T, mesh->cell_handle);
+
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 0, sizeof(cl_int), &ncells);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 1, sizeof(cl_mem), &dev_celltype);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 2, sizeof(cl_mem), &dev_nlft);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 3, sizeof(cl_mem), &dev_nrht);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 4, sizeof(cl_mem), &dev_ntop);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 5, sizeof(cl_mem), &dev_nbot);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 6, sizeof(cl_mem), &dev_H);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 7, sizeof(cl_mem), &dev_U);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 8, sizeof(cl_mem), &dev_V);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_ghost,   1, NULL, &global_work_size, &local_work_size, NULL);
+   } else {
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U);
+      ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V);
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions,   1, NULL, &global_work_size, &local_work_size, NULL);
+   }
+#else
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U);
+   ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V);
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions,   1, NULL, &global_work_size, &local_work_size, NULL);
+#endif
+
+#ifdef BOUNDS_CHECK
+      {
+         vector<int> nlft_tmp(mesh->ncells_ghost);
+         vector<int> nrht_tmp(mesh->ncells_ghost);
+         vector<int> nbot_tmp(mesh->ncells_ghost);
+         vector<int> ntop_tmp(mesh->ncells_ghost);
+         vector<int> level_tmp(mesh->ncells_ghost);
+         vector<state_t> H_tmp(mesh->ncells_ghost);
+         ezcl_enqueue_read_buffer(command_queue, dev_nlft,  CL_FALSE, 0, mesh->ncells_ghost*sizeof(cl_int), &nlft_tmp[0],  NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_nrht,  CL_FALSE, 0, mesh->ncells_ghost*sizeof(cl_int), &nrht_tmp[0],  NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_nbot,  CL_FALSE, 0, mesh->ncells_ghost*sizeof(cl_int), &nbot_tmp[0],  NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_ntop,  CL_TRUE,  0, mesh->ncells_ghost*sizeof(cl_int), &ntop_tmp[0],  NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE,  0, mesh->ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL);
+         ezcl_enqueue_read_buffer(command_queue, dev_H,     CL_TRUE,  0, mesh->ncells_ghost*sizeof(cl_int), &H_tmp[0],     NULL);
+         for (uint ic=0; ic<ncells; ic++){
+            int nl = nlft_tmp[ic];
+            if (nl<0 || nl>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nlft %d\n",mesh->mype,__LINE__,ic,nl);
+            if (level_tmp[nl] > level_tmp[ic]){
+               int ntl = ntop_tmp[nl];
+               if (ntl<0 || ntl>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d global %d nlft %d ntop of nlft %d\n",mesh->mype,__LINE__,ic,ic+mesh->noffset,nl,ntl);
+            }
+            int nr = nrht_tmp[ic];
+            if (nr<0 || nr>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nrht %d\n",mesh->mype,__LINE__,ic,nr);
+            if (level_tmp[nr] > level_tmp[ic]){
+               int ntr = ntop_tmp[nr];
+               if (ntr<0 || ntr>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d ntop of nrht %d\n",mesh->mype,__LINE__,ic,ntr);
+            }
+            int nb = nbot_tmp[ic];
+            if (nb<0 || nb>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nbot %d\n",mesh->mype,__LINE__,ic,nb);
+            if (level_tmp[nb] > level_tmp[ic]){
+               int nrb = nrht_tmp[nb];
+               if (nrb<0 || nrb>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nrht of nbot %d\n",mesh->mype,__LINE__,ic,nrb);
+            }
+            int nt = ntop_tmp[ic];
+            if (nt<0 || nt>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d ntop %d\n",mesh->mype,__LINE__,ic,nt);
+            if (level_tmp[nt] > level_tmp[ic]){
+               int nrt = nrht_tmp[nt];
+               if (nrt<0 || nrt>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nrht of ntop %d\n",mesh->mype,__LINE__,ic,nrt);
+            }
+         }
+         for (uint ic=0; ic<mesh->ncells_ghost; ic++){
+            if (H_tmp[ic] < 1.0) printf("%d: Warning at line %d cell %d H %lf\n",mesh->mype,__LINE__,ic,H_tmp[ic]);
+         }
+      }
+#endif
+
+   size_t result_size = 1;
+   cl_mem dev_result     = ezcl_malloc(NULL, const_cast<char *>("dev_result"),     &result_size,        sizeof(cl_int2), CL_MEM_READ_WRITE, 0);
+   cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size,         sizeof(cl_int2), CL_MEM_READ_WRITE, 0);
+
+   dev_mpot              = ezcl_malloc(NULL, const_cast<char *>("dev_mpot"),       &mesh->ncells_ghost, sizeof(cl_int),  CL_MEM_READ_WRITE, 0);
+
+     /*
+     __kernel void refine_potential
+              const int      ncells,     // 0  Total number of cells.
+              const int      levmx,      // 1  Maximum level
+     __global       state_t *H,          // 2
+     __global       state_t *U,          // 3
+     __global       state_t *V,          // 4
+     __global const int     *nlft,       // 5  Array of left neighbors.
+     __global const int     *nrht,       // 6  Array of right neighbors.
+     __global const int     *ntop,       // 7  Array of bottom neighbors.
+     __global const int     *nbot,       // 8  Array of top neighbors.
+     __global const int     *level,      // 9  Array of level information.
+     __global const int     *celltype,   // 10  Array of celltype information.
+     __global       int     *mpot,       // 11  Array of mesh potential information.
+     __global       int2    *redscratch, // 12
+     __global const real_t  *lev_dx,     // 13
+     __global const real_t  *lev_dy,     // 14
+     __global       int2    *result,     // 15
+     __local        state_t *tile,       // 16  Tile size in real4.
+     __local        int8    *itile)      // 17  Tile size in int8.
+     */
+
+   ezcl_set_kernel_arg(kernel_refine_potential, 0, sizeof(cl_int),  (void *)&ncells);
+   ezcl_set_kernel_arg(kernel_refine_potential, 1, sizeof(cl_int),  (void *)&levmx);
+   ezcl_set_kernel_arg(kernel_refine_potential, 2, sizeof(cl_mem),  (void *)&dev_H);
+   ezcl_set_kernel_arg(kernel_refine_potential, 3, sizeof(cl_mem),  (void *)&dev_U);
+   ezcl_set_kernel_arg(kernel_refine_potential, 4, sizeof(cl_mem),  (void *)&dev_V);
+   ezcl_set_kernel_arg(kernel_refine_potential, 5, sizeof(cl_mem),  (void *)&dev_nlft);
+   ezcl_set_kernel_arg(kernel_refine_potential, 6, sizeof(cl_mem),  (void *)&dev_nrht);
+   ezcl_set_kernel_arg(kernel_refine_potential, 7, sizeof(cl_mem),  (void *)&dev_ntop);
+   ezcl_set_kernel_arg(kernel_refine_potential, 8, sizeof(cl_mem),  (void *)&dev_nbot);
+   ezcl_set_kernel_arg(kernel_refine_potential, 9, sizeof(cl_mem),  (void *)&dev_i);
+   ezcl_set_kernel_arg(kernel_refine_potential,10, sizeof(cl_mem),  (void *)&dev_j);
+   ezcl_set_kernel_arg(kernel_refine_potential,11, sizeof(cl_mem),  (void *)&dev_level);
+   ezcl_set_kernel_arg(kernel_refine_potential,12, sizeof(cl_mem),  (void *)&dev_celltype);
+   ezcl_set_kernel_arg(kernel_refine_potential,13, sizeof(cl_mem),  (void *)&dev_levdx);
+   ezcl_set_kernel_arg(kernel_refine_potential,14, sizeof(cl_mem),  (void *)&dev_levdy);
+   ezcl_set_kernel_arg(kernel_refine_potential,15, sizeof(cl_mem),  (void *)&dev_mpot);
+   ezcl_set_kernel_arg(kernel_refine_potential,16, sizeof(cl_mem),  (void *)&dev_redscratch);
+   ezcl_set_kernel_arg(kernel_refine_potential,17, sizeof(cl_mem),  (void *)&dev_result);
+   ezcl_set_kernel_arg(kernel_refine_potential,18, local_work_size*sizeof(cl_state_t),    NULL);
+   ezcl_set_kernel_arg(kernel_refine_potential,19, local_work_size*sizeof(cl_int8),    NULL);
+
+   ezcl_enqueue_ndrange_kernel(command_queue, kernel_refine_potential, 1, NULL, &global_work_size, &local_work_size, NULL);
+
+   mesh->gpu_rezone_count2(block_size, local_work_size, dev_redscratch, dev_result);
+
+   int count[2] = {0, 0};
+   ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int2), count, NULL);
+   icount  = count[0];
+   jcount  = count[1];
+   //size_t result = ncells + icount - jcount;
+
+   //int mpot_check[ncells];
+   //ezcl_enqueue_read_buffer(command_queue, dev_mpot, CL_TRUE, 0, ncells*sizeof(cl_int), mpot_check, NULL);
+   //for (int ic=0; ic<ncells; ic++){
+   //   if (mpot_check[ic]) printf("DEBUG -- cell %d mpot %d\n",ic,mpot_check[ic]);
+   //}
+
+   //printf("result = %lu after first refine potential icount %d jcount %d\n",result, icount, jcount);
+//   int which_smooth = 1;
+
+   ezcl_device_memory_delete(dev_redscratch);
+   ezcl_device_memory_delete(dev_result);
+
+   if (TIMING_LEVEL >= 2) {
+      gpu_timers[STATE_TIMER_CALC_MPOT] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
+   }
+
+   int my_result = mesh->gpu_refine_smooth(dev_mpot, icount, jcount);
+   //printf("DEBUG gpu calc refine potential %d icount %d jcount %d\n",my_result,icount,jcount);
+
+   gpu_timers[STATE_TIMER_REFINE_POTENTIAL] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9);
+
+   return((size_t)my_result);
+}
+#endif
+
+double State::mass_sum(int enhanced_precision_sum)
+{
+   size_t &ncells = mesh->ncells;
+   int *celltype = mesh->celltype;
+   int *level    = mesh->level;
+
+#ifdef HAVE_MPI
+   //int &mype = mesh->mype;
+#endif
+
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   double summer = 0.0;
+   double total_sum = 0.0;
+
+   if (enhanced_precision_sum == SUM_KAHAN) {
+      //printf("DEBUG -- kahan_sum\n");
+      double corrected_next_term, new_sum;
+      struct esum_type local;
+#ifdef HAVE_MPI
+      struct esum_type global;
+#endif
+
+      local.sum = 0.0;
+      local.correction = 0.0;
+      int ic;
+      for (ic = 0; ic < (int)ncells; ic++) {
+         if (celltype[ic] == REAL_CELL) {
+            //  Exclude boundary cells.
+            corrected_next_term= H[ic]*mesh->lev_deltax[level[ic]]*mesh->lev_deltay[level[ic]] + local.correction;
+            new_sum            = local.sum + local.correction;
+            local.correction   = corrected_next_term - (new_sum - local.sum);
+            local.sum          = new_sum;
+         }
+      }
+
+#ifdef HAVE_MPI
+      if (mesh->parallel) {
+         MPI_Allreduce(&local, &global, 1, MPI_TWO_DOUBLES, KNUTH_SUM, MPI_COMM_WORLD);
+         total_sum = global.sum + global.correction;
+      } else {
+         total_sum = local.sum + local.correction;
+      }
+
+//if(mype == 0) printf("MYPE %d: Line %d Iteration %d \t local_sum = %12.6lg, global_sum = %12.6lg\n", mype, __LINE__, mesh->m_ncycle, local.sum, global.sum);
+
+#else
+      total_sum = local.sum + local.correction;
+#endif
+
+   } else if (enhanced_precision_sum == SUM_REGULAR) {
+      //printf("DEBUG -- regular_sum\n");
+      for (uint ic=0; ic < ncells; ic++){
+         if (celltype[ic] == REAL_CELL) {
+            summer += H[ic]*mesh->lev_deltax[level[ic]]*mesh->lev_deltay[level[ic]];
+         }
+      }
+#ifdef HAVE_MPI
+      if (mesh->parallel) {
+         MPI_Allreduce(&summer, &total_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+      } else {
+         total_sum = summer;
+      }
+#else
+      total_sum = summer;
+#endif
+   }
+
+   cpu_timers[STATE_TIMER_MASS_SUM] += cpu_timer_stop(tstart_cpu);
+
+   return(total_sum);
+}
+
+#ifdef HAVE_OPENCL
+double State::gpu_mass_sum(int enhanced_precision_sum)
+{
+   struct timeval tstart_cpu;
+   cpu_timer_start(&tstart_cpu);
+
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   size_t &ncells       = mesh->ncells;
+   cl_mem &dev_levdx    = mesh->dev_levdx;
+   cl_mem &dev_levdy    = mesh->dev_levdy;
+   cl_mem &dev_celltype = mesh->dev_celltype;
+   cl_mem &dev_level    = mesh->dev_level;
+
+   assert(dev_H);
+   assert(dev_level);
+   assert(dev_levdx);
+   assert(dev_levdy);
+   assert(dev_celltype);
+
+   size_t one = 1;
+   cl_mem dev_mass_sum, dev_redscratch;
+   double gpu_mass_sum_total;
+
+   size_t local_work_size = 128;
+   size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
+   size_t block_size     = global_work_size/local_work_size;
+
+   if (enhanced_precision_sum) {
+      dev_mass_sum = ezcl_malloc(NULL, const_cast<char *>("dev_mass_sum"), &one,    sizeof(cl_real2_t), CL_MEM_READ_WRITE, 0);
+      dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_real2_t), CL_MEM_READ_WRITE, 0);
+
+        /*
+        __kernel void reduce_sum_cl(
+                         const int      isize,      // 0
+                __global       state_t *array,      // 1   Array to be reduced.
+                __global       int     *level,      // 2
+                __global       int     *levdx,      // 3
+                __global       int     *levdy,      // 4
+                __global       int     *celltype,   // 5
+                __global       real_t  *redscratch, // 6   Final result of operation.
+                __local        real_t  *tile)       // 7
+        */
+      ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 0, sizeof(cl_int), (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 1, sizeof(cl_mem), (void *)&dev_H);
+      ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 2, sizeof(cl_mem), (void *)&dev_level);
+      ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 3, sizeof(cl_mem), (void *)&dev_levdx);
+      ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 4, sizeof(cl_mem), (void *)&dev_levdy);
+      ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 5, sizeof(cl_mem), (void *)&dev_celltype);
+      ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 6, sizeof(cl_mem), (void *)&dev_mass_sum);
+      ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 7, sizeof(cl_mem), (void *)&dev_redscratch);
+      ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 8, local_work_size*sizeof(cl_real2_t), NULL);
+
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_epsum_mass_stage1of2, 1, NULL, &global_work_size, &local_work_size, NULL);
+
+      if (block_size > 1) {
+           /*
+           __kernel void reduce_sum_cl(
+                            const int      isize,      // 0
+                   __global       int     *redscratch, // 1   Array to be reduced.
+                   __local        real_t  *tile)       // 2
+           */
+
+         ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 0, sizeof(cl_int), (void *)&block_size);
+         ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 1, sizeof(cl_mem), (void *)&dev_mass_sum);
+         ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 2, sizeof(cl_mem), (void *)&dev_redscratch);
+         ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 3, local_work_size*sizeof(cl_real2_t), NULL);
+
+         ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_epsum_mass_stage2of2, 1, NULL, &local_work_size, &local_work_size, NULL);
+      }
+
+      struct esum_type local, global;
+      real2_t mass_sum;
+
+      ezcl_enqueue_read_buffer(command_queue, dev_mass_sum, CL_TRUE, 0, 1*sizeof(cl_real2_t), &mass_sum, NULL);
+
+      local.sum = mass_sum.s0;
+      local.correction = mass_sum.s1;
+      global.sum = local.sum;
+      global.correction = local.correction;
+#ifdef HAVE_MPI
+      MPI_Allreduce(&local, &global, 1, MPI_TWO_DOUBLES, KNUTH_SUM, MPI_COMM_WORLD);
+#endif
+      gpu_mass_sum_total = global.sum + global.correction;
+   } else {
+      dev_mass_sum = ezcl_malloc(NULL, const_cast<char *>("dev_mass_sum"), &one,    sizeof(cl_real_t), CL_MEM_READ_WRITE, 0);
+      dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0);
+
+        /*
+        __kernel void reduce_sum_cl(
+                         const int      isize,      // 0
+                __global       state_t *array,      // 1   Array to be reduced.
+                __global       int     *level,      // 2
+                __global       int     *levdx,      // 3
+                __global       int     *levdy,      // 4
+                __global       int     *celltype,   // 5
+                __global       real_t  *redscratch, // 6   Final result of operation.
+                __local        real_t  *tile)       // 7
+        */
+      ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 0, sizeof(cl_int), (void *)&ncells);
+      ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 1, sizeof(cl_mem), (void *)&dev_H);
+      ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 2, sizeof(cl_mem), (void *)&dev_level);
+      ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 3, sizeof(cl_mem), (void *)&dev_levdx);
+      ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 4, sizeof(cl_mem), (void *)&dev_levdy);
+      ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 5, sizeof(cl_mem), (void *)&dev_celltype);
+      ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 6, sizeof(cl_mem), (void *)&dev_mass_sum);
+      ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 7, sizeof(cl_mem), (void *)&dev_redscratch);
+      ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 8, local_work_size*sizeof(cl_real_t), NULL);
+
+      ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_sum_mass_stage1of2, 1, NULL, &global_work_size, &local_work_size, NULL);
+
+      if (block_size > 1) {
+           /*
+           __kernel void reduce_sum_cl(
+                            const int     isize,      // 0
+                   __global       int    *redscratch, // 1   Array to be reduced.
+                   __local        real_t  *tile)       // 2
+           */
+
+         ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 0, sizeof(cl_int), (void *)&block_size);
+         ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 1, sizeof(cl_mem), (void *)&dev_mass_sum);
+         ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 2, sizeof(cl_mem), (void *)&dev_redscratch);
+         ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 3, local_work_size*sizeof(cl_real_t), NULL);
+
+         ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_sum_mass_stage2of2, 1, NULL, &local_work_size, &local_work_size, NULL);
+      }
+
+      double local_sum, global_sum;
+      real_t mass_sum;
+
+      ezcl_enqueue_read_buffer(command_queue, dev_mass_sum, CL_TRUE, 0, 1*sizeof(cl_real_t), &mass_sum, NULL);
+      
+      local_sum = mass_sum;
+      global_sum = local_sum;
+#ifdef HAVE_MPI
+      MPI_Allreduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+#endif
+      gpu_mass_sum_total = global_sum;
+   }
+
+   ezcl_device_memory_delete(dev_redscratch);
+   ezcl_device_memory_delete(dev_mass_sum);
+
+   gpu_timers[STATE_TIMER_MASS_SUM] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9);
+
+   return(gpu_mass_sum_total);
+}
+#endif
+
+#ifdef HAVE_OPENCL
+void State::allocate_device_memory(size_t ncells)
+{
+   dev_H = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_H"), DEVICE_REGULAR_MEMORY);
+   dev_U = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_U"), DEVICE_REGULAR_MEMORY);
+   dev_V = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_V"), DEVICE_REGULAR_MEMORY);
+}
+#endif
+
+void State::resize_old_device_memory(size_t ncells)
+{
+#ifdef HAVE_OPENCL
+   gpu_state_memory.memory_delete(dev_H);
+   gpu_state_memory.memory_delete(dev_U);
+   gpu_state_memory.memory_delete(dev_V);
+   dev_H = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_H"), DEVICE_REGULAR_MEMORY);
+   dev_U = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_U"), DEVICE_REGULAR_MEMORY);
+   dev_V = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_V"), DEVICE_REGULAR_MEMORY);
+#else
+   // Just to block compiler warnings
+   if (1 == 2) printf("DEBUG -- ncells is %ld\n",ncells);
+#endif
+}
+
+#ifdef HAVE_MPI
+void State::do_load_balance_local(size_t &numcells){
+   mesh->do_load_balance_local(numcells, NULL, state_memory);
+   memory_reset_ptrs();
+}
+#endif
+#ifdef HAVE_OPENCL
+#ifdef HAVE_MPI
+void State::gpu_do_load_balance_local(size_t &numcells){
+   if (mesh->gpu_do_load_balance_local(numcells, NULL, gpu_state_memory) ){
+      //gpu_state_memory.memory_report();
+      dev_H = (cl_mem)gpu_state_memory.get_memory_ptr("dev_H");
+      dev_U = (cl_mem)gpu_state_memory.get_memory_ptr("dev_U");
+      dev_V = (cl_mem)gpu_state_memory.get_memory_ptr("dev_V");
+/*
+      if (dev_H == NULL){
+         dev_H = (cl_mem)gpu_state_memory.get_memory_ptr("dev_H_new");
+         dev_U = (cl_mem)gpu_state_memory.get_memory_ptr("dev_U_new");
+         dev_V = (cl_mem)gpu_state_memory.get_memory_ptr("dev_V_new");
+      }
+      printf("DEBUG memory for proc %d dev_H is %p dev_U is %p dev_V is %p\n",mesh->mype,dev_H,dev_U,dev_V);
+*/
+   }
+}
+#endif
+#endif
+
+static double reference_time = 0.0;
+
+void State::output_timing_info(int do_cpu_calc, int do_gpu_calc, double total_elapsed_time)
+{
+   int parallel = mesh->parallel;
+
+   double cpu_time_compute = 0.0;
+   double gpu_time_compute = 0.0;
+
+   double cpu_elapsed_time = 0.0;
+   double gpu_elapsed_time = 0.0;
+
+   double cpu_mesh_time = 0.0;
+   double gpu_mesh_time = 0.0;
+
+   if (do_cpu_calc) {
+      cpu_time_compute = get_cpu_timer(STATE_TIMER_SET_TIMESTEP) +
+                         get_cpu_timer(STATE_TIMER_FINITE_DIFFERENCE) +
+                         get_cpu_timer(STATE_TIMER_REFINE_POTENTIAL) +
+                         get_cpu_timer(STATE_TIMER_REZONE_ALL) +
+                         mesh->get_cpu_timer(MESH_TIMER_CALC_NEIGHBORS) +
+                         mesh->get_cpu_timer(MESH_TIMER_LOAD_BALANCE) +
+                         get_cpu_timer(STATE_TIMER_MASS_SUM) +
+                         mesh->get_cpu_timer(MESH_TIMER_CALC_SPATIAL_COORDINATES) +
+                         mesh->get_cpu_timer(MESH_TIMER_PARTITION);
+      cpu_elapsed_time = cpu_time_compute;
+      cpu_mesh_time = mesh->get_cpu_timer(MESH_TIMER_CALC_NEIGHBORS) +
+                      get_cpu_timer(STATE_TIMER_REZONE_ALL) +
+                      mesh->get_cpu_timer(MESH_TIMER_REFINE_SMOOTH) +
+                      mesh->get_cpu_timer(MESH_TIMER_LOAD_BALANCE);
+   }
+   if (do_gpu_calc) {
+      gpu_time_compute = get_gpu_timer(STATE_TIMER_APPLY_BCS) +
+                         get_gpu_timer(STATE_TIMER_SET_TIMESTEP) +
+                         get_gpu_timer(STATE_TIMER_FINITE_DIFFERENCE) +
+                         get_gpu_timer(STATE_TIMER_REFINE_POTENTIAL) +
+                         get_gpu_timer(STATE_TIMER_REZONE_ALL) +
+                         mesh->get_gpu_timer(MESH_TIMER_CALC_NEIGHBORS) +
+                         mesh->get_gpu_timer(MESH_TIMER_LOAD_BALANCE) +
+                         get_gpu_timer(STATE_TIMER_MASS_SUM) +
+                         mesh->get_gpu_timer(MESH_TIMER_CALC_SPATIAL_COORDINATES) +
+                         mesh->get_gpu_timer(MESH_TIMER_COUNT_BCS);
+      gpu_elapsed_time = get_gpu_timer(STATE_TIMER_WRITE) + gpu_time_compute + get_gpu_timer(STATE_TIMER_READ);
+      gpu_mesh_time = mesh->get_gpu_timer(MESH_TIMER_CALC_NEIGHBORS) +
+                      get_gpu_timer(STATE_TIMER_REZONE_ALL) +
+                      mesh->get_gpu_timer(MESH_TIMER_REFINE_SMOOTH) +
+                      mesh->get_gpu_timer(MESH_TIMER_LOAD_BALANCE);
+   }
+
+   if (! parallel && do_cpu_calc) reference_time = cpu_elapsed_time;
+
+   double speedup_ratio = 0.0;
+   if (reference_time > 0.0){
+      if (do_cpu_calc && parallel) speedup_ratio = reference_time/cpu_elapsed_time;
+      if (do_gpu_calc) speedup_ratio = reference_time/gpu_elapsed_time;
+   }
+
+   if (do_cpu_calc) {
+      output_timer_block(MESH_DEVICE_CPU, cpu_elapsed_time, cpu_mesh_time, cpu_time_compute, total_elapsed_time, speedup_ratio);
+   }
+   if (do_gpu_calc) {
+      output_timer_block(MESH_DEVICE_GPU, gpu_elapsed_time, gpu_mesh_time, gpu_time_compute, total_elapsed_time, speedup_ratio);
+   }
+}
+
+void State::output_timer_block(mesh_device_types device_type, double elapsed_time,
+   double mesh_time, double compute_time, double total_elapsed_time, double speedup_ratio)
+{
+   int mype  = mesh->mype;
+   int parallel = mesh->parallel;
+
+   int rank = mype;
+   if (! parallel) {
+      // We need to get rank info for check routines
+#ifdef HAVE_MPI
+      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+#endif
+   }
+
+   if (! parallel && rank) return;
+
+   char device_string[10];
+   if (device_type == MESH_DEVICE_CPU) {
+      sprintf(device_string,"CPU");
+   } else {
+      sprintf(device_string,"GPU");
+   }
+
+#ifdef TIMING
+   if (rank == 0) {
+      printf("\n");
+      printf("~~~~~~~~~~~~~~~~ Device timing information ~~~~~~~~~~~~~~~~~~\n");
+   }
+
+   if (rank == 0 && parallel) {
+      printf("\n%3s: Parallel timings\n\n",device_string);
+   }
+
+   if (device_type == MESH_DEVICE_GPU) {
+      mesh->parallel_output("GPU: Write to device          time was",  get_gpu_timer(STATE_TIMER_WRITE), 0, "s");
+      mesh->parallel_output("GPU: Read from device         time was",  get_gpu_timer(STATE_TIMER_READ),  0, "s");
+   }
+
+   const char *device_compute_string[2] = {
+      "CPU: Device compute           time was",
+      "GPU: Device compute           time was"
+   };
+   mesh->parallel_output(device_compute_string[device_type], compute_time, 0, "s");
+
+   timer_output(STATE_TIMER_SET_TIMESTEP,                  device_type, 1);
+   timer_output(STATE_TIMER_FINITE_DIFFERENCE,             device_type, 1);
+   timer_output(STATE_TIMER_REFINE_POTENTIAL,              device_type, 1);
+   timer_output(STATE_TIMER_CALC_MPOT,                     device_type, 2);
+   mesh->timer_output(MESH_TIMER_REFINE_SMOOTH,            device_type, 2);
+   timer_output(STATE_TIMER_REZONE_ALL,                    device_type, 1);
+   mesh->timer_output(MESH_TIMER_PARTITION,                device_type, 1);
+   mesh->timer_output(MESH_TIMER_CALC_NEIGHBORS,           device_type, 1);
+   if (mesh->get_calc_neighbor_type() == HASH_TABLE) {
+      mesh->timer_output(MESH_TIMER_HASH_SETUP,            device_type, 2);
+      mesh->timer_output(MESH_TIMER_HASH_QUERY,            device_type, 2);
+      if (parallel) {
+         mesh->timer_output(MESH_TIMER_FIND_BOUNDARY,      device_type, 2);
+         mesh->timer_output(MESH_TIMER_PUSH_SETUP,         device_type, 2);
+         mesh->timer_output(MESH_TIMER_PUSH_BOUNDARY,      device_type, 2);
+         mesh->timer_output(MESH_TIMER_LOCAL_LIST,         device_type, 2);
+         mesh->timer_output(MESH_TIMER_LAYER1,             device_type, 2);
+         mesh->timer_output(MESH_TIMER_LAYER2,             device_type, 2);
+         mesh->timer_output(MESH_TIMER_LAYER_LIST,         device_type, 2);
+         mesh->timer_output(MESH_TIMER_COPY_MESH_DATA,     device_type, 2);
+         mesh->timer_output(MESH_TIMER_FILL_MESH_GHOST,    device_type, 2);
+         mesh->timer_output(MESH_TIMER_FILL_NEIGH_GHOST,   device_type, 2);
+         mesh->timer_output(MESH_TIMER_SET_CORNER_NEIGH,   device_type, 2);
+         mesh->timer_output(MESH_TIMER_NEIGH_ADJUST,       device_type, 2);
+         mesh->timer_output(MESH_TIMER_SETUP_COMM,         device_type, 2);
+      }
+   } else {
+      mesh->timer_output(MESH_TIMER_KDTREE_SETUP,          device_type, 2);
+      mesh->timer_output(MESH_TIMER_KDTREE_QUERY,          device_type, 2);
+   }
+   timer_output(STATE_TIMER_MASS_SUM,                      device_type, 1);
+   if (parallel) {
+      mesh->timer_output(MESH_TIMER_LOAD_BALANCE,          device_type, 1);
+   }
+   mesh->timer_output(MESH_TIMER_CALC_SPATIAL_COORDINATES, device_type, 1);
+   if (! mesh->have_boundary) {
+      mesh->timer_output(MESH_TIMER_COUNT_BCS,             device_type, 1);
+   }
+   if (rank == 0) printf("=============================================================\n");
+
+   const char *profile_string[2] = {
+      "Profiling: Total CPU          time was",
+      "Profiling: Total GPU          time was"
+   };
+   mesh->parallel_output(profile_string[device_type], elapsed_time, 0, "s");
+   if (elapsed_time > 600.0){
+      mesh->parallel_output("                                  or  ", elapsed_time/60.0, 0, "min");
+   }
+
+   if (rank == 0) printf("-------------------------------------------------------------\n");
+   mesh->parallel_output("Mesh Ops (Neigh+rezone+smooth+balance) ",mesh_time, 0, "s");
+   mesh->parallel_output("Mesh Ops Percentage                    ",mesh_time/elapsed_time*100.0, 0, "percent");
+   if (rank == 0) printf("=============================================================\n");
+
+   mesh->parallel_output("Profiling: Total              time was",total_elapsed_time, 0, "s");
+   if (elapsed_time > 600.0){
+      mesh->parallel_output("                                  or  ",total_elapsed_time/60.0, 0, "min");
+   }
+
+   if (speedup_ratio > 0.0) {
+      mesh->parallel_output("Parallel Speed-up:                    ",speedup_ratio, 0, "Reference Serial CPU");
+   }
+
+   if (rank == 0) printf("=============================================================\n");
+#endif
+}
+
+void State::timer_output(state_timer_category category, mesh_device_types device_type, int timer_level)
+{
+   int mype = mesh->mype;
+
+   double local_time = 0.0;
+   if (device_type == MESH_DEVICE_CPU){
+      local_time = get_cpu_timer(category);
+   } else {
+      local_time = get_gpu_timer(category);
+   }
+
+   char string[80] = "/0";
+
+   if (mype == 0) {
+      const char *blank="          ";
+
+      const char *device_string[2] = {
+         "CPU",
+         "GPU"
+      };
+
+      sprintf(string,"%3s: %.*s%-30.30s\t", device_string[device_type],
+         2*timer_level, blank, state_timer_descriptor[category]);
+   }
+
+   mesh->parallel_output(string, local_time, timer_level, "s");
+}
+
+#ifdef HAVE_OPENCL
+void State::compare_state_gpu_global_to_cpu_global(const char* string, int cycle, uint ncells)
+{
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   vector<state_t>H_check(ncells);
+   vector<state_t>U_check(ncells);
+   vector<state_t>V_check(ncells);
+   ezcl_enqueue_read_buffer(command_queue, dev_H, CL_FALSE, 0, ncells*sizeof(cl_state_t), &H_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_U, CL_FALSE, 0, ncells*sizeof(cl_state_t), &U_check[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_V, CL_TRUE,  0, ncells*sizeof(cl_state_t), &V_check[0], NULL);
+   for (uint ic = 0; ic < ncells; ic++){
+      if (fabs(H[ic]-H_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d H & H_check %d %lf %lf\n",string,cycle,ic,H[ic],H_check[ic]);
+      if (fabs(U[ic]-U_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d U & U_check %d %lf %lf\n",string,cycle,ic,U[ic],U_check[ic]);
+      if (fabs(V[ic]-V_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d V & V_check %d %lf %lf\n",string,cycle,ic,V[ic],V_check[ic]);
+   }
+}
+#endif
+
+void State::compare_state_cpu_local_to_cpu_global(State *state_global, const char* string, int cycle, uint ncells, uint ncells_global, int *nsizes, int *ndispl)
+{
+   state_t *H_global = state_global->H;
+   state_t *U_global = state_global->U;
+   state_t *V_global = state_global->V;
+
+   vector<state_t>H_check(ncells_global);
+   vector<state_t>U_check(ncells_global);
+   vector<state_t>V_check(ncells_global);
+#ifdef HAVE_MPI
+   MPI_Allgatherv(&H[0], ncells, MPI_STATE_T, &H_check[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&U[0], ncells, MPI_STATE_T, &U_check[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&V[0], ncells, MPI_STATE_T, &V_check[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
+#else
+   // Just to block compiler warnings
+   if (1 == 2) printf("DEBUG -- ncells %u nsizes %d ndispl %d\n",ncells, nsizes[0],ndispl[0]);
+#endif
+
+   for (uint ic = 0; ic < ncells_global; ic++){
+      if (fabs(H_global[ic]-H_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d H & H_check %d %lf %lf\n",string,cycle,ic,H_global[ic],H_check[ic]);
+      if (fabs(U_global[ic]-U_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d U & U_check %d %lf %lf\n",string,cycle,ic,U_global[ic],U_check[ic]);
+      if (fabs(V_global[ic]-V_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d V & V_check %d %lf %lf\n",string,cycle,ic,V_global[ic],V_check[ic]);
+   }
+}
+
+#ifdef HAVE_OPENCL
+void State::compare_state_all_to_gpu_local(State *state_global, uint ncells, uint ncells_global, int mype, int ncycle, int *nsizes, int *ndispl)
+{
+#ifdef HAVE_MPI
+   cl_command_queue command_queue = ezcl_get_command_queue();
+
+   state_t *H_global = state_global->H;
+   state_t *U_global = state_global->U;
+   state_t *V_global = state_global->V;
+   cl_mem &dev_H_global = state_global->dev_H;
+   cl_mem &dev_U_global = state_global->dev_U;
+   cl_mem &dev_V_global = state_global->dev_V;
+
+   // Need to compare dev_H to H, etc
+   vector<state_t>H_save(ncells);
+   vector<state_t>U_save(ncells);
+   vector<state_t>V_save(ncells);
+   ezcl_enqueue_read_buffer(command_queue, dev_H, CL_FALSE, 0, ncells*sizeof(cl_state_t), &H_save[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_U, CL_FALSE, 0, ncells*sizeof(cl_state_t), &U_save[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_V, CL_TRUE,  0, ncells*sizeof(cl_state_t), &V_save[0], NULL);
+   for (uint ic = 0; ic < ncells; ic++){
+      if (fabs(H[ic]-H_save[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 1 at cycle %d H & H_save %d %lf %lf \n",mype,ncycle,ic,H[ic],H_save[ic]);
+      if (fabs(U[ic]-U_save[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 1 at cycle %d U & U_save %d %lf %lf \n",mype,ncycle,ic,U[ic],U_save[ic]);
+      if (fabs(V[ic]-V_save[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 1 at cycle %d V & V_save %d %lf %lf \n",mype,ncycle,ic,V[ic],V_save[ic]);
+   }
+
+   // And compare dev_H gathered to H_global, etc
+   vector<state_t>H_save_global(ncells_global);
+   vector<state_t>U_save_global(ncells_global);
+   vector<state_t>V_save_global(ncells_global);
+   MPI_Allgatherv(&H_save[0], nsizes[mype], MPI_STATE_T, &H_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&U_save[0], nsizes[mype], MPI_STATE_T, &U_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&V_save[0], nsizes[mype], MPI_STATE_T, &V_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
+   if (mype == 0) {
+      for (uint ic = 0; ic < ncells_global; ic++){
+         if (fabs(H_global[ic]-H_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 2 at cycle %d H_global & H_save_global %d %lf %lf \n",mype,ncycle,ic,H_global[ic],H_save_global[ic]);
+         if (fabs(U_global[ic]-U_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 2 at cycle %d U_global & U_save_global %d %lf %lf \n",mype,ncycle,ic,U_global[ic],U_save_global[ic]);
+         if (fabs(V_global[ic]-V_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 2 at cycle %d V_global & V_save_global %d %lf %lf \n",mype,ncycle,ic,V_global[ic],V_save_global[ic]);
+      }
+   }
+
+   // And compare H gathered to H_global, etc
+   MPI_Allgatherv(&H[0], nsizes[mype], MPI_STATE_T, &H_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&U[0], nsizes[mype], MPI_STATE_T, &U_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
+   MPI_Allgatherv(&V[0], nsizes[mype], MPI_STATE_T, &V_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
+   if (mype == 0) {
+      for (uint ic = 0; ic < ncells_global; ic++){
+         if (fabs(H_global[ic]-H_save_global[ic]) > STATE_EPS) printf("DEBUG finite_difference 3 at cycle %d H_global & H_save_global %d %lf %lf \n",ncycle,ic,H_global[ic],H_save_global[ic]);
+         if (fabs(U_global[ic]-U_save_global[ic]) > STATE_EPS) printf("DEBUG finite_difference 3 at cycle %d U_global & U_save_global %d %lf %lf \n",ncycle,ic,U_global[ic],U_save_global[ic]);
+         if (fabs(V_global[ic]-V_save_global[ic]) > STATE_EPS) printf("DEBUG finite_difference 3 at cycle %d V_global & V_save_global %d %lf %lf \n",ncycle,ic,V_global[ic],V_save_global[ic]);
+      }
+   }
+
+   // Now the global dev_H_global to H_global, etc
+   ezcl_enqueue_read_buffer(command_queue, dev_H_global, CL_FALSE, 0, ncells_global*sizeof(cl_state_t), &H_save_global[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_U_global, CL_FALSE, 0, ncells_global*sizeof(cl_state_t), &U_save_global[0], NULL);
+   ezcl_enqueue_read_buffer(command_queue, dev_V_global, CL_TRUE,  0, ncells_global*sizeof(cl_state_t), &V_save_global[0], NULL);
+   if (mype == 0) {
+      for (uint ic = 0; ic < ncells_global; ic++){
+         if (fabs(H_global[ic]-H_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 4 at cycle %d H_global & H_save_global %d %lf %lf \n",mype,ncycle,ic,H_global[ic],H_save_global[ic]);
+         if (fabs(U_global[ic]-U_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 4 at cycle %d U_global & U_save_global %d %lf %lf \n",mype,ncycle,ic,U_global[ic],U_save_global[ic]);
+         if (fabs(V_global[ic]-V_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 4 at cycle %d V_global & V_save_global %d %lf %lf \n",mype,ncycle,ic,V_global[ic],V_save_global[ic]);
+      }
+   }
+#else
+   // Just to get rid of compiler warnings
+   if (1 == 2) printf("%d: DEBUG -- ncells %d ncells_global %d ncycle %d nsizes[0] %d ndispl %d state_global %p\n",
+      mype,ncells,ncells_global,ncycle,nsizes[0],ndispl[0],state_global);
+#endif
+}
+#endif
+
+void State::print_object_info(void)
+{
+   printf(" ---- State object info -----\n");
+
+#ifdef HAVE_OPENCL
+   int num_elements, elsize;
+
+   num_elements = ezcl_get_device_mem_nelements(dev_H);
+   elsize = ezcl_get_device_mem_elsize(dev_H);
+   printf("dev_H       ptr : %p nelements %d elsize %d\n",dev_H,num_elements,elsize);
+   num_elements = ezcl_get_device_mem_nelements(dev_U);
+   elsize = ezcl_get_device_mem_elsize(dev_U);
+   printf("dev_U       ptr : %p nelements %d elsize %d\n",dev_U,num_elements,elsize);
+   num_elements = ezcl_get_device_mem_nelements(dev_V);
+   elsize = ezcl_get_device_mem_elsize(dev_V);
+   printf("dev_V       ptr : %p nelements %d elsize %d\n",dev_V,num_elements,elsize);
+   num_elements = ezcl_get_device_mem_nelements(dev_mpot);
+   elsize = ezcl_get_device_mem_elsize(dev_mpot);
+   printf("dev_mpot    ptr : %p nelements %d elsize %d\n",dev_mpot,num_elements,elsize);
+   //num_elements = ezcl_get_device_mem_nelements(dev_ioffset);
+   //elsize = ezcl_get_device_mem_elsize(dev_ioffset);
+   //printf("dev_ioffset ptr : %p nelements %d elsize %d\n",dev_ioffset,num_elements,elsize);
+#endif
+   state_memory.memory_report();
+   //printf("vector H    ptr : %p nelements %ld elsize %ld\n",&H[0],H.size(),sizeof(H[0]));
+   //printf("vector U    ptr : %p nelements %ld elsize %ld\n",&U[0],U.size(),sizeof(U[0]));
+   //printf("vector V    ptr : %p nelements %ld elsize %ld\n",&V[0],V.size(),sizeof(V[0]));
+}
+
+void State::print(void)
+{  //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size());
+
+   if (mesh->fp == NULL) {
+      char filename[10];
+      sprintf(filename,"out%1d",mesh->mype);
+      mesh->fp=fopen(filename,"w");
+   }
+
+   if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){
+      fprintf(mesh->fp,"%d:   index global  i     j     lev   nlft  nrht  nbot  ntop \n",mesh->mype);
+      for (uint ic=0; ic<mesh->ncells; ic++) {
+         fprintf(mesh->fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
+      }
+      for (uint ic=mesh->ncells; ic<mesh->ncells_ghost; ic++) {
+         fprintf(mesh->fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
+      }
+   } else {
+      fprintf(mesh->fp,"%d:  index     H        U         V      i     j     lev\n",mesh->mype);
+      for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
+         fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d  %4d   %4d  \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]);
+      }
+   }
+}
+
+const int CRUX_STATE_VERSION = 102;
+const int num_int_vals       = 1;
+
+size_t State::get_checkpoint_size(void)
+{
+#ifdef FULL_PRECISION
+   size_t nsize = mesh->ncells*3*sizeof(double);
+#else
+   size_t nsize = mesh->ncells*3*sizeof(float);
+#endif
+   nsize += num_int_vals*sizeof(int);
+   nsize += mesh->get_checkpoint_size();
+   return(nsize);
+}
+
+void State::store_checkpoint(Crux *crux)
+{
+   // Store mesh data first
+   mesh->store_checkpoint(crux);
+
+//#ifndef HAVE_MPI
+   // Load up scalar values
+   int int_vals[num_int_vals];
+   int_vals[0] = CRUX_STATE_VERSION;
+
+   // Add to memory database for storing checkpoint
+   state_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "state_int_vals", RESTART_DATA | REPLICATED_DATA);
+   state_memory.memory_add(cpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_cpu_timers", RESTART_DATA);
+   state_memory.memory_add(gpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_gpu_timers", RESTART_DATA);
+
+   crux->store_MallocPlus(state_memory);
+
+   // Remove from database after checkpoint is stored
+   state_memory.memory_remove(int_vals);
+   state_memory.memory_remove(cpu_timers);
+   state_memory.memory_remove(gpu_timers);
+//#endif
+}
+
+void State::restore_checkpoint(Crux *crux)
+{
+   int storage;
+   // Restore mesh data first
+   mesh->restore_checkpoint(crux);
+   crux->restore_named_ints("storage", 7, &storage, 1);
+
+   // Create memory for restoring data into
+   int int_vals[num_int_vals];
+
+   // allocate is a state method
+   allocate(storage);
+
+   // Add to memory database for restoring checkpoint
+   state_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "state_int_vals", RESTART_DATA | REPLICATED_DATA);
+   state_memory.memory_add(cpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_cpu_timers", RESTART_DATA);
+   state_memory.memory_add(gpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_gpu_timers", RESTART_DATA);
+
+   // Restore memory database
+   crux->restore_MallocPlus(state_memory);
+
+   // Check version number
+   if (int_vals[ 0] != CRUX_STATE_VERSION) {
+      printf("CRUX version mismatch for state data, version on file is %d, version in code is %d\n",
+         int_vals[0], CRUX_STATE_VERSION);
+      exit(0);
+   }
+
+#ifdef DEBUG_RESTORE_VALS
+   if (DEBUG_RESTORE_VALS) {
+      printf("\n");
+      printf("       === Restored state cpu timers ===\n");
+      for (int i = 0; i < STATE_TIMER_SIZE; i++){
+         printf("       %-30s %lg\n",state_timer_descriptor[i], cpu_timers[i]);
+      }
+      printf("       === Restored state cpu timers ===\n");
+      printf("\n");
+   }
+#endif
+
+#ifdef DEBUG_RESTORED_VALS
+   if (DEBUG_RESTORED_VALS) {
+      printf("\n");
+      printf("       === Restored state gpu timers ===\n");
+      for (int i = 0; i < STATE_TIMER_SIZE; i++){
+         printf("       %-30s %lld\n",state_timer_descriptor[i], gpu_timers[i]);
+      }
+      printf("       === Restored state gpu_timers ===\n");
+      printf("\n");
+   }
+#endif
+
+   state_memory.memory_remove(int_vals);
+   state_memory.memory_remove(cpu_timers);
+   state_memory.memory_remove(gpu_timers);
+   
+   memory_reset_ptrs();
+//#endif
+}
+
+// Added overloaded print to get mesh information to print in each cycle
+// Brian Atkinson (5-29-14)
+void State::print(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage)
+{  //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size());
+
+      char filename[40];
+      sprintf(filename,"iteration%d",iteration);
+      mesh->fp=fopen(filename,"w");
+
+      if(iteration_mass == 0.0){
+         fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime);
+         fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost);
+         fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tSimulation Time: %lf\n", initial_mass, simTime);
+      }
+      else{
+         double mass_diff = iteration_mass - initial_mass;
+         fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime);
+         fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost);
+         fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tIteration Mass: %14.12lg\n", initial_mass, iteration_mass);
+         fprintf(mesh->fp,"Mass Difference: %12.6lg\t\tMass Difference Percentage: %12.6lg%%\n", mass_diff, mass_diff_percentage);
+      }
+
+   if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){
+      fprintf(mesh->fp,"%d:   index global  i     j     lev   nlft  nrht  nbot  ntop \n",mesh->mype);
+      for (uint ic=0; ic<mesh->ncells; ic++) {
+         fprintf(mesh->fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
+      }
+      for (uint ic=mesh->ncells; ic<mesh->ncells_ghost; ic++) {
+         fprintf(mesh->fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
+      }
+   } else {
+      fprintf(mesh->fp,"%d:  index     H        U         V      i     j     lev\n",mesh->mype);
+      for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
+         fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d  %4d   %4d  \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]);
+      }
+   }
+}
+
+void State::print_local(int ncycle)
+{  //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size());
+
+   if (mesh->fp == NULL) {
+      char filename[10];
+      sprintf(filename,"out%1d",mesh->mype);
+      mesh->fp=fopen(filename,"w");
+   }
+
+   fprintf(mesh->fp,"DEBUG in print_local ncycle is %d\n",ncycle);
+   if (mesh->nlft != NULL){
+      fprintf(mesh->fp,"%d:  index     H        U         V      i     j     lev   nlft   nrht   nbot   ntop\n",mesh->mype);
+      uint state_size = state_memory.get_memory_size(H);
+      for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
+         if (ic >= state_size){
+            fprintf(mesh->fp,"%d: %6d                              %4d  %4d   %4d  %4d  %4d  %4d  %4d\n", mesh->mype,ic, mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
+         } else {
+            fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d  %4d   %4d  %4d  %4d  %4d  %4d\n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
+         }
+      }
+   } else {
+      fprintf(mesh->fp,"%d:  index     H        U         V      i     j     lev\n",mesh->mype);
+      for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
+         fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d  %4d   %4d\n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]);
+      }
+   }
+}
+
+void State::print_failure_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, bool got_nan){
+   char filename[] = {"failure.log"};
+   mesh->fp=fopen(filename,"w");
+
+   double mass_diff = iteration_mass - initial_mass;
+   if(got_nan){
+      fprintf(mesh->fp,"Failed because of nan for H_sum was equal to NAN\n");
+   }
+   else{
+      fprintf(mesh->fp,"Failed because mass difference is outside of accepted percentage\n");
+   }
+   fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime);
+   fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost);
+   fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tIteration Mass: %14.12lg\n", initial_mass, iteration_mass);
+   fprintf(mesh->fp,"Mass Difference: %12.6lg\t\tMass Difference Percentage: %12.6lg%%\n", mass_diff, mass_diff_percentage);
+
+   if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){
+      fprintf(mesh->fp,"%d:   index global  i     j     lev   nlft  nrht  nbot  ntop \n",mesh->mype);
+      for (uint ic=0; ic<mesh->ncells; ic++) {
+         fprintf(mesh->fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
+      }
+      for (uint ic=mesh->ncells; ic<mesh->ncells_ghost; ic++) {
+         fprintf(mesh->fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
+      }
+   } else {
+      fprintf(mesh->fp,"%d:  index     H        U         V      i     j     lev\n",mesh->mype);
+      for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
+         fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d  %4d   %4d  \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]);
+      }
+   }
+}
+
+void State::print_rollback_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, int backup_attempt, int num_of_attempts, int error_status){
+   char filename[40];
+   sprintf(filename, "rollback%d.log",backup_attempt);
+   mesh->fp=fopen(filename,"w");
+
+   double mass_diff = iteration_mass - initial_mass;
+   if(error_status == STATUS_NAN){
+      fprintf(mesh->fp,"Rolling back because of nan for H_sum was equal to NAN\n");
+   }
+   else{
+      fprintf(mesh->fp,"Rolling back because mass difference is outside of accepted percentage\n");
+   }
+   fprintf(mesh->fp,"Rollback attempt %d of %d ---> Number of attempts left:%d\n", backup_attempt, num_of_attempts, num_of_attempts - backup_attempt);
+   fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime);
+   fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost);
+   fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tIteration Mass: %14.12lg\n", initial_mass, iteration_mass);
+   fprintf(mesh->fp,"Mass Difference: %12.6lg\t\tMass Difference Percentage: %12.6lg%%\n", mass_diff, mass_diff_percentage);
+
+   if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){
+      fprintf(mesh->fp,"%d:   index global  i     j     lev   nlft  nrht  nbot  ntop \n",mesh->mype);
+      for (uint ic=0; ic<mesh->ncells; ic++) {
+         fprintf(mesh->fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
+      }
+      for (uint ic=mesh->ncells; ic<mesh->ncells_ghost; ic++) {
+         fprintf(mesh->fp,"%d: %6d  %6d %4d  %4d   %4d  %4d  %4d  %4d  %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
+      }
+   } else {
+      fprintf(mesh->fp,"%d:  index     H        U         V      i     j     lev\n",mesh->mype);
+      for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
+         fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d  %4d   %4d  \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]);
+      }
+   }
+}
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.h
@@ -0,0 +1,72 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#ifndef _TIMER_H
+#define _TIMER_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+void cpu_timer_start(struct timeval *tstart_cpu);
+double cpu_timer_stop(struct timeval tstart_cpu);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _TIMER_H */
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.c
@@ -0,0 +1,114 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <string.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "timer.h"
+
+void cpu_timer_start(struct timeval *tstart_cpu){
+#ifdef _OPENMP
+   if ( omp_in_parallel() ) {
+#pragma omp master
+      {
+         gettimeofday(tstart_cpu, NULL);
+      }
+   } else {
+      gettimeofday(tstart_cpu, NULL);
+   }
+#else
+   gettimeofday(tstart_cpu, NULL);
+#endif
+}
+
+double cpu_timer_stop(struct timeval tstart_cpu){
+   double result;
+   struct timeval tstop_cpu, tresult;
+
+#ifdef _OPENMP
+   if ( omp_in_parallel() ) {
+#pragma omp master
+      {
+         gettimeofday(&tstop_cpu, NULL);
+         tresult.tv_sec = tstop_cpu.tv_sec - tstart_cpu.tv_sec;
+         tresult.tv_usec = tstop_cpu.tv_usec - tstart_cpu.tv_usec;
+         result = (double)tresult.tv_sec + (double)tresult.tv_usec*1.0e-6;
+      }
+   } else {
+      gettimeofday(&tstop_cpu, NULL);
+      tresult.tv_sec = tstop_cpu.tv_sec - tstart_cpu.tv_sec;
+      tresult.tv_usec = tstop_cpu.tv_usec - tstart_cpu.tv_usec;
+      result = (double)tresult.tv_sec + (double)tresult.tv_usec*1.0e-6;
+   }
+#else
+   gettimeofday(&tstop_cpu, NULL);
+   tresult.tv_sec = tstop_cpu.tv_sec - tstart_cpu.tv_sec;
+   tresult.tv_usec = tstop_cpu.tv_usec - tstart_cpu.tv_usec;
+   result = (double)tresult.tv_sec + (double)tresult.tv_usec*1.0e-6;
+#endif
+   return(result);
+}
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.h
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.h
@@ -0,0 +1,74 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+#ifndef _ZORDER_H
+#define _ZORDER_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+void calc_zorder(int size, int *i, int *j, int *level, int levmx, int ibase, int *z_index, int *z_order);
+unsigned long long index_to_bit(unsigned long long index, int lev, int levmx, int ibase);
+unsigned long long twobit_to_index(unsigned long long ibit, unsigned long long jbit);
+void printbits(int n);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _ZORDER_H */
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.c
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.c
@@ -0,0 +1,148 @@
+/*
+ *  Copyright (c) 2011-2012, Los Alamos National Security, LLC.
+ *  All rights Reserved.
+ *
+ *  Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced 
+ *  under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National 
+ *  Laboratory (LANL), which is operated by Los Alamos National Security, LLC 
+ *  for the U.S. Department of Energy. The U.S. Government has rights to use, 
+ *  reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS 
+ *  ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR 
+ *  ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+ *  to produce derivative works, such modified software should be clearly marked,
+ *  so as not to confuse it with the version available from LANL.
+ *
+ *  Additionally, redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Los Alamos National Security, LLC, Los Alamos 
+ *       National Laboratory, LANL, the U.S. Government, nor the names of its 
+ *       contributors may be used to endorse or promote products derived from 
+ *       this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND 
+ *  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+ *  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
+ *  SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  CLAMR -- LA-CC-11-094
+ *  This research code is being developed as part of the 
+ *  2011 X Division Summer Workshop for the express purpose
+ *  of a collaborative code for development of ideas in
+ *  the implementation of AMR codes for Exascale platforms
+ *  
+ *  AMR implementation of the Wave code previously developed
+ *  as a demonstration code for regular grids on Exascale platforms
+ *  as part of the Supercomputing Challenge and Los Alamos 
+ *  National Laboratory
+ *  
+ *  Authors: Bob Robey       XCP-2   brobey@lanl.gov
+ *           Neal Davis              davis68@lanl.gov, davis68@illinois.edu
+ *           David Nicholaeff        dnic@lanl.gov, mtrxknight@aol.com
+ *           Dennis Trujillo         dptrujillo@lanl.gov, dptru10@gmail.com
+ * 
+ */
+
+#include <stdio.h>
+#include <math.h>
+#include "s7.h"
+#include "zorder.h"
+
+#define DEBUG 0
+
+void calc_zorder(int size, int *i, int *j, int *level, int levmx, int ibase, int *z_index, int *z_order)
+{   unsigned long long ibit,   //   Bitwise representation of x-index.
+                       jbit;   //   Bitwise representation of y-index.
+
+   //   Convert the indices to a bitwise representation.
+   int ic;
+   for (ic = 0; ic < size; ic++)
+   {  if (level[ic] < 0) continue;
+      ibit = index_to_bit(i[ic], level[ic], levmx, ibase);
+      jbit = index_to_bit(j[ic], level[ic], levmx, ibase);
+      z_index[ic] = twobit_to_index(ibit, jbit);
+      z_order[ic] = ic; }
+
+   //   Sort the z-ordered indices.
+   S7_Index_Sort(z_index, size, S7_INT, z_order);
+
+   //   Output ordered mesh information.
+   if (DEBUG)
+   {   printf("orig index   i     j     lev     ibit       jbit       ijbit      z index  z order\n");
+      for (ic=0; ic<size; ic++){
+         printf(" %6d   %4d  %4d   %4d   ",ic+1, j[ic], i[ic], level[ic]);
+         printbits(index_to_bit(j[ic], level[ic], levmx, ibase));
+         printf("   ");
+         printbits(index_to_bit(i[ic], level[ic], levmx, ibase));
+         printf("   ");
+         printbits( index_to_bit(i[ic], level[ic], levmx, ibase)
+               | (index_to_bit(j[ic], level[ic], levmx, ibase)
+               << 1));
+         printf("   %6d     %5d\n",z_index[ic], z_order[ic]); } } }
+
+unsigned long long index_to_bit(unsigned long long index,
+                        int lev,
+                        int levmx,
+                        int ibase)
+{   static const unsigned long long B[] =
+      {0x55555555,  /* 01010101010101010101010101010101 */
+       0x33333333,  /* 00110011001100110011001100110011 */
+       0x0F0F0F0F,  /* 00001111000011110000111100001111 */
+       0x00FF00FF,  /* 00000000111111110000000011111111 */
+       0x0000FFFF}; /* 00000000000000001111111111111111 */
+   static const unsigned long long S[] = {1, 2, 4, 8, 16};
+
+   //   Convert the index to a bit representation.
+   unsigned long long ii, ibit;
+   ii = index - ibase;
+   if (lev < levmx)
+   {   ii = ii * pow((double)2, (double)(levmx - lev)); }
+   ibit = ii;
+   ibit = (ibit | (ibit << S[3])) & B[3];
+   ibit = (ibit | (ibit << S[2])) & B[2];
+   ibit = (ibit | (ibit << S[1])) & B[1];
+   ibit = (ibit | (ibit << S[0])) & B[0];
+
+   return (ibit); }
+
+unsigned long long twobit_to_index(unsigned long long ibit,
+                           unsigned long long jbit)
+{   unsigned long long ijbit;
+   return (ijbit = ibit | (jbit << 1)); }
+
+//   Print n as a binary number.
+void printbits(int n)
+{  
+   int i, step;
+
+   if (0 == n)
+   {   //   For simplicity's sake, treat 0 as a special case.
+      printf("00000000");
+      return; }
+
+   i      =  1 << (sizeof(n) * 8 - 1);
+   step   = -1;   //   Only print the relevant digits.
+   step  >>=  8;   //   Print in groups of four.
+   while (step >= n)
+   {   i    >>= 8;
+      step >>= 8; }
+
+   //   At this point, i is the smallest power of two larger or equal to n.
+   while (i > 0)
+   {   if (n & i)
+         printf("1");
+      else
+         printf("0");
+      i >>= 1; } }
+
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(CLAMR)
Index: MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile
===================================================================
--- /dev/null
+++ MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile
@@ -0,0 +1,6 @@
+# MultiSource/DOE-ProxyApps-C++ Makefile: Build all subdirectories automatically
+
+LEVEL = ../../..
+PARALLEL_DIRS = CLAMR
+
+include $(LEVEL)/Makefile.programs