Index: LICENSE.TXT =================================================================== --- LICENSE.TXT +++ LICENSE.TXT @@ -83,6 +83,7 @@ llvm-test/MultiSource/Benchmarks/ASC_Sequoia/IRSmk llvm-test/MultiSource/Benchmarks/ASC_Sequoia/sphot smg2000: llvm-test/MultiSource/Benchmarks/ASCI_Purple/SMG2000 +miniAMR: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR XSBench: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench Fhourstones: llvm-test/MultiSource/Benchmarks/Fhourstones Fhourstones-3.1: llvm-test/MultiSource/Benchmarks/Fhourstones-3.1 Index: MultiSource/Benchmarks/DOE-ProxyApps-C/CMakeLists.txt =================================================================== --- MultiSource/Benchmarks/DOE-ProxyApps-C/CMakeLists.txt +++ MultiSource/Benchmarks/DOE-ProxyApps-C/CMakeLists.txt @@ -1,2 +1,2 @@ add_subdirectory(XSBench) - +add_subdirectory(miniAMR) Index: MultiSource/Benchmarks/DOE-ProxyApps-C/Makefile =================================================================== --- MultiSource/Benchmarks/DOE-ProxyApps-C/Makefile +++ MultiSource/Benchmarks/DOE-ProxyApps-C/Makefile @@ -1,7 +1,8 @@ # MultiSource/DOE-ProxyApps-C Makefile: Build all subdirectories automatically LEVEL = ../../.. -PARALLEL_DIRS = XSBench +PARALLEL_DIRS = XSBench \ + miniAMR include $(LEVEL)/Makefile.programs Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeLists.txt =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeLists.txt @@ -0,0 +1,6 @@ +set(PROG miniAMR) +list(APPEND CPPFLAGS -I.) +list(APPEND LDFLAGS -lm) +set(RUN_OPTIONS --nx 8 --ny 8 --nz 8 --num_tsteps 100) +llvm_multisource() + Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/LICENSE =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/LICENSE @@ -0,0 +1,167 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + + Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/Makefile =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/Makefile @@ -0,0 +1,7 @@ +LEVEL = ../../../.. + +PROG = miniAMR +CPPFLAGS = -I. +LDFLAGS = -lm +RUN_OPTIONS = --nx 8 --ny 8 --nz 8 --num_tsteps 100 +include $(LEVEL)/MultiSource/Makefile.multisrc Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/README =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/README @@ -0,0 +1,296 @@ +************************************************************************** +LLVM Test-suite Note: +************************************************************************** +The original source is located at https://github.com/Mantevo/miniAMR. +Beyond this paragraph is the original README contained with the source +code. The Makefile refered to within is not utilized within the +test-suite. The test-suite builds a serial version (openmp and +mpi disabled) with its own cmake and make build system. +************************************************************************** + +miniAMR mini-application + +-------------------------------------- +Contents of this README file: +1. miniAMR overview +2. miniAMR versions +3. building miniAMR +4. running miniAMR +5. notes about the code +-------------------------------------- + +-------------------------------------- +1. miniAMR overview + + miniAMR applies a stencil calculation on a unit cube computational domain, +which is divided into blocks. The blocks all have the same number of cells +in each direction and communicate ghost values with neighboring blocks. With +adaptive mesh refinement, the blocks can represent different levels of +refinement in the larger mesh. Neighboring blocks can be at the same level +or one level different, which means that the length of cells in neighboring +blocks can differ by only a factor of two in each direction. The calculations +on the variables in each cell is an averaging of the values in the chosen +stencil. The refinement and coarsening of the blocks is driven by objects +that are pushed through the mesh. If a block intersects with the surface +or the volume of an object, then that block can be refined. There is also +an option to uniformly refine the mesh. Each cell contains a number of +variables, each of which is evaluated indepently. + +-------------------------------------- +2. miniAMR versions: + +- miniAMR_ref: + + reference version: self-contained MPI-parallel. + +- miniAMR_serial + + serial version of reference version + +------------------- +3. Building miniAMR: + + To make the code, type 'make' in the directory containing the source. +The enclosed Makefile.mpi is configured for a general MPI installation. +Other compiler or other machines will need changes in the CFLAGS +variable to correspond with the flags available for the compiler being used. + +------------------- +4. Running miniAMR: + + miniAMR can be run like this: + + % ./miniAMR.x + + where varies from system to system but usually looks something like 'mpirun -np 4 ' or similar. + + Execution is then driven entirely by the default settings, as configured in default-settings.h. Options may be listed using + + % ./miniAMR.x --help + + To run the program, there are several arguments on the command line. +The list of arguments and their defaults is as follows: + + --nx - block size in x + --ny - block size in y + --nz - block size in z + These control the size of the blocks in the mesh. All of these need to + be even and greater than zero. The default is 10 for each variable. + + --init_x - initial blocks in x + --init_y - initial blocks in y + --init_z - initial blocks in z + These control the number of the blocks on each processor in the + initial mesh. These need to be greater than zero. The default + is 1 block in each direction per processor. The initial mesh + is a unit cube regardless of the number of blocks. + + --reorder - ordering of blocks + This controls whether the blocks are ordered by the RCB algorithm + or by a natural ordering of the processors. The default is 1 which + selects the RCB ordering and the natural ordering is 0. + + --npx - number of processors in the x direction + --npy - number of processors in the y direction + --npz - number of processors in the z direction + These control the number of processors is each direction. The product + of these number has to equal the number of processors being used. The + default is 1 block in each direction. + + --max_blocks - maximun number of blocks per processor + The maximun number of blocks used per processor. This is the number of + blocks that will be allocated at the start of the run and the code will + fail if this number is exceeded. The default is 500 blocks. + + --num_refine - number of levels of refinement + This is the number of levels of refinement that blocks which are refined + will be refined to. If it is zero then the mesh will not be refined. + the default is 5 levels of refinement. + + --block_change - number of levels a block can change during refinement + This parameter controls the number of levels that a block can change + (either refining or coarsening) during a refinement step. The default + is the number of levels of refinement. + + --uniform_refine - if 1, then grid is uniformly refined + This controls whether the mesh is uniformly refined. If it is 1 then the + mesh will be uniformly refined, while if it is zero, the refinement will + be controlled by objects in the mesh. The default is 1. + + --refine_freq - frequency (in timesteps) of checking for refinement + This determines the frequency (in timesteps) between checking if + refinement is needed. The default is every 5 timesteps. + + --target_active - target number of blocks per processor + --target_max - max number of blocks per processor + --target_min - min number of blocks per processor + These allow the user to control the number of blocks per processor. + If these are zero, then no adjustment is made. If target_active is + greater than zero than the code will adjust the number of blocks to + that target after the refinement step. If target_max is greater than + zero then the number of blocks will be reduced if it exceeds this + number. Likewise, if target_min is greater than zero, than the number + of blocks will be raised if there is less than that number after the + refinement step. The default for all of these is zero. + + --inbalance - percentage inbalance to trigger inbalance + This parameter allows the user to set a percentage threshold above + which the load will be balanced amoung the processors. The value + that this is checked against is the maximum number of blocks on a + processor minus the minimum number of blocks on a processor divided + by the average. The default is zero, which means to always load + balance at each refinement step. + + --lb_opt - (0, 1, 2) determine load balance strategy + If set to 0, then load balancing is not performed. The default is + set to 1 which load balances each refinement step. Setting the + parameter to 2 results in load balancing at each stage of the + refinement step. If a processor has a large number of blocks which + are refined several steps, this allows the work (and space needed) + to be shared amoung more processors. + + --num_vars - number of variables (> 0) + The number of variables the will be calculated on and communicated. + The default is 40 variables. + + --comm_vars - number of vars to communicate together + The number of variables that will communicated together. This will + allow shorter but more variables if it is set to something less than + the total number of variables. The default is zero which will + communicate all of the variables at once. + + --num_tsteps - number of timesteps (> 0) + The number of timesteps for which the simulation will be run. The + default is 20. + + --stages_per_ts - number of comm/calc stages per timestep + The number of calculate/communicate stages per timestep. The default + is 20. + + --permute - (no argument) permute communication directions + If this is set, then the order of the communication directions will + be permuted through the six options available. The default is + to send messages in the x direction first, then y, and then z. + + --blocking_send - (no argument) Use blocking sends in the communication + routine instead of the default nonblocking sends. + + --code - change the way communication is done + The default is 0 which communicates only the ghost values that are + needed. Setting this to 1 sends all of the ghost values, and setting + this to 2 also does all of the message processing (refinement or + unrefinement) to be done on the sending side. This allows us to + more closely minic the communication behaviour of codes. + + --checksum_freq - number of stages between checksums + The number of stages between calculating checksums on the variables. + The default is 5. If it is zero, no checks are performed. + + --stencil - 7 or 27 point 3D stencil + The 3D stencil used for the calculations. It can be either 7 or 27 + and the default is 7 since the 27 point calculation will not conserve + the sum of the variables except for the case of uniform refinement. + + --error_tol - (e^{-error_tol} ; >= 0) + This determines the error tolerance for the checksums for the variables. + the tolerance is 10 to the negative power of error_tol. The default + is 8, so the default tolerance is 10^(-8). + + --report_diffusion - (>= 0) none if 0 + This determines if the checksums are printed when they are calculated. + The default is 0, which is no printing. + + --report_perf - (0 .. 15) + This determines how the performance output is displayed. The default + is YAML output (value of 1). There are four output modes and each is + controlled by a bit in the value. The YAML output (to a file called + results.yaml) is controlled by the first bit (report_perf & 1), the + text output file (results.txt) is controlled by the second bit + (report_perf & 2), the output to standard out is controlled by the + third bit (report_perf & 4), and the output of block decomposition + at each refine step is controlled by the forth bit (report_perf & 8). + These options can be combined in any way desired and zero to four + of these options can be used in any run. Setting report_perf to 0 + will result in no output. + + --refine_freq - frequency (timesteps) of refinement (0 for none) + This determines how frequently (in timesteps) the mesh is checked + and refinement is done. The default is every 5 timesteps. If + uniform refinement is turned on, the setting of refine_freq does + not matter and the mesh will be refined before the first timestep. + + --refine_ghosts - (no argument) + The default is to not use the ghost cells of a block to determine if + that block will be refined. Specifying this flag will allow those + ghost cells to be used. + + --num_objects - (>= 0) number of objects to cause refinement + The number of objects on which refinement is based. Default is zero. + + --object - type, position, movement, size, size rate of change + The object keyword has 14 arguments. The first two are integers + and the rest are floating point numbers. They are: + type - The type of object. There is 16 types of objects. They include + the surface of a rectangle (0), a solid rectangle (1), + the surface of a spheroid (2), a solid spheroid (3), + the surface of a hemispheroid (+/- with 3 cutting planes) + (4, 6, 8, 10, 12, 14), + a solid spheroid (+/- with 3 cutting planes)(5, 7, 9, 11, 13, 15), + the surface of a cylinder (20, 22, 24), + and the volume of a cylinder (21, 23, 25). + bounce - If this is 1 then an object will bounce off of the walls + when the center hits an edge of the unit cube. If it is + zero, then the object can leave the mesh. + center - Three doubles that determine the center of the object in the + x, y, and z directions. + move - Three doubles that determine the rate of movement of the center + of the object in the x, y, and z directions. The object moves + this far at each timestep. + size - The initial size of the object in the x, y, and z directions. + If any of these become negative, the object will not be used + in the calculations to determine refinement. These sizes are + from the center to the edge in the specified direction. + inc - The change in size of the object in the x, y, and z directions. + + +Examples of run scripts for a Cray XE6 that illustrate several of the options: + +One sphere moving diagonally on 27 processors: + +mpirun -np 27 -N 7 miniAMR.x --num_refine 4 --max_blocks 9000 --npx 3 --npy 3 --npz 3 --nx 8 --ny 8 --nz 8 --num_objects 1 --object 2 0 -1.71 -1.71 -1.71 0.04 0.04 0.04 1.7 1.7 1.7 0.0 0.0 0.0 --num_tsteps 100 --checksum_freq 1 + +An expanding sphere on 64 processors: + +mpirun -np 64 miniAMR.x --num_refine 4 --max_blocks 6000 --init_x 1 --init_y 1 --init_z 1 --npx 4 --npy 4 --npz 4 --nx 8 --ny 8 --nz 8 --num_objects 1 --object 2 0 -0.01 -0.01 -0.01 0.0 0.0 0.0 0.0 0.0 0.0 0.0009 0.0009 0.0009 --num_tsteps 200 --comm_vars 2 + +Two moving spheres on 16 processors: + +mpirun -np 16 miniAMR.x --num_refine 4 --max_blocks 4000 --init_x 1 --init_y 1 --init_z 1 --npx 4 --npy 2 --npz 2 --nx 8 --ny 8 --nz 8 --num_objects 2 --object 2 0 -1.10 -1.10 -1.10 0.030 0.030 0.030 1.5 1.5 1.5 0.0 0.0 0.0 --object 2 0 0.5 0.5 1.76 0.0 0.0 -0.025 0.75 0.75 0.75 0.0 0.0 0.0 --num_tsteps 100 --checksum_freq 4 --stages_per_ts 16 + +------------------- +5. The code: + + block.c Routines to split and recombine blocks + check_sum.c Calculates check_sum for the arrays + comm_block.c Communicate new location for block during refine + comm.c General routine to do interblock communication + comm_parent.c Communicate refine/unrefine information to parents/children + comm_refine.c Communicate block refine/unrefine to neighbors during refine + comm_util.c Utilities to manage communication lists + driver.c Main driver + init.c Initialization routine + main.c Main routine that reads command line and launches program + move.c Routines that check overlap of objects and blocks + pack.c Pack and unpack blocks to move + plot.c Write out block information for plotting + profile.c Write out performance data + rcb.c Load balancing routines + refine.c Routines to direct refinement step + stencil.c Perform stencil calculations + target.c Add/subtract blocks to reach a target number + util.c Utility routines for timing and allocation + +-- End README file. + +Courtenay T. Vaughan +(ctvaugh@sandia.gov) Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/block.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/block.h @@ -0,0 +1,130 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +typedef struct { + int number; + int level; + int refine; + int new_proc; + int parent; // if original block -1, + // else if on node, number in structure + // else (-2 - parent->number) + int parent_node; + int child_number; + int nei_refine[6]; + int nei_level[6]; /* 0 to 5 = W, E, S, N, D, U; use -2 for boundary */ + int nei[6][2][2]; /* negative if off processor (-1 - proc) */ + int cen[3]; + double ****array; +} block; +block *blocks; + +typedef struct { + int number; + int level; + int parent; // -1 if original block + int parent_node; + int child_number; + int refine; + int child[8]; // n if on node, number if not + // if negative, then onnode child is a parent (-1 - n) + int child_node[8]; + int cen[3]; +} parent; +parent *parents; + +typedef struct { + int number; // number of block + int n; // position in block array +} sorted_block; +sorted_block *sorted_list; +int *sorted_index; + +int my_pe; +int num_pes; + +int max_num_blocks; +int target_active; +int target_max; +int target_min; +int num_refine; +int uniform_refine; +int x_block_size, y_block_size, z_block_size; +int num_vars; +int comm_vars; +int init_block_x, init_block_y, init_block_z; +int reorder; +int npx, npy, npz; +int inbalance; +int refine_freq; +int report_diffusion; +int checksum_freq; +int stages_per_ts; +int error_tol; +int num_tsteps; +int stencil; +int report_perf; +int plot_freq; +int lb_opt; +int block_change; +int code; +int permute; +int nonblocking; +int refine_ghost; + +int max_num_parents; +int num_parents; +int max_active_parent; +int cur_max_level; +int *num_blocks; +int *block_start; +int num_active; +int max_active_block; +int global_active; +int x_block_half, y_block_half, z_block_half; +double tol; +double *grid_sum; +int *p8, *p2; +int mesh_size[3]; +int max_mesh_size; +int *from, *to; +int msg_len[3][4]; +int local_max_b; +int global_max_b; + +int num_objects; +typedef struct { + int type; + int bounce; + double cen[3]; + double orig_cen[3]; + double move[3]; + double orig_move[3]; + double size[3]; + double orig_size[3]; + double inc[3]; +} object; +object *objects; Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/block.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/block.c @@ -0,0 +1,388 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ +#include +#include + +#include "block.h" +#include "proto.h" +#include "timer.h" + +// This routine splits blocks that are being refined into 8 new blocks, +// copies the data over to the new blocks, and then disconnects the +// original block from the mesh and connects the new blocks to the +// mesh. The information in old block is also transferred to a parent +// block, which also contains information to identify its children. +void split_blocks(void) +{ + static int side[6][2][2] = { { {0, 2}, {4, 6} }, { {1, 3}, {5, 7} }, + { {0, 1}, {4, 5} }, { {2, 3}, {6, 7} }, + { {0, 1}, {2, 3} }, { {4, 5}, {6, 7} } }; + static int off[6] = {1, -1, 2, -2, 4, -4}; + int i, j, k, m, n, o, v, nl, xp, yp, zp, c, c1, other, + i1, i2, j1, j2, k1, k2, dir, fcase, pe, f, p, + level, sib[8], offset, d, half_size; + block *bp, *bp1; + parent *pp; + + // have to do this by level, else could have a block at a level split and + // its offspring try to connect to a block at a lower (that will be split) + for (m = level = 0; level <= cur_max_level; level++) { + // can not use sorted list here since this routine changes the list + for (n = 0; n < max_active_block; n++) + if (blocks[n].number >= 0 && blocks[n].level == level) { + bp = &blocks[n]; + if (bp->refine == 1) { + nl = bp->number - block_start[level]; + zp = nl/((p2[level]*npx*init_block_x)* + (p2[level]*npy*init_block_y)); + yp = (nl%((p2[level]*npx*init_block_x)* + (p2[level]*npy*init_block_y)))/ + (p2[level]*npx*init_block_x); + xp = nl%(p2[level]*npx*init_block_x); + if ((num_active + 8) > max_num_blocks) { + printf("ERROR: Need more blocks %d %d on %d\n", num_active, max_num_blocks, my_pe); + exit(-1); + } + if ((num_active + 8) > local_max_b) + local_max_b = num_active + 8; + del_sorted_list(bp->number, level); + num_active += 7; + num_blocks[level]--; + num_blocks[level+1] += 8; + for (p = 0; p < max_active_parent; p++) + if (parents[p].number < 0) + break; + if (p == max_num_parents) { + printf("ERROR: Need more parents\n"); + exit(-1); + } + if (p == max_active_parent) + max_active_parent++; + num_parents++; + num_refined++; + pp = &parents[p]; + pp->number = bp->number; + pp->level = bp->level; + pp->parent = bp->parent; + pp->parent_node = bp->parent_node; + pp->child_number = bp->child_number; + parents[pp->parent].child[pp->child_number] = -1 - p; + pp->refine = 0; + pp->cen[0] = bp->cen[0]; + pp->cen[1] = bp->cen[1]; + pp->cen[2] = bp->cen[2]; + + // Define the 8 children + for (o = 0; o < 8; o++) { + for ( ; m < max_num_blocks; m++) + if (blocks[m].number < 0) + break; + if (m == max_num_blocks) { + printf("Error: No inactive blocks available %d %d %d\n", m, num_active, max_num_blocks); + exit(-1); + } + if ((m+1) > max_active_block) + max_active_block = m+1; + bp1 = &blocks[m]; + sib[o] = m; + pp->child[o] = m; + pp->child_node[o] = my_pe; + bp1->refine = 0; + bp1->level = level + 1; + bp1->parent = p; + bp1->parent_node = my_pe; + bp1->child_number = o; + i1 = (o%2); + j1 = ((o/2)%2); + k1 = (o/4); + bp1->number = ((2*zp+k1)*(p2[level+1]*npy*init_block_y) + + (2*yp+j1))*(p2[level+1]*npx*init_block_x) + + 2*xp + i1 + block_start[level+1]; + add_sorted_list(m, bp1->number, (level+1)); + bp1->cen[0] = bp->cen[0] + + (2*i1 - 1)*p2[num_refine - level - 1]; + bp1->cen[1] = bp->cen[1] + + (2*j1 - 1)*p2[num_refine - level - 1]; + bp1->cen[2] = bp->cen[2] + + (2*k1 - 1)*p2[num_refine - level - 1]; + half_size = p2[num_refine - level - 1]; + i1 *= x_block_half; + j1 *= y_block_half; + k1 *= z_block_half; + for (v = 0; v < num_vars; v++) + for (i2 = i = 1; i <= x_block_half; i++, i2+=2) + for (j2 = j = 1; j <= y_block_half; j++, j2+=2) + for (k2 = k = 1; k <= z_block_half; k++, k2+=2) + bp1->array[v][i2 ][j2 ][k2 ] = + bp1->array[v][i2+1][j2 ][k2 ] = + bp1->array[v][i2 ][j2+1][k2 ] = + bp1->array[v][i2+1][j2+1][k2 ] = + bp1->array[v][i2 ][j2 ][k2+1] = + bp1->array[v][i2+1][j2 ][k2+1] = + bp1->array[v][i2 ][j2+1][k2+1] = + bp1->array[v][i2+1][j2+1][k2+1] = + bp->array[v][i+i1][j+j1][k+k1]/8.0; + } + + // children all defined - connect children & disconnect parent + for (c = 0; c < 6; c++) { + // deal with internal connections amoung 8 siblings + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) { + blocks[sib[side[c][i][j]+off[c]]].nei_level[c] = + level + 1; + blocks[sib[side[c][i][j]+off[c]]].nei[c][0][0] = + sib[side[c][i][j]]; + } + // deal with external connections + if (bp->nei_level[c] == -2) // external boundary + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) { + blocks[sib[side[c][i][j]]].nei_level[c] = -2; + blocks[sib[side[c][i][j]]].nei[c][0][0] = 0; + } + else if (bp->nei_level[c] == level-1) { // level less parent + if (bp->nei[c][0][0] >= 0) { // error + printf("%d ERROR: internal misconnect block %d num %d c %d %d\n", + my_pe, n, bp->number, c, bp->nei[c][0][0]); + exit(-1); + } + } else if (bp->nei_level[c] == level) { // paretn level + if (bp->nei[c][0][0] >= 0) { + other = bp->nei[c][0][0]; + c1 = (c/2)*2 + (c+1)%2; + blocks[other].nei_level[c1] = level + 1; + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) { + bp1 = &blocks[sib[side[c][i][j]]]; + bp1->nei_level[c] = level; + bp1->nei[c][0][0] = other; + blocks[other].nei[c1][i][j] = sib[side[c][i][j]]; + } + } + } else if (bp->nei_level[c] == level+1) { // child level + dir = c/2; + fcase = (c%2)*10; + c1 = (c/2)*2 + (c+1)%2; + d = 2*(c%2) - 1; + for (k = fcase+6, i = 0; i < 2; i++) + for (j = 0; j < 2; j++, k++) + if (bp->nei[c][i][j] >= 0) { + other = bp->nei[c][i][j]; + bp1 = &blocks[sib[side[c][i][j]]]; + bp1->nei_level[c] = level+1; + bp1->nei[c][0][0] = other; + blocks[other].nei_level[c1] = level + 1; + blocks[other].nei[c1][0][0] = sib[side[c][i][j]]; + } + } else { + printf("%d ERROR: misconnected b %d %d l %d nei[%d] %d\n", + my_pe, n, bp->number, level, c, bp->nei_level[c]); + exit(-1); + } + } + /* children all defined and connected - inactivate block */ + bp->number = -1; + if (n < m) + m = n; + } + } + } +} + +// This routine takes blocks that are to be coarsened and recombines them. +// Before this routine can be called, all of the child blocks need to be on +// the same processor as the parent. A new block is created and the parent +// and child blocks are inactivated. +void consolidate_blocks(void) +{ + static int side[6][2][2] = { { {0, 2}, {4, 6} }, { {1, 3}, {5, 7} }, + { {0, 1}, {4, 5} }, { {2, 3}, {6, 7} }, + { {0, 1}, {2, 3} }, { {4, 5}, {6, 7} } }; + int n, p, i, j, k, i1, j1, k1, i2, j2, k2, level, o, v, f, c, offset, + other, c1, dir, fcase, pe, nl, pos[3], d, in; + block *bp, *bp1; + parent *pp; + + if (stencil == 7) // add to face case when diags are needed + f = 0; + else + f = 1; + // assume that blocks were moved back to node with parent + for (level = cur_max_level; level >= 0; level--) + for (p = 0; p < max_active_parent; p++) + if ((pp = &parents[p])->number >= 0 && pp->level == level && + pp->refine == -1) { + for (n = 0; n < max_num_blocks; n++) + if (blocks[n].number < 0) // found inactive block + break; + if (n == max_num_blocks) { + printf("Out of free blocks in consolidate_blocks %d\n", my_pe); + exit(-1); + } else + bp = &blocks[n]; + if ((n+1) > max_active_block) + max_active_block = n+1; + if ((num_active + 1) > local_max_b) + local_max_b = num_active + 1; + num_active -= 7; + num_reformed++; + num_blocks[level]++; + num_blocks[level+1] -= 8; + bp->number = pp->number; + pp->number = -1; + bp->level = pp->level; + bp->parent = pp->parent; + bp->parent_node = pp->parent_node; + bp->child_number = pp->child_number; + parents[bp->parent].child[bp->child_number] = n; + add_sorted_list(n, bp->number, level); + bp->refine = 0; + bp->cen[0] = pp->cen[0]; + bp->cen[1] = pp->cen[1]; + bp->cen[2] = pp->cen[2]; + // Copy child arrays back to new block. + for (o = 0; o < 8; o++) { + bp1 = &blocks[pp->child[o]]; + del_sorted_list(bp1->number, (level+1)); + bp1->number = -1; + i1 = (o%2)*x_block_half; + j1 = ((o/2)%2)*y_block_half; + k1 = (o/4)*z_block_half; + for (v = 0; v < num_vars; v++) + for (i2 = i = 1; i <= x_block_half; i++, i2+=2) + for (j2 = j = 1; j <= y_block_half; j++, j2+=2) + for (k2 = k = 1; k <= z_block_half; k++, k2+=2) + bp->array[v][i+i1][j+j1][k+k1] = + bp1->array[v][i2 ][j2 ][k2 ] + + bp1->array[v][i2+1][j2 ][k2 ] + + bp1->array[v][i2 ][j2+1][k2 ] + + bp1->array[v][i2+1][j2+1][k2 ] + + bp1->array[v][i2 ][j2 ][k2+1] + + bp1->array[v][i2+1][j2 ][k2+1] + + bp1->array[v][i2 ][j2+1][k2+1] + + bp1->array[v][i2+1][j2+1][k2+1]; + } + // now figure out communication + for (c = 0; c < 6; c++) { + other = pp->child[side[c][0][0]]; // first child on this side + // four options - boundary, level of parent, level of children, + // and level of children + 1 (that are offnode and unrefining) + if (blocks[other].nei_level[c] == -2) { + // external boundary (only need to check one child) + bp->nei_level[c] = -2; + bp->nei_refine[c] = 0; + } else if (blocks[other].nei_level[c] == level) { + // same level as parent + if (blocks[other].nei[c][0][0] >= 0) { + // on node - if it gets consolidated later, it will fix + // the connections at that point + c1 = (c/2)*2 + (c+1)%2; + bp->nei[c][0][0] = blocks[other].nei[c][0][0]; + bp->nei_level[c] = level; + bp->nei_refine[c] = 0; + blocks[blocks[other].nei[c][0][0]].nei[c1][0][0] = n; + blocks[blocks[other].nei[c][0][0]].nei_level[c1] = level; + blocks[blocks[other].nei[c][0][0]].nei_refine[c1] = 0; + } + } else { + dir = c/2; + fcase = (c%2)*10; + offset = p2[num_refine - level - 1]; + for (k = fcase+6, i = 0; i < 2; i++) + for (j = 0; j < 2; j++, k++) { + other = pp->child[side[c][i][j]]; + if (blocks[other].nei[c][0][0] >= 0) { + if (blocks[other].nei_level[c] == level+2) { + printf("%d ERROR: %d con %d block %d c %d wrong level %d\n", + my_pe, p, n, other, c, level); + exit(-1); + } + c1 = (c/2)*2 + (c+1)%2; + bp->nei[c][i][j] = blocks[other].nei[c][0][0]; + bp->nei_level[c] = level + 1; + bp->nei_refine[c] = 0; + blocks[blocks[other].nei[c][0][0]].nei[c1][0][0] = + n; + blocks[blocks[other].nei[c][0][0]].nei_level[c1] = + level; + blocks[blocks[other].nei[c][0][0]].nei_refine[c1] = + 0; + } + } + } + } + } +} + +void add_sorted_list(int n, int number, int level) +{ + int i, j; + + for (i = sorted_index[level]; i < sorted_index[level+1]; i++) + if (number > sorted_list[i].number) + break; + for (j = sorted_index[num_refine+1]; j > i; j--) { + sorted_list[j].number = sorted_list[j-1].number; + sorted_list[j].n = sorted_list[j-1].n; + } + sorted_list[i].number = number; + sorted_list[i].n = n; + for (i = level+1; i <= (num_refine+1); i++) + sorted_index[i]++; +} + +void del_sorted_list(int number, int level) +{ + int i, j; + + for (i = sorted_index[level]; i < sorted_index[level+1]; i++) + if (number == sorted_list[i].number) + break; + if (number != sorted_list[i].number) { + printf("ERROR: del_sorted_list on %d - number %d not found\n", + my_pe, number); + exit(-1); + } + for (j = level+1; j <= (num_refine+1); j++) + sorted_index[j]--; + for (j = i; j < sorted_index[num_refine+1]; j++) { + sorted_list[j].number = sorted_list[j+1].number; + sorted_list[j].n = sorted_list[j+1].n; + } +} + +int find_sorted_list(int number, int level) +{ + int i; + + for (i = sorted_index[level]; i < sorted_index[level+1]; i++) + if (number == sorted_list[i].number) + return sorted_list[i].n; + printf("ERROR: find_sorted_list on %d - number %d not found\n", + my_pe, number); + exit(-1); +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/check_sum.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/check_sum.c @@ -0,0 +1,61 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include + +#include "block.h" +#include "timer.h" +#include "proto.h" + +// Generate check sum for a variable over all active blocks. +double check_sum(int var) +{ + int n, in, i, j, k; + double sum, block_sum, t1, t2; + block *bp; + + t1 = timer(); + + sum = 0.0; + for (in = 0; in < sorted_index[num_refine+1]; in++) { + n = sorted_list[in].n; + bp = &blocks[n]; + if (bp->number >= 0) { + block_sum = 0.0; + for (i = 1; i <= x_block_size; i++) + for (j = 1; j <= y_block_size; j++) + for (k = 1; k <= z_block_size; k++) + block_sum += bp->array[var][i][j][k]; + sum += block_sum; + } + } + + t2 = timer(); + timer_cs_calc += t2 - t1; + total_red++; + + return sum; +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/comm.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/comm.c @@ -0,0 +1,609 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include +#include + +#include "block.h" +#include "timer.h" +#include "proto.h" + +// The routines in this file are used in the communication of ghost values +// between blocks, both on processor and off processor. + +// Main communication routine that sends and recieves ghost values between +// blocks on different processors and directos blocks on the same processor +// to exchange their ghost values. +void comm(int start, int num_comm, int stage) +{ + int i, j, k, l, m, n, dir, o, in, which, offset, type; + int permutations[6][3] = { {0, 1, 2}, {1, 2, 0}, {2, 0, 1}, + {0, 2, 1}, {1, 0, 2}, {2, 1, 0} }; + double t1, t2, t3, t4; + block *bp; + + for (o = 0; o < 3; o++) { + if (permute) + dir = permutations[stage%6][o]; + else + dir = o; + type = dir; + t1 = timer(); + + // While values are being sent over the mesh, go through and direct + // blocks to exchange ghost values with other blocks that are on + // processor. Also apply boundary conditions for boundary of domain. + for (in = 0; in < sorted_index[num_refine+1]; in++) { + n = sorted_list[in].n; + bp = &blocks[n]; + if (bp->number >= 0) + for (l = dir*2; l < (dir*2 + 2); l++) { + if (bp->nei_level[l] == bp->level) { + t2 = timer(); + if ((m = bp->nei[l][0][0]) > n) { + on_proc_comm(n, m, l, start, num_comm); + counter_same[dir] += 2; + } + timer_comm_same[dir] += timer() - t2; + } else if (bp->nei_level[l] == (bp->level+1)) { + t2 = timer(); + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + if ((m = bp->nei[l][i][j]) > n) { + on_proc_comm_diff(n, m, l, i, j, start, num_comm); + counter_diff[dir] += 2; + } + timer_comm_diff[dir] += timer() - t2; + } else if (bp->nei_level[l] == (bp->level-1)) { + t2 = timer(); + if ((m = bp->nei[l][0][0]) > n) { + k = dir*2 + 1 - l%2; + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + if (blocks[m].nei[k][i][j] == n) { + on_proc_comm_diff(m, n, k, i, j, start, num_comm); + counter_diff[dir] += 2; + } + } + timer_comm_diff[dir] += timer() - t2; + } else if (bp->nei_level[l] == -2) { + t2 = timer(); + apply_bc(l, bp, start, num_comm); + counter_bc[dir]++; + timer_comm_bc[dir] += timer() - t2; + } else { + printf("ERROR: misconnected block\n"); + exit(-1); + } + } + } + timer_comm_dir[dir] += timer() - t1; + } +} + +// Routine that does on processor communication between two blocks that +// are at the same level of refinement. +void on_proc_comm(int n, int n1, int l, int start, int num_comm) +{ + int i, j, k, m; + int is, ie, js, je; + block *bp, *bp1; + + /* Determine direction and then exchange data across the face + */ + if (!code) { + if ((l/2) == 0) { /* West, East */ + if ((l%2) == 0) { /* West */ + bp = &blocks[n]; + bp1 = &blocks[n1]; + } else { /* East */ + bp1 = &blocks[n]; + bp = &blocks[n1]; + } + for (m = start; m < start+num_comm; m++) + for (j = 1; j <= y_block_size; j++) + for (k = 1; k <= z_block_size; k++) { + bp1->array[m][x_block_size+1][j][k] = bp->array[m][1][j][k]; + bp->array[m][0][j][k] = bp1->array[m][x_block_size][j][k]; + } + } else if ((l/2) == 1) { /* South, North */ + if ((l%2) == 0) { /* South */ + bp = &blocks[n]; + bp1 = &blocks[n1]; + } else { /* North */ + bp1 = &blocks[n]; + bp = &blocks[n1]; + } + if (stencil == 7) { + is = 1; + ie = x_block_size; + } else { + is = 0; + ie = x_block_size + 1; + } + for (m = start; m < start+num_comm; m++) + for (i = is; i <= ie; i++) + for (k = 1; k <= z_block_size; k++) { + bp1->array[m][i][y_block_size+1][k] = bp->array[m][i][1][k]; + bp->array[m][i][0][k] = bp1->array[m][i][y_block_size][k]; + } + } else if ((l/2) == 2) { /* Down, Up */ + if ((l%2) == 0) { /* Down */ + bp = &blocks[n]; + bp1 = &blocks[n1]; + } else { /* Up */ + bp1 = &blocks[n]; + bp = &blocks[n1]; + } + if (stencil == 7) { + is = 1; + ie = x_block_size; + js = 1; + je = y_block_size; + } else { + is = 0; + ie = x_block_size + 1; + js = 0; + je = y_block_size + 1; + } + for (m = start; m < start+num_comm; m++) + for (i = is; i <= ie; i++) + for (j = js; j <= je; j++) { + bp1->array[m][i][j][z_block_size+1] = bp->array[m][i][j][1]; + bp->array[m][i][j][0] = bp1->array[m][i][j][z_block_size]; + } + } + } else { /* set all ghosts */ + if ((l/2) == 0) { /* West, East */ + if ((l%2) == 0) { /* West */ + bp = &blocks[n]; + bp1 = &blocks[n1]; + } else { /* East */ + bp1 = &blocks[n]; + bp = &blocks[n1]; + } + for (m = start; m < start+num_comm; m++) + for (j = 0; j <= y_block_size+1; j++) + for (k = 0; k <= z_block_size+1; k++) { + bp1->array[m][x_block_size+1][j][k] = bp->array[m][1][j][k]; + bp->array[m][0][j][k] = bp1->array[m][x_block_size][j][k]; + } + } else if ((l/2) == 1) { /* South, North */ + if ((l%2) == 0) { /* South */ + bp = &blocks[n]; + bp1 = &blocks[n1]; + } else { /* North */ + bp1 = &blocks[n]; + bp = &blocks[n1]; + } + for (m = start; m < start+num_comm; m++) + for (i = 0; i <= x_block_size+1; i++) + for (k = 0; k <= z_block_size+1; k++) { + bp1->array[m][i][y_block_size+1][k] = bp->array[m][i][1][k]; + bp->array[m][i][0][k] = bp1->array[m][i][y_block_size][k]; + } + } else if ((l/2) == 2) { /* Down, Up */ + if ((l%2) == 0) { /* Down */ + bp = &blocks[n]; + bp1 = &blocks[n1]; + } else { /* Up */ + bp1 = &blocks[n]; + bp = &blocks[n1]; + } + for (m = start; m < start+num_comm; m++) + for (i = 0; i <= x_block_size+1; i++) + for (j = 0; j <= y_block_size+1; j++) { + bp1->array[m][i][j][z_block_size+1] = bp->array[m][i][j][1]; + bp->array[m][i][j][0] = bp1->array[m][i][j][z_block_size]; + } + } + } +} + +// Routine that does on processor communication between two blocks that are +// at different levels of refinement. The order of the blocks that are +// being input determine which block is at a higher level of refinement. +void on_proc_comm_diff(int n, int n1, int l, int iq, int jq, + int start, int num_comm) +{ + int i, j, k, m; + int i0, i1, i2, i3, j0, j1, j2, j3, k0, k1, k2, k3; + block *bp, *bp1; + + bp = &blocks[n]; + bp1 = &blocks[n1]; + + /* (iq, jq) quarter face on block n to whole face on block n1 + */ + if (!code) { + /* only have to communicate ghost values - bp is level, bp1 is level+1 - + * in 2 to 1 case get 0..block_half from one proc and + * block_half+1..block_size+1 from another + * in 1 to 2 case get 0..block_size+1 from 0..block_half+1 or + * block_half..block_size+1 with averages + */ + if ((l/2) == 0) { + if (l == 0) { /* West */ + i0 = 0; + i1 = 1; + i2 = x_block_size + 1; + i3 = x_block_size; + } else { /* East */ + i0 = x_block_size + 1; + i1 = x_block_size; + i2 = 0; + i3 = 1; + } + j1 = jq*y_block_half; + k1 = iq*z_block_half; + for (m = start; m < start+num_comm; m++) + for (j = 1; j <= y_block_half; j++) + for (k = 1; k <= z_block_half; k++) { + bp1->array[m][i2][2*j-1][2*k-1] = + bp1->array[m][i2][2*j-1][2*k ] = + bp1->array[m][i2][2*j ][2*k-1] = + bp1->array[m][i2][2*j ][2*k ] = + bp->array[m][i1][j+j1][k+k1]/4.0; + bp->array[m][i0][j+j1][k+k1] = + bp1->array[m][i3][2*j-1][2*k-1] + + bp1->array[m][i3][2*j-1][2*k ] + + bp1->array[m][i3][2*j ][2*k-1] + + bp1->array[m][i3][2*j ][2*k ]; + } + } else if ((l/2) == 1) { + if (l == 2) { /* South */ + j0 = 0; + j1 = 1; + j2 = y_block_size + 1; + j3 = y_block_size; + } else { /* North */ + j0 = y_block_size + 1; + j1 = y_block_size; + j2 = 0; + j3 = 1; + } + i1 = jq*x_block_half; + k1 = iq*z_block_half; + for (m = start; m < start+num_comm; m++) + for (i = 1; i <= x_block_half; i++) + for (k = 1; k <= z_block_half; k++) { + bp1->array[m][2*i-1][j2][2*k-1] = + bp1->array[m][2*i-1][j2][2*k ] = + bp1->array[m][2*i ][j2][2*k-1] = + bp1->array[m][2*i ][j2][2*k ] = + bp->array[m][i+i1][j1][k+k1]/4.0; + bp->array[m][i+i1][j0][k+k1] = + bp1->array[m][2*i-1][j3][2*k-1] + + bp1->array[m][2*i-1][j3][2*k ] + + bp1->array[m][2*i ][j3][2*k-1] + + bp1->array[m][2*i ][j3][2*k ]; + } + } else if ((l/2) == 2) { + if (l == 4) { /* Down */ + k0 = 0; + k1 = 1; + k2 = z_block_size + 1; + k3 = z_block_size; + } else { /* Up */ + k0 = z_block_size + 1; + k1 = z_block_size; + k2 = 0; + k3 = 1; + } + i1 = jq*x_block_half; + j1 = iq*y_block_half; + for (m = start; m < start+num_comm; m++) + for (i = 1; i <= x_block_half; i++) + for (j = 1; j <= y_block_half; j++) { + bp1->array[m][2*i-1][2*j-1][k2] = + bp1->array[m][2*i-1][2*j ][k2] = + bp1->array[m][2*i ][2*j-1][k2] = + bp1->array[m][2*i ][2*j ][k2] = + bp->array[m][i+i1][j+j1][k1]/4.0; + bp->array[m][i+i1][j+j1][k0] = + bp1->array[m][2*i-1][2*j-1][k3] + + bp1->array[m][2*i-1][2*j ][k3] + + bp1->array[m][2*i ][2*j-1][k3] + + bp1->array[m][2*i ][2*j ][k3]; + } + } + } else { /* transfer ghosts */ + if ((l/2) == 0) { + if (l == 0) { /* West */ + i0 = 0; + i1 = 1; + i2 = x_block_size + 1; + i3 = x_block_size; + } else { /* East */ + i0 = x_block_size + 1; + i1 = x_block_size; + i2 = 0; + i3 = 1; + } + j1 = jq*y_block_half; + k1 = iq*z_block_half; + j2 = y_block_size + 1; + j3 = y_block_half + 1; + k2 = z_block_size + 1; + k3 = z_block_half + 1; + for (m = start; m < start+num_comm; m++) { + bp1->array[m][i2][0][0] = bp->array[m][i1][j1][k1]/4.0; + for (k = 1; k <= z_block_half; k++) + bp1->array[m][i2][0][2*k-1] = + bp1->array[m][i2][0][2*k ] = bp->array[m][i1][j1][k+k1]/4.0; + bp1->array[m][i2][0][k2] = bp->array[m][i1][j1][k3+k1]/4.0; + if (jq == 0) { + if (iq == 0) + bp->array[m][i0][0][0 ] = bp1->array[m][i3][0][0 ]; + else + bp->array[m][i0][0][k2] = bp1->array[m][i3][0][k2]; + for (k = 1; k <= z_block_half; k++) + bp->array[m][i0][0][k+k1] = (bp1->array[m][i3][0][2*k-1] + + bp1->array[m][i3][0][2*k ]); + } + for (j = 1; j <= y_block_half; j++) { + bp1->array[m][i2][2*j-1][0] = + bp1->array[m][i2][2*j ][0] = bp->array[m][i1][j+j1][k1]/4.0; + if (iq == 0) + bp->array[m][i0][j+j1][0 ] = (bp1->array[m][i3][2*j-1][0 ] + + bp1->array[m][i3][2*j ][0 ]); + else + bp->array[m][i0][j+j1][k2] = (bp1->array[m][i3][2*j-1][k2] + + bp1->array[m][i3][2*j ][k2]); + for (k = 1; k <= z_block_half; k++) { + bp1->array[m][i2][2*j-1][2*k-1] = + bp1->array[m][i2][2*j-1][2*k ] = + bp1->array[m][i2][2*j ][2*k-1] = + bp1->array[m][i2][2*j ][2*k ] = + bp->array[m][i1][j+j1][k+k1]/4.0; + bp->array[m][i0][j+j1][k+k1] = + bp1->array[m][i3][2*j-1][2*k-1] + + bp1->array[m][i3][2*j-1][2*k ] + + bp1->array[m][i3][2*j ][2*k-1] + + bp1->array[m][i3][2*j ][2*k ]; + } + bp1->array[m][i2][2*j-1][k2] = + bp1->array[m][i2][2*j ][k2] = bp->array[m][i1][j+j1][k3+k1]/4.0; + } + bp1->array[m][i2][j2][0] = bp->array[m][i1][j3+j1][k1]/4.0; + for (k = 1; k <= z_block_half; k++) + bp1->array[m][i2][j2][2*k-1] = + bp1->array[m][i2][j2][2*k ] = bp->array[m][i1][j3+j1][k+k1]/4.0; + bp1->array[m][i2][j2][k2] = bp->array[m][i1][j3+j1][k3+k1]/4.0; + if (jq == 1) { + if (iq == 0) + bp->array[m][i0][j2][0 ] = bp1->array[m][i3][j2][0 ]; + else + bp->array[m][i0][j2][k2] = bp1->array[m][i3][j2][k2]; + for (k = 1; k <= z_block_half; k++) + bp->array[m][i0][j2][k+k1] = (bp1->array[m][i3][j2][2*k-1] + + bp1->array[m][i3][j2][2*k ]); + } + } + } else if ((l/2) == 1) { + if (l == 2) { /* South */ + j0 = 0; + j1 = 1; + j2 = y_block_size + 1; + j3 = y_block_size; + } else { /* North */ + j0 = y_block_size + 1; + j1 = y_block_size; + j2 = 0; + j3 = 1; + } + i1 = jq*x_block_half; + k1 = iq*z_block_half; + i2 = x_block_size + 1; + i3 = x_block_half + 1; + k2 = z_block_size + 1; + k3 = z_block_half + 1; + for (m = start; m < start+num_comm; m++) { + bp1->array[m][0][j2][0 ] = bp->array[m][i1][j1][k1]/4.0; + for (k = 1; k <= z_block_half; k++) + bp1->array[m][0][j2][2*k-1] = + bp1->array[m][0][j2][2*k ] = bp->array[m][i1][j1][k+k1]/4.0; + bp1->array[m][0][j2][k2] = bp->array[m][i1][j1][k3+k1]/4.0; + if (jq == 0) { + if (iq == 0) + bp->array[m][0][j0][0 ] = bp1->array[m][0][j3][0 ]; + else + bp->array[m][0][j0][k2] = bp1->array[m][0][j3][k2]; + for (k = 1; k <= z_block_half; k++) + bp->array[m][0][j0][k+k1] = (bp1->array[m][0][j3][2*k-1] + + bp1->array[m][0][j3][2*k ]); + } + for (i = 1; i <= x_block_half; i++) { + bp1->array[m][2*i-1][j2][0] = + bp1->array[m][2*i ][j2][0] = bp->array[m][i+i1][j1][k1]/4.0; + if (iq == 0) + bp->array[m][i+i1][j0][0 ] = (bp1->array[m][2*i-1][j3][0 ] + + bp1->array[m][2*i ][j3][0 ]); + else + bp->array[m][i+i1][j0][k2] = (bp1->array[m][2*i-1][j3][k2] + + bp1->array[m][2*i ][j3][k2]); + for (k = 1; k <= z_block_half; k++) { + bp1->array[m][2*i-1][j2][2*k-1] = + bp1->array[m][2*i-1][j2][2*k ] = + bp1->array[m][2*i ][j2][2*k-1] = + bp1->array[m][2*i ][j2][2*k ] = + bp->array[m][i+i1][j1][k+k1]/4.0; + bp->array[m][i+i1][j0][k+k1] = + bp1->array[m][2*i-1][j3][2*k-1] + + bp1->array[m][2*i-1][j3][2*k ] + + bp1->array[m][2*i ][j3][2*k-1] + + bp1->array[m][2*i ][j3][2*k ]; + } + bp1->array[m][2*i-1][j2][k2] = + bp1->array[m][2*i ][j2][k2] = bp->array[m][i+i1][j1][k3+k1]/4.0; + } + bp1->array[m][i2][j2][0 ] = bp->array[m][i3+i1][j1][k1]/4.0; + for (k = 1; k <= z_block_half; k++) + bp1->array[m][i2][j2][2*k-1] = + bp1->array[m][i2][j2][2*k ] = bp->array[m][i3+i1][j1][k+k1]/4.0; + bp1->array[m][i2][j2][k2] = bp->array[m][i3+i1][j1][k3+k1]/4.0; + if (jq == 1) { + if (iq == 0) + bp->array[m][i2][j0][0 ] = bp1->array[m][i2][j3][0 ]; + else + bp->array[m][i2][j0][k2] = bp1->array[m][i2][j3][k2]; + for (k = 1; k <= z_block_half; k++) + bp->array[m][i2][j0][k+k1] = (bp1->array[m][i2][j3][2*k-1] + + bp1->array[m][i2][j3][2*k ]); + } + } + } else if ((l/2) == 2) { + if (l == 4) { /* Down */ + k0 = 0; + k1 = 1; + k2 = z_block_size + 1; + k3 = z_block_size; + } else { /* Up */ + k0 = z_block_size + 1; + k1 = z_block_size; + k2 = 0; + k3 = 1; + } + i1 = jq*x_block_half; + j1 = iq*y_block_half; + i2 = x_block_size + 1; + i3 = x_block_half + 1; + j2 = y_block_size + 1; + j3 = y_block_half + 1; + for (m = start; m < start+num_comm; m++) { + bp1->array[m][0][0 ][k2] = bp->array[m][i1][j1][k1]/4.0; + for (j = 1; j <= y_block_half; j++) + bp1->array[m][0][2*j-1][k2] = + bp1->array[m][0][2*j ][k2] = bp->array[m][i1][j+j1][k1]/4.0; + bp1->array[m][0][j2][k2] = bp->array[m][i1][j3+j1][k1]/4.0; + if (jq == 0) { + if (iq == 0) + bp->array[m][0][0 ][k0] = bp1->array[m][0][0 ][k3]; + else + bp->array[m][0][j2][k0] = bp1->array[m][0][j2][k3]; + for (j = 1; j <= y_block_half; j++) + bp->array[m][0][j+j1][k0] = (bp1->array[m][0][2*j-1][k3] + + bp1->array[m][0][2*j ][k3]); + } + for (i = 1; i <= x_block_half; i++) { + bp1->array[m][2*i-1][0][k2] = + bp1->array[m][2*i ][0][k2] = bp->array[m][i+i1][j1][k1]/4.0; + if (iq == 0) + bp->array[m][i+i1][0][k0] = (bp1->array[m][2*i-1][0][k3] + + bp1->array[m][2*i ][0][k3]); + else + bp->array[m][i+i1][j2][k0] = (bp1->array[m][2*i-1][j2][k3] + + bp1->array[m][2*i ][j2][k3]); + for (j = 1; j <= y_block_half; j++) { + bp1->array[m][2*i-1][2*j-1][k2] = + bp1->array[m][2*i-1][2*j ][k2] = + bp1->array[m][2*i ][2*j-1][k2] = + bp1->array[m][2*i ][2*j ][k2] = + bp->array[m][i+i1][j+j1][k1]/4.0; + bp->array[m][i+i1][j+j1][k0] = + bp1->array[m][2*i-1][2*j-1][k3] + + bp1->array[m][2*i-1][2*j ][k3] + + bp1->array[m][2*i ][2*j-1][k3] + + bp1->array[m][2*i ][2*j ][k3]; + } + bp1->array[m][2*i-1][j2][k2] = + bp1->array[m][2*i ][j2][k2] = bp->array[m][i+i1][j3+j1][k1]/4.0; + } + bp1->array[m][i2][0 ][k2] = bp->array[m][i3+i1][j1][k1]/4.0; + for (j = 1; j <= y_block_half; j++) + bp1->array[m][i2][2*j-1][k2] = + bp1->array[m][i2][2*j ][k2] = bp->array[m][i3+i1][j+j1][k1]/4.0; + bp1->array[m][i2][j2][k2] = bp->array[m][i3+i1][j3+j1][k1]/4.0; + if (jq == 1) { + if (iq == 0) + bp->array[m][i2][0 ][k0] = bp1->array[m][i2][0 ][k3]; + else + bp->array[m][i2][j2][k0] = bp1->array[m][i2][j2][k3]; + for (j = 1; j <= y_block_half; j++) + bp->array[m][i2][j+j1][k0] = (bp1->array[m][i2][2*j-1][k3] + + bp1->array[m][i2][2*j ][k3]); + } + } + } + } +} + +// Apply reflective boundary conditions to a face of a block. +void apply_bc(int l, block *bp, int start, int num_comm) +{ + int var, i, j, k, f, t; + + t = 0; + f = 1; + if (!code && stencil == 7) + switch (l) { + case 1: t = x_block_size + 1; + f = x_block_size; + case 0: for (var = start; var < start+num_comm; var++) + for (j = 1; j <= y_block_size; j++) + for (k = 1; k <= z_block_size; k++) + bp->array[var][t][j][k] = bp->array[var][f][j][k]; + break; + case 3: t = y_block_size + 1; + f = y_block_size; + case 2: for (var = start; var < start+num_comm; var++) + for (i = 1; i <= x_block_size; i++) + for (k = 1; k <= z_block_size; k++) + bp->array[var][i][t][k] = bp->array[var][i][f][k]; + break; + case 5: t = z_block_size + 1; + f = z_block_size; + case 4: for (var = start; var < start+num_comm; var++) + for (i = 1; i <= x_block_size; i++) + for (j = 1; j <= y_block_size; j++) + bp->array[var][i][j][t] = bp->array[var][i][j][f]; + break; + } + else + switch (l) { + case 1: t = x_block_size + 1; + f = x_block_size; + case 0: for (var = start; var < start+num_comm; var++) + for (j = 0; j <= y_block_size+1; j++) + for (k = 0; k <= z_block_size+1; k++) + bp->array[var][t][j][k] = bp->array[var][f][j][k]; + break; + case 3: t = y_block_size + 1; + f = y_block_size; + case 2: for (var = start; var < start+num_comm; var++) + for (i = 0; i <= x_block_size+1; i++) + for (k = 0; k <= z_block_size+1; k++) + bp->array[var][i][t][k] = bp->array[var][i][f][k]; + break; + case 5: t = z_block_size + 1; + f = z_block_size; + case 4: for (var = start; var < start+num_comm; var++) + for (i = 0; i <= x_block_size+1; i++) + for (j = 0; j <= y_block_size+1; j++) + bp->array[var][i][j][t] = bp->array[var][i][j][f]; + break; + } +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/driver.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/driver.c @@ -0,0 +1,112 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include +#include + +#include "block.h" +#include "timer.h" +#include "proto.h" + +// Main driver for program. +void driver(void) +{ + int ts, var, start, number, stage, comm_stage; + double t1, t2, t3, t4; + double sum; + + init(); + init_profile(); + counter_malloc_init = counter_malloc; + size_malloc_init = size_malloc; + + t1 = timer(); + + if (num_refine || uniform_refine) refine(0); + t2 = timer(); + timer_refine_all += t2 - t1; + + if (plot_freq) + plot(0); + t3 = timer(); + timer_plot += t3 - t2; + + nb_min = nb_max = global_active; + + for (comm_stage = 0, ts = 1; ts <= num_tsteps; ts++) { + for (stage = 0; stage < stages_per_ts; stage++, comm_stage++) { + total_blocks += global_active; + if (global_active < nb_min) + nb_min = global_active; + if (global_active > nb_max) + nb_max = global_active; + for (start = 0; start < num_vars; start += comm_vars) { + if (start+comm_vars > num_vars) + number = num_vars - start; + else + number = comm_vars; + t3 = timer(); + comm(start, number, comm_stage); + t4 = timer(); + timer_comm_all += t4 - t3; + for (var = start; var < (start+number); var++) { + stencil_calc(var); + t3 = timer(); + timer_calc_all += t3 - t4; + if (checksum_freq && !(stage%checksum_freq)) { + sum = check_sum(var); + if (report_diffusion && !my_pe) + printf("%d var %d sum %lf old %lf diff %lf tol %lf\n", + ts, var, sum, grid_sum[var], + (fabs(sum - grid_sum[var])/grid_sum[var]), tol); + if (fabs(sum - grid_sum[var])/grid_sum[var] > tol) { + if (!my_pe) + printf("Time step %d sum %lf (old %lf) variable %d difference too large\n", ts, sum, grid_sum[var], var); + return; + } + grid_sum[var] = sum; + } + t4 = timer(); + timer_cs_all += t4 - t3; + } + } + } + + if (num_refine && !uniform_refine) { + move(); + if (!(ts%refine_freq)) + refine(ts); + } + t2 = timer(); + timer_refine_all += t2 - t4; + + t3 = timer(); + if (plot_freq && !(ts%plot_freq)) + plot(ts); + timer_plot += timer() - t3; + } + timer_all = timer() - t1; +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/glibc_compat_rand.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/glibc_compat_rand.h @@ -0,0 +1,16 @@ +/*===------------- glibc_compat_rand.h- glibc rand emulation --------------===*\ + * |* + * |* The LLVM Compiler Infrastructure + * |* + * |* This file is distributed under the University of Illinois Open Source + * |* License. See LICENSE.TXT for details. + * |* + * \*===----------------------------------------------------------------------===*/ + +#ifndef GLIBC_COMPAT_RAND_H +#define GLIBC_COMPAT_RAND_H + +int glibc_compat_rand(void); +void glibc_compat_srand(unsigned int seed); + +#endif /* GLIBC_COMPAT_RAND_H */ Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/glibc_compat_rand.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/glibc_compat_rand.c @@ -0,0 +1,60 @@ +/*===------------ glibc_compat_rand.c - glibc rand emulation --------------===*\ + * * + * * The LLVM Compiler Infrastructure + * * + * * This file is distributed under the University of Illinois Open Source + * * License. See LICENSE.TXT for details. + * * + * \*===----------------------------------------------------------------------===*/ + +#include "glibc_compat_rand.h" + +/** + * * This rand implementation is designed to emulate the implementation of + * * rand/srand in recent versions of glibc. This is used for programs which + * * require this specific rand implementation in order to pass verification + * * tests. + * * + * * For more information, see: http://www.mathstat.dal.ca/~selinger/random/ + * **/ + +#define TABLE_SIZE 344 +static unsigned int table[TABLE_SIZE]; +static int next; + +int glibc_compat_rand(void) { + /* Calculate the indices i-3 and i-31 in the circular vector. */ + int i3 = (next < 3) ? (TABLE_SIZE + next - 3) : (next - 3); + int i31 = (next < 31) ? (TABLE_SIZE + next - 31) : (next - 31); + + table[next] = table[i3] + table[i31]; + unsigned int r = table[next] >> 1; + + ++next; + if (next >= TABLE_SIZE) + next = 0; + + return r; +} + +void glibc_compat_srand(unsigned int seed) { + if (seed == 0) + seed = 1; + + table[0] = seed; + + for (int i = 1; i < 31; i++) { + int r = (16807ll * table[i - 1]) % 2147483647; + if (r < 0) + r += 2147483647; + + table[i] = r; + } + + for (int i = 31; i < 34; i++) + table[i] = table[i - 31]; + for (int i = 34; i < TABLE_SIZE; i++) + table[i] = table[i - 31] + table[i - 3]; + + next = 0; +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/init.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/init.c @@ -0,0 +1,152 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include +#include + +#include "block.h" +#include "proto.h" + +#ifndef NO_GLIBC_COMPAT_RAND +#include "glibc_compat_rand.h" + +#define rand glibc_compat_rand +#define srand glibc_compat_srand +#endif + + +// Initialize the problem and setup initial blocks. +void init(void) +{ + int n, var, i, j, k, l, m, o, size, dir, i1, i2, j1, j2, k1, k2, ib, jb, kb; + int start[num_pes], pos[3][num_pes], pos1[npx][npy][npz], set, + num, npx1, npy1, npz1, pes, fact, fac[25], nfac, f; + block *bp; + + tol = pow(10.0, ((double) -error_tol)); + + p2[0] = p8[0] = 1; + for (i = 0; i < (num_refine+1); i++) { + p8[i+1] = p8[i]*8; + p2[i+1] = p2[i]*2; + sorted_index[i] = 0; + } + sorted_index[num_refine+1] = 0; + block_start[0] = 0; + local_max_b = global_max_b = init_block_x*init_block_y*init_block_z; + num = num_pes*global_max_b; + for (i = 1; i <= num_refine; i++) { + block_start[i] = block_start[i-1] + num; + num *= 8; + num_blocks[i] = 0; + } + + x_block_half = x_block_size/2; + y_block_half = y_block_size/2; + z_block_half = z_block_size/2; + + max_active_block = init_block_x*init_block_y*init_block_z; + num_active = max_active_block; + global_active = num_active*num_pes; + num_parents = max_active_parent = 0; + size = p2[num_refine+1]; /* block size is p2[num_refine+1-level] + * smallest block is size p2[1], so can find + * its center */ + mesh_size[0] = npx*init_block_x*size; + mesh_size[1] = npy*init_block_y*size; + mesh_size[2] = npz*init_block_z*size; + + for (o = n = k = 0; k < init_block_z; k++) + for (j = 0; j < init_block_y; j++) + for (i = 0; i < init_block_x; i++, n++) { + bp = &blocks[o]; + bp->level = 0; + bp->number = n; + bp->parent = -1; + bp->cen[0] = i*size + size/2; + bp->cen[1] = j*size + size/2; + bp->cen[2] = k*size + size/2; + add_sorted_list(o, n, 0); + for (var = 0; var < num_vars; var++) + for (ib = 1; ib <= x_block_size; ib++) + for (jb = 1; jb <= y_block_size; jb++) + for (kb = 1; kb <= z_block_size; kb++) + bp->array[var][ib][jb][kb] = + ((double) rand())/((double) RAND_MAX); + if (i == 0) { /* 0 boundary */ + bp->nei_level[0] = -2; + bp->nei[0][0][0] = 0; + } else { /* neighbor on core */ + bp->nei_level[0] = 0; + bp->nei[0][0][0] = o - 1; + } + bp->nei_refine[0] = 0; + if (i == (init_block_x - 1)) { + bp->nei_level[1] = -2; + bp->nei[1][0][0] = 0; + } else { /* neighbor on core */ + bp->nei_level[1] = 0; + bp->nei[1][0][0] = o + 1; + } + bp->nei_refine[1] = 0; + if (j == 0) { + bp->nei_level[2] = -2; + bp->nei[2][0][0] = 0; + } else { /* neighbor on core */ + bp->nei_level[2] = 0; + bp->nei[2][0][0] = o - init_block_x; + } + bp->nei_refine[2] = 0; + if (j == (init_block_y - 1)) { + bp->nei_level[3] = -2; + bp->nei[3][0][0] = 0; + } else { /* neighbor on core */ + bp->nei_level[3] = 0; + bp->nei[3][0][0] = o + init_block_x; + } + bp->nei_refine[3] = 0; + if (k == 0) { + bp->nei_level[4] = -2; + bp->nei[4][0][0] = 0; + } else { /* neighbor on core */ + bp->nei_level[4] = 0; + bp->nei[4][0][0] = o - init_block_x*init_block_y; + } + bp->nei_refine[4] = 0; + if (k == (init_block_z - 1)) { + bp->nei_level[5] = -2; + bp->nei[5][0][0] = 0; + } else { /* neighbor on core */ + bp->nei_level[5] = 0; + bp->nei[5][0][0] = o + init_block_x*init_block_y; + } + bp->nei_refine[5] = 0; + o++; + } + + for (var = 0; var < num_vars; var++) + grid_sum[var] = check_sum(var); +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/main.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/main.c @@ -0,0 +1,365 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include +#include +#include + +#include "block.h" +#include "timer.h" +#include "proto.h" + +int main(int argc, char** argv) +{ + int i, ierr, object_num; + int params[35]; + double *objs; +#include "param.h" + + my_pe = 0; + num_pes = 1; + + counter_malloc = 0; + size_malloc = 0.0; + + /* set initial values */ + for (i = 1; i < argc; i++) + if (!strcmp(argv[i], "--max_blocks")) + max_num_blocks = atoi(argv[++i]); + else if (!strcmp(argv[i], "--target_active")) + target_active = atoi(argv[++i]); + else if (!strcmp(argv[i], "--target_max")) + target_max = atoi(argv[++i]); + else if (!strcmp(argv[i], "--target_min")) + target_min = atoi(argv[++i]); + else if (!strcmp(argv[i], "--num_refine")) + num_refine = atoi(argv[++i]); + else if (!strcmp(argv[i], "--block_change")) + block_change = atoi(argv[++i]); + else if (!strcmp(argv[i], "--uniform_refine")) + uniform_refine = atoi(argv[++i]); + else if (!strcmp(argv[i], "--nx")) + x_block_size = atoi(argv[++i]); + else if (!strcmp(argv[i], "--ny")) + y_block_size = atoi(argv[++i]); + else if (!strcmp(argv[i], "--nz")) + z_block_size = atoi(argv[++i]); + else if (!strcmp(argv[i], "--num_vars")) + num_vars = atoi(argv[++i]); + else if (!strcmp(argv[i], "--comm_vars")) + comm_vars = atoi(argv[++i]); + else if (!strcmp(argv[i], "--init_x")) + init_block_x = atoi(argv[++i]); + else if (!strcmp(argv[i], "--init_y")) + init_block_y = atoi(argv[++i]); + else if (!strcmp(argv[i], "--init_z")) + init_block_z = atoi(argv[++i]); + else if (!strcmp(argv[i], "--refine_freq")) + refine_freq = atoi(argv[++i]); + else if (!strcmp(argv[i], "--report_diffusion")) + report_diffusion = atoi(argv[++i]); + else if (!strcmp(argv[i], "--error_tol")) + error_tol = atoi(argv[++i]); + else if (!strcmp(argv[i], "--num_tsteps")) + num_tsteps = atoi(argv[++i]); + else if (!strcmp(argv[i], "--stages_per_ts")) + stages_per_ts = atoi(argv[++i]); + else if (!strcmp(argv[i], "--checksum_freq")) + checksum_freq = atoi(argv[++i]); + else if (!strcmp(argv[i], "--stencil")) + stencil = atoi(argv[++i]); + else if (!strcmp(argv[i], "--permute")) + permute = 1; + else if (!strcmp(argv[i], "--report_perf")) + report_perf = atoi(argv[++i]); + else if (!strcmp(argv[i], "--plot_freq")) + plot_freq = atoi(argv[++i]); + else if (!strcmp(argv[i], "--code")) + code = atoi(argv[++i]); + else if (!strcmp(argv[i], "--refine_ghost")) + refine_ghost = 1; + else if (!strcmp(argv[i], "--num_objects")) { + num_objects = atoi(argv[++i]); + objects = (object *) ma_malloc(num_objects*sizeof(object), + __FILE__, __LINE__); + object_num = 0; + } else if (!strcmp(argv[i], "--object")) { + if (object_num >= num_objects) { + printf("object number greater than num_objects\n"); + exit(-1); + } + objects[object_num].type = atoi(argv[++i]); + objects[object_num].bounce = atoi(argv[++i]); + objects[object_num].cen[0] = atof(argv[++i]); + objects[object_num].cen[1] = atof(argv[++i]); + objects[object_num].cen[2] = atof(argv[++i]); + objects[object_num].move[0] = atof(argv[++i]); + objects[object_num].move[1] = atof(argv[++i]); + objects[object_num].move[2] = atof(argv[++i]); + objects[object_num].size[0] = atof(argv[++i]); + objects[object_num].size[1] = atof(argv[++i]); + objects[object_num].size[2] = atof(argv[++i]); + objects[object_num].inc[0] = atof(argv[++i]); + objects[object_num].inc[1] = atof(argv[++i]); + objects[object_num].inc[2] = atof(argv[++i]); + object_num++; + } else if (!strcmp(argv[i], "--help")) { + print_help_message(); + exit(0); + } else { + printf("** Error ** Unknown input parameter %s\n", argv[i]); + print_help_message(); + exit(-1); + } + if (check_input()) + exit(-1); + + if (!block_change) + block_change = num_refine; + + for (object_num = 0; object_num < num_objects; object_num++) + for (i = 0; i < 3; i++) { + objects[object_num].orig_cen[i] = objects[object_num].cen[i]; + objects[object_num].orig_move[i] = objects[object_num].move[i]; + objects[object_num].orig_size[i] = objects[object_num].size[i]; + } + + allocate(); + + driver(); + + profile(); + + deallocate(); + + exit(0); +} + +// =================================== print_help_message ==================== + +void print_help_message(void) +{ + printf("(Optional) command line input is of the form: \n\n"); + + printf("--nx - block size x (even && > 0)\n"); + printf("--ny - block size y (even && > 0)\n"); + printf("--nz - block size z (even && > 0)\n"); + printf("--init_x - initial blocks in x (> 0)\n"); + printf("--init_y - initial blocks in y (> 0)\n"); + printf("--init_z - initial blocks in z (> 0)\n"); + printf("--reorder - ordering of blocks if initial number > 1\n"); + printf("--max_blocks - maximun number of blocks per core\n"); + printf("--num_refine - (>= 0) number of levels of refinement\n"); + printf("--block_change - (>= 0) number of levels a block can change in a timestep\n"); + printf("--uniform_refine - if 1, then grid is uniformly refined\n"); + printf("--refine_freq - frequency (in timesteps) of checking for refinement\n"); + printf("--target_active - (>= 0) target number of blocks per core, none if 0\n"); + printf("--target_max - (>= 0) max number of blocks per core, none if 0\n"); + printf("--target_min - (>= 0) min number of blocks per core, none if 0\n"); + printf("--num_vars - number of variables (> 0)\n"); + printf("--comm_vars - number of vars to communicate together\n"); + printf("--num_tsteps - number of timesteps (> 0)\n"); + printf("--stages_per_ts - number of comm/calc stages per timestep\n"); + printf("--checksum_freq - number of stages between checksums\n"); + printf("--stencil - 7 or 27 point (27 will not work with refinement (except uniform))\n"); + printf("--error_tol - (e^{-error_tol} ; >= 0) \n"); + printf("--report_diffusion - (>= 0) none if 0\n"); + printf("--report_perf - 0, 1, 2\n"); + printf("--refine_freq - frequency (timesteps) of plotting (0 for none)\n"); + printf("--code - closely minic communication of different codes\n"); + printf(" 0 minimal sends, 1 send ghosts, 2 send ghosts and process on send\n"); + printf("--permute - altenates directions in communication\n"); + printf("--refine_ghost - use full extent of block (including ghosts) to determine if block is refined\n"); + printf("--num_objects - (>= 0) number of objects to cause refinement\n"); + printf("--object - type, position, movement, size, size rate of change\n"); + + printf("All associated settings are integers except for objects\n"); +} + +// =================================== allocate ============================== + +void allocate(void) +{ + int i, j, k, m, n; + + num_blocks = (int *) ma_malloc((num_refine+1)*sizeof(int), + __FILE__, __LINE__); + num_blocks[0] = num_pes*init_block_x*init_block_y*init_block_z; + num_blocks[0] = init_block_x*init_block_y*init_block_z; + + blocks = (block *) ma_malloc(max_num_blocks*sizeof(block), + __FILE__, __LINE__); + + for (n = 0; n < max_num_blocks; n++) { + blocks[n].number = -1; + blocks[n].array = (double ****) ma_malloc(num_vars*sizeof(double ***), + __FILE__, __LINE__); + for (m = 0; m < num_vars; m++) { + blocks[n].array[m] = (double ***) + ma_malloc((x_block_size+2)*sizeof(double **), + __FILE__, __LINE__); + for (i = 0; i < x_block_size+2; i++) { + blocks[n].array[m][i] = (double **) + ma_malloc((y_block_size+2)*sizeof(double *), + __FILE__, __LINE__); + for (j = 0; j < y_block_size+2; j++) + blocks[n].array[m][i][j] = (double *) + ma_malloc((z_block_size+2)*sizeof(double), + __FILE__, __LINE__); + } + } + } + + sorted_list = (sorted_block *)ma_malloc(max_num_blocks*sizeof(sorted_block), + __FILE__, __LINE__); + sorted_index = (int *) ma_malloc((num_refine+2)*sizeof(int), + __FILE__, __LINE__); + + max_num_parents = max_num_blocks; // Guess at number needed + parents = (parent *) ma_malloc(max_num_parents*sizeof(parent), + __FILE__, __LINE__); + for (n = 0; n < max_num_parents; n++) + parents[n].number = -1; + + grid_sum = (double *)ma_malloc(num_vars*sizeof(double), __FILE__, __LINE__); + + p8 = (int *) ma_malloc((num_refine+2)*sizeof(int), __FILE__, __LINE__); + p2 = (int *) ma_malloc((num_refine+2)*sizeof(int), __FILE__, __LINE__); + block_start = (int *) ma_malloc((num_refine+1)*sizeof(int), + __FILE__, __LINE__); +} + +// =================================== deallocate ============================ + +void deallocate(void) +{ + int i, j, m, n; + + for (n = 0; n < max_num_blocks; n++) { + for (m = 0; m < num_vars; m++) { + for (i = 0; i < x_block_size+2; i++) { + for (j = 0; j < y_block_size+2; j++) + free(blocks[n].array[m][i][j]); + free(blocks[n].array[m][i]); + } + free(blocks[n].array[m]); + } + free(blocks[n].array); + } + free(blocks); + + free(sorted_list); + free(sorted_index); + + free(objects); + + free(grid_sum); + + free(p8); + free(p2); +} + +int check_input(void) +{ + int error = 0; + + if (init_block_x < 1 || init_block_y < 1 || init_block_z < 1) { + printf("initial blocks on processor must be positive\n"); + error = 1; + } + if (max_num_blocks < init_block_x*init_block_y*init_block_z) { + printf("max_num_blocks not large enough\n"); + error = 1; + } + if (x_block_size < 1 || y_block_size < 1 || z_block_size < 1) { + printf("block size must be positive\n"); + error = 1; + } + if (((x_block_size/2)*2) != x_block_size) { + printf("block size in x direction must be even\n"); + error = 1; + } + if (((y_block_size/2)*2) != y_block_size) { + printf("block size in y direction must be even\n"); + error = 1; + } + if (((z_block_size/2)*2) != z_block_size) { + printf("block size in z direction must be even\n"); + error = 1; + } + if (target_active && target_max) { + printf("Only one of target_active and target_max can be used\n"); + error = 1; + } + if (target_active && target_min) { + printf("Only one of target_active and target_min can be used\n"); + error = 1; + } + if (target_active < 0 || target_active > max_num_blocks) { + printf("illegal value for target_active\n"); + error = 1; + } + if (target_max < 0 || target_max > max_num_blocks || + target_max < target_active) { + printf("illegal value for target_max\n"); + error = 1; + } + if (target_min < 0 || target_min > max_num_blocks || + target_min > target_active || target_min > target_max) { + printf("illegal value for target_min\n"); + error = 1; + } + if (num_refine < 0) { + printf("number of refinement levels must be non-negative\n"); + error = 1; + } + if (block_change < 0) { + printf("number of refinement levels must be non-negative\n"); + error = 1; + } + if (num_vars < 1) { + printf("number of variables must be positive\n"); + error = 1; + } + if (num_pes != npx*npy*npz) { + printf("number of processors used does not match number allocated\n"); + error = 1; + } + if (stencil != 7 && stencil != 27) { + printf("illegal value for stencil\n"); + error = 1; + } + if (stencil == 27 && num_refine && !uniform_refine) + printf("WARNING: 27 point stencil with non-uniform refinement: answers may diverge\n"); + if (comm_vars == 0 || comm_vars > num_vars) + comm_vars = num_vars; + if (code < 0 || code > 2) { + printf("code must be 0, 1, or 2\n"); + error = 1; + } + + return (error); +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/miniAMR.reference_output =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/miniAMR.reference_output @@ -0,0 +1,172 @@ +Number of blocks at level 0 at timestep 0 is 1 +Number of blocks at level 1 at timestep 0 is 0 +Number of blocks at level 2 at timestep 0 is 0 +Number of blocks at level 3 at timestep 0 is 0 +Number of blocks at level 4 at timestep 0 is 0 +Number of blocks at level 5 at timestep 0 is 0 + +Number of blocks at level 0 at timestep 5 is 1 +Number of blocks at level 1 at timestep 5 is 0 +Number of blocks at level 2 at timestep 5 is 0 +Number of blocks at level 3 at timestep 5 is 0 +Number of blocks at level 4 at timestep 5 is 0 +Number of blocks at level 5 at timestep 5 is 0 + +Number of blocks at level 0 at timestep 10 is 1 +Number of blocks at level 1 at timestep 10 is 0 +Number of blocks at level 2 at timestep 10 is 0 +Number of blocks at level 3 at timestep 10 is 0 +Number of blocks at level 4 at timestep 10 is 0 +Number of blocks at level 5 at timestep 10 is 0 + +Number of blocks at level 0 at timestep 15 is 1 +Number of blocks at level 1 at timestep 15 is 0 +Number of blocks at level 2 at timestep 15 is 0 +Number of blocks at level 3 at timestep 15 is 0 +Number of blocks at level 4 at timestep 15 is 0 +Number of blocks at level 5 at timestep 15 is 0 + +Number of blocks at level 0 at timestep 20 is 1 +Number of blocks at level 1 at timestep 20 is 0 +Number of blocks at level 2 at timestep 20 is 0 +Number of blocks at level 3 at timestep 20 is 0 +Number of blocks at level 4 at timestep 20 is 0 +Number of blocks at level 5 at timestep 20 is 0 + +Number of blocks at level 0 at timestep 25 is 1 +Number of blocks at level 1 at timestep 25 is 0 +Number of blocks at level 2 at timestep 25 is 0 +Number of blocks at level 3 at timestep 25 is 0 +Number of blocks at level 4 at timestep 25 is 0 +Number of blocks at level 5 at timestep 25 is 0 + +Number of blocks at level 0 at timestep 30 is 1 +Number of blocks at level 1 at timestep 30 is 0 +Number of blocks at level 2 at timestep 30 is 0 +Number of blocks at level 3 at timestep 30 is 0 +Number of blocks at level 4 at timestep 30 is 0 +Number of blocks at level 5 at timestep 30 is 0 + +Number of blocks at level 0 at timestep 35 is 1 +Number of blocks at level 1 at timestep 35 is 0 +Number of blocks at level 2 at timestep 35 is 0 +Number of blocks at level 3 at timestep 35 is 0 +Number of blocks at level 4 at timestep 35 is 0 +Number of blocks at level 5 at timestep 35 is 0 + +Number of blocks at level 0 at timestep 40 is 1 +Number of blocks at level 1 at timestep 40 is 0 +Number of blocks at level 2 at timestep 40 is 0 +Number of blocks at level 3 at timestep 40 is 0 +Number of blocks at level 4 at timestep 40 is 0 +Number of blocks at level 5 at timestep 40 is 0 + +Number of blocks at level 0 at timestep 45 is 1 +Number of blocks at level 1 at timestep 45 is 0 +Number of blocks at level 2 at timestep 45 is 0 +Number of blocks at level 3 at timestep 45 is 0 +Number of blocks at level 4 at timestep 45 is 0 +Number of blocks at level 5 at timestep 45 is 0 + +Number of blocks at level 0 at timestep 50 is 1 +Number of blocks at level 1 at timestep 50 is 0 +Number of blocks at level 2 at timestep 50 is 0 +Number of blocks at level 3 at timestep 50 is 0 +Number of blocks at level 4 at timestep 50 is 0 +Number of blocks at level 5 at timestep 50 is 0 + +Number of blocks at level 0 at timestep 55 is 1 +Number of blocks at level 1 at timestep 55 is 0 +Number of blocks at level 2 at timestep 55 is 0 +Number of blocks at level 3 at timestep 55 is 0 +Number of blocks at level 4 at timestep 55 is 0 +Number of blocks at level 5 at timestep 55 is 0 + +Number of blocks at level 0 at timestep 60 is 1 +Number of blocks at level 1 at timestep 60 is 0 +Number of blocks at level 2 at timestep 60 is 0 +Number of blocks at level 3 at timestep 60 is 0 +Number of blocks at level 4 at timestep 60 is 0 +Number of blocks at level 5 at timestep 60 is 0 + +Number of blocks at level 0 at timestep 65 is 1 +Number of blocks at level 1 at timestep 65 is 0 +Number of blocks at level 2 at timestep 65 is 0 +Number of blocks at level 3 at timestep 65 is 0 +Number of blocks at level 4 at timestep 65 is 0 +Number of blocks at level 5 at timestep 65 is 0 + +Number of blocks at level 0 at timestep 70 is 1 +Number of blocks at level 1 at timestep 70 is 0 +Number of blocks at level 2 at timestep 70 is 0 +Number of blocks at level 3 at timestep 70 is 0 +Number of blocks at level 4 at timestep 70 is 0 +Number of blocks at level 5 at timestep 70 is 0 + +Number of blocks at level 0 at timestep 75 is 1 +Number of blocks at level 1 at timestep 75 is 0 +Number of blocks at level 2 at timestep 75 is 0 +Number of blocks at level 3 at timestep 75 is 0 +Number of blocks at level 4 at timestep 75 is 0 +Number of blocks at level 5 at timestep 75 is 0 + +Number of blocks at level 0 at timestep 80 is 1 +Number of blocks at level 1 at timestep 80 is 0 +Number of blocks at level 2 at timestep 80 is 0 +Number of blocks at level 3 at timestep 80 is 0 +Number of blocks at level 4 at timestep 80 is 0 +Number of blocks at level 5 at timestep 80 is 0 + +Number of blocks at level 0 at timestep 85 is 1 +Number of blocks at level 1 at timestep 85 is 0 +Number of blocks at level 2 at timestep 85 is 0 +Number of blocks at level 3 at timestep 85 is 0 +Number of blocks at level 4 at timestep 85 is 0 +Number of blocks at level 5 at timestep 85 is 0 + +Number of blocks at level 0 at timestep 90 is 1 +Number of blocks at level 1 at timestep 90 is 0 +Number of blocks at level 2 at timestep 90 is 0 +Number of blocks at level 3 at timestep 90 is 0 +Number of blocks at level 4 at timestep 90 is 0 +Number of blocks at level 5 at timestep 90 is 0 + +Number of blocks at level 0 at timestep 95 is 1 +Number of blocks at level 1 at timestep 95 is 0 +Number of blocks at level 2 at timestep 95 is 0 +Number of blocks at level 3 at timestep 95 is 0 +Number of blocks at level 4 at timestep 95 is 0 +Number of blocks at level 5 at timestep 95 is 0 + +Number of blocks at level 0 at timestep 100 is 1 +Number of blocks at level 1 at timestep 100 is 0 +Number of blocks at level 2 at timestep 100 is 0 +Number of blocks at level 3 at timestep 100 is 0 +Number of blocks at level 4 at timestep 100 is 0 +Number of blocks at level 5 at timestep 100 is 0 + + + ================ Start report =================== + + Mantevo miniAMR + version 1.0 provisional + +serial run on 1 rank +initial blocks per rank 1 x 1 x 1 +block size 8 x 8 x 8 +Maximum number of blocks per rank is 500 +Number of levels of refinement is 5 +Blocks can change by 5 levels per refinement step + +Blocks will be refined by 0 objects + + +Number of timesteps is 100 +Communicaion/computation stages per timestep is 20 +Will perform checksums every 5 stages +Will refine every 5 timesteps +Will not plot results +Calculate on 40 variables with 7 point stencil +Communicate 40 variables at a time +Error tolorance for variable sums is 10^(-8) +exit 0 Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/move.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/move.c @@ -0,0 +1,614 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include +#include + +#include "block.h" +#include "proto.h" + +// This routine moves the objects that determine the refinement and checks +// the blocks against those objects to determine which blocks will refine. +void move(void) +{ + int i, j; + + for (i = 0; i < num_objects; i++) + for (j = 0; j < 3; j++) { + objects[i].cen[j] += objects[i].move[j]; + if (objects[i].bounce) + if (objects[i].cen[j] >= 1.0) { + objects[i].cen[j] = 2.0 - objects[i].cen[j]; + objects[i].move[j] = -objects[i].move[j]; + } else if (objects[i].cen[j] <= 0.0) { + objects[i].cen[j] = 0.0 - objects[i].cen[j]; + objects[i].move[j] = -objects[i].move[j]; + } + objects[i].size[j] += objects[i].inc[j]; + } +} + +void check_objects(void) +{ + int n, sz, in, c; + double cor[3][2]; /* extent of block */ + block *bp; + parent *pp; + +/* only need to check corners to refine + * * if boundary is in block refine if not level max_level block.refine = 1 + * * else if not level 0 then block.refine = -1 (unrefine) + * * types of objects: spheroid, solid spheroid, rectangle, solid rectangle, + * * hemispheroid, and solid hemispheroid (in six orientations), + * * cylinder, solid cylinder (in three directions) + * * (later) diamond, solid diamond */ + + for (in = 0; in < sorted_index[num_refine+1]; in++) { + n = sorted_list[in].n; + if ((bp = &blocks[n])->number >= 0) { + sz = p2[num_refine - bp->level]; /* half size of block */ + cor[0][0] = ((double) (bp->cen[0] - sz))/((double) mesh_size[0]); + cor[0][1] = ((double) (bp->cen[0] + sz))/((double) mesh_size[0]); + cor[1][0] = ((double) (bp->cen[1] - sz))/((double) mesh_size[1]); + cor[1][1] = ((double) (bp->cen[1] + sz))/((double) mesh_size[1]); + cor[2][0] = ((double) (bp->cen[2] - sz))/((double) mesh_size[2]); + cor[2][1] = ((double) (bp->cen[2] + sz))/((double) mesh_size[2]); + if (refine_ghost) { + cor[0][0] -= 2.0*(((double) sz)/((double) x_block_size))/ + ((double) mesh_size[0]); + cor[0][1] += 2.0*(((double) sz)/((double) x_block_size))/ + ((double) mesh_size[0]); + cor[1][0] -= 2.0*(((double) sz)/((double) y_block_size))/ + ((double) mesh_size[1]); + cor[1][1] += 2.0*(((double) sz)/((double) y_block_size))/ + ((double) mesh_size[1]); + cor[2][0] -= 2.0*(((double) sz)/((double) z_block_size))/ + ((double) mesh_size[2]); + cor[2][1] += 2.0*(((double) sz)/((double) z_block_size))/ + ((double) mesh_size[2]); + } + if (check_block(cor)) + bp->refine = 1; + else if (refine_ghost && bp->level) { + /* check if this block would unrefine, but its parent would then + * refine. Then leave it alone */ + sz = p2[num_refine - bp->level + 1]; /* half size of parent */ + cor[0][0] -= (((double) sz)/((double) x_block_size))/ + ((double) mesh_size[0]); + cor[0][1] += (((double) sz)/((double) x_block_size))/ + ((double) mesh_size[0]); + cor[1][0] -= (((double) sz)/((double) y_block_size))/ + ((double) mesh_size[1]); + cor[1][1] += (((double) sz)/((double) y_block_size))/ + ((double) mesh_size[1]); + cor[2][0] -= (((double) sz)/((double) z_block_size))/ + ((double) mesh_size[2]); + cor[2][1] += (((double) sz)/((double) z_block_size))/ + ((double) mesh_size[2]); + if (check_block(cor)) + bp->refine = 0; + } + + /* if at max refinement, then can not refine */ + if ((bp->level == num_refine && bp->refine == 1) || !bp->refine) { + bp->refine = 0; + if (bp->parent != -1 && bp->parent_node == my_pe) { + pp = &parents[bp->parent]; + pp->refine = 0; + for (c = 0; c < 8; c++) + if (pp->child_node[c] == my_pe && pp->child[c] >= 0) + if (blocks[pp->child[c]].refine == -1) + blocks[pp->child[c]].refine = 0; + } + } + /* if 0 level, we can not unrefine */ + if (!bp->level && bp->refine == -1) + bp->refine = 0; + } + } +} + +int check_block(double cor[3][2]) +{ + int o, tmp, ca, c1, c2, intersect, + xc, xv, yc, yv, zc, zv; /* where is center of object to block */ + object *op; + + intersect = 0; + for (o = 0; o < num_objects; o++) { + op = &objects[o]; + if (intersect > 0 || + op->size[0] < 0.0 || op->size[1] < 0.0 || op->size[2] < 0) + /* skip since already determined that it will be refined or + * can not be or object has size less than zero */ + ; + else if (op->type == 0) { /* surface of rectangle */ + if (cor[0][1] > (op->cen[0] - op->size[0]) && + cor[0][0] < (op->cen[0] + op->size[0])) { + /* some portion of block intersects with rectangle in x + * 4 cases - the two that straddle can be treated the same + * and the two that do not can also be treated the same */ + if ((cor[0][0] < (op->cen[0] - op->size[0]) && + cor[0][1] < (op->cen[0] + op->size[0])) || + (cor[0][0] > (op->cen[0] - op->size[0]) && + cor[0][1] > (op->cen[0] + op->size[0]))) { + /* one of rectangle boundary between block sides */ + if (cor[1][1] > (op->cen[1] - op->size[1]) && + cor[1][0] < (op->cen[1] + op->size[1]) && + cor[2][1] > (op->cen[2] - op->size[2]) && + cor[2][0] < (op->cen[2] + op->size[2])) + /* some portion of block intersects rectangle in y,z */ + intersect = 1; + } else { + /* rectangle in block (or vice-versa) in x */ + if (cor[1][1] > (op->cen[1] - op->size[1]) && + cor[1][0] < (op->cen[1] + op->size[1])) { + /* some portion of block intersects rectangle in y */ + if ((cor[1][0] < (op->cen[1] - op->size[1]) && + cor[1][1] < (op->cen[1] + op->size[1])) || + (cor[1][0] > (op->cen[1] - op->size[1]) && + cor[1][1] > (op->cen[1] + op->size[1]))) + if (cor[2][1] > (op->cen[2] - op->size[2]) && + cor[2][0] < (op->cen[2] + op->size[2])) + /* portion of block intersects rectangle in z */ + intersect = 1; + } else { + /* rectangle in block (or vice-versa) in x and y */ + if (cor[2][1] > (op->cen[2] - op->size[2]) && + cor[2][0] < (op->cen[2] + op->size[2])) { + if ((cor[2][0] < (op->cen[2] - op->size[2]) && + cor[2][1] < (op->cen[2] + op->size[2])) || + (cor[2][0] > (op->cen[2] - op->size[2]) && + cor[2][1] > (op->cen[2] + op->size[2]))) + /* portion of block intersects rectangle in z */ + intersect = 1; + } /* else case need not be considered */ + } + } + } + } else if (op->type == 1) { /* solid rectangle */ + if (cor[0][1] > (op->cen[0] - op->size[0]) && + cor[0][0] < (op->cen[0] + op->size[0]) && + cor[1][1] > (op->cen[1] - op->size[1]) && + cor[1][0] < (op->cen[1] + op->size[1]) && + cor[2][1] > (op->cen[2] - op->size[2]) && + cor[2][0] < (op->cen[2] + op->size[2])) + intersect = 1; + } else if (op->type >= 2 && op->type <= 14 && !(op->type%2)) { + /* boundary of spheroid or hemispheroid */ + /* determine where center is and nearest and furthest + * verticies and then determine if boundary is between them + * 1 = (x/a)^2 + (y/b)^2 + (z/c)^2 is boundary */ + tmp = intersect; + xc = yc = zc = 0; + if (op->cen[0] < cor[0][0]) + xv = 0; + else if (op->cen[0] > cor[0][1]) + xv = 1; + else { + xc = 1; + if (op->cen[0] < (cor[0][0] + cor[0][1])/2.0) + xv = 0; + else + xv = 1; + } + if (op->cen[1] < cor[1][0]) + yv = 0; + else if (op->cen[1] > cor[1][1]) + yv = 1; + else { + yc = 1; + if (op->cen[1] < (cor[1][0] + cor[1][1])/2.0) + yv = 0; + else + yv = 1; + } + if (op->cen[2] < cor[2][0]) + zv = 0; + else if (op->cen[2] > cor[2][1]) + zv = 1; + else { + zc = 1; + if (op->cen[2] < (cor[2][0] + cor[2][1])/2.0) + zv = 0; + else + zv = 1; + } + if (xc) { + if (yc) { + if (zc) { /* xc, yc, zc */ + if ((((cor[0][1-xv] - op->cen[0])/op->size[0])* + ((cor[0][1-xv] - op->cen[0])/op->size[0]) + + ((cor[1][1-yv] - op->cen[1])/op->size[1])* + ((cor[1][1-yv] - op->cen[1])/op->size[1]) + + ((cor[2][1-zv] - op->cen[2])/op->size[2])* + ((cor[2][1-zv] - op->cen[2])/op->size[2])) + > 1.0) + intersect = 1; + } else { /* xc, yc, !zc */ + if ((fabs(cor[2][zv] - op->cen[2]) < op->size[2]) && + ((((cor[0][1-xv] - op->cen[0])/op->size[0])* + ((cor[0][1-xv] - op->cen[0])/op->size[0]) + + ((cor[1][1-yv] - op->cen[1])/op->size[1])* + ((cor[1][1-yv] - op->cen[1])/op->size[1]) + + ((cor[2][1-zv] - op->cen[2])/op->size[2])* + ((cor[2][1-zv] - op->cen[2])/op->size[2])) > 1.0)) + intersect = 1; + } + } else { + if (zc) { /* xc, !yc, zc */ + if ((fabs(cor[1][yv] - op->cen[1]) < op->size[1]) && + ((((cor[0][1-xv] - op->cen[0])/op->size[0])* + ((cor[0][1-xv] - op->cen[0])/op->size[0]) + + ((cor[1][1-yv] - op->cen[1])/op->size[1])* + ((cor[1][1-yv] - op->cen[1])/op->size[1]) + + ((cor[2][1-zv] - op->cen[2])/op->size[2])* + ((cor[2][1-zv] - op->cen[2])/op->size[2])) > 1.0)) + intersect = 1; + } else { /* xc, !yc, !zc */ + if (((((cor[1][yv] - op->cen[1])/op->size[1])* + ((cor[1][yv] - op->cen[1])/op->size[1]) + + ((cor[2][zv] - op->cen[2])/op->size[2])* + ((cor[2][zv] - op->cen[2])/op->size[2])) < 1.0) && + ((((cor[0][1-xv] - op->cen[0])/op->size[0])* + ((cor[0][1-xv] - op->cen[0])/op->size[0]) + + ((cor[1][1-yv] - op->cen[1])/op->size[1])* + ((cor[1][1-yv] - op->cen[1])/op->size[1]) + + ((cor[2][1-zv] - op->cen[2])/op->size[2])* + ((cor[2][1-zv] - op->cen[2])/op->size[2])) > 1.0)) + intersect = 1; + } + } + } else { + if (yc) { + if (zc) { /* !xc, yc, zc */ + if ((fabs(cor[0][xv] - op->cen[0]) < op->size[0]) && + ((((cor[0][1-xv] - op->cen[0])/op->size[0])* + ((cor[0][1-xv] - op->cen[0])/op->size[0]) + + ((cor[1][1-yv] - op->cen[1])/op->size[1])* + ((cor[1][1-yv] - op->cen[1])/op->size[1]) + + ((cor[2][1-zv] - op->cen[2])/op->size[2])* + ((cor[2][1-zv] - op->cen[2])/op->size[2])) > 1.0)) + intersect = 1; + } else { /* !xc, yc, !zc */ + if (((((cor[0][xv] - op->cen[0])/op->size[0])* + ((cor[0][xv] - op->cen[0])/op->size[0]) + + ((cor[2][zv] - op->cen[2])/op->size[2])* + ((cor[2][zv] - op->cen[2])/op->size[2])) < 1.0) && + ((((cor[0][1-xv] - op->cen[0])/op->size[0])* + ((cor[0][1-xv] - op->cen[0])/op->size[0]) + + ((cor[1][1-yv] - op->cen[1])/op->size[1])* + ((cor[1][1-yv] - op->cen[1])/op->size[1]) + + ((cor[2][1-zv] - op->cen[2])/op->size[2])* + ((cor[2][1-zv] - op->cen[2])/op->size[2])) > 1.0)) + intersect = 1; + } + } else { + if (zc) { /* !xc, !yc, zc */ + if (((((cor[0][xv] - op->cen[0])/op->size[0])* + ((cor[0][xv] - op->cen[0])/op->size[0]) + + ((cor[1][yv] - op->cen[1])/op->size[1])* + ((cor[1][yv] - op->cen[1])/op->size[1])) < 1.0) && + ((((cor[0][1-xv] - op->cen[0])/op->size[0])* + ((cor[0][1-xv] - op->cen[0])/op->size[0]) + + ((cor[1][1-yv] - op->cen[1])/op->size[1])* + ((cor[1][1-yv] - op->cen[1])/op->size[1]) + + ((cor[2][1-zv] - op->cen[2])/op->size[2])* + ((cor[2][1-zv] - op->cen[2])/op->size[2])) > 1.0)) + intersect = 1; + } else { /* !xc, !yc, !zc */ + if (((((cor[0][xv] - op->cen[0])/op->size[0])* + ((cor[0][xv] - op->cen[0])/op->size[0]) + + ((cor[1][yv] - op->cen[1])/op->size[1])* + ((cor[1][yv] - op->cen[1])/op->size[1]) + + ((cor[2][zv] - op->cen[2])/op->size[2])* + ((cor[2][zv] - op->cen[2])/op->size[2])) < 1.0) && + ((((cor[0][1-xv] - op->cen[0])/op->size[0])* + ((cor[0][1-xv] - op->cen[0])/op->size[0]) + + ((cor[1][1-yv] - op->cen[1])/op->size[1])* + ((cor[1][1-yv] - op->cen[1])/op->size[1]) + + ((cor[2][1-zv] - op->cen[2])/op->size[2])* + ((cor[2][1-zv] - op->cen[2])/op->size[2])) > 1.0)) + intersect = 1; + } + } + } + if (op->type != 2 && intersect) { + /* exclude halfplane of spheroid to make hemispheroid */ + if (op->type == 4 && cor[0][1] < op->cen[0]) + intersect = tmp; + else if (op->type == 6 && cor[0][0] > op->cen[0]) + intersect = tmp; + else if (op->type == 8 && cor[1][1] < op->cen[1]) + intersect = tmp; + else if (op->type == 10 && cor[1][0] > op->cen[1]) + intersect = tmp; + else if (op->type == 12 && cor[2][1] < op->cen[2]) + intersect = tmp; + else if (op->type == 14 && cor[2][0] > op->cen[2]) + intersect = tmp; + } + } else if (op->type >= 3 && op->type <= 15 && op->type%2) { + /* solid spheroid or hemispheroid */ + /* determine if center is in block - if so then refine, + * if not determine nearest vertix and see if that is in + * - if so refine + * 1 = (x/a)^2 + (y/b)^2 + (z/c)^2 is boundary */ + tmp = intersect; + xc = yc = zc = 0; + if (op->cen[0] < cor[0][0]) + xv = 0; + else if (op->cen[0] > cor[0][1]) + xv = 1; + else + xc = 1; + if (op->cen[1] < cor[1][0]) + yv = 0; + else if (op->cen[1] > cor[1][1]) + yv = 1; + else + yc = 1; + if (op->cen[2] < cor[2][0]) + zv = 0; + else if (op->cen[2] > cor[2][1]) + zv = 1; + else + zc = 1; + if (xc) { + if (yc) { + if (zc) /* xc, yc, zc */ + intersect = 1; + else { /* xc, yc, !zc */ + if (fabs(cor[2][zv] - op->cen[2]) < op->size[2]) + intersect = 1; + } + } else { + if (zc) { /* xc, !yc, zc */ + if (fabs(cor[1][yv] - op->cen[1]) < op->size[1]) + intersect = 1; + } else { /* xc, !yc, !zc */ + if ((((cor[1][yv] - op->cen[1])/op->size[1])* + ((cor[1][yv] - op->cen[1])/op->size[1]) + + ((cor[2][zv] - op->cen[2])/op->size[2])* + ((cor[2][zv] - op->cen[2])/op->size[2])) < 1.0) + intersect = 1; + } + } + } else { + if (yc) { + if (zc) { /* !xc, yc, zc */ + if (fabs(cor[0][xv] - op->cen[0]) < op->size[0]) + intersect = 1; + } else { /* !xc, yc, !zc */ + if ((((cor[0][xv] - op->cen[0])/op->size[0])* + ((cor[0][xv] - op->cen[0])/op->size[0]) + + ((cor[2][zv] - op->cen[2])/op->size[2])* + ((cor[2][zv] - op->cen[2])/op->size[2])) < 1.0) + intersect = 1; + } + } else { + if (zc) { /* !xc, !yc, zc */ + if ((((cor[0][xv] - op->cen[0])/op->size[0])* + ((cor[0][xv] - op->cen[0])/op->size[0]) + + ((cor[1][yv] - op->cen[1])/op->size[1])* + ((cor[1][yv] - op->cen[1])/op->size[1])) < 1.0) + intersect = 1; + } else { /* !xc, !yc, !zc */ + if ((((cor[0][xv] - op->cen[0])/op->size[0])* + ((cor[0][xv] - op->cen[0])/op->size[0]) + + ((cor[1][yv] - op->cen[1])/op->size[1])* + ((cor[1][yv] - op->cen[1])/op->size[1]) + + ((cor[2][zv] - op->cen[2])/op->size[2])* + ((cor[2][zv] - op->cen[2])/op->size[2])) < 1.0) + intersect = 1; + } + } + } + if (op->type != 3 && intersect) { + /* exclude halfplane of spheroid to make hemispheroid */ + if (op->type == 5 && cor[0][1] < op->cen[0]) + intersect = tmp; + else if (op->type == 7 && cor[0][0] > op->cen[0]) + intersect = tmp; + else if (op->type == 9 && cor[1][1] < op->cen[1]) + intersect = tmp; + else if (op->type == 11 && cor[1][0] > op->cen[1]) + intersect = tmp; + else if (op->type == 13 && cor[2][1] < op->cen[2]) + intersect = tmp; + else if (op->type == 15 && cor[2][0] > op->cen[2]) + intersect = tmp; + } + } else if (op->type == 20 || op->type == 22 || op->type == 24) { + /* boundary of cylinder, ca is axis of cylinder */ + if (op->type == 20) { + ca = 0; + c1 = 1; + c2 = 2; + } else if (op->type == 22) { + ca = 1; + c1 = 2; + c2 = 0; + } else { + ca = 2; + c1 = 0; + c2 = 1; + } + if (cor[ca][1] > (op->cen[ca] - op->size[ca]) && + cor[ca][0] < (op->cen[ca] + op->size[ca])) { + /* some part of block between planes that define ends */ + /* use y and z for directions perpendicular to axis */ + yc = zc = 0; + if (op->cen[c1] < cor[c1][0]) + yv = 0; + else if (op->cen[c1] > cor[c1][1]) + yv = 1; + else { + yc = 1; + if (op->cen[c1] < (cor[c1][0] + cor[c1][1])/2.0) + yv = 0; + else + yv = 1; + } + if (op->cen[c2] < cor[c2][0]) + zv = 0; + else if (op->cen[c2] > cor[c2][1]) + zv = 1; + else { + zc = 1; + if (op->cen[c2] < (cor[c2][0] + cor[c2][1])/2.0) + zv = 0; + else + zv = 1; + } + if ((cor[0][0] < (op->cen[0] - op->size[0]) && + cor[0][1] < (op->cen[0] + op->size[0])) || + (cor[0][0] > (op->cen[0] - op->size[0]) && + cor[0][1] > (op->cen[0] + op->size[0]))) { + /* block overlaps cylinder ends in aixs dir */ + if (yc) { + if (zc) { + intersect = 1; + } else { + if (fabs(cor[c2][zv] - op->cen[c2]) < op->size[c2]) + intersect = 1; + } + } else { + if (zc) { + if (fabs(cor[c1][yv] - op->cen[c1]) < op->size[c1]) + intersect = 1; + } else { + if ((((cor[c1][yv] - op->cen[c1])/op->size[c1])* + ((cor[c1][yv] - op->cen[c1])/op->size[c1]) + + ((cor[c2][zv] - op->cen[c2])/op->size[c2])* + ((cor[c2][zv] - op->cen[c2])/op->size[c2])) + < 1.0) + intersect = 1; + } + } + } else { + /* block in cylinder or cylinder in block in aixs dir */ + /* in c1 c2 plane need block point in and out of circle */ + if (yc) { + if (zc) { + if ((((cor[c1][1-yv] - op->cen[c1])/op->size[c1])* + ((cor[c1][1-yv] - op->cen[c1])/op->size[c1]) + + ((cor[c2][1-zv] - op->cen[c2])/op->size[c2])* + ((cor[c2][1-zv] - op->cen[c2])/op->size[c2])) + > 1.0) + intersect = 1; + } else { + if ((fabs(cor[c2][zv]-op->cen[c2]) < op->size[c2])&& + ((((cor[c1][1-yv] - op->cen[c1])/op->size[c1])* + ((cor[c1][1-yv] - op->cen[c1])/op->size[c1]) + + ((cor[c2][1-zv] - op->cen[c2])/op->size[c2])* + ((cor[c2][1-zv] - op->cen[c2])/op->size[c2])) + > 1.0)) + intersect = 1; + } + } else { + if (zc) { + if ((fabs(cor[c1][yv]-op->cen[c1]) < op->size[c1])&& + ((((cor[c1][1-yv] - op->cen[c1])/op->size[c1])* + ((cor[c1][1-yv] - op->cen[c1])/op->size[c1]) + + ((cor[c2][1-zv] - op->cen[c2])/op->size[c2])* + ((cor[c2][1-zv] - op->cen[c2])/op->size[c2])) + > 1.0)) + intersect = 1; + } else { + if (((((cor[c1][yv] - op->cen[c1])/op->size[c1])* + ((cor[c1][yv] - op->cen[c1])/op->size[c1]) + + ((cor[c2][zv] - op->cen[c2])/op->size[c2])* + ((cor[c2][zv] - op->cen[c2])/op->size[c2])) + < 1.0) && + ((((cor[c1][1-yv] - op->cen[c1])/op->size[c1])* + ((cor[c1][1-yv] - op->cen[c1])/op->size[c1]) + + ((cor[c2][1-zv] - op->cen[c2])/op->size[c2])* + ((cor[c2][1-zv] - op->cen[c2])/op->size[c2])) + > 1.0)) + intersect = 1; + } + } + } + } + } else if (op->type == 21 || op->type == 23 || op->type == 25) { + /* volume of cylinder, ca is axis of cylinder */ + if (op->type == 21) { + ca = 0; + c1 = 1; + c2 = 2; + } else if (op->type == 23) { + ca = 1; + c1 = 2; + c2 = 0; + } else { + ca = 2; + c1 = 0; + c2 = 1; + } + if (cor[ca][1] > (op->cen[ca] - op->size[ca]) && + cor[ca][0] < (op->cen[ca] + op->size[ca])) { + /* some part of block between planes that define ends */ + /* use y and z for directions perpendicular to axis */ + yc = zc = 0; + if (op->cen[c1] < cor[c1][0]) + yv = 0; + else if (op->cen[c1] > cor[c1][1]) + yv = 1; + else + yc = 1; + if (op->cen[c2] < cor[c2][0]) + zv = 0; + else if (op->cen[c2] > cor[c2][1]) + zv = 1; + else + zc = 1; + if (yc) { + if (zc) { + intersect = 1; + } else { + if (fabs(cor[c2][zv] - op->cen[c2]) < op->size[c2]) + intersect = 1; + } + } else { + if (zc) { + if (fabs(cor[c1][yv] - op->cen[c1]) < op->size[c1]) + intersect = 1; + } else { + if ((((cor[c1][yv] - op->cen[c1])/op->size[c1])* + ((cor[c1][yv] - op->cen[c1])/op->size[c1]) + + ((cor[c2][zv] - op->cen[c2])/op->size[c2])* + ((cor[c2][zv] - op->cen[c2])/op->size[c2])) < 1.0) + intersect = 1; + } + } + } + } else { + printf("undefined object %d\n", op->type); + } + } + return(intersect); +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/param.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/param.h @@ -0,0 +1,61 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +max_num_blocks = 500; +target_active = 0; +target_max = 0; +target_min = 0; +num_refine = 5; +uniform_refine = 0; +x_block_size = 10; +y_block_size = 10; +z_block_size = 10; +num_vars = 40; +comm_vars = 0; +init_block_x = 1; +init_block_y = 1; +init_block_z = 1; +reorder = 1; +npx = 1; +npy = 1; +npz = 1; +inbalance = 0; +refine_freq = 5; +report_diffusion = 0; +error_tol = 8; +num_tsteps = 20; +stages_per_ts = 20; +checksum_freq = 5; +stencil = 7; +report_perf = 12; +plot_freq = 0; +num_objects = 0; +lb_opt = 1; +block_change = 0; +code = 0; +permute = 0; +nonblocking = 1; +refine_ghost = 0; Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/plot.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/plot.c @@ -0,0 +1,65 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include +#include + +#include "block.h" +#include "proto.h" + +// Write block information (level and center) to plot file. +void plot(int ts) +{ + int i, j, n, total_num_blocks, *buf, buf_size, size; + char fname[20]; + block *bp; + FILE *fp; + + fname[0] = 'p'; + fname[1] = 'l'; + fname[2] = 'o'; + fname[3] = 't'; + fname[4] = '.'; + for (n = 1, j = 0; n < num_tsteps; j++, n *= 10) ; + for (n = 1, i = 0; i <= j; i++, n *= 10) + fname[5+j-i] = (char) ('0' + (ts/n)%10); + fname[6+j] = '\0'; + fp = fopen(fname, "w"); + + total_num_blocks = 0; + for (i = 0; i <= num_refine; i++) + total_num_blocks += num_blocks[i]; + fprintf(fp, "%d %d %d %d %d\n", total_num_blocks, num_refine, + npx*init_block_x, npy*init_block_y, + npz*init_block_z); + buf_size = 0; + fprintf(fp, "%d\n", num_active); + for (n = 0; n < max_active_block; n++) + if ((bp = &blocks[n])->number >= 0) + fprintf(fp, "%d %d %d %d\n", bp->level, bp->cen[0], + bp->cen[1], bp->cen[2]); + fclose(fp); +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/profile.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/profile.c @@ -0,0 +1,678 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include +#include + +#include "block.h" +#include "proto.h" +#include "timer.h" + +// Profiling output. +void profile(void) +{ + int i; + double total_gflops, gflops_rank, total_fp_ops, total_fp_adds, + total_fp_divs; + object *op; + char *version = "1.0 provisional"; + FILE *fp; + + calculate_results(); + total_fp_divs = ((double) total_blocks)*((double) x_block_size)* + ((double) y_block_size)*((double) z_block_size); + if (stencil == 7) + total_fp_adds = 6*total_fp_divs; + else + total_fp_adds = 26*total_fp_divs; + total_fp_ops = total_fp_divs + total_fp_adds; + total_gflops = total_fp_ops/(average[38]*1024.0*1024.0*1024.0); + gflops_rank = total_gflops/((double) num_pes); + + if (!my_pe) { + if (report_perf & 1) { + fp = fopen("results.yaml", "w"); + fprintf(fp, "code: miniAMR\n"); + fprintf(fp, "version: %s\n", version); + fprintf(fp, "ranks: %d\n", num_pes); + fprintf(fp, "init_block_x: %d\n", init_block_x); + fprintf(fp, "init_block_y: %d\n", init_block_y); + fprintf(fp, "init_block_z: %d\n", init_block_z); + fprintf(fp, "x_block_size: %d\n", x_block_size); + fprintf(fp, "y_block_size: %d\n", y_block_size); + fprintf(fp, "z_block_size: %d\n", z_block_size); + fprintf(fp, "permute: %d\n", permute); + fprintf(fp, "max_blocks_allowed: %d\n", max_num_blocks); + fprintf(fp, "code: %d\n", code); + fprintf(fp, "num_refine: %d\n", num_refine); + fprintf(fp, "block_change: %d\n", block_change); + fprintf(fp, "refine_ghost: %d\n", refine_ghost); + fprintf(fp, "uniform_refine: %d\n", uniform_refine); + fprintf(fp, "num_objects: %d\n", num_objects); + for (i = 0; i < num_objects; i++) { + op = &objects[i]; + fprintf(fp, "obj%dtype: %d\n", i, op->type); + fprintf(fp, "obj%dbounce: %d\n", i, op->bounce); + fprintf(fp, "obj%dcenter_x: %lf\n", i, op->cen[0]); + fprintf(fp, "obj%dcenter_y: %lf\n", i, op->cen[1]); + fprintf(fp, "obj%dcenter_z: %lf\n", i, op->cen[2]); + fprintf(fp, "obj%dmove_x: %lf\n", i, op->move[0]); + fprintf(fp, "obj%dmove_y: %lf\n", i, op->move[1]); + fprintf(fp, "obj%dmove_z: %lf\n", i, op->move[2]); + fprintf(fp, "obj%dsize_x: %lf\n", i, op->size[0]); + fprintf(fp, "obj%dsize_y: %lf\n", i, op->size[1]); + fprintf(fp, "obj%dsize_z: %lf\n", i, op->size[2]); + fprintf(fp, "obj%dinc_x: %lf\n", i, op->inc[0]); + fprintf(fp, "obj%dinc_y: %lf\n", i, op->inc[1]); + fprintf(fp, "obj%dinc_z: %lf\n", i, op->inc[2]); + } + fprintf(fp, "num_tsteps: %d\n", num_tsteps); + fprintf(fp, "stages_per_timestep: %d\n", stages_per_ts); + fprintf(fp, "checksum_freq: %d\n", checksum_freq); + fprintf(fp, "refine_freq: %d\n", refine_freq); + fprintf(fp, "plot_freq: %d\n", plot_freq); + fprintf(fp, "num_vars: %d\n", num_vars); + fprintf(fp, "stencil: %d\n", stencil); + fprintf(fp, "comm_vars: %d\n", comm_vars); + fprintf(fp, "error_tol: %d\n", error_tol); + + fprintf(fp, "total_time: %lf\n", average[0]); + fprintf(fp, "memory_used: %lf\n", average[111]); + fprintf(fp, "compute_time: %lf\n", average[38]); + fprintf(fp, "total_gflops: %lf\n", total_gflops); + fprintf(fp, "ave_gflops: %lf\n", gflops_rank); + + fprintf(fp, "total_comm: %lf\n", average[37]); + fprintf(fp, " total_exch_same: %lf\n", average[5]); + fprintf(fp, " total_exch_diff: %lf\n", average[6]); + fprintf(fp, " total_apply_bc: %lf\n", average[7]); + fprintf(fp, " total_face_exch_same: %lf\n", average[75]); + fprintf(fp, " total_face_exch_diff: %lf\n", average[76]); + fprintf(fp, " total_face_bc_apply: %lf\n", average[74]); + + fprintf(fp, " x_comm: %lf\n", average[10]); + fprintf(fp, " x_exch_same: %lf\n", average[14]); + fprintf(fp, " x_exch_diff: %lf\n", average[15]); + fprintf(fp, " x_apply_bc: %lf\n", average[16]); + fprintf(fp, " x_face_exch_same: %lf\n", average[84]); + fprintf(fp, " x_face_exch_diff: %lf\n", average[85]); + fprintf(fp, " x_face_bc_apply: %lf\n", average[83]); + + fprintf(fp, " y_comm: %lf\n", average[19]); + fprintf(fp, " y_exch_same: %lf\n", average[23]); + fprintf(fp, " y_exch_diff: %lf\n", average[24]); + fprintf(fp, " y_apply_bc: %lf\n", average[25]); + fprintf(fp, " y_face_exch_same: %lf\n", average[93]); + fprintf(fp, " y_face_exch_diff: %lf\n", average[94]); + fprintf(fp, " y_face_bc_apply: %lf\n", average[92]); + + fprintf(fp, " z_comm: %lf\n", average[28]); + fprintf(fp, " z_exch_same: %lf\n", average[32]); + fprintf(fp, " z_exch_diff: %lf\n", average[33]); + fprintf(fp, " z_apply_bc: %lf\n", average[34]); + fprintf(fp, " z_face_exch_same: %lf\n", average[102]); + fprintf(fp, " z_face_exch_diff: %lf\n", average[103]); + fprintf(fp, " z_face_bc_apply: %lf\n", average[101]); + + fprintf(fp, "gridsum_time: %lf\n", average[39]); + fprintf(fp, " gridsum_calc: %lf\n", average[41]); + + fprintf(fp, "refine_time: %lf\n", average[42]); + fprintf(fp, " total_blocks_ts: %lf\n", + ((double) total_blocks)/((double) (num_tsteps*stages_per_ts))); + fprintf(fp, " total_blocks_ts_min: %d\n", nb_min); + fprintf(fp, " total_blocks_ts_max: %d\n", nb_max); + fprintf(fp, " blocks_split: %lf\n", average[104]); + fprintf(fp, " blocks_reformed: %lf\n", average[105]); + fprintf(fp, " time_compare_obj: %lf\n", average[43]); + fprintf(fp, " time_mark_refine: %lf\n", average[44]); + fprintf(fp, " time_split_block: %lf\n", average[46]); + fprintf(fp, " time_total_coarsen: %lf\n", average[47]); + fprintf(fp, " time_misc: %lf\n", average[45]); + + fprintf(fp, "plot_time: %lf\n", average[67]); + + fclose(fp); + } + + if (report_perf & 2) { + fp = fopen("results.txt", "w"); + + fprintf(fp, "\n ================ Start report ===================\n\n"); + fprintf(fp, " Mantevo miniAMR\n"); + fprintf(fp, " version %s\n\n", version); + + fprintf(fp, "serial run on 1 rank\n"); + fprintf(fp, "initial blocks per rank %d x %d x %d\n", init_block_x, + init_block_y, init_block_z); + fprintf(fp, "block size %d x %d x %d\n", x_block_size, y_block_size, + z_block_size); + if (permute) + fprintf(fp, "Order of exchanges permuted\n"); + fprintf(fp, "Maximum number of blocks per rank is %d\n", + max_num_blocks); + if (target_active) + fprintf(fp, "Target number of blocks per rank is %d\n", + target_active); + if (target_max) + fprintf(fp, "Target max number of blocks per rank is %d\n", + target_max); + if (target_min) + fprintf(fp, "Target min number of blocks per rank is %d\n", + target_min); + if (code) + fprintf(fp, "Code set to code %d\n", code); + fprintf(fp, "Number of levels of refinement is %d\n", num_refine); + fprintf(fp, "Blocks can change by %d levels per refinement step\n", + block_change); + if (refine_ghost) + fprintf(fp, "Ghost cells will be used determine is block is refined\n"); + if (uniform_refine) + fprintf(fp, "\nBlocks will be uniformly refined\n"); + else { + fprintf(fp, "\nBlocks will be refined by %d objects\n\n", num_objects); + for (i = 0; i < num_objects; i++) { + op = &objects[i]; + if (op->type == 0) + fprintf(fp, "Object %d is the surface of a rectangle\n", i); + else if (op->type == 1) + fprintf(fp, "Object %d is the volume of a rectangle\n", i); + else if (op->type == 2) + fprintf(fp, "Object %d is the surface of a spheroid\n", i); + else if (op->type == 3) + fprintf(fp, "Object %d is the volume of a spheroid\n", i); + else if (op->type == 4) + fprintf(fp, "Object %d is the surface of x+ hemispheroid\n", i); + else if (op->type == 5) + fprintf(fp, "Object %d is the volume of x+ hemispheroid\n", i); + else if (op->type == 6) + fprintf(fp, "Object %d is the surface of x- hemispheroid\n", i); + else if (op->type == 7) + fprintf(fp, "Object %d is the volume of x- hemispheroid\n", i); + else if (op->type == 8) + fprintf(fp, "Object %d is the surface of y+ hemispheroid\n", i); + else if (op->type == 9) + fprintf(fp, "Object %d is the volume of y+ hemispheroid\n", i); + else if (op->type == 10) + fprintf(fp, "Object %d is the surface of y- hemispheroid\n", i); + else if (op->type == 11) + fprintf(fp, "Object %d is the volume of y- hemispheroid\n", i); + else if (op->type == 12) + fprintf(fp, "Object %d is the surface of z+ hemispheroid\n", i); + else if (op->type == 13) + fprintf(fp, "Object %d is the volume of z+ hemispheroid\n", i); + else if (op->type == 14) + fprintf(fp, "Object %d is the surface of z- hemispheroid\n", i); + else if (op->type == 15) + fprintf(fp, "Object %d is the volume of z- hemispheroid\n", i); + else if (op->type == 20) + fprintf(fp, "Object %d is the surface of x axis cylinder\n", i); + else if (op->type == 21) + fprintf(fp, "Object %d is the volune of x axis cylinder\n", i); + else if (op->type == 22) + fprintf(fp, "Object %d is the surface of y axis cylinder\n", i); + else if (op->type == 23) + fprintf(fp, "Object %d is the volune of y axis cylinder\n", i); + else if (op->type == 24) + fprintf(fp, "Object %d is the surface of z axis cylinder\n", i); + else if (op->type == 25) + fprintf(fp, "Object %d is the volune of z axis cylinder\n", i); + if (op->bounce == 0) + fprintf(fp, "Oject may leave mesh\n"); + else + fprintf(fp, "Oject center will bounce off of walls\n"); + fprintf(fp, "Center starting at %lf %lf %lf\n", + op->orig_cen[0], op->orig_cen[1], op->orig_cen[2]); + fprintf(fp, "Center end at %lf %lf %lf\n", + op->cen[0], op->cen[1], op->cen[2]); + fprintf(fp, "Moving at %lf %lf %lf per timestep\n", + op->orig_move[0], op->orig_move[1], op->orig_move[2]); + fprintf(fp, " Rate relative to smallest cell size %lf %lf %lf\n", + op->orig_move[0]*((double) (mesh_size[0]*x_block_size)), + op->orig_move[1]*((double) (mesh_size[1]*y_block_size)), + op->orig_move[2]*((double) (mesh_size[2]*z_block_size))); + fprintf(fp, "Initial size %lf %lf %lf\n", + op->orig_size[0], op->orig_size[1], op->orig_size[2]); + fprintf(fp, "Final size %lf %lf %lf\n", + op->size[0], op->size[1], op->size[2]); + fprintf(fp, "Size increasing %lf %lf %lf per timestep\n", + op->inc[0], op->inc[1], op->inc[2]); + fprintf(fp, " Rate relative to smallest cell size %lf %lf %lf\n\n", + op->inc[0]*((double) (mesh_size[0]*x_block_size)), + op->inc[1]*((double) (mesh_size[1]*y_block_size)), + op->inc[2]*((double) (mesh_size[2]*z_block_size))); + } + } + fprintf(fp, "\nNumber of timesteps is %d\n", num_tsteps); + fprintf(fp, "Communicaion/computation stages per timestep is %d\n", + stages_per_ts); + fprintf(fp, "Will perform checksums every %d stages\n", checksum_freq); + fprintf(fp, "Will refine every %d timesteps\n", refine_freq); + if (plot_freq) + fprintf(fp, "Will plot results every %d timesteps\n", plot_freq); + else + fprintf(fp, "Will not plot results\n"); + fprintf(fp, "Calculate on %d variables with %d point stencil\n", + num_vars, stencil); + fprintf(fp, "Communicate %d variables at a time\n", comm_vars); + fprintf(fp, "Error tolorance for variable sums is 10^(-%d)\n", error_tol); + + fprintf(fp, "\nTotal time for test: (sec): %lf\n\n", average[0]); + + fprintf(fp, "\nNumber of malloc calls: %lf\n", average[110]); + fprintf(fp, "\nAmount malloced: %lf\n", average[111]); + + fprintf(fp, "---------------------------------------------\n"); + fprintf(fp, " Computational Performance\n"); + fprintf(fp, "---------------------------------------------\n\n"); + fprintf(fp, " Time: ave, stddev, min, max (sec): %lf\n\n", + average[38]); + fprintf(fp, " total GFLOPS: %lf\n", total_gflops); + fprintf(fp, " Average GFLOPS per rank: %lf\n\n", gflops_rank); + fprintf(fp, " Total floating point ops: %lf\n\n", total_fp_ops); + fprintf(fp, " Adds: %lf\n", total_fp_adds); + fprintf(fp, " Divides: %lf\n\n", total_fp_divs); + + fprintf(fp, "---------------------------------------------\n"); + fprintf(fp, " Interblock communication\n"); + fprintf(fp, "---------------------------------------------\n\n"); + fprintf(fp, " Time: ave, stddev, min, max (sec): %lf\n\n", + average[37]); + for (i = 0; i < 4; i++) { + if (i == 0) + fprintf(fp, "\nTotal communication:\n\n"); + else if (i == 1) + fprintf(fp, "\nX direction communication statistics:\n\n"); + else if (i == 2) + fprintf(fp, "\nY direction communication statistics:\n\n"); + else + fprintf(fp, "\nZ direction communication statistics:\n\n"); + fprintf(fp, " average stddev minimum maximum\n"); + fprintf(fp, " Total time : %lf\n", average[1+9*i]); + fprintf(fp, " Exchange same level : %lf\n", average[5+9*i]); + fprintf(fp, " Exchange diff level : %lf\n", average[6+9*i]); + fprintf(fp, " Apply BC : %lf\n", average[7+9*i]); + + fprintf(fp, " Faces exchanged same : %lf\n", average[75+9*i]); + fprintf(fp, " Faces exchanged diff : %lf\n", average[76+9*i]); + fprintf(fp, " Faces with BC applied : %lf\n", average[74+9*i]); + } + + fprintf(fp, "\n---------------------------------------------\n"); + fprintf(fp, " Gridsum performance\n"); + fprintf(fp, "---------------------------------------------\n\n"); + fprintf(fp, " Time: ave, stddev, min, max (sec): %lf\n\n", + average[39]); + fprintf(fp, " calc: ave, stddev, min, max (sec): %lf\n\n", + average[41]); + fprintf(fp, " total number: %d\n", total_red); + fprintf(fp, " number per timestep: %d\n\n", num_vars); + + fprintf(fp, "---------------------------------------------\n"); + fprintf(fp, " Mesh Refinement\n"); + fprintf(fp, "---------------------------------------------\n\n"); + fprintf(fp, " Time: ave, stddev, min, max (sec): %lf\n\n", + average[42]); + fprintf(fp, " Number of refinement steps: %d\n\n", nrs); + fprintf(fp, " Total blocks : %ld\n", total_blocks); + fprintf(fp, " Blocks/timestep ave, min, max : %lf %d %d\n", + ((double) total_blocks)/((double) (num_tsteps*stages_per_ts)), + nb_min, nb_max); + fprintf(fp, " Max blocks on a processor at any time: %d\n", + global_max_b); + fprintf(fp, " total blocks split : %lf\n", average[104]); + fprintf(fp, " total blocks reformed : %lf\n\n", average[105]); + + fprintf(fp, " Time:\n"); + fprintf(fp, " compare objects : %lf\n", average[43]); + fprintf(fp, " mark refine/coarsen : %lf\n", average[44]); + fprintf(fp, " split blocks : %lf\n", average[46]); + fprintf(fp, " total coarsen blocks: %lf\n", average[47]); + fprintf(fp, " misc time : %lf\n", average[45]); + if (target_active) { + fprintf(fp, " total target active : %lf\n", average[52]); + fprintf(fp, " reduce blocks : %lf\n", average[53]); + fprintf(fp, " decide and comm : %lf\n", average[54]); + fprintf(fp, " coarsen blocks : %lf\n", average[58]); + fprintf(fp, " add blocks : %lf\n", average[59]); + fprintf(fp, " decide and comm : %lf\n", average[60]); + fprintf(fp, " split blocks : %lf\n", average[61]); + } + + fprintf(fp, "---------------------------------------------\n"); + fprintf(fp, " Plot\n"); + fprintf(fp, "---------------------------------------------\n\n"); + fprintf(fp, " Time: ave, stddev, min, max (sec): %lf\n\n", + average[67]); + fprintf(fp, " Number of plot steps: %d\n", nps); + fprintf(fp, "\n ================== End report ===================\n"); + + fclose(fp); + } + + if (report_perf & 4) { + printf("\n ================ Start report ===================\n\n"); + printf(" Mantevo miniAMR\n"); + printf(" version %s\n\n", version); + + printf("serial run on 1 rank\n"); + printf("initial blocks per rank %d x %d x %d\n", init_block_x, + init_block_y, init_block_z); + printf("block size %d x %d x %d\n", x_block_size, y_block_size, + z_block_size); + if (permute) + printf("Order of exchanges permuted\n"); + printf("Maximum number of blocks per rank is %d\n", max_num_blocks); + if (target_active) + printf("Target number of blocks per rank is %d\n", target_active); + if (target_max) + printf("Target max number of blocks per rank is %d\n", target_max); + if (target_min) + printf("Target min number of blocks per rank is %d\n", target_min); + if (code) + printf("Code set to code %d\n", code); + printf("Number of levels of refinement is %d\n", num_refine); + printf("Blocks can change by %d levels per refinement step\n", + block_change); + if (refine_ghost) + printf("Ghost cells will be used determine is block is refined\n"); + if (uniform_refine) + printf("\nBlocks will be uniformly refined\n"); + else { + printf("\nBlocks will be refined by %d objects\n\n", num_objects); + for (i = 0; i < num_objects; i++) { + op = &objects[i]; + if (op->type == 0) + printf("Object %d is the surface of a rectangle\n", i); + else if (op->type == 1) + printf("Object %d is the volume of a rectangle\n", i); + else if (op->type == 2) + printf("Object %d is the surface of a spheroid\n", i); + else if (op->type == 3) + printf("Object %d is the volume of a spheroid\n", i); + else if (op->type == 4) + printf("Object %d is the surface of x+ hemispheroid\n", i); + else if (op->type == 5) + printf("Object %d is the volume of x+ hemispheroid\n", i); + else if (op->type == 6) + printf("Object %d is the surface of x- hemispheroid\n", i); + else if (op->type == 7) + printf("Object %d is the volume of x- hemispheroid\n", i); + else if (op->type == 8) + printf("Object %d is the surface of y+ hemispheroid\n", i); + else if (op->type == 9) + printf("Object %d is the volume of y+ hemispheroid\n", i); + else if (op->type == 10) + printf("Object %d is the surface of y- hemispheroid\n", i); + else if (op->type == 11) + printf("Object %d is the volume of y- hemispheroid\n", i); + else if (op->type == 12) + printf("Object %d is the surface of z+ hemispheroid\n", i); + else if (op->type == 13) + printf("Object %d is the volume of z+ hemispheroid\n", i); + else if (op->type == 14) + printf("Object %d is the surface of z- hemispheroid\n", i); + else if (op->type == 15) + printf("Object %d is the volume of z- hemispheroid\n", i); + else if (op->type == 20) + printf("Object %d is the surface of x axis cylinder\n", i); + else if (op->type == 21) + printf("Object %d is the volune of x axis cylinder\n", i); + else if (op->type == 22) + printf("Object %d is the surface of y axis cylinder\n", i); + else if (op->type == 23) + printf("Object %d is the volune of y axis cylinder\n", i); + else if (op->type == 24) + printf("Object %d is the surface of z axis cylinder\n", i); + else if (op->type == 25) + printf("Object %d is the volune of z axis cylinder\n", i); + if (op->bounce == 0) + printf("Oject may leave mesh\n"); + else + printf("Oject center will bounce off of walls\n"); + printf("Center starting at %lf %lf %lf\n", + op->orig_cen[0], op->orig_cen[1], op->orig_cen[2]); + printf("Center end at %lf %lf %lf\n", + op->cen[0], op->cen[1], op->cen[2]); + printf("Moving at %lf %lf %lf per timestep\n", + op->orig_move[0], op->orig_move[1], op->orig_move[2]); + printf(" Rate relative to smallest cell size %lf %lf %lf\n", + op->orig_move[0]*((double) (mesh_size[0]*x_block_size)), + op->orig_move[1]*((double) (mesh_size[1]*y_block_size)), + op->orig_move[2]*((double) (mesh_size[2]*z_block_size))); + printf("Initial size %lf %lf %lf\n", + op->orig_size[0], op->orig_size[1], op->orig_size[2]); + printf("Final size %lf %lf %lf\n", + op->size[0], op->size[1], op->size[2]); + printf("Size increasing %lf %lf %lf per timestep\n", + op->inc[0], op->inc[1], op->inc[2]); + printf(" Rate relative to smallest cell size %lf %lf %lf\n\n", + op->inc[0]*((double) (mesh_size[0]*x_block_size)), + op->inc[1]*((double) (mesh_size[1]*y_block_size)), + op->inc[2]*((double) (mesh_size[2]*z_block_size))); + } + } + printf("\nNumber of timesteps is %d\n", num_tsteps); + printf("Communicaion/computation stages per timestep is %d\n", + stages_per_ts); + printf("Will perform checksums every %d stages\n", checksum_freq); + printf("Will refine every %d timesteps\n", refine_freq); + if (plot_freq) + printf("Will plot results every %d timesteps\n", plot_freq); + else + printf("Will not plot results\n"); + printf("Calculate on %d variables with %d point stencil\n", + num_vars, stencil); + printf("Communicate %d variables at a time\n", comm_vars); + printf("Error tolorance for variable sums is 10^(-%d)\n", error_tol); +/* + printf("\nTotal time for test (sec): %lf\n\n", average[0]); + + printf("\nNumber of malloc calls: %lf\n", average[110]); + printf("\nAmount malloced: %lf\n", average[111]); + + printf("---------------------------------------------\n"); + printf(" Computational Performance\n"); + printf("---------------------------------------------\n\n"); + printf(" Time: ave, stddev, min, max (sec): %lf\n\n", average[38]); + printf(" total GFLOPS: %lf\n", total_gflops); + printf(" Average GFLOPS per rank: %lf\n\n", gflops_rank); + printf(" Total floating point ops: %lf\n\n", total_fp_ops); + printf(" Adds: %lf\n", total_fp_adds); + printf(" Divides: %lf\n\n", total_fp_divs); + + printf("---------------------------------------------\n"); + printf(" Interblock communication\n"); + printf("---------------------------------------------\n\n"); + printf(" Time: ave, stddev, min, max (sec): %lf\n\n", average[37]); + for (i = 0; i < 4; i++) { + if (i == 0) + printf("\nTotal communication:\n\n"); + else if (i == 1) + printf("\nX direction communication statistics:\n\n"); + else if (i == 2) + printf("\nY direction communication statistics:\n\n"); + else + printf("\nZ direction communication statistics:\n\n"); + printf(" Total time : %lf\n", average[1+9*i]); + printf(" Exchange same level : %lf\n", average[5+9*i]); + printf(" Exchange diff level : %lf\n", average[6+9*i]); + printf(" Apply BC : %lf\n", average[7+9*i]); + + printf(" Faces exchanged same : %lf\n", average[75+9*i]); + printf(" Faces exchanged diff : %lf\n", average[76+9*i]); + printf(" Faces with BC applied : %lf\n", average[74+9*i]); + } + + printf("\n---------------------------------------------\n"); + printf(" Gridsum performance\n"); + printf("---------------------------------------------\n\n"); + printf(" Time: ave, stddev, min, max (sec): %lf\n\n", average[39]); + printf(" calc: ave, stddev, min, max (sec): %lf\n\n", + average[41]); + printf(" total number: %d\n", total_red); + printf(" number per timestep: %d\n\n", num_vars); + + printf("---------------------------------------------\n"); + printf(" Mesh Refinement\n"); + printf("---------------------------------------------\n\n"); + printf(" Time: ave, stddev, min, max (sec): %lf\n\n", average[42]); + printf(" Number of refinement steps: %d\n\n", nrs); + printf(" Total blocks : %ld\n", total_blocks); + printf(" Blocks/timestep ave, min, max : %lf %d %d\n", + ((double) total_blocks)/((double) (num_tsteps*stages_per_ts)), + nb_min, nb_max); + printf(" Max blocks on a processor at any time: %d\n", + global_max_b); + printf(" total blocks split : %lf\n", average[104]); + printf(" total blocks reformed : %lf\n\n", average[105]); + + printf(" Time:\n"); + printf(" compare objects : %lf\n", average[43]); + printf(" mark refine/coarsen : %lf\n", average[44]); + printf(" split blocks : %lf\n", average[46]); + printf(" total coarsen blocks: %lf\n", average[47]); + printf(" misc time : %lf\n", average[45]); + if (target_active) { + printf(" total target active : %lf\n", average[52]); + printf(" reduce blocks : %lf\n", average[53]); + printf(" decide and comm : %lf\n", average[54]); + printf(" coarsen blocks : %lf\n", average[58]); + printf(" add blocks : %lf\n", average[59]); + printf(" decide and comm : %lf\n", average[60]); + printf(" split blocks : %lf\n", average[61]); + } + + printf("---------------------------------------------\n"); + printf(" Plot\n"); + printf("---------------------------------------------\n\n"); + printf(" Time: ave, stddev, min, max (sec): %lf\n\n", average[67]); + printf(" Number of plot steps: %d\n", nps); + printf("\n ================== End report ===================\n"); +*/ + } + } +} + +void calculate_results(void) +{ + double results[128]; + int i; + + results[0] = timer_all; + for (i = 0; i < 9; i++) + results[i+1] = 0.0; + for (i = 0; i < 3; i++) { + results[1] += results[10+9*i] = timer_comm_dir[i]; + results[5] += results[14+9*i] = timer_comm_same[i]; + results[6] += results[15+9*i] = timer_comm_diff[i]; + results[7] += results[16+9*i] = timer_comm_bc[i]; + } + results[37] = timer_comm_all; + results[38] = timer_calc_all; + results[39] = timer_cs_all; + results[41] = timer_cs_calc; + results[42] = timer_refine_all; + results[43] = timer_refine_co; + results[44] = timer_refine_mr; + results[45] = timer_refine_cc; + results[46] = timer_refine_sb; + results[47] = timer_cb_all; + results[52] = timer_target_all; + results[53] = timer_target_rb; + results[54] = timer_target_dc; + results[58] = timer_target_cb; + results[59] = timer_target_ab; + results[60] = timer_target_da; + results[61] = timer_target_sb; + results[67] = timer_plot; + for (i = 0; i < 9; i++) + results[68+i] = 0.0; + for (i = 0; i < 3; i++) { + results[74] += results[83+9*i] = (double) counter_bc[i]; + results[75] += results[84+9*i] = (double) counter_same[i]; + results[76] += results[85+9*i] = (double) counter_diff[i]; + } + results[104] = (double) num_refined; + results[105] = (double) num_reformed; + results[110] = (double) counter_malloc; + results[111] = size_malloc; + results[112] = (double) counter_malloc_init; + results[113] = size_malloc_init; + results[114] = (double) (counter_malloc - counter_malloc_init); + results[115] = size_malloc - size_malloc_init; + + for (i = 0; i < 128; i++) + average[i] = results[i]; +} + +void init_profile(void) +{ + int i; + + timer_all = 0.0; + + timer_comm_all = 0.0; + for (i = 0; i < 3; i++) { + timer_comm_dir[i] = 0.0; + timer_comm_same[i] = 0.0; + timer_comm_diff[i] = 0.0; + timer_comm_bc[i] = 0.0; + } + + timer_calc_all = 0.0; + + timer_cs_all = 0.0; + timer_cs_calc = 0.0; + + timer_refine_all = 0.0; + timer_refine_co = 0.0; + timer_refine_mr = 0.0; + timer_refine_cc = 0.0; + timer_refine_sb = 0.0; + timer_cb_all = 0.0; + timer_target_all = 0.0; + timer_target_rb = 0.0; + timer_target_dc = 0.0; + timer_target_cb = 0.0; + timer_target_ab = 0.0; + timer_target_da = 0.0; + timer_target_sb = 0.0; + + timer_plot = 0.0; + + total_blocks = 0; + nrs = 0; + nps = 0; + num_refined = 0; + num_reformed = 0; + for (i = 0; i < 3; i++) { + counter_bc[i] = 0; + counter_same[i] = 0; + counter_diff[i] = 0; + } + total_red = 0; +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/proto.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/proto.h @@ -0,0 +1,92 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include + +// main.c +void print_help_message(void); +void allocate(void); +void deallocate(void); +int check_input(void); + +// block.c +void split_blocks(void); +void consolidate_blocks(void); +void add_sorted_list(int, int, int); +void del_sorted_list(int, int); +int find_sorted_list(int, int); + +// check_sum.c +double check_sum(int); + +// comm.c +void comm(int, int, int); +void on_proc_comm(int, int, int, int, int); +void on_proc_comm_diff(int, int, int, int, int, int, int); +void apply_bc(int, block *, int, int); + +// driver.c +void driver(void); + +// init.c +void init(void); + +// move.c +void move(void); +void check_objects(void); +int check_block(double cor[3][2]); + +// plot.c +void plot(int); + +// profile.c +void profile(void); +void calculate_results(void); +void init_profile(void); + +// refine.c +void refine(int); +int refine_level(void); +void reset_all(void); + +// stencil.c +void stencil_calc(int); + +// target.c +int reduce_blocks(); +void add_blocks(); +void zero_refine(void); + +// util.c +double timer(void); +void *ma_malloc(size_t, char *, int); + +// debug.c +void print_par(void); +void print_comm(int); +void print_block(int, int); +void print_blocks(int); +void print_parents(int); Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/refine.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/refine.c @@ -0,0 +1,303 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include + +#include "block.h" +#include "proto.h" +#include "timer.h" + +// This file contains routines that determine which blocks are going to +// be refined and which are going to be coarsened. +void refine(int ts) +{ + int i, j, n, in, min_b, max_b, sum_b, num_refine_step, num_split, + nm_r, nm_c, nm_t; + double ratio, t1, t2, t3, t4, t5; + block *bp; + + t4 = 0.0; + t1 = timer(); + + if (ts) + num_refine_step = block_change; + else + num_refine_step = num_refine; + + for (i = 0; i < num_refine_step; i++) { + for (j = num_refine; j >= 0; j--) + if (num_blocks[j]) { + cur_max_level = j; + break; + } + reset_all(); + if (uniform_refine) { + for (in = 0; in < sorted_index[num_refine+1]; in++) { + n = sorted_list[in].n; + bp = &blocks[n]; + if (bp->number >= 0) + if (bp->level < num_refine) + bp->refine = 1; + else + bp->refine = 0; + } + } else { + t2 = timer(); + check_objects(); + timer_refine_co += timer() - t2; + t4 += timer() - t2; + } + + t2 = timer(); + num_split = refine_level(); + t5 = timer(); + timer_refine_mr += t5 - t2; + t4 += t5 - t2; + + t2 = timer(); + split_blocks(); + t5 = timer(); + timer_refine_sb += t5 - t2; + t4 += t5 - t2; + + t2 = timer(); + consolidate_blocks(); + t5 = timer(); + timer_cb_all += t5 - t2; + t4 += t5 - t2; + } + + if (target_active || target_max || target_min) { + if (!my_pe) { + for (j = 0; j <= num_refine; j++) + printf("Number of blocks at level %d before target %d is %d\n", + j, ts, num_blocks[j]); + printf("\n"); + } + t2 = timer(); + global_active = num_blocks[0]; + for (i = 1; i <= num_refine; i++) + global_active += num_blocks[i]; + // Will not be able to get to target in all cases, but can get to + // a range of target +- 3 since can add or subtract in units of + // 7 blocks. + if ((target_active && global_active > num_pes*target_active + 3) || + (target_max && global_active > num_pes*target_max)) + nm_t += reduce_blocks(); + else if ((target_active && global_active < num_pes*target_active - 3) || + (target_min && global_active < num_pes*target_min)) + add_blocks(); + t5 = timer(); + timer_target_all += t5 - t2; + t4 += t5 - t2; + } + t2 = timer(); + if (num_active > local_max_b) + local_max_b = num_active; + if (local_max_b > global_max_b) + global_max_b = local_max_b; + for (j = 0; j <= num_refine; j++) { + if (!j) + global_active = num_blocks[0]; + else + global_active += num_blocks[j]; + if (!my_pe && report_perf & 8) + printf("Number of blocks at level %d at timestep %d is %d\n", + j, ts, num_blocks[j]); + } + if (!my_pe && report_perf & 8) printf("\n"); + t4 += timer() - t2; + + t5 = timer(); + timer_refine_cc += t5 - t1 - t4; +} + +int refine_level(void) +{ + int level, nei, n, i, j, b, c, c1, change, unrefine, sib, p, in; + block *bp, *bp1; + parent *pp; + + /* block states: + * 1 block should be refined + * -1 block could be unrefined + * 0 block at level 0 and can not be unrefined or + * at max level and can not be refined + */ + +// get list of neighbor blocks (indirect links in from blocks) + + for (level = cur_max_level; level >= 0; level--) { + /* check for blocks at this level that will refine + their neighbors at this level can not unrefine + their neighbors at a lower level must refine + */ + do { + change = 0; + for (in = sorted_index[level]; in < sorted_index[level+1]; in++) { + n = sorted_list[in].n; + bp = &blocks[n]; + if (bp->number >= 0 && bp->level == level) { + if (bp->refine == 1) { + if (bp->parent != -1 && bp->parent_node == my_pe) { + pp = &parents[bp->parent]; + if (pp->refine == -1) + pp->refine = 0; + for (b = 0; b < 8; b++) + if (pp->child_node[b] == my_pe && pp->child[b] >= 0) + if (blocks[pp->child[b]].refine == -1) { + blocks[pp->child[b]].refine = 0; + change++; + } + } + for (i = 0; i < 6; i++) + /* neighbors in level above taken care of already */ + /* neighbors in this level can not unrefine */ + if (bp->nei_level[i] == level) { + if ((nei = bp->nei[i][0][0]) >= 0) { /* on core */ + if (blocks[nei].refine == -1) { + blocks[nei].refine = 0; + change++; + if ((p = blocks[nei].parent) != -1 && + blocks[nei].parent_node == my_pe) { + if ((pp = &parents[p])->refine == -1) + pp->refine = 0; + for (b = 0; b < 8; b++) + if (pp->child_node[b] == my_pe && + pp->child[b] >= 0) + if (blocks[pp->child[b]].refine == -1) { + blocks[pp->child[b]].refine = 0; + change++; + } + } + } + } + /* neighbors in level below must refine */ + } else if (bp->nei_level[i] == level-1) + if ((nei = bp->nei[i][0][0]) >= 0) { + if (blocks[nei].refine != 1) { + blocks[nei].refine = 1; + change++; + } + } + } else if (bp->refine == -1) { + // check if block can be unrefined + for (i = 0; i < 6; i++) + if (bp->nei_level[i] == level+1) { + bp->refine = 0; + change++; + if ((p = bp->parent) != -1 && + bp->parent_node == my_pe) { + if ((pp = &parents[p])->refine == -1) + pp->refine = 0; + for (b = 0; b < 8; b++) + if (pp->child_node[b] == my_pe && + pp->child[b] >= 0 && + blocks[pp->child[b]].refine == -1) + blocks[pp->child[b]].refine = 0; + } + } + } + } + } + } while (change); + + /* Check for blocks at this level that will remain at this level + their neighbors at a lower level can not unrefine + */ + do { + change = 0; + for (in = sorted_index[level]; in < sorted_index[level+1]; in++) { + n = sorted_list[in].n; + bp = &blocks[n]; + if (bp->number >= 0) + if (bp->level == level && bp->refine == 0) + for (c = 0; c < 6; c++) + if (bp->nei_level[c] == level-1) { + if ((nei = bp->nei[c][0][0]) >= 0) { + if (blocks[nei].refine == -1) { + blocks[nei].refine = 0; + change++; + if ((p = blocks[nei].parent) != -1 && + blocks[nei].parent_node == my_pe) + if ((pp = &parents[p])->refine == -1) { + pp->refine = 0; + for (b = 0; b < 8; b++) + if (pp->child_node[b] == my_pe && + pp->child[b] >= 0 && + blocks[pp->child[b]].refine == -1) + blocks[pp->child[b]].refine = 0; + } + } + } + } else if (bp->nei_level[c] == level) { + if ((nei = bp->nei[c][0][0]) >= 0) + blocks[nei].nei_refine[(c/2)*2+(c+1)%2] = 0; + } else if (bp->nei_level[c] == level+1) { + c1 = (c/2)*2 + (c+1)%2; + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + if ((nei = bp->nei[c][i][j]) >= 0) + blocks[nei].nei_refine[c1] = 0; + } + } + } while (change); + } + + for (i = in = 0; in < sorted_index[num_refine+1]; in++) + if (blocks[sorted_list[in].n].refine == 1) + i++; + + return(i); +} + +// Reset the neighbor lists on blocks so that matching them against objects +// can set those which can be refined. +void reset_all(void) +{ + int n, c, in; + block *bp; + parent *pp; + + for (in = 0; in < sorted_index[num_refine+1]; in++) { + n = sorted_list[in].n; + if ((bp= &blocks[n])->number >= 0) + bp->refine = -1; + } + + for (n = 0; n < max_active_parent; n++) + if ((pp = &parents[n])->number >= 0) { + pp->refine = -1; + for (c = 0; c < 8; c++) + if (pp->child[c] < 0) + pp->refine = 0; + if (pp->refine == 0) + for (c = 0; c < 8; c++) + if (pp->child[c] >= 0) + if (blocks[pp->child[c]].refine == -1) + blocks[pp->child[c]].refine = 0; + } +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/stencil.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/stencil.c @@ -0,0 +1,102 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include "block.h" +#include "proto.h" + +// This routine does the stencil calculations. +void stencil_calc(int var) +{ + int n, i, j, k, in; + double sb, sm, sf, work[x_block_size+2][y_block_size+2][z_block_size+2]; + block *bp; + + if (stencil == 7) { + for (in = 0; in < sorted_index[num_refine+1]; in++) { + n = sorted_list[in].n; + bp = &blocks[n]; + if (bp->number >= 0) { + for (i = 1; i <= x_block_size; i++) + for (j = 1; j <= y_block_size; j++) + for (k = 1; k <= z_block_size; k++) + work[i][j][k] = (bp->array[var][i-1][j ][k ] + + bp->array[var][i ][j-1][k ] + + bp->array[var][i ][j ][k-1] + + bp->array[var][i ][j ][k ] + + bp->array[var][i ][j ][k+1] + + bp->array[var][i ][j+1][k ] + + bp->array[var][i+1][j ][k ])/7.0; + for (i = 1; i <= x_block_size; i++) + for (j = 1; j <= y_block_size; j++) + for (k = 1; k <= z_block_size; k++) + bp->array[var][i][j][k] = work[i][j][k]; + } + } + } else { + for (in = 0; in < sorted_index[num_refine+1]; in++) { + n = sorted_list[in].n; + bp = &blocks[n]; + if (bp->number >= 0) { + for (i = 1; i <= x_block_size; i++) + for (j = 1; j <= y_block_size; j++) + for (k = 1; k <= z_block_size; k++) { + sb = bp->array[var][i-1][j-1][k-1] + + bp->array[var][i-1][j-1][k ] + + bp->array[var][i-1][j-1][k+1] + + bp->array[var][i-1][j ][k-1] + + bp->array[var][i-1][j ][k ] + + bp->array[var][i-1][j ][k+1] + + bp->array[var][i-1][j+1][k-1] + + bp->array[var][i-1][j+1][k ] + + bp->array[var][i-1][j+1][k+1]; + sm = bp->array[var][i ][j-1][k-1] + + bp->array[var][i ][j-1][k ] + + bp->array[var][i ][j-1][k+1] + + bp->array[var][i ][j ][k-1] + + bp->array[var][i ][j ][k ] + + bp->array[var][i ][j ][k+1] + + bp->array[var][i ][j+1][k-1] + + bp->array[var][i ][j+1][k ] + + bp->array[var][i ][j+1][k+1]; + sf = bp->array[var][i+1][j-1][k-1] + + bp->array[var][i+1][j-1][k ] + + bp->array[var][i+1][j-1][k+1] + + bp->array[var][i+1][j ][k-1] + + bp->array[var][i+1][j ][k ] + + bp->array[var][i+1][j ][k+1] + + bp->array[var][i+1][j+1][k-1] + + bp->array[var][i+1][j+1][k ] + + bp->array[var][i+1][j+1][k+1]; + work[i][j][k] = (sb + sm + sf)/27.0; + } + for (i = 1; i <= x_block_size; i++) + for (j = 1; j <= y_block_size; j++) + for (k = 1; k <= z_block_size; k++) + bp->array[var][i][j][k] = work[i][j][k]; + } + } + } +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/target.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/target.c @@ -0,0 +1,134 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include "block.h" +#include "proto.h" +#include "timer.h" + +// This file contains routines that modify the number of blocks so that the +// number is close (+- 3) to the target number of blocks for the problem. +int reduce_blocks() +{ + int l, i, j, p, c, num_comb, comb, num_parents, nm_t; + double t1, t2, t3; + parent *pp; + + nm_t = 0; + t3 = 0.0; + t1 = timer(); + + zero_refine(); + if (target_active) + num_comb = (global_active - num_pes*target_active + 3)/7; + else + num_comb = (global_active - num_pes*target_active)/7; + + for (comb = 0, l = num_refine-1; comb < num_comb; l--) { + for (p = 0; p < max_active_parent; p++) + if ((pp = &parents[p])->number >= 0) + if (pp->level == l) + num_parents++; + + for (p = 0; p < max_active_parent && comb < num_comb; p++) + if ((pp = &parents[p])->number >= 0) + if (pp->level == l) { + pp->refine = -1; + comb++; + for (c = 0; c < 8; c++) + if (pp->child_node[c] == my_pe && pp->child[c] >= 0) + blocks[pp->child[c]].refine = -1; + } + + t2 = timer() - t2; + consolidate_blocks(); + t3 += timer() - t2; + } + timer_target_rb += timer() - t1; + timer_target_dc += timer() - t1 - t3; + timer_target_cb += t3; + + return(nm_t); +} + +void add_blocks() +{ + int l, i, j, n, in, num_split, split; + double t1, t2, t3; + block *bp; + + t3 = 0.0; + t1 = timer(); + + if (target_active) + num_split = (num_pes*target_active + 3 - global_active)/7; + else + num_split = (num_pes*target_active - global_active)/7; + + for (split = l = 0; split < num_split; l++) { + zero_refine(); + for (j = num_refine; j >= 0; j--) + if (num_blocks[j]) { + cur_max_level = j; + break; + } + for (in = 0; split < num_split && in < sorted_index[num_refine+1]; in++) { + n = sorted_list[in].n; + if ((bp = &blocks[n])->number >= 0) + if (bp->level == l) { + bp->refine = 1; + split++; + } + } + + t2 = timer(); + split_blocks(); + t3 += timer() - t2; + } + timer_target_ab += timer() - t1; + timer_target_da += timer() - t1 - t3; + timer_target_sb += t3; +} + +void zero_refine(void) +{ + int n, c, in; + block *bp; + parent *pp; + + for (in = 0; in < sorted_index[num_refine+1]; in++) { + n = sorted_list[in].n; + if ((bp= &blocks[n])->number >= 0) { + bp->refine = 0; + for (c = 0; c < 6; c++) + if (bp->nei_level[c] >= 0) + bp->nei_refine[c] = 0; + } + } + + for (n = 0; n < max_active_parent; n++) + if ((pp = &parents[n])->number >= 0) + pp->refine = 0; +} Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/timer.h =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/timer.h @@ -0,0 +1,75 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +double average[128]; +double stddev[128]; +double minimum[128]; +double maximum[128]; + +double timer_all; + +double timer_comm_all; +double timer_comm_dir[3]; +double timer_comm_same[3]; +double timer_comm_diff[3]; +double timer_comm_bc[3]; + +double timer_calc_all; + +double timer_cs_all; +double timer_cs_calc; + +double timer_refine_all; +double timer_refine_co; +double timer_refine_mr; +double timer_refine_sb; +double timer_refine_cc; +double timer_cb_all; +double timer_target_all; +double timer_target_rb; +double timer_target_dc; +double timer_target_cb; +double timer_target_ab; +double timer_target_da; +double timer_target_sb; + +double timer_plot; + +long total_blocks; +int nb_min; +int nb_max; +int nrs; +int nps; +int num_refined; +int num_reformed; +int counter_bc[3]; +int counter_same[3]; +int counter_diff[3]; +int counter_malloc; +double size_malloc; +int counter_malloc_init; +double size_malloc_init; +int total_red; Index: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/util.c =================================================================== --- /dev/null +++ MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/util.c @@ -0,0 +1,55 @@ +// ************************************************************************ +// +// miniAMR: stencil computations with boundary exchange and AMR. +// +// Copyright (2014) Sandia Corporation. Under the terms of Contract +// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +// retains certain rights in this software. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov) +// Richard F. Barrett (rfbarre@sandia.gov) +// +// ************************************************************************ + +#include +#include +#include + +#include "block.h" +#include "proto.h" +#include "timer.h" + +double timer(void) +{ + return((((double) clock())/((double) CLOCKS_PER_SEC))); +} + +void *ma_malloc(size_t size, char *file, int line) +{ + void *ptr; + + ptr = (void *) malloc(size); + + if (ptr == NULL) { + printf("NULL pointer from malloc call in %s at %d\n", file, line); + exit(-1); + } + + counter_malloc++; + size_malloc += (double) size; + + return(ptr); +}