diff --git a/Fortran/CMakeLists.txt b/Fortran/CMakeLists.txt --- a/Fortran/CMakeLists.txt +++ b/Fortran/CMakeLists.txt @@ -7,3 +7,4 @@ # This file should only contain add_subdirectory(...) one for each test # directory add_subdirectory(UnitTests) +add_subdirectory(SNAP) diff --git a/Fortran/SNAP/CMakeLists.txt b/Fortran/SNAP/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/CMakeLists.txt @@ -0,0 +1,13 @@ +################################################################################ +# +# CMakeLists.txt +# +# Author: Anthony Cabrera: +# Contact: cabreraam AT ieee DOT org +# Description: CMakeLists.txt replacement for SNAP Makefile +# +################################################################################ + +cmake_minimum_required(VERSION 3.18) + +add_subdirectory(src) diff --git a/Fortran/SNAP/LICENSE.md b/Fortran/SNAP/LICENSE.md new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/LICENSE.md @@ -0,0 +1,43 @@ +# License +Los Alamos National Security, LLC owns the copyright to "SNAP: SN (Discrete +Ordinates) Application Proxy, Version 1.x (C13087)". The license is BSD with +standard clauses regarding indicating modifications before future +redistribution: + +Copyright (c) 2013, Los Alamos National Security, LLC All rights reserved. + +Copyright 2013. Los Alamos National Security, LLC. This software was produced +under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National +Laboratory (LANL), which is operated by Los Alamos National Security, LLC for +the U.S. Department of Energy. The U.S. Government has rights to use, +reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS +ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR +ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified +to produce derivative works, such modified software should be clearly marked, +so as not to confuse it with the version available from LANL. + +Additionally, redistribution and use in source and binary forms, with or +without modification, are permitted provided that the following conditions +are met: + ++ Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. ++ Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. ++ Neither the name of Los Alamos National Security, LLC, Los Alamos National +Laboratory, LANL, the U.S. Government, nor the names of its contributors may +be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT +NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL +SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Fortran/SNAP/README.md b/Fortran/SNAP/README.md new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/README.md @@ -0,0 +1,418 @@ +SNAP: SN (Discrete Ordinates) Application Proxy +=============================================== + +llvm-test-suite +----------- +This repository was modified to serve as a Fortran regression test in the llvm-test-suite + +Documents that describe the code base can be found in the original repository's +[docs](https://github.com/lanl/SNAP/tree/main/docs) repository. + + +Description +----------- + +SNAP serves as a proxy application to model the performance of a modern discrete ordinates neutral particle transport application. SNAP may be considered an update to [Sweep3D](http://www.ccs3.lanl.gov/PAL/software.shtml), intended for hybrid computing architectures. It is modeled off the Los Alamos National Laboratory code PARTISN. PARTISN solves the linear Boltzmann transport equation (TE), a governing equation for determining the number of neutral particles (e.g., neutrons and gamma rays) in a multi-dimensional phase space. SNAP itself is not a particle transport application; SNAP incorporates no actual physics in its available data, nor does it use numerical operators specifically designed for particle transport. Rather, SNAP mimics the computational workload, memory requirements, and communication patterns of PARTISN. The equation it solves has been composed to use the same number of operations, use the same data layout, and load elements of the arrays in approximately the same order. Although the equation SNAP solves looks similar to the TE, it has no real world relevance. + +The solution to the time-dependent TE is a "flux" function of seven independent variables: three spatial (3-D spatial mesh), two angular (set of discrete ordinates, directions in which particles travel), one energy (particle speeds binned into "groups"), and one temporal. PARTISN, and therefore SNAP, uses domain decomposition over these dimensions to coherently distribute the data and the tasks associated with solving the equation. The parallelization strategy is expected to be the most efficient compromise between computing resources and the iterative strategy necessary to converge the flux. + +The iterative strategy is comprised of a set of two nested loops. These nested loops are performed for each step of a time-dependent calculation, wherein any particular time step requires information from the preceding one. No parallelization is performed over the temporal domain. However, for time-dependent calculations two copies of the unknown flux must be stored, each copy an array of the six remaining dimensions. The outer iterative loop involves solving for the flux over the energy domain with updated information about coupling among the energy groups. Typical calculations require tens to hundreds of groups, making the energy domain suitable for threading with the node's (or nodes') provided accelerator. The inner loop involves sweeping across the entire spatial mesh along each discrete direction of the angular domain. The spatial mesh may be immensely large. Therefore, SNAP spatially decomposes the problem across nodes and communicates needed information according to the KBA method. KBA is a transport-specific application of general parallel wavefront methods. Nested threads, spawned by the energy group threads, are available to use in one of two ways. Per one approach, nested threads may be used to further parallelize the work to sweep different energy groups assigned to a main-level thread. This option is still experimental and has only been implemented to work in the case of using a single MPI process. Alternatively, nested threads are used to perform "mini KBA" sweeps by concurrently operating on cells lying on the same diagonal of spatial sub-domains already decomposed across the distributed memory architecture (i.e., different MPI ranks). Lastly, although KBA efficiency is improved by pipelining operations according to the angle, current chipsets operate best with vectorized operations. During a mesh sweep, SNAP operations are vectorized over angles to take advantage of the modern hardware. + +SNAP should be tested with problem sizes that accurately reflect the types of calculations PARTISN frequently handles. The spatial domain shall be decomposed to 2,000--4,000 cells per node (MPI rank). Each node will own all the energy groups and angles for that group of cells; typical calculations feature 10--100 energy groups and as few as 100 to as many as 2,000 angles. Moreover, sufficient memory must be provided to store two full copies of the solution vector for time-dependent calculations. The preceding parameters assume current trends in available per core memory. Significant advances or detriments affecting this assumption shall require reconsideration of appropriate parameters per compute node. + +Compilation +----------- + +SNAP has been written to the Fortran 90/95 standard primarily. The retrieval of command line arguments, which contain file names, is handled with a standard Fortran 2003 intrinsic subroutine. It has been successfully built with, but not necessarily limited to, gfortran and ifort. Moreover, the code has been built with the profiling tool [Byfl](https://github.com/losalamos/byfl). The accompanying Makefile provides sample build options for gfortran and ifort. The build system depends on the availability of MPI. Both example builds assume the usage of mpif90 from an MPI installation. Builds may be selected by switching the COMPILER option in the Makefile or choosing one with the "make COMPILER=[]" command. The builds also assume the availability of OpenMP. Compiling SNAP without MPI or OpenMP will require modification to the source code to remove related subroutine calls and directives. + +MPI implementations typically suggest using a "wrapper" compiler to compile the code. SNAP has been built and tested with OpenMPI and MPICH. OpenMPI allows one to set the underlying Fortran compiler with the environment variable OMPI_FC, where the variable is set to the (path and) compiler of choice, e.g., ifort, gfortran, etc. + +The makefile currently is set up for several build options using different MPI wrappers and Fortran compilers. One example uses: + + FORTRAN = mpif90 + COMPILER = ifort + +and testing has been performed with + + OMPI_FC = [path]/ifort + +Fortran compilation flags can be set according to the underlying compiler. The current flags are set for the ifort compiler and using OpenMP for parallel threading. + + TARGET = isnap + FFLAGS = -03 -[q]openmp -align array32byte -fp-model fast -fp-speculation fast -xHost + FFLAG2 = + +where `FFLAG2` is reserved for additional flags that may need applied differently, depending on the compiler. To make SNAP with these default settings, simply type + + make + +on the command line within the SNAP directory. + +A debugging version of SNAP can be built by typing + + make OPT=no + +on the command line. The unoptimized, debugging version of SNAP features bounds checking, back-tracing an error, and the necessary debug compiler flags. With ifort, these flags appear as: + + FFLAGS = -O0 -[q]openmp -g -check bounds -traceback -warn unused + FFLAG2 = + +The values for these compilation variables have been modified for various Fortran compilers and the Makefile provides details of what has been used previously. These lines are commented out for clarity at this time and to ensure that changes to the build system are made carefully before attempting to rebuild with a different compiler. + +The SNAP directory can be cleaned up of its module and object files if the user desires with: + + make clean + +This removes all the `*.mod` and `*.o` files, as well as `*.bc` files from Byfl builds. Moreover, it will enforce complete recompilation of all files upon the next instance of `make` or `make OPT=no`. Currently, there is no separate directory for the compilation files of separate optimized and unoptimized builds. The user must do a `make clean` before building the code if the previous build used the opposite command. + +Pre-processing has been added for the inclusion/exclusion of MPI and OpenMP. To build without MPI, OpenMP, or both, use the command lines, respectively: + + make MPI=no + make OPENMP=no + MAKE MPI=no OPENMP=no + +Default make settings will build with MPI and OpenMP included. These options are further available with unpotimized settings, OPT=no. + +Lastly, a line count report is generated with: + + make count + +The line count report excludes blank lines and comments. It counts the number of code lines in all `*.f90` files and sums the results. The information is printed to the the `Lines` file. + +Usage +----- + +When SNAP is built with MPI, to execute SNAP, use the following command: + + mpirun -n [#] [path]/snap [infile] [outfile] + +This command will automatically run with the number of threads specified by the input file, which is used to set the number of OpenMP threads, overwriting any environment variable to set the number of threads. Testing has shown that to ensure proper concurrency of work, the above command can be modified to + + mpirun -cpus-per-proc [#threads] -np [#procs] [path]/snap [infile] [outfile] + +The command line is read for the input/output file names. If one of the names is missing, the code will not execute. Moreover, the output file overwrites any pre-existing files of the same name. + +The specific command to invoke a run with MPI and the corresponding options may be dependent on the specific machine used to execute. Most notably, the "aprun" command is used on Cray systems. + +Sample Input +------------ + +The following is a sample input of a SNAP job. Several other examples are provided as part of the small set of regression testing. For more information about the valid range of values and descriptions of the input variables, please see the user manual. + + ! Input from namelist + &invar + npey=2 + npez=2 + ichunk=2 + nthreads=2 + nnested=1 + ndimen=3 + nx=6 + lx=0.6 + ny=6 + ly=0.6 + nz=6 + lz=0.6 + nmom=1 + nang=10 + ng=4 + epsi=1.0E-4 + iitm=5 + oitm=30 + timedep=0 + tf=1.0 + nsteps=1 + mat_opt=0 + src_opt=0 + scatp=0 + it_det=0 + fluxp=0 + fixup=1 + soloutp=1 + kplane=0 + popout=0 + swp_typ=0 + / + +Sample Output +------------- + +The following is the corresponding output to the above case. A brief outline of the output file contents is version and run time information, echo of input (or default) values of the namelist variables, echo of relevant parameters after setup, iteration monitor, mid-plane flux output, and the timing summary. Warning and error messages may be printed throughout the output file to alert the user to some problem with the execution. Unlike errors, warnings do not cause program termination. + + SNAP: SN (Discrete Ordinates) Application Proxy + Version Number.. 1.05 + Version Date.. 02-19-2015 + Ran on 2-20-2015 at time 10:53:26 + + ******************************************************************************** + + keyword Input Echo - Values from input or + + default + ******************************************************************************** + + NML=invar + npey= 2 + npez= 2 + ichunk= 2 + nthreads= 2 + nnested= 1 + ndimen= 3 + nx= 6 + ny= 6 + nz= 6 + lx= 6.0000E-01 + ly= 6.0000E-01 + lz= 6.0000E-01 + nmom= 1 + nang= 10 + ng= 4 + mat_opt= 0 + src_opt= 0 + scatp= 0 + epsi= 1.0000E-04 + iitm= 5 + oitm= 30 + timedep= 0 + tf= 1.0000E+00 + nsteps= 1 + swp_typ= 0 + it_det= 0 + soloutp= 1 + kplane= 0 + popout= 0 + fluxp= 0 + fixup= 1 + + ******************************************************************************** + + keyword Calculation Run-time Parameters Echo + ******************************************************************************** + + Geometry + ndimen = 3 + nx = 6 + ny = 6 + nz = 6 + lx = 6.0000E-01 + ly = 6.0000E-01 + lz = 6.0000E-01 + dx = 1.0000E-01 + dy = 1.0000E-01 + dz = 1.0000E-01 + + Sn + nmom = 1 + nang = 10 + noct = 8 + + w = 1.2500E-02 ... uniform weights + + mu eta xi + 5.00000000E-02 9.50000000E-01 3.08220700E-01 + 1.50000000E-01 8.50000000E-01 5.04975247E-01 + 2.50000000E-01 7.50000000E-01 6.12372436E-01 + 3.50000000E-01 6.50000000E-01 6.74536878E-01 + 4.50000000E-01 5.50000000E-01 7.03562364E-01 + 5.50000000E-01 4.50000000E-01 7.03562364E-01 + 6.50000000E-01 3.50000000E-01 6.74536878E-01 + 7.50000000E-01 2.50000000E-01 6.12372436E-01 + 8.50000000E-01 1.50000000E-01 5.04975247E-01 + 9.50000000E-01 5.00000000E-02 3.08220700E-01 + + Material Map + mat_opt = 0 --> nmat = 1 + Base material (default for every cell) = 1 + + Source Map + src_opt = 0 + Source strength per cell (where applied) = 1.0 + Source map: + Starting cell: ( 1, 1, 1 ) + Ending cell: ( 6, 6, 6 ) + + Pseudo Cross Sections Data + ng = 4 + + Material 1 + Group Total Absorption Scattering + 1 1.000000E+00 5.000000E-01 5.000000E-01 + 2 1.010000E+00 5.050000E-01 5.050000E-01 + 3 1.020000E+00 5.100000E-01 5.100000E-01 + 4 1.030000E+00 5.150000E-01 5.150000E-01 + + Solution Control Parameters + epsi = 1.0000E-04 + iitm = 5 + oitm = 30 + timedep = 0 + swp_typ = 0 + it_det = 0 + soloutp = 1 + kplane = 0 + popout = 0 + fluxp = 0 + fixup = 1 + + + Parallelization Parameters + npey = 2 + npez = 2 + nthreads = 2 + + Thread Support Level + 0 - MPI_THREAD_SINGLE + 1 - MPI_THREAD_FUNNELED + 2 - MPI_THREAD_SERIALIZED + 3 - MPI_THREAD_MULTIPLE + thread_level = 0 + + .FALSE. nested threading + nnested = 1 + + Parallel Computational Efficiency = 0.8889 + + ******************************************************************************** + + keyword Iteration Monitor + ******************************************************************************** + Outer + 1 Dfmxo= 3.5528E-01 No. Inners= 17 + 2 Dfmxo= 1.7376E-01 No. Inners= 14 + 3 Dfmxo= 8.6338E-03 No. Inners= 9 + + No. Outers= 3 No. Inners= 40 + + ******************************************************************************** + + keyword Scalar Flux Solution + ******************************************************************************** + + *********************************** + Group= 1 Z Mid-Plane= 4 + *********************************** + + y x 1 x 2 x 3 x 4 x 5 x 6 + 6 1.8403E-01 2.3461E-01 2.4743E-01 2.4743E-01 2.3461E-01 1.8403E-01 + 5 2.3461E-01 2.9818E-01 3.1572E-01 3.1572E-01 2.9818E-01 2.3461E-01 + 4 2.4743E-01 3.1572E-01 3.3604E-01 3.3604E-01 3.1572E-01 2.4743E-01 + 3 2.4743E-01 3.1572E-01 3.3604E-01 3.3604E-01 3.1572E-01 2.4743E-01 + 2 2.3461E-01 2.9818E-01 3.1572E-01 3.1572E-01 2.9818E-01 2.3461E-01 + 1 1.8403E-01 2.3461E-01 2.4743E-01 2.4743E-01 2.3461E-01 1.8403E-01 + + ******************************************************************************** + + + *********************************** + Group= 2 Z Mid-Plane= 4 + *********************************** + + y x 1 x 2 x 3 x 4 x 5 x 6 + 6 1.8434E-01 2.3510E-01 2.4797E-01 2.4797E-01 2.3510E-01 1.8434E-01 + 5 2.3510E-01 2.9891E-01 3.1654E-01 3.1654E-01 2.9891E-01 2.3510E-01 + 4 2.4797E-01 3.1654E-01 3.3697E-01 3.3697E-01 3.1654E-01 2.4797E-01 + 3 2.4797E-01 3.1654E-01 3.3697E-01 3.3697E-01 3.1654E-01 2.4797E-01 + 2 2.3510E-01 2.9891E-01 3.1654E-01 3.1654E-01 2.9891E-01 2.3510E-01 + 1 1.8434E-01 2.3510E-01 2.4797E-01 2.4797E-01 2.3510E-01 1.8434E-01 + + ******************************************************************************** + + + *********************************** + Group= 3 Z Mid-Plane= 4 + *********************************** + + y x 1 x 2 x 3 x 4 x 5 x 6 + 6 1.8990E-01 2.4282E-01 2.5648E-01 2.5648E-01 2.4282E-01 1.8990E-01 + 5 2.4282E-01 3.0956E-01 3.2828E-01 3.2828E-01 3.0956E-01 2.4282E-01 + 4 2.5648E-01 3.2828E-01 3.4996E-01 3.4996E-01 3.2828E-01 2.5648E-01 + 3 2.5648E-01 3.2828E-01 3.4996E-01 3.4996E-01 3.2828E-01 2.5648E-01 + 2 2.4282E-01 3.0956E-01 3.2828E-01 3.2828E-01 3.0956E-01 2.4282E-01 + 1 1.8990E-01 2.4282E-01 2.5648E-01 2.5648E-01 2.4282E-01 1.8990E-01 + + ******************************************************************************** + + + *********************************** + Group= 4 Z Mid-Plane= 4 + *********************************** + + y x 1 x 2 x 3 x 4 x 5 x 6 + 6 2.2018E-01 2.8475E-01 3.0277E-01 3.0277E-01 2.8475E-01 2.2018E-01 + 5 2.8475E-01 3.6725E-01 3.9202E-01 3.9202E-01 3.6725E-01 2.8475E-01 + 4 3.0277E-01 3.9202E-01 4.2062E-01 4.2062E-01 3.9202E-01 3.0277E-01 + 3 3.0277E-01 3.9202E-01 4.2062E-01 4.2062E-01 3.9202E-01 3.0277E-01 + 2 2.8475E-01 3.6725E-01 3.9202E-01 3.9202E-01 3.6725E-01 2.8475E-01 + 1 2.2018E-01 2.8475E-01 3.0277E-01 3.0277E-01 2.8475E-01 2.2018E-01 + + ******************************************************************************** + + keyword Timing Summary + ******************************************************************************** + + Code Section Time (seconds) + ************** **************** + Parallel Setup 9.7394E-04 + Input 4.7112E-04 + Setup 7.1216E-04 + Solve 7.6568E-03 + Parameter Setup 2.8968E-04 + Outer Source 3.0041E-05 + Inner Iterations 7.0901E-03 + Inner Source 5.4121E-05 + Transport Sweeps 6.6264E-03 + Inner Misc Ops 4.0960E-04 + Solution Misc Ops 2.4700E-04 + Output 6.4492E-04 + Total Execution time 3.4652E-02 + + Grind Time (nanoseconds) 1.1078E+01 + + ******************************************************************************** + + +Additional outputs in the form of `slgg` and `flux` files are available when requested according to the `scatp` and `fluxp` input variables, respectively. + +License +------- + +Los Alamos National Security, LLC owns the copyright to "SNAP: SN (Discrete Ordinates) Application Proxy, Version 1.x (C13087)". The license is BSD with standard clauses regarding indicating modifications before future redistribution: + +Copyright (c) 2013, Los Alamos National Security, LLC +All rights reserved. + +Copyright 2013. Los Alamos National Security, LLC. This software was produced under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National Laboratory (LANL), which is operated by Los Alamos National Security, LLC for the U.S. Department of Energy. The U.S. Government has rights to use, reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified to produce derivative works, such modified software should be clearly marked, so as not to confuse it with the version available from LANL. + +Additionally, redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of Los Alamos National Security, LLC, Los Alamos National Laboratory, LANL, the U.S. Government, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Classification +-------------- + +SNAP is Unclassified and contains no Unclassified Controlled Nuclear Information. It has been assigned Los Alamos Computer Code number LA-CC-13-016. + +Authors +------- + +Joe Zerr, rzerr _ at _ lanl.gov + +Randal Baker, rsb _ at _ lanl.gov + +Additional Contacts +------------------- + +Mike Lang, mlang _ at _ lanl.gov + +Josip Loncaric, josip _ at _ lanl.gov + +Last Modification to this Readme +-------------------------------- + +03/03/2016 + diff --git a/Fortran/SNAP/qasnap/mms_src/2d_mms_st.inp b/Fortran/SNAP/qasnap/mms_src/2d_mms_st.inp new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/qasnap/mms_src/2d_mms_st.inp @@ -0,0 +1,26 @@ +! Input from namelist +&invar + nthreads=4 + npey=1 + ndimen=2 + nx=20 + lx=0.2 + ny=20 + ly=0.2 + nmom=4 + nang=12 + ng=10 + mat_opt=0 + src_opt=3 + timedep=0 + it_det=1 + tf=1.0 + nsteps=10 + iitm=5 + oitm=100 + epsi=1.E-4 + fluxp=2 + scatp=1 + fixup=0 + angcpy=2 +/ diff --git a/Fortran/SNAP/src/CMakeLists.txt b/Fortran/SNAP/src/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/CMakeLists.txt @@ -0,0 +1,176 @@ +################################################################################ +# +# CMakeLists.txt +# +# Author: Anthony Cabrera: +# Contact: cabreraam AT ieee DOT org +# Description: CMakeLists.txt replacement for SNAP Makefile +# +################################################################################ + +################################################################################ +# +# Options and Variables +# +################################################################################ + +set(SNAP_USER_DEFS "" CACHE STRING "Semi-colon separted preprocessor pound defines") + +set(FP_TOLERANCE "0.001" CACHE STRING "FP tolerance value") + +set(SNAP_COMPILER_OPTIONS_LLVM_FLANG + "" + CACHE + STRING + "Compiler options for LLVMFlang" +) + +################################################################################ +# +# Setting up build target +# +################################################################################ + +set(SNAP_SOURCE_FILES + global.f90 + snap_main.f90 + utils.f90 + version.f90 + plib.F90 + geom.f90 + sn.f90 + data.f90 + control.f90 + input.f90 + setup.f90 + dealloc.f90 + translv.f90 + solvar.f90 + outer.f90 + expxs.f90 + inner.f90 + sweep.f90 + octsweep.f90 + dim1_sweep.f90 + dim3_sweep.f90 + output.f90 + time.F90 + mms.f90 + analyze.f90 + thrd_comm.f90 + mkba_sweep.f90 + CACHE + STRING + "List of SNAP source files" +) + +add_executable(snap + ${SNAP_SOURCE_FILES} +) + +target_compile_options(snap + PRIVATE + $<$:${SNAP_COMPILER_OPTIONS_LLVM_FLANG}> +) +target_compile_definitions(snap + PRIVATE + ${SNAP_USER_DEFS} +) + +macro(llvm_test_verify_hash_program_output _file) + llvm_test_verify(%b/HashProgramOutput.sh ${_file}) +endmacro() + +# This if-body takes care of all of the llvm-test suite stuff. It is modeled +# after the llvm_multisource function in the llvm-test-suite, which takes the +# following form: +#function(llvm_multisource target) +# set(sources ${ARGN}) +# if(NOT sources) +# file(GLOB sources *.c *.cpp *.cc *.f *.F *.f90 *.F90) +# endif() +# llvm_test_executable_no_test(${target} ${sources}) +# llvm_test_traditional(${target}) +# llvm_add_test_for_target(${target}) +#endfunction() +# Because +# - I could not directly use those pre-existing functions/macros, +# - I wanted to reference the work I'd already done with the SNAP CMake infra +# I will show how my implementation below corresponds to llvm_multisource. Look +# for comments that take the form # start +if(PROJECT_NAME STREQUAL "test-suite") + set(target "snap") + + # start llvm_test_executable_no_test_fortran + # Note: we ignore TEST_SUITE_PROFILE_USE. Will this break LLVM Flang? if we + # aded that flag? + llvm_codesign(snap) + set_property(GLOBAL APPEND PROPERTY TEST_SUITE_TARGETS snap) + test_suite_add_build_dependencies(snap) + + if(TEST_SUITE_LLVM_SIZE) + add_custom_command(TARGET ${target} POST_BUILD + COMMAND ${TEST_SUITE_LLVM_SIZE} --format=sysv $ + > $.size) + endif() + + # start llvm_test_traditional(${target}) + # isolate path for the input file that we use + set(TEST_INPUT_DIR + "${CMAKE_CURRENT_SOURCE_DIR}/../qasnap/mms_src" + ) + message(STATUS "TEST_INPUT_DIR - ${TEST_INPUT_DIR}") + set(TEST_INPUT_FILENAME "2d_mms_st.inp") + set(TEST_OUTPUT_FILENAME "snap-output") + llvm_test_data(${target} + MUST_COPY + SOURCE_DIR + ${TEST_INPUT_DIR} + ${TEST_INPUT_FILENAME} + ) + # make run command include the new input we just copied + list(APPEND RUN_OPTIONS + ${TEST_INPUT_FILENAME} + ${TEST_OUTPUT_FILENAME} + ) + list(INSERT RUN_OPTIONS 0 WORKDIR %S) + # here, we take the options for the RUN: command that we want to craft, and + # pass it to the llvm_test_run function + llvm_test_run(${RUN_OPTIONS}) + + # Hash if we've been asked to. + if(HASH_PROGRAM_OUTPUT) + llvm_test_verify_hash_program_output(%o) + endif() + + # Deal with output that we want to compare to + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/snap.reference_output) + set(REFERENCE_OUTPUT + snap.reference_output + ) + set(DIFFPROG_BASE %b/${FPCMP}) + set(FP_TOL_SUFFIX "-r ${FP_TOLERANCE}") + set(FP_ABSTOL_SUFFIX "-a ${FP_ABSTOLERANCE}") + set(DIFFPROG + ${DIFFPROG_BASE} + "$<$:${FP_TOL_SUFFIX}>" + "$<$:${FP_ABSTOL_SUFFIX}>" + ) + + llvm_test_verify(${DIFFPROG} %o %S/${REFERENCE_OUTPUT}) + llvm_test_data(${target} ${REFERENCE_OUTPUT}) + else() + message("-- No reference output found for test ${name}") + endif() + set(TESTSCRIPT "${TESTSCRIPT}" PARENT_SCOPE) + + # this creates the .test file for you! + # start llvm_add_test_for_target(${target}) + # don't need to make any mods to this, woo! + llvm_add_test_for_target(snap) + + # lit.local.cfg has since been added to the src directory. Additionally, + # we'll need to copy that file over to the binary directory. + file(COPY lit.local.cfg DESTINATION "${CMAKE_CURRENT_BINARY_DIR}") +endif() + diff --git a/Fortran/SNAP/src/analyze.f90 b/Fortran/SNAP/src/analyze.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/analyze.f90 @@ -0,0 +1,112 @@ +!----------------------------------------------------------------------- +! +! MODULE: analyze_module +!> @brief +!> This module contains subroutines for analyzing data. +! +!----------------------------------------------------------------------- + +MODULE analyze_module + + USE global_module, ONLY: i_knd, r_knd, ounit + + USE control_module, ONLY: timedep + + USE geom_module, ONLY: dx, dy, dz, nx, ny, nz, ndimen + + USE data_module, ONLY: v, ng + + USE solvar_module, ONLY: flux0, pop + + USE plib_module, ONLY: iproc, root, comm_snap, rtsum + + IMPLICIT NONE + + PUBLIC + + SAVE + + + CONTAINS + + + SUBROUTINE analyze_pop_calc ( cy, time ) + +!----------------------------------------------------------------------- +! +! Compute the neutron spectrum: the population per group integrated over +! volume. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: cy + + REAL(r_knd), INTENT(IN) :: time +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: g + + REAL(r_knd) :: vol + + REAL(r_knd), DIMENSION(nx,ny,nz,ng) :: tmp +!_______________________________________________________________________ +! +! Integrate flux over volume of local spatial sub-domain +!_______________________________________________________________________ + + vol = dx + IF ( ndimen > 1 ) THEN + vol = vol*dy + IF ( ndimen > 2 ) vol = vol*dz + END IF + tmp = vol*flux0 + + DO g = 1, ng + pop(g) = SUM( tmp(:,:,:,g) ) + END DO +!_______________________________________________________________________ +! +! Integrate over global volume with MPI reduction (to root only, not +! all_reduce) +!_______________________________________________________________________ + + CALL rtsum ( pop, ng, comm_snap, root ) +!_______________________________________________________________________ +! +! Divide volume integrated flux by velocity to get particle density +! per unit energy. Print out results. Note, only root process has +! correct value because the reduction is not global. Can easily +! change this though. +!_______________________________________________________________________ + + IF ( iproc == root ) THEN + + IF ( timedep == 1 ) THEN + pop = pop/v + ELSE + WRITE( ounit, 352 ) + END IF + + WRITE( ounit, 350 ) cy, time + DO g = 1, ng + WRITE( ounit, 351 ) g, pop(g) + END DO + WRITE( ounit, * ) + + END IF +!_______________________________________________________________________ + + 350 FORMAT( 2X, 'Particle spectrum, cycle (time)', I4, ' ( ', & + ES11.4, ' )' ) + 351 FORMAT( 8X, I4, ES13.6 ) + 352 FORMAT( 2X, 'Static calc: v(g) not set. Use dummy value of 1.' ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE analyze_pop_calc + + +END MODULE analyze_module diff --git a/Fortran/SNAP/src/control.f90 b/Fortran/SNAP/src/control.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/control.f90 @@ -0,0 +1,139 @@ +!----------------------------------------------------------------------- +! +! MODULE: control_module +!> @brief +!> This module contains the variables that control SNAP's solver +!> routines. This includes the time-dependent variables. It also +!> includes some KBA scheduling variables. +! +!----------------------------------------------------------------------- + +MODULE control_module + + USE global_module, ONLY: i_knd, r_knd, l_knd, zero, one + + IMPLICIT NONE + + PUBLIC + + SAVE +!_______________________________________________________________________ +! +! Module Input Variables +! +! epsi - convergence criterion +! iitm - max inner iterations +! oitm - max outer iterations +! timedep - 0/1=no/yes perform a time-dependent calculation +! tf - final time +! nsteps - number of time steps to cover the ts -> tf range +! +! swp_typ - 0/1=standard order/mini-KBA sweep +! multiswp - 0/1=no/yes concurrent octant mesh sweeps (corner starts) +! +! angcpy - 1/2 copies of the time-edge angular flux +! +! it_det - 0/1=no/yes full iteration details +! soloutp - 0/1=no/yes print single k-plane solution to output file +! kplane - 0/1+=default mid-plane/k-plane index to print with soloutp +! popout - 0/1/2=no/final only/every cycle print population data to +! output file +! fluxp - 0/1/2=print none/scalar flux/all flux moments to file +! +! fixup - 0/1=no/yes perform flux fixup +!_______________________________________________________________________ + + INTEGER(i_knd) :: iitm=5, oitm=100, timedep=0, nsteps=1, swp_typ=0, & + multiswp=1, angcpy=1, it_det=0, soloutp=0, kplane=0, popout=0, & + fluxp=0, fixup=1 + + REAL(r_knd) :: epsi=1.0E-4_r_knd, tf=zero +!_______________________________________________________________________ +! +! Run-time variables +! +! dt - time-step size +! +! tolr - parameter, small number used for determining how to +! compute flux error +! dfmxi(ng) - max error of inner iteration +! dfmxo - max error of outer iteration +! +! inrdone(ng) - logical for inners being complete +! otrdone - logical for outers being complete +! update_ptr - true/false update the ptr_out array +! +! ncor - number of corners from which sweeps begin +! nops - number of chunks a process performs per sweep +! last_oct - last octant to be swept +! corner_sch(2,4) - corner scheduling control array +! yzstg(4) - KBA stage in yz plane per starting corner +! corner_loop_order(4) - multisweep corner loop priortized order +!_______________________________________________________________________ + + LOGICAL(l_knd) :: otrdone, update_ptr + + LOGICAL(l_knd), ALLOCATABLE, DIMENSION(:) :: inrdone + + INTEGER(i_knd) :: ncor, nops, last_oct, corner_sch(2,4), yzstg(4), & + corner_loop_order(4) + + REAL(r_knd) :: dt, dfmxo + + REAL(r_knd), PARAMETER :: tolr=1.0E-12_r_knd + + REAL(r_knd), ALLOCATABLE, DIMENSION(:) :: dfmxi + + + CONTAINS + + + SUBROUTINE control_allocate ( ng, ndpwds, ierr ) + +!----------------------------------------------------------------------- +! +! Allocate control module variables. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: ng + + INTEGER(i_knd), INTENT(INOUT) :: ndpwds + + INTEGER(i_knd), INTENT(OUT) :: ierr +!_______________________________________________________________________ + + ierr = 0 + ALLOCATE( dfmxi(ng), inrdone(ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + dfmxi = -one + inrdone = .FALSE. + dfmxo = -one + otrdone = .FALSE. + update_ptr = .FALSE. + + ndpwds = ndpwds + SIZE( dfmxi ) + SIZE( inrdone ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE control_allocate + + + SUBROUTINE control_deallocate + +!----------------------------------------------------------------------- +! +! Deallocate control module arrays. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ + + DEALLOCATE( dfmxi, inrdone ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE control_deallocate + + +END MODULE control_module diff --git a/Fortran/SNAP/src/data.f90 b/Fortran/SNAP/src/data.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/data.f90 @@ -0,0 +1,186 @@ +!----------------------------------------------------------------------- +! +! MODULE: data_module +!> @brief +!> This module contains the variables and setup subroutines for the mock +!> cross section data. It establishes the number of groups and +!> constructs the cross section arrays. +! +!----------------------------------------------------------------------- + +MODULE data_module + + USE global_module, ONLY: i_knd, r_knd, zero + + IMPLICIT NONE + + PUBLIC + + SAVE +!_______________________________________________________________________ +! +! Module Input Variables +! +! ng - number of groups +! mat_opt - material layout, 0/1/2=homogeneous/center/corner, with +! two materials, and material 2 nowhere/center/corner +! src_opt - source layout, 0/1/2=homogenous/src-center/src-corner= +! source everywhere/center of problem/corner, strength=10.0 +! scatp - 0/1=no/yes print the full scattering matrix to file 'slgg' +!_______________________________________________________________________ + + INTEGER(i_knd) :: ng=1, mat_opt=0, src_opt=0, scatp=0 +!_______________________________________________________________________ +! +! Run-time variables +! +! v(ng) - mock velocity array +! nmat - number of materials +! mat(nx,ny,nz) - material identifier array +! +! qi(nx,ny,nz,ng) - fixed source array for src_opt<3 +! qim(nang,nx,ny,nz,noct,ng) - fixed source array for src_opt>=3 +! +! sigt(nmat,ng) - total interaction +! siga(nmat,ng) - absorption +! sigs(nmat,ng) - scattering, total +! slgg(nmat,nmom,ng,ng) - scattering matrix, all moments/groups +! vdelt(ng) - time-absorption coefficient +!_______________________________________________________________________ + + INTEGER(i_knd) :: nmat=1 + + INTEGER(i_knd), ALLOCATABLE, DIMENSION(:,:,:) :: mat + + REAL(r_knd), ALLOCATABLE, DIMENSION(:) :: v, vdelt + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:) :: sigt, siga, sigs + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:,:,:) :: qi, slgg + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:,:,:,:,:) :: qim + + + CONTAINS + + + SUBROUTINE data_allocate ( nx, ny, nz, nmom, nang, noct, timedep, & + ndpwds, istat ) + +!----------------------------------------------------------------------- +! +! Allocate data_module arrays. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: nx, ny, nz, nmom, nang, noct, timedep + + INTEGER(i_knd), INTENT(INOUT) :: ndpwds, istat +!_______________________________________________________________________ +! +! Establish number of materials according to mat_opt +!_______________________________________________________________________ + + IF ( mat_opt > 0 ) nmat = 2 +!_______________________________________________________________________ +! +! Allocate velocities +!_______________________________________________________________________ + + istat = 0 + + IF ( timedep == 1 ) THEN + ALLOCATE( v(ng), STAT=istat ) + ELSE + ALLOCATE( v(0), STAT=istat ) + END IF + IF ( istat /= 0 ) RETURN + + v = zero +!_______________________________________________________________________ +! +! Allocate the material identifier array. ny and nz are 1 if not +! 2-D/3-D. +!_______________________________________________________________________ + + ALLOCATE( mat(nx,ny,nz), STAT=istat ) + IF ( istat /= 0 ) RETURN + + mat = 1 +!_______________________________________________________________________ +! +! Allocate the fixed source array. If src_opt < 3, allocate the qi +! array, not the qim. Do the opposite (store the full angular copy) of +! the source, qim, if src_opt>=3 (MMS). Allocate array not used to 0. +! ny and nz are 1 if not 2-D/3-D. +!_______________________________________________________________________ + + IF ( src_opt < 3 ) THEN + ALLOCATE( qi(nx,ny,nz,ng), qim(0,0,0,0,0,0), STAT=istat ) + IF ( istat /= 0 ) RETURN + qi = zero + ELSE + ALLOCATE( qi(nx,ny,nz,ng), qim(nang,nx,ny,nz,noct,ng), & + STAT=istat ) + IF ( istat /= 0 ) RETURN + qi = zero + qim = zero + END IF +!_______________________________________________________________________ +! +! Allocate mock cross sections +!_______________________________________________________________________ + + ALLOCATE( sigt(nmat,ng), siga(nmat,ng), sigs(nmat,ng), & + slgg(nmat,nmom,ng,ng), STAT=istat ) + IF ( istat /= 0 ) RETURN + + sigt = zero + siga = zero + sigs = zero + slgg = zero +!_______________________________________________________________________ +! +! Allocate the vdelt array +!_______________________________________________________________________ + + ALLOCATE( vdelt(ng), STAT=istat ) + IF ( istat /= 0 ) RETURN + + vdelt = zero +!_______________________________________________________________________ +! +! Add up words allocated +!_______________________________________________________________________ + + ndpwds = ndpwds + SIZE( v ) + SIZE( mat ) + SIZE( qi ) + & + SIZE( qim ) + SIZE( sigt ) + SIZE( siga ) + SIZE( sigs ) + & + SIZE( slgg ) + SIZE( vdelt ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE data_allocate + + + SUBROUTINE data_deallocate + +!----------------------------------------------------------------------- +! +! Deallocate data_module arrays. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ + + DEALLOCATE( v ) + DEALLOCATE( mat ) + DEALLOCATE( qi, qim ) + DEALLOCATE( sigt, siga, sigs ) + DEALLOCATE( slgg ) + DEALLOCATE( vdelt ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE data_deallocate + + +END MODULE data_module diff --git a/Fortran/SNAP/src/dealloc.f90 b/Fortran/SNAP/src/dealloc.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/dealloc.f90 @@ -0,0 +1,79 @@ +!----------------------------------------------------------------------- +! +! MODULE: dealloc_module +!> @brief +!> This module controls the deallocation process of run-time arrays. +! +!----------------------------------------------------------------------- + +MODULE dealloc_module + + USE global_module, ONLY: i_knd + + USE sn_module, ONLY: sn_deallocate + + USE data_module, ONLY: data_deallocate + + USE mms_module, ONLY: mms_deallocate + + USE geom_module, ONLY: geom_deallocate + + USE solvar_module, ONLY: solvar_deallocate + + USE control_module, ONLY: control_deallocate + + IMPLICIT NONE + + PUBLIC + + + CONTAINS + + + SUBROUTINE dealloc_input ( flg ) + +!----------------------------------------------------------------------- +! +! Call for the data deallocation from individual deallocation +! subroutines. Covers the allocations from input. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: flg +!_______________________________________________________________________ + + CALL sn_deallocate + + IF ( flg > 1 ) CALL data_deallocate + + IF ( flg > 2 ) CALL control_deallocate + + IF ( flg > 3 ) CALL mms_deallocate +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE dealloc_input + + + SUBROUTINE dealloc_solve ( swp_typ, flg ) + +!----------------------------------------------------------------------- +! +! Call for the data deallocation from individual deallocation +! subroutines. Covers the allocations from input. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: swp_typ, flg +!_______________________________________________________________________ + + CALL geom_deallocate ( swp_typ ) + + IF ( flg > 1 ) CALL solvar_deallocate +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE dealloc_solve + + +END MODULE dealloc_module diff --git a/Fortran/SNAP/src/dim1_sweep.f90 b/Fortran/SNAP/src/dim1_sweep.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/dim1_sweep.f90 @@ -0,0 +1,237 @@ +!----------------------------------------------------------------------- +! +! MODULE: dim1_sweep_module +!> @brief +!> This module contains the 1D mesh sweep logic. +! +!----------------------------------------------------------------------- + +MODULE dim1_sweep_module + + USE global_module, ONLY: i_knd, r_knd, zero, two, one, half + + USE geom_module, ONLY: nx, hi + + USE sn_module, ONLY: cmom, nang, mu, w + + USE data_module, ONLY: src_opt, qim + + USE control_module, ONLY: fixup, tolr, update_ptr, it_det + + USE plib_module, ONLY: ichunk + + IMPLICIT NONE + + PUBLIC :: dim1_sweep + + + CONTAINS + + + SUBROUTINE dim1_sweep ( id, d1, d2, d3, d4, oct, g, psii, qtot, ec, & + vdelt, ptr_in, ptr_out, dinv, flux0, fluxm, wmu, flkx, t_xs, & + fixup_counter ) + +!----------------------------------------------------------------------- +! +! 1-D slab mesh sweeper. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: id, d1, d2, d3, d4, oct, g + + REAL(r_knd), INTENT(IN) :: vdelt + + REAL(r_knd), DIMENSION(nang), INTENT(IN) :: wmu + + REAL(r_knd), DIMENSION(nang), INTENT(INOUT) :: psii + + REAL(r_knd), DIMENSION(nx), INTENT(IN) :: t_xs + + REAL(r_knd), DIMENSION(nx), INTENT(INOUT) :: flux0 + + REAL(r_knd), DIMENSION(nx+1), INTENT(INOUT) :: flkx + + REAL(r_knd), DIMENSION(4), INTENT(INOUT) :: fixup_counter + + REAL(r_knd), DIMENSION(cmom,ichunk), INTENT(IN) :: qtot + + REAL(r_knd), DIMENSION(cmom-1,nx), INTENT(INOUT) :: fluxm + + REAL(r_knd), DIMENSION(nang,cmom), INTENT(IN) :: ec + + REAL(r_knd), DIMENSION(nang,ichunk), INTENT(IN) :: dinv + + REAL(r_knd), DIMENSION(d1,d2,d3,d4), INTENT(IN) :: ptr_in + + REAL(r_knd), DIMENSION(d1,d2,d3,d4), INTENT(OUT) :: ptr_out +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ilo, ihi, ist, i, l + + REAL(r_knd) :: sum_hv + + REAL(r_knd), DIMENSION(nang) :: psi, pc, den + + REAL(r_knd), DIMENSION(nang,2) :: hv, fxhv + + REAL(r_knd), DIMENSION(nang,nx) :: qm +!_______________________________________________________________________ +! +! Set up the mms source if necessary. +!_______________________________________________________________________ + + qm = zero + IF ( src_opt == 3 ) qm = qim(:,:,1,1,oct,g) +!_______________________________________________________________________ +! +! Set up the sweep order in the i-direction. ilo here set according +! to direction--different than ilo in octsweep. Setup the fixup +! counter. +!_______________________________________________________________________ + + IF ( id == 1 ) THEN + ilo = nx; ihi = 1; ist = -1 + ELSE + ilo = 1; ihi = nx; ist = 1 + END IF + + fxhv = zero +!_______________________________________________________________________ +! +! Sweep the i cells. Set the boundary condition. +!_______________________________________________________________________ + + i_loop: DO i = ilo, ihi, ist + + IF ( i == ilo ) psii = zero +!_______________________________________________________________________ +! +! Compute the angular source. MMS source scales linearly with time. +! Note that for 1-D ichunk=nx, so i index on qtot is ok. +!_______________________________________________________________________ + + psi = qtot(1,i) + qm(:,i) + + DO l = 2, cmom + psi = psi + ec(:,l)*qtot(l,i) + END DO +!_______________________________________________________________________ +! +! Compute the numerator for the update formula +!_______________________________________________________________________ + + pc = psi + psii*mu*hi + IF ( vdelt /= zero ) pc = pc + vdelt*ptr_in(:,i,1,1) +!_______________________________________________________________________ +! +! Compute the solution of the center. Use DD for edges. Use fixup +! if requested. Note for 1-D ichunk=nx, so i index of dinv is ok. +!_______________________________________________________________________ + + IF ( fixup == 0 ) THEN + + psi = pc*dinv(:,i) + + psii = two*psi - psii + IF ( vdelt/=zero .AND. update_ptr ) & + ptr_out(:,i,1,1) = two*psi - ptr_in(:,i,1,1) + + ELSE +!_______________________________________________________________________ +! +! Multi-pass set to zero + rebalance fixup. Determine angles that +! will need fixup first. +!_______________________________________________________________________ + + hv = one; sum_hv = SUM( hv ) + + pc = pc * dinv(:,i) + + fixup_loop: DO + + fxhv(:,1) = two*pc - psii + IF ( vdelt /= zero ) fxhv(:,2) = two*pc - ptr_in(:,i,1,1) + + WHERE ( fxhv < zero ) hv = zero +!_______________________________________________________________________ +! +! Exit loop when all angles are fixed up +!_______________________________________________________________________ + + IF ( sum_hv == SUM( hv ) ) EXIT fixup_loop + sum_hv = SUM( hv ) +!_______________________________________________________________________ +! +! Recompute balance equation numerator and denominator and get +! new cell average flux +!_______________________________________________________________________ + + pc = psii*mu*hi*(one+hv(:,1)) + IF ( vdelt /= zero ) & + pc = pc + vdelt*ptr_in(:,i,1,1)*(one+hv(:,2)) + pc = psi + half*pc + + den = t_xs(i) + mu*hi*hv(:,1) + vdelt*hv(:,2) + + WHERE( pc <= zero ) den = zero + + WHERE( den < tolr ) + pc = zero + den = one + END WHERE + + pc = pc / den + + END DO fixup_loop +!_______________________________________________________________________ +! +! Fixup done: update counts and compute edges with resolved center +!_______________________________________________________________________ + + IF ( it_det == 1 ) THEN + fixup_counter(1) = fixup_counter(1) + nang - SUM( HV(:,1) ) + IF ( vdelt /= zero ) & + fixup_counter(4) = fixup_counter(4) + nang - SUM( HV(:,2) ) + END IF + + psi = pc + + psii = fxhv(:,1) * hv(:,1) + IF ( vdelt/=zero .AND. update_ptr ) & + ptr_out(:,i,1,1) = fxhv(:,2) * hv(:,2) + + END IF +!_______________________________________________________________________ +! +! Compute the flux moments +!_______________________________________________________________________ + + psi = w*psi + + flux0(i) = flux0(i) + SUM( psi ) + DO l = 1, cmom-1 + fluxm(l,i) = fluxm(l,i) + SUM( ec(:,l+1)*psi ) + END DO +!_______________________________________________________________________ +! +! Compute dummy leakages (not used elsewhere currently) +!_______________________________________________________________________ + + flkx(i+id-1) = flkx(i+id-1) + ist*SUM( wmu*psii ) +!_______________________________________________________________________ +! +! Finish the loop +!_______________________________________________________________________ + + END DO i_loop +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE dim1_sweep + + +END MODULE dim1_sweep_module diff --git a/Fortran/SNAP/src/dim3_sweep.f90 b/Fortran/SNAP/src/dim3_sweep.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/dim3_sweep.f90 @@ -0,0 +1,424 @@ +!----------------------------------------------------------------------- +! +! MODULE: dim3_sweep_module +!> @brief +!> This module contains the 2D and 3D mesh sweep logic. +! +!----------------------------------------------------------------------- + +MODULE dim3_sweep_module + + USE global_module, ONLY: i_knd, l_knd, r_knd, zero, two, one, half + + USE plib_module, ONLY: ichunk, firsty, lasty, firstz, lastz + + USE geom_module, ONLY: nx, hi, hj, hk, ndimen, ny, nz, nc + + USE sn_module, ONLY: cmom, nang, mu, eta, xi, w + + USE data_module, ONLY: src_opt, qim + + USE control_module, ONLY: fixup, tolr, update_ptr, multiswp, it_det + + USE thrd_comm_module, ONLY: sweep_recv_bdry, sweep_send_bdry + + IMPLICIT NONE + + PUBLIC :: dim3_sweep + + + CONTAINS + + + SUBROUTINE dim3_sweep ( ich, id, d1, d2, d3, d4, jd, kd, oct, g, t, & + iop, reqs, szreq, psii, psij, psik, qtot, ec, vdelt, ptr_in, & + ptr_out, dinv, flux0, fluxm, jb_in, jb_out, kb_in, kb_out, wmu, & + weta, wxi, flkx, flky, flkz, t_xs, fixup_counter ) + +!----------------------------------------------------------------------- +! +! 3-D slab mesh sweeper. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: ich, id, d1, d2, d3, d4, jd, kd, oct,& + g, t, iop, szreq + + INTEGER(i_knd), DIMENSION(szreq), INTENT(INOUT) :: reqs + + REAL(r_knd), INTENT(IN) :: vdelt + + REAL(r_knd), DIMENSION(nang), INTENT(IN) :: wmu, weta, wxi + + REAL(r_knd), DIMENSION(4), INTENT(INOUT) :: fixup_counter + + REAL(r_knd), DIMENSION(nang,cmom), INTENT(IN) :: ec + + REAL(r_knd), DIMENSION(nang,ny,nz), INTENT(INOUT) :: psii + + REAL(r_knd), DIMENSION(nang,ichunk,nz), INTENT(INOUT) :: psij, & + jb_in, jb_out + + REAL(r_knd), DIMENSION(nang,ichunk,ny), INTENT(INOUT) :: psik, & + kb_in, kb_out + + REAL(r_knd), DIMENSION(nx,ny,nz), INTENT(IN) :: t_xs + + REAL(r_knd), DIMENSION(nx,ny,nz), INTENT(INOUT) :: flux0 + + REAL(r_knd), DIMENSION(nx+1,ny,nz), INTENT(INOUT) :: flkx + + REAL(r_knd), DIMENSION(nx,ny+1,nz), INTENT(INOUT) :: flky + + REAL(r_knd), DIMENSION(nx,ny,nz+1), INTENT(INOUT) :: flkz + + REAL(r_knd), DIMENSION(nang,ichunk,ny,nz), INTENT(IN) :: dinv + + REAL(r_knd), DIMENSION(cmom-1,nx,ny,nz), INTENT(INOUT) :: fluxm + + REAL(r_knd), DIMENSION(cmom,ichunk,ny,nz), INTENT(IN) :: qtot + + REAL(r_knd), DIMENSION(d1,d2,d3,d4), INTENT(IN) :: ptr_in + + REAL(r_knd), DIMENSION(d1,d2,d3,d4), INTENT(OUT) :: ptr_out +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ist, iclo, ichi, jst, jlo, jhi, kst, klo, khi, k,& + j, ic, i, l, ibl, ibr, ibb, ibt, ibf, ibk + + LOGICAL(l_knd) :: receive + + REAL(r_knd) :: sum_hv + + REAL(r_knd), DIMENSION(nang) :: psi, pc, den + + REAL(r_knd), DIMENSION(nang,4) :: hv, fxhv +!_______________________________________________________________________ +! +! Set the receive control flag to true. Will set to false within the +! loop for the first cell. Pushing the receive to this point helps +! overlap communication and computation. +!_______________________________________________________________________ + + receive = multiswp /= 1 +!_______________________________________________________________________ +! +! Set up the sweep order given octant info. +!_______________________________________________________________________ + + IF ( id == 1 ) THEN + ist = -1; iclo = ichunk; ichi = 1 + ELSE + ist = +1; iclo = 1; ichi = ichunk + END IF + + IF ( jd == 1 ) THEN + jst = -1; jlo = ny; jhi = 1 + ELSE + jst = +1; jlo = 1; jhi = ny + END IF + + IF ( kd == 1 ) THEN + kst = -1; klo = nz; khi = 1 + ELSE + kst = +1; klo = 1; khi = nz + END IF +!_______________________________________________________________________ +! +! Initialize the fixup counter +!_______________________________________________________________________ + + fxhv = zero +!_______________________________________________________________________ +! +! Loop over the cells using bounds/stride above +!_______________________________________________________________________ + + k_loop: DO k = klo, khi, kst + j_loop: DO j = jlo, jhi, jst + ic_loop: DO ic = iclo, ichi, ist +!_______________________________________________________________________ +! +! Index ic refers to location in current ichunk, ich. Compute the +! global i index for x-direction. +!_______________________________________________________________________ + + i = (ich-1)*ichunk + ic +!_______________________________________________________________________ +! +! Dummy operation to match real transport code where nx is not +! always evenly divided by ichunk +!_______________________________________________________________________ + + IF ( i > nx ) THEN + jb_out(:,ic,k) = zero + kb_out(:,ic,j) = zero + CYCLE ic_loop + END IF +!_______________________________________________________________________ +! +! Compute the angular source. Add the MMS contribution if necessary. +!_______________________________________________________________________ + + psi = qtot(1,ic,j,k) + + DO l = 2, cmom + psi = psi + ec(:,l)*qtot(l,ic,j,k) + END DO + + IF ( src_opt == 3 ) psi = psi + qim(:,i,j,k,oct,g) +!_______________________________________________________________________ +! +! Left/right boundary conditions, always vacuum. Dummy operations +! may be ignored. +!_______________________________________________________________________ + + ibl = 0 + ibr = 0 + IF ( i==nx .AND. ist==-1 ) THEN + psii(:,j,k) = zero + ELSE IF ( i==1 .AND. ist==1 ) THEN + SELECT CASE ( ibl ) + CASE ( 0 ) + psii(:,j,k) = zero + CASE ( 1 ) + psii(:,j,k) = zero + END SELECT + END IF +!_______________________________________________________________________ +! +! Process boundaries in y- and z-directions come from preset +! boundary conditions (vacuum) or from upstream neighbors. Must call +! to receive any data. If upstream is boundary, no communication. +! Only do this at first cell of chunk but receive all incoming data +! for chunk. Resetting to .FALSE. skips unneeded communication call +! for all other cells. +!_______________________________________________________________________ + + IF ( receive ) THEN + CALL sweep_recv_bdry ( g, jd, kd, iop, t, reqs, szreq, nc, & + nang, ichunk, ny, nz, jb_in, kb_in ) + receive = .FALSE. + END IF +!_______________________________________________________________________ +! +! Top/bottom boundary conditions for chunk. Either received from +! upstream neighbor or set to zero (always vacuum) if no usptream +! neighbor. +!_______________________________________________________________________ + + ibb = 0 + ibt = 0 + IF ( j == jlo ) THEN + IF ( jd==1 .AND. lasty ) THEN + psij(:,ic,k) = zero + ELSE IF ( jd==2 .AND. firsty ) THEN + SELECT CASE ( ibb ) + CASE ( 0 ) + psij(:,ic,k) = zero + CASE ( 1 ) + psij(:,ic,k) = zero + END SELECT + ELSE + psij(:,ic,k) = jb_in(:,ic,k) + END IF + END IF +!_______________________________________________________________________ +! +! Front/back boundary condtions. Either received from upstream +! neighbor or set to zero (always vacuum) if no upstream neighbor. +!_______________________________________________________________________ + + ibf = 0 + ibk = 0 + IF ( k == klo ) THEN + IF ( (kd==1 .AND. lastz) .OR. ndimen<3 ) THEN + psik(:,ic,j) = zero + ELSE IF ( kd==2 .AND. firstz ) THEN + SELECT CASE ( ibf ) + CASE ( 0 ) + psik(:,ic,j) = zero + CASE ( 1 ) + psik(:,ic,j) = zero + END SELECT + ELSE + psik(:,ic,j) = kb_in(:,ic,j) + END IF + END IF +!_______________________________________________________________________ +! +! Compute initial solution +!_______________________________________________________________________ + + IF ( vdelt /= zero ) THEN + pc = ( psi + psii(:,j,k)*mu*hi + psij(:,ic,k)*eta*hj + & + psik(:,ic,j)*xi*hk + ptr_in(:,i,j,k)*vdelt ) * dinv(:,ic,j,k) + ELSE + pc = ( psi + psii(:,j,k)*mu*hi + psij(:,ic,k)*eta*hj + & + psik(:,ic,j)*xi*hk ) * dinv(:,ic,j,k) + END IF +!_______________________________________________________________________ +! +! Compute outgoing edges with diamond difference, no negative flux +! fixup +!_______________________________________________________________________ + + IF ( fixup == 0 ) THEN + + psi = pc + + psii(:,j,k) = two*psi - psii(:,j,k) + psij(:,ic,k) = two*psi - psij(:,ic,k) + IF ( ndimen == 3 ) psik(:,ic,j) = two*psi - psik(:,ic,j) + IF ( vdelt/=zero .AND. update_ptr ) & + ptr_out(:,i,j,k) = two*psi - ptr_in(:,i,j,k) + + ELSE +!_______________________________________________________________________ +! +! Use negative flux fixup. Compute outgoing edges. If negative, +! set to zero and rebalance. Multi-pass until all negativities +! eliminated. Initialize counters and determine angles that will +! need fixup first. +!_______________________________________________________________________ + + hv = one + sum_hv = SUM( hv ) + + fixup_loop: DO + + fxhv(:,1) = two*pc - psii(:,j,k) + fxhv(:,2) = two*pc - psij(:,ic,k) + IF ( ndimen == 3 ) fxhv(:,3) = two*pc - psik(:,ic,j) + IF ( vdelt /= zero ) fxhv(:,4) = two*pc - ptr_in(:,i,j,k) + + WHERE ( fxhv < zero ) hv = zero +!_______________________________________________________________________ +! +! Exit loop when all angles are fixed up, i.e., no change in hv +!_______________________________________________________________________ + + IF ( sum_hv == SUM( hv ) ) EXIT fixup_loop + sum_hv = SUM( hv ) +!_______________________________________________________________________ +! +! Recompute balance equation numerator and denominator and get +! new cell average flux +!_______________________________________________________________________ + + IF ( vdelt /= zero ) THEN + pc = psi + half * ( psii(:,j,k)*mu*hi*(one+hv(:,1)) + & + psij(:,ic,k)*eta*hj*(one+hv(:,2)) + & + psik(:,ic,j)*xi*hk*(one+hv(:,3)) + & + ptr_in(:,i,j,k)*vdelt*(one+hv(:,4)) ) + den = t_xs(i,j,k) + mu*hi*hv(:,1) + eta*hj*hv(:,2) + & + xi*hk*hv(:,3) + vdelt*hv(:,4) + ELSE + pc = psi + half * ( psii(:,j,k)*mu*hi*(one+hv(:,1)) + & + psij(:,ic,k)*eta*hj*(one+hv(:,2)) + & + psik(:,ic,j)*xi*hk*(one+hv(:,3)) ) + den = t_xs(i,j,k) + mu*hi*hv(:,1) + eta*hj*hv(:,2) + & + xi*hk*hv(:,3) + END IF + + WHERE( pc <= zero ) den = zero + + WHERE( den < tolr ) + pc = zero + den = one + END WHERE + + pc = pc / den + + END DO fixup_loop +!_______________________________________________________________________ +! +! Fixup done: update counts and compute edges with resolved center +!_______________________________________________________________________ + + IF ( it_det == 1 ) THEN + fixup_counter(1) = fixup_counter(1) + nang - SUM( HV(:,1) ) + fixup_counter(2) = fixup_counter(2) + nang - SUM( HV(:,2) ) + IF ( ndimen == 3 ) & + fixup_counter(3) = fixup_counter(3) + nang - SUM( HV(:,3) ) + IF ( vdelt /= zero ) & + fixup_counter(4) = fixup_counter(4) + nang - SUM( HV(:,4) ) + END IF + + psi = pc + + psii(:,j,k) = fxhv(:,1) * hv(:,1) + psij(:,ic,k) = fxhv(:,2) * hv(:,2) + IF ( ndimen == 3 ) psik(:,ic,j) = fxhv(:,3) * hv(:,3) + IF ( vdelt/=zero .AND. update_ptr ) & + ptr_out(:,i,j,k) = fxhv(:,4) * hv(:,4) + + END IF +!_______________________________________________________________________ +! +! Save edge fluxes (dummy if checks for unused non-vacuum BCs) +!_______________________________________________________________________ + + IF ( j == jhi ) THEN + IF ( jd==2 .AND. lasty ) THEN + CONTINUE + ELSE IF ( jd==1 .AND. firsty ) THEN + IF ( ibb == 1 ) CONTINUE + ELSE + jb_out(:,ic,k) = psij(:,ic,k) + END IF + END IF + + IF ( k == khi ) THEN + IF ( kd==2 .AND. lastz ) THEN + CONTINUE + ELSE IF ( kd==1 .AND. firstz ) THEN + IF ( ibf == 1 ) CONTINUE + ELSE + kb_out(:,ic,j) = psik(:,ic,j) + END IF + END IF +!_______________________________________________________________________ +! +! Compute dummy leakages (not used elsewhere currently) +!_______________________________________________________________________ + + flkx(i+id-1,j,k) = flkx(i+id-1,j,k) + ist*SUM( wmu*psii(:,j,k) ) + flky(i,j+jd-1,k) = flky(i,j+jd-1,k) + jst*SUM( weta*psij(:,ic,k) ) + flkz(i,j,k+kd-1) = flkz(i,j,k+kd-1) + kst*SUM( wxi*psik(:,ic,j) ) +!_______________________________________________________________________ +! +! Compute the flux moments +!_______________________________________________________________________ + + psi = w*psi + + flux0(i,j,k) = flux0(i,j,k) + SUM( psi ) + DO l = 1, cmom-1 + fluxm(l,i,j,k) = fluxm(l,i,j,k) + SUM( ec(:,l+1)*psi ) + END DO +!_______________________________________________________________________ +! +! Finish the loops +!_______________________________________________________________________ + + END DO ic_loop + END DO j_loop + END DO k_loop +!_______________________________________________________________________ +! +! Send data to downstream neighbors +!_______________________________________________________________________ + + IF ( multiswp == 0 ) CALL sweep_send_bdry ( g, jd, kd, iop, t, & + reqs, szreq, nc, nang, ichunk, ny, nz, jb_out, kb_out ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE dim3_sweep + + +END MODULE dim3_sweep_module diff --git a/Fortran/SNAP/src/expxs.f90 b/Fortran/SNAP/src/expxs.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/expxs.f90 @@ -0,0 +1,140 @@ +!----------------------------------------------------------------------- +! +! MODULE: expxs_module +!> @brief +!> This module contains the subroutines for expanding a cross section to +!> a full spatial map. +! +!----------------------------------------------------------------------- + +MODULE expxs_module + + USE global_module, ONLY: i_knd, r_knd, zero + + USE geom_module, ONLY: nx, ny, nz + + USE sn_module, ONLY: nmom, cmom, lma + + USE data_module, ONLY: nmat + + IMPLICIT NONE + + PUBLIC + + INTERFACE expxs_reg + MODULE PROCEDURE expxs_reg_1d, expxs_reg_3d + END INTERFACE + + SAVE + + + CONTAINS + + + SUBROUTINE expxs_reg_1d ( xs, map, cs ) + +!----------------------------------------------------------------------- +! +! Expand one of the sig*(nmat,ng) arrays to a 1D spatial mapping. xs is +! a generic cross section array, map is the material map, and cs is the +! cross section expanded to the mesh. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), DIMENSION(nx), INTENT(IN) :: map + + REAL(r_knd), DIMENSION(nmat), INTENT(IN) :: xs + + REAL(r_knd), DIMENSION(nx), INTENT(OUT) :: cs +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: i +!_______________________________________________________________________ + + DO i = 1, nx + cs(i) = xs(map(i)) + END DO +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE expxs_reg_1d + + + SUBROUTINE expxs_reg_3d ( xs, map, cs ) + +!----------------------------------------------------------------------- +! +! Expand one of the sig*(nmat,ng) arrays to a 3D spatial mapping. xs is +! a generic cross section array, map is the material map, and cs is the +! cross section expanded to the mesh. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), DIMENSION(nx,ny,nz), INTENT(IN) :: map + + REAL(r_knd), DIMENSION(nmat), INTENT(IN) :: xs + + REAL(r_knd), DIMENSION(nx,ny,nz), INTENT(OUT) :: cs +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: i, j, k +!_______________________________________________________________________ + + DO k = 1, nz + DO j = 1, ny + DO i = 1, nx + cs(i,j,k) = xs(map(i,j,k)) + END DO + END DO + END DO +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE expxs_reg_3d + + + SUBROUTINE expxs_slgg ( scat, map, cs ) + +!----------------------------------------------------------------------- +! +! Expand the slgg(nmat,nmom,ng,ng) array to a 1D spatial mapping. scat +! is the slgg matrix for a single h->g group coupling, map is the +! material map, and cs is the scattering matrix expanded to the mesh. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), DIMENSION(nx), INTENT(IN) :: map + + REAL(r_knd), DIMENSION(nmat,nmom), INTENT(IN) :: scat + + REAL(r_knd), DIMENSION(cmom-1,nx), INTENT(OUT) :: cs +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: l, i, mom +!_______________________________________________________________________ + + cs = zero + + DO i = 1, nx + mom = 1 + DO l = 2, nmom + cs(mom:mom+lma(l)-1,i) = scat(map(i),l) + mom = mom + lma(l) + END DO + END DO +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE expxs_slgg + + +END MODULE expxs_module diff --git a/Fortran/SNAP/src/geom.f90 b/Fortran/SNAP/src/geom.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/geom.f90 @@ -0,0 +1,306 @@ +!----------------------------------------------------------------------- +! +! MODULE: geom_module +!> @brief +!> This module contains the variables that relate to the geometry of the +!> problem and the subroutines necessary to allocate and deallocate +!> geometry related data as necessary. +! +!----------------------------------------------------------------------- + +MODULE geom_module + + USE global_module, ONLY: i_knd, r_knd, zero, one, two + + IMPLICIT NONE + + PUBLIC + + SAVE +!_______________________________________________________________________ +! +! Module Input Variables +! +! ndimen - number of spatial dimensions 1/2/3 +! nx - number of x-dir spatial cells (global) +! ny - number of y-dir spatial cells (global on input, reset to +! per PE in setup) +! nz - number of z-dir spatial cells (global on input, reset to +! per PE in setup) +! lx - total length of x domain +! ly - total length of y domain +! lz - total length of z domain +!_______________________________________________________________________ + + INTEGER(i_knd) :: ndimen=1, nx=4, ny=1, nz=1 + + REAL(r_knd) :: lx=one, ly=one, lz=one +!_______________________________________________________________________ +! +! Run-time variables +! +! ny_gl - global number of y-dir spatial cells +! nz_gl - global number of z-dir spatial cells +! jlb - global index of local lower y bound +! jub - global index of local upper y bound +! klb - global index of local lower z bound +! kub - global index of local upper z bound +! +! dx - x width of spatial cell +! dy - y width of spatial cell +! dz - z width of spatial cell +! +! nc - number of i-chunks, nx/ichunk +! +! jdim - number of directions to sweep in j-direction: 1D=1, else=2 +! kdim - number of directions to sweep in z-direction: 3D=2, else=1 +! +! hi - Spatial DD x-coefficient +! hj - Spatial DD y-coefficient +! hk - Spatial DD z-coefficient +! +! dinv(nang,ichunk,ny,nz,nc,ng) - Sweep denominator pre-computed +! +! ndiag - number of diagonals of mini-KBA sweeps in nested threading +!_______________________________________________________________________ + + INTEGER(i_knd) :: ny_gl, nz_gl, jlb, jub, klb, kub, nc, ndiag=0, & + jdim, kdim + + REAL(r_knd) :: dx, dy, dz, hi, hj, hk + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:,:,:,:,:) :: dinv +!_______________________________________________________________________ +! +! Derived data types for mini-KBA diagonals +! +! cell_id_type - type for holding ijk indices of a cell on a given +! diagonal +! ic, j, k - ijk integer indices of cell_id_type +! diag_type - type for holding diagonal information +! len - number of cells on a diagonal line/plane +! cell_id(len) - array of cell ijk indices +! diag(ndiag) - array of diagonal lines/planes information +!_______________________________________________________________________ + + TYPE cell_id_type + INTEGER(i_knd) :: ic, j, k + END TYPE cell_id_type + + TYPE diag_type + INTEGER(i_knd) :: len + TYPE(cell_id_type), ALLOCATABLE, DIMENSION(:) :: cell_id + END TYPE diag_type + + TYPE(diag_type), ALLOCATABLE, DIMENSION(:) :: diag + + + CONTAINS + + + SUBROUTINE geom_allocate ( nang, ng, swp_typ, ichunk, ndpwds, ierr ) + +!----------------------------------------------------------------------- +! +! Allocate the geometry-related solution arrays. Setup the diag_type +! data structures for mini-KBA if requested. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: nang, ng, swp_typ, ichunk + + INTEGER(i_knd), INTENT(INOUT) :: ndpwds + + INTEGER(i_knd), INTENT(OUT) :: ierr +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: i, j, k, nn, ing + + INTEGER(i_knd), ALLOCATABLE, DIMENSION(:) :: indx +!_______________________________________________________________________ + + ierr = 0 + + ALLOCATE( dinv(nang,ichunk,ny,nz,nc,ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + hi = zero + hj = zero + hk = zero + dinv = zero + + ndpwds = ndpwds + SIZE( dinv ) +!_______________________________________________________________________ +! +! If mini-KBA on spatial chunks is selected, set up the diagonal +! 'diag' derived data type, allocate many diagonals, and set indices +! for cells. Because each chunk is same size, can create just one. +!_______________________________________________________________________ + + IF ( swp_typ == 1 ) THEN + + ndiag = ichunk + ny + nz - 2 + + ALLOCATE( diag(ndiag), indx(ndiag), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + diag%len = 0 + indx = 0 +!_______________________________________________________________________ +! +! Cells of same diagonal all have same value according to i+j+k-2 +! formula. Use that to compute len for each diagonal. Use ichunk. +!_______________________________________________________________________ + + DO k = 1, nz + DO j = 1, ny + DO i = 1, ichunk + nn = i + j + k - 2 + diag(nn)%len = diag(nn)%len + 1 + END DO + END DO + END DO +!_______________________________________________________________________ +! +! Next allocate cell_id array within diag type according to len +!_______________________________________________________________________ + + DO nn = 1, ndiag + ing = diag(nn)%len + ALLOCATE( diag(nn)%cell_id(ing), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + ndpwds = ndpwds + SIZE( diag(nn)%cell_id ) + END DO +!_______________________________________________________________________ +! +! Lastly, set each cell's actual ijk indices in this diagonal map +!_______________________________________________________________________ + + DO k = 1, nz + DO j = 1, ny + DO i = 1, ichunk + nn = i + j + k - 2 + indx(nn) = indx(nn) + 1 + ing = indx(nn) + diag(nn)%cell_id(ing)%ic = i + diag(nn)%cell_id(ing)%j = j + diag(nn)%cell_id(ing)%k = k + END DO + END DO + END DO + + DEALLOCATE( indx ) + + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE geom_allocate + + + SUBROUTINE geom_deallocate ( swp_typ ) + +!----------------------------------------------------------------------- +! +! Deallocate the geometry-related solution arrays. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: swp_typ +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: i +!_______________________________________________________________________ +! +! Deallocate the sweep parameters +!_______________________________________________________________________ + + DEALLOCATE( dinv ) +!_______________________________________________________________________ +! +! Deallocate the diagonal related arrays with swp_typ +!_______________________________________________________________________ + + IF ( swp_typ == 1 ) THEN + DO i = 1, ndiag + DEALLOCATE( diag(i)%cell_id ) + END DO + DEALLOCATE( diag ) + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE geom_deallocate + + + SUBROUTINE geom_param_calc ( nang, ichunk, mu, eta, xi, cs, vd, d ) + +!----------------------------------------------------------------------- +! +! Calculate the DD spatial coefficients hi, hj, hk for all angles at +! the start of each time step. Compute the pre-computed/inverted dinv. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: nang, ichunk + + REAL(r_knd), INTENT(IN) :: vd + + REAL(r_knd), DIMENSION(nang), INTENT(IN) :: mu, eta, xi + + REAL(r_knd), DIMENSION(nx,ny,nz), INTENT(IN) :: cs + + REAL(r_knd), DIMENSION(nang,ichunk,ny,nz,nc), INTENT(OUT) :: d +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: k, j, ii, ich, i, m +!_______________________________________________________________________ +! +! Set the DD coefficients +!_______________________________________________________________________ + + hi = two/dx + IF ( ndimen > 1 ) THEN + hj = two/dy + IF ( ndimen > 2 ) THEN + hk = two/dz + END IF + END IF +!_______________________________________________________________________ +! +! Compute the inverted denominator, saved for sweep +!_______________________________________________________________________ + + DO k = 1, nz + DO j = 1, ny + ii = 0 + ich = 1 + DO i = 1, nx + ii = ii + 1 + DO m = 1, nang + d(m,ii,j,k,ich) = one / ( cs(i,j,k) + vd + hi*mu(m) + & + hj*eta(m) + hk*xi(m) ) + END DO + IF ( ii == ichunk ) THEN + ii = 0 + ich = ich + 1 + END IF + END DO + END DO + END DO +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE geom_param_calc + + +END MODULE geom_module diff --git a/Fortran/SNAP/src/global.f90 b/Fortran/SNAP/src/global.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/global.f90 @@ -0,0 +1,53 @@ +!----------------------------------------------------------------------- +! +! MODULE: global_module +!> @brief +!> Global variables for data types, file names and unit numbers, and +!> commonly used numbers +! +!----------------------------------------------------------------------- + +MODULE global_module + + IMPLICIT NONE + + PUBLIC + + SAVE +!_______________________________________________________________________ +! +! Numerical kinds. +!_______________________________________________________________________ + + INTEGER, PARAMETER :: l_knd = KIND( .TRUE. ) + INTEGER, PARAMETER :: i_knd = SELECTED_INT_KIND( 8 ) + INTEGER, PARAMETER :: r_knd = SELECTED_REAL_KIND( 13 ) +!_______________________________________________________________________ +! +! File names and unit numbers. +! +! ifile - input file name +! iunit - input file unit number +! ofile - output file name +! ounit - output file unit number +!_______________________________________________________________________ + + CHARACTER(LEN=64) :: ifile, ofile + + INTEGER(i_knd), PARAMETER :: iunit=10, ounit=11 +!_______________________________________________________________________ +! +! Commonly used numbers. +!_______________________________________________________________________ + + REAL(r_knd), PARAMETER :: zero = 0.0_r_knd + REAL(r_knd), PARAMETER :: half = 0.5_r_knd + REAL(r_knd), PARAMETER :: one = 1.0_r_knd + REAL(r_knd), PARAMETER :: two = 2.0_r_knd + + REAL(r_knd), PARAMETER :: pi = 3.14159265358979_r_knd +!_______________________________________________________________________ +!_______________________________________________________________________ + + +END MODULE global_module diff --git a/Fortran/SNAP/src/inner.f90 b/Fortran/SNAP/src/inner.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/inner.f90 @@ -0,0 +1,341 @@ +!----------------------------------------------------------------------- +! +! MODULE: inner_module +!> @brief +!> This module controls the inner iterations. Inner iterations include +!> the KBA mesh sweep, which is parallelized via MPI and vectorized over +!> angles in a given octant. Inner source computed here and inner +!> convergence is checked. +! +!----------------------------------------------------------------------- + +MODULE inner_module + + USE global_module, ONLY: i_knd, r_knd, one, ounit, zero + + USE geom_module, ONLY: nx, ny, nz, nc + + USE sn_module, ONLY: nmom, cmom, lma, nang, noct + + USE data_module, ONLY: ng + + USE control_module, ONLY: epsi, tolr, dfmxi, inrdone, it_det + + USE solvar_module, ONLY: q2grp0, q2grpm, s_xs, flux0, flux0pi, fluxm,& + qtot, fixup_counter + + USE sweep_module, ONLY: sweep + + USE time_module, ONLY: tinrsrc, tsweeps, wtime + + USE plib_module, ONLY: nthreads, glmax, comm_snap, iproc, root, ichunk + + USE thrd_comm_module, ONLY: assign_thrd_set + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: inner + + + CONTAINS + + + SUBROUTINE inner ( inno, iits, t, do_grp, ng_per_thrd, nnstd_used, & + grp_act ) + +!----------------------------------------------------------------------- +! +! Do a single inner iteration for all groups. Calculate the total source +! for each group and sweep the mesh over octants. Repeat for all groups +! unless the group is converged according to inrdone(g). +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: inno, t + + INTEGER(i_knd), INTENT(INOUT) :: ng_per_thrd, nnstd_used + + INTEGER(i_knd), DIMENSION(ng), INTENT(OUT) :: iits + + INTEGER(i_knd), DIMENSION(ng), INTENT(INOUT) :: do_grp + + INTEGER(i_knd), DIMENSION(ng,nthreads), INTENT(INOUT) :: grp_act +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: n, g + + REAL(r_knd) :: t1, t2, t3, t4 +!_______________________________________________________________________ +! +! Compute the inner source and add it to fixed + out-of-group sources. +! No need to do inner operations if the group's inners are converged. +!_______________________________________________________________________ + + !$OMP MASTER + + CALL wtime ( t1 ) + + do_grp = 1 + WHERE( inrdone ) do_grp = 0 + + CALL assign_thrd_set ( do_grp, ng, ng_per_thrd, ny*nz, nnstd_used, & + grp_act ) + + !$OMP END MASTER + !$OMP BARRIER + + CALL inner_src ( ng_per_thrd, nnstd_used, grp_act(:,t) ) + !$OMP BARRIER + + !$OMP MASTER + CALL wtime ( t2 ) + tinrsrc = tinrsrc + t2 - t1 + !$OMP END MASTER +!_______________________________________________________________________ +! +! With source computed, set previous copy of flux; new flux moments +! iterates computed during sweep. +!_______________________________________________________________________ + + DO n = 1, ng_per_thrd + g = grp_act(n,t) + IF ( g == 0 ) EXIT + flux0pi(:,:,:,g) = flux0(:,:,:,g) + END DO +!_______________________________________________________________________ +! +! Call for the transport sweep. Check convergence, using threads. +!_______________________________________________________________________ + + !$OMP MASTER + CALL wtime ( t3 ) + !$OMP END MASTER + + !$OMP BARRIER + + CALL sweep ( t, do_grp, ng_per_thrd, nnstd_used, grp_act ) + + !$OMP BARRIER + + !$OMP MASTER + + CALL wtime ( t4 ) + tsweeps = tsweeps + t4 - t3 +!_______________________________________________________________________ +! +! Check inner convergence. Apply nested threads to group sets. +!_______________________________________________________________________ + + do_grp = 1 + WHERE( inrdone ) do_grp = 0 + + CALL assign_thrd_set ( do_grp, ng, ng_per_thrd, 0, nnstd_used, & + grp_act ) + + !$OMP END MASTER + !$OMP BARRIER +!_______________________________________________________________________ +! +! Thread group loops for computing local difference (df) array. +!_______________________________________________________________________ + + !$OMP PARALLEL DO NUM_THREADS(nnstd_used) IF(nnstd_used>1) & + !$OMP& SCHEDULE(STATIC,1) DEFAULT(SHARED) PRIVATE(n,g) & + !$OMP& PROC_BIND(CLOSE) + DO n = 1, ng_per_thrd + g = grp_act(n,t) + IF ( g == 0 ) CYCLE + CALL inner_df_calc ( inno, iits(g), flux0pi(:,:,:,g), & + flux0(:,:,:,g), dfmxi(g) ) + END DO + !$OMP END PARALLEL DO +!_______________________________________________________________________ +! +! All procs then reduce dfmxi for all groups, determine which groups +! are converged and print requested info +!_______________________________________________________________________ + + !$OMP BARRIER + !$OMP MASTER + + CALL glmax ( dfmxi, ng, comm_snap ) + WHERE( dfmxi <= epsi ) inrdone = .TRUE. + + IF ( iproc==root .AND. it_det==1 ) THEN + DO g = 1, ng + fixup_counter(:,1,g) = fixup_counter(:,1,g) / & + ( REAL(nx,r_knd) * REAL(ny,r_knd) * REAL(nz,r_knd) * & + REAL(nang,r_knd) * REAL(noct,r_knd) ) + WRITE( ounit, 221 ) g, iits(g), dfmxi(g), & + (fixup_counter(n,1,g),n=1,4) + END DO + END IF + + !$OMP END MASTER +!_______________________________________________________________________ + + 221 FORMAT( 4X, 'Group ', I3, 4X, ' Inner ', I5, 4X, ' Dfmxi ', & + ES11.4, ' Fixup x/y/z/t ', 4(ES9.2,1x) ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE inner + + + SUBROUTINE inner_src ( ng_per_thrd, nnstd_used, grp_act ) + +!----------------------------------------------------------------------- +! +! Compute the inner source, i.e., the within-group scattering source. +! Thread over groups and use nested threads on i-lines when available. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: ng_per_thrd, nnstd_used + + INTEGER(i_knd), DIMENSION(ng), INTENT(IN) :: grp_act +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: n, g, k, j +!_______________________________________________________________________ +! +! Loop over each set of groups. Use nested threads if available. +!_______________________________________________________________________ + + !$OMP PARALLEL NUM_THREADS(nnstd_used) IF(nnstd_used>1) & + !$OMP& DEFAULT(SHARED) PRIVATE(n,g,k,j) PROC_BIND(CLOSE) + DO n = 1, ng_per_thrd + + g = grp_act(n) + IF ( g == 0 ) EXIT + + !$OMP DO SCHEDULE(STATIC,1) COLLAPSE(2) + DO k = 1, nz + DO j = 1, ny + CALL inner_src_calc ( j, k, s_xs(:,:,:,:,g), flux0(:,j,k,g), & + fluxm(:,:,j,k,g), q2grp0(:,j,k,g), q2grpm(:,:,j,k,g), & + qtot(:,:,:,:,:,g) ) + END DO + END DO + !$OMP END DO NOWAIT + + END DO + !$OMP END PARALLEL +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE inner_src + + + SUBROUTINE inner_src_calc ( j, k, sxs_g, f0, fm, qo0, qom, q ) + +!----------------------------------------------------------------------- +! +! Compute the within-group scattering for a given group. Add it to fixed +! and out-of-group sources. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: j, k + + REAL(r_knd), DIMENSION(nx), INTENT(IN) :: f0, qo0 + + REAL(r_knd), DIMENSION(cmom-1,nx), INTENT(IN) :: fm, qom + + REAL(r_knd), DIMENSION(nx,ny,nz,nmom), INTENT(IN) :: sxs_g + + REAL(r_knd), DIMENSION(cmom,ichunk,ny,nz,nc), INTENT(OUT) :: q +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ic, ich, i, mom, l +!_______________________________________________________________________ +! +! Loop over i cells. Set the first source moment with flux0 (f0). Then +! set remaining source moments with fluxm (fm) and combination of +! higher scattering orders. +!_______________________________________________________________________ + + ic = 0 + ich = 1 + + DO i = 1, nx + + ic = ic + 1 + + q(1,ic,j,k,ich) = qo0(i) + f0(i)*sxs_g(i,j,k,1) + + mom = 1 + DO l = 2, nmom + q(mom+1:mom+lma(l),ic,j,k,ich) = qom(mom:mom+lma(l)-1,i) + & + fm(mom:mom+lma(l)-1,i)*sxs_g(i,j,k,l) + mom = mom + lma(l) + END DO + + IF ( ic == ichunk ) THEN + ic = 0 + ich = ich + 1 + END IF + + END DO +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE inner_src_calc + + + SUBROUTINE inner_df_calc ( inno, iits, flux0pi, flux0, dfmxi ) + +!----------------------------------------------------------------------- +! +! Check for inner iteration convergence using the flux array. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: inno + + INTEGER(i_knd), INTENT(OUT) :: iits + + REAL(r_knd), INTENT(OUT) :: dfmxi + + REAL(r_knd), DIMENSION(nx,ny,nz), INTENT(INOUT) :: flux0pi + + REAL(r_knd), DIMENSION(nx,ny,nz), INTENT(IN) :: flux0 +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + REAL(r_knd), DIMENSION(nx,ny,nz) :: df +!_______________________________________________________________________ +! +! Compute max for individual group, all spatial extent. +!_______________________________________________________________________ + + iits = inno + + df = one + WHERE( ABS( flux0pi ) < tolr ) + flux0pi = one + df = zero + END WHERE + df = ABS( flux0/flux0pi - df ) + + dfmxi = MAXVAL( df ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE inner_df_calc + + +END MODULE inner_module diff --git a/Fortran/SNAP/src/input.f90 b/Fortran/SNAP/src/input.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/input.f90 @@ -0,0 +1,684 @@ +!----------------------------------------------------------------------- +! +! MODULE: input_module +!> @brief +!> This module controls the reading of the input file and checking the +!> input parameters for acceptable values. +! +!----------------------------------------------------------------------- + +MODULE input_module + + USE global_module, ONLY: i_knd, r_knd, iunit, ounit, zero, one + + USE plib_module, ONLY: npey, npez, ichunk, nthreads, iproc, root, & + comm_snap, bcast, nproc, barrier, nnested + + USE geom_module, ONLY: ndimen, nx, ny, nz, lx, ly, lz + + USE sn_module, ONLY: nang, nmom + + USE data_module, ONLY: ng, mat_opt, src_opt, scatp + + USE control_module, ONLY: epsi, iitm, oitm, timedep, tf, nsteps, & + swp_typ, multiswp, angcpy, it_det, fluxp, fixup, soloutp, kplane, & + popout + + USE utils_module, ONLY: print_error, stop_run + + USE time_module, ONLY: tinp, wtime + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: input_read + + + CONTAINS + + + SUBROUTINE input_read + +!----------------------------------------------------------------------- +! +! Read the input file. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=64) :: error + + INTEGER(i_knd) :: ierr + + REAL(r_knd) :: t1, t2 + + NAMELIST / invar / npey, npez, ichunk, nthreads, ndimen, nx, ny, & + nz, lx, ly, lz, nmom, nang, ng, epsi, iitm, oitm, timedep, tf, & + nsteps, mat_opt, src_opt, scatp, swp_typ, multiswp, it_det, & + fluxp, fixup, nnested, soloutp, kplane, popout, angcpy +!_______________________________________________________________________ +! +! Read the input file. Echo to output file. Call for an input variable +! check. Only root reads, echoes, checks input. +!_______________________________________________________________________ + + CALL wtime ( t1 ) + + ierr = 0 + + IF ( iproc == root ) READ( iunit, NML=invar, IOSTAT=ierr ) + CALL bcast ( ierr, comm_snap, root ) + IF ( ierr /= 0 ) THEN + error = '***ERROR: READ_INPUT: Problem reading input file' + CALL print_error ( 0, error ) + CALL print_error ( ounit, error ) + CALL stop_run ( 0, 0, 0, 0 ) + END IF + + IF ( iproc == root ) THEN + CALL input_echo + CALL input_check ( ierr ) + END IF + + CALL bcast ( ierr, comm_snap, root ) + IF ( ierr /= 0 ) THEN + error = '***ERROR: READ_INPUT: Input file errors' + CALL print_error ( ounit, error ) + CALL stop_run ( 0, 0, 0, 0 ) + END IF +!_______________________________________________________________________ +! +! Broadcast the data to all processes. +!_______________________________________________________________________ + + CALL input_var_bcast + + CALL wtime ( t2 ) + tinp = t2 - t1 +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE input_read + + + SUBROUTINE input_echo + +!----------------------------------------------------------------------- +! +! Print the input variables to the output file. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=1) :: star='*' + + INTEGER(i_knd) :: i +!_______________________________________________________________________ + + WRITE( ounit, 121 ) ( star, i = 1, 80 ) + WRITE( ounit, 122 ) ( star, i = 1, 80 ) + WRITE( ounit, 123 ) + WRITE( ounit, 124 ) npey, npez, ichunk, nthreads, nnested + WRITE( ounit, 125 ) ndimen, nx, ny, nz + WRITE( ounit, 126 ) lx, ly, lz + WRITE( ounit, 127 ) nmom, nang + WRITE( ounit, 128 ) ng, mat_opt, src_opt, scatp + WRITE( ounit, 129 ) epsi, iitm, oitm, timedep, tf, nsteps, swp_typ,& + multiswp, angcpy, it_det, soloutp, kplane, popout, fluxp, fixup + WRITE( ounit, 121 ) ( star, i = 1, 80 ) +!_______________________________________________________________________ + + 121 FORMAT( /, 80A, / ) + 122 FORMAT( 10X, & + 'keyword Input Echo - Values from input or default', /,& + 80A, / ) + + 123 FORMAT( 2X, 'NML=invar' ) + + 124 FORMAT( 5X, 'npey= ', I5, /, & + 5X, 'npez= ', I5, /, & + 5X, 'ichunk= ', I5, /, & + 5X, 'nthreads= ', I5, /, & + 5X, 'nnested= ', I3 ) + + 125 FORMAT( 5X, 'ndimen= ', I2, /, & + 5X, 'nx= ', I5, /, & + 5X, 'ny= ', I5, /, & + 5X, 'nz= ', I5 ) + + 126 FORMAT( 5X, 'lx= ', ES11.4, /, & + 5X, 'ly= ', ES11.4, /, & + 5X, 'lz= ', ES11.4 ) + + 127 FORMAT( 5X, 'nmom= ', I3, /, & + 5X, 'nang= ', I4 ) + + 128 FORMAT( 5X, 'ng= ', I4, /, & + 5X, 'mat_opt= ', I2, /, & + 5X, 'src_opt= ', I2, /, & + 5X, 'scatp= ', I2 ) + + 129 FORMAT( 5X, 'epsi= ', ES11.4, /, & + 5X, 'iitm= ', I3, /, & + 5X, 'oitm= ', I4, /, & + 5X, 'timedep= ', I2, /, & + 5X, 'tf= ', ES11.4, /, & + 5X, 'nsteps= ', I5, /, & + 5X, 'swp_typ= ', I2, /, & + 5X, 'multiswp= ', I2, /, & + 5X, 'angcpy= ', I2, /, & + 5X, 'it_det= ', I2, /, & + 5X, 'soloutp= ', I2, /, & + 5X, 'kplane= ', I4, /, & + 5X, 'popout= ', I2, /, & + 5X, 'fluxp= ', I2, /, & + 5X, 'fixup= ', I2 ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE input_echo + + + SUBROUTINE input_check ( ierr ) + +!----------------------------------------------------------------------- +! +! Checks input for valid entries +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(INOUT) :: ierr +!_______________________________________________________________________ +! +! Local Variables +!_______________________________________________________________________ + + CHARACTER(LEN=64) :: error +!_______________________________________________________________________ +! +! Check the parallel environment variables +!_______________________________________________________________________ + + ierr = 0 +!_______________________________________________________________________ +! +! Parallel processing inputs. +!_______________________________________________________________________ + + IF ( npey < 1 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NPEY must be positive' + CALL print_error ( ounit, error ) + END IF + + IF ( npez < 1 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NPEZ must be positive' + CALL print_error ( ounit, error ) + END IF + + IF ( ndimen>1 .AND. MOD( ny, npey )/=0 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NPEY must divide evenly into NY' + CALL print_error ( ounit, error ) + END IF + + IF ( ndimen>2 .AND. MOD( nz, npez )/=0 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NPEZ must divide evenly into NZ' + CALL print_error ( ounit, error ) + END IF + + IF ( ndimen<2 .AND. npey/=1 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NPEY must be 1 if not 2-D problem' + CALL print_error ( ounit, error ) + END IF + + IF ( ndimen<3 .AND. npez/=1 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NPEZ must be 1 if not 3-D problem' + CALL print_error ( ounit, error ) + END IF + + IF ( npey*npez /= nproc ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NPEY*NPEZ must equal MPI NPROC' + CALL print_error ( ounit, error ) + END IF + + IF ( ichunk > nx ) THEN + ichunk = nx + error = '*WARNING: INPUT_CHECK: ICHUNK cannot exceed NX; ' // & + 'reset to NX' + CALL print_error ( ounit, error ) + END IF + + IF ( ndimen==1 .AND. ichunk/=nx ) ichunk = nx + + IF ( MOD( nx, ichunk ) /= 0 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: ICHUNK must divide evenly into NX' + CALL print_error ( ounit, error ) + END IF + + IF ( nthreads < 1 ) THEN + nthreads = 1 + error = '*WARNING: INPUT_CHECK: NTHREADS must be positive; ' // & + 'reset to 1' + CALL print_error ( ounit, error ) + END IF + + IF ( nnested < 1 ) THEN + nnested = 1 + error = '*WARNING: INPUT_CHECK: NNESTED must be positive; ' // & + 'reset to 1' + CALL print_error ( ounit, error ) + END IF + + IF ( multiswp/=0 .AND. nproc==1 ) THEN + multiswp = 0 + error = '*WARNING: INPUT_CHECK: MULTISWP reset to 0 for serial' + CALL print_error ( ounit, error ) + END IF +!_______________________________________________________________________ +! +! Geometry inputs. +!_______________________________________________________________________ + + IF ( ndimen<1 .OR. ndimen>3 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NDIMEN must be 1, 2, or 3' + CALL print_error ( ounit, error ) + END IF + +! IF ( nx < 4 ) THEN +! ierr = ierr + 1 +! error = '***ERROR: INPUT_CHECK: NX must be at least 4' +! CALL print_error ( ounit, error ) +! END IF + +! IF ( ny<4 .AND. ndimen>1 ) THEN +! ierr = ierr + 1 +! error = '***ERROR: INPUT_CHECK: NY must be at least 4' +! CALL print_error ( ounit, error ) +! END IF + +! IF ( nz<4 .AND. ndimen>2 ) THEN +! ierr = ierr + 1 +! error = '***ERROR: INPUT_CHECK: NZ must be at least 4' +! CALL print_error ( ounit, error ) +! END IF + + IF ( lx <= zero ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: LX must be positive' + CALL print_error ( ounit, error ) + END IF + + IF ( ly <= zero ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: LY must be positive' + CALL print_error ( ounit, error ) + END IF + + IF ( lz <= zero ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: LZ must be positive' + CALL print_error ( ounit, error ) + END IF + + IF ( ndimen<2 .AND. ( ny>1 .OR. ly/=one ) ) THEN + ny = 1 + ly = zero + error = '*WARNING: INPUT_CHECK: NY and LY reset for 1-D problem' + CALL print_error ( ounit, error ) + END IF + + IF ( ndimen<3 .AND. ( nz>1 .OR. lz/=one ) ) THEN + nz = 1 + lz = zero + error = '*WARNING: INPUT_CHECK: NZ and LZ reset for 1/2-D problem' + CALL print_error ( ounit, error ) + END IF +!_______________________________________________________________________ +! +! Sn inputs. +!_______________________________________________________________________ + + IF ( nmom<1 .OR. nmom>4 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NMOM must be positive and ' // & + 'less than 5' + CALL print_error ( ounit, error ) + END IF + + IF ( nang < 1 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NANG must be positive' + CALL print_error ( ounit, error ) + END IF +!_______________________________________________________________________ +! +! Data inputs. +!_______________________________________________________________________ + + IF ( ng < 1 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NG must be positive' + CALL print_error ( ounit, error ) + END IF + + IF ( nthreads > ng ) THEN + nthreads = ng + error = '*WARNING: INPUT_CHECK: NTHREADS should be <= NG; ' // & + 'reset to NG' + CALL print_error ( ounit, error ) + END IF + + IF ( mat_opt<0 .OR. mat_opt>2) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: MAT_OPT must be 0/1/2' + CALL print_error ( ounit, error ) + END IF + + IF ( src_opt<0 .OR. src_opt>3) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: SRC_OPT must be 0/1/2/3' + CALL print_error ( ounit, error ) + END IF + +! IF ( src_opt==3 .AND. mat_opt/=0 ) THEN +! mat_opt = 0 +! error = '*WARNING: INPUT_CHECK: MAT_OPT must be 0 for SRC_OPT=3' +! CALL print_error ( ounit, error ) +! END IF + + IF ( scatp/=0 .AND. scatp/=1 ) THEN + scatp = 0 + error = '*WARNING: INPUT_CHECK: SCATP must be 0/1; reset to 0' + CALL print_error ( ounit, error ) + END IF +!_______________________________________________________________________ +! +! Control inputs. +!_______________________________________________________________________ + + IF ( epsi<=zero .OR. epsi>=1.0E-2_r_knd ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: EPSI must be positive, less ' // & + 'than 1.0E-2' + CALL print_error ( ounit, error ) + END IF + + IF ( iitm < 1 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: IITM must be positive' + CALL print_error ( ounit, error ) + END IF + + IF ( oitm < 1 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: OITM must be positive' + CALL print_error ( ounit, error ) + END IF + + IF ( timedep/=0 .AND. timedep/=1 ) THEN + timedep = 0 + error = '*WARNING: INPUT_CHECK: TIMEDEP must be 0/1; reset to 0' + CALL print_error ( ounit, error ) + END IF + + IF ( timedep==1 .AND. tf<=zero ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: TF must be positive' + CALL print_error ( ounit, error ) + END IF + + IF ( timedep==1 .AND. nsteps<1 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: NSTEPS must be positive' + CALL print_error ( ounit, error ) + END IF + + IF ( timedep==0 .AND. nsteps/=1 ) THEN + nsteps = 1 + error = '*WARNING: INPUT_CHECK: NSTEPS reset to 1 for ' // & + 'static calc' + CALL print_error ( ounit, error ) + END IF + + IF ( swp_typ/=0 .AND. swp_typ/=1 ) THEN + swp_typ = 0 + error = '*WARNING: INPUT_CHECK: SWP_TYP must equal 0/1; ' // & + 'reset to 0' + CALL print_error ( ounit, error ) + END IF + + IF ( swp_typ/=0 .AND. ndimen==1 ) THEN + swp_typ = 0 + error = '*WARNING: INPUT_CHECK: SWP_TYP must be 0 for 1-D; ' // & + 'reset to 0' + CALL print_error ( ounit, error ) + END IF + + IF ( nnested>1 .AND. swp_typ==0 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: SWP_TYP must be 1 for nested' // & + ' threading' + CALL print_error ( ounit, error ) + END IF + +! IF ( nproc>1 .AND. nnested>1 .AND. swp_typ==0 ) THEN +! ierr = ierr + 1 +! error = '***ERROR: INPUT_CHECK: SWP_TYP=0 + NNESTED>1 ' // & +! 'requires NPROC=1' +! CALL print_error ( ounit, error ) +! END IF + + IF ( multiswp/=0 .AND. multiswp/=1 ) THEN + multiswp = 0 + error = '*WARNING: INPUT_CHECK: MULTISWP must equal 0/1; ' // & + 'reset to 0' + CALL print_error ( ounit, error ) + END IF + + IF ( ndimen==1 .AND. multiswp==1 ) THEN + multiswp = 0 + error = '*WARNING: INPUT_CHECK: MULTISWP must equal 0 in 1D;' // & + ' reset to 0' + CALL print_error ( ounit, error ) + END IF + + IF ( multiswp==1 .AND. swp_typ==1 ) THEN + ierr = ierr + 1 + error = '***ERROR: INPUT_CHECK: MULTISWP must equal 0 if ' // & + 'SWP_TYP is 1' + CALL print_error ( ounit, error ) + END IF + + IF ( angcpy/=1 .AND. angcpy/=2 ) THEN + angcpy = 1 + error = '*WARNING: INPUT_CHECK: ANGCPY must be 1 or 2; ' // & + 'reset to 1' + CALL print_error ( ounit, error ) + END IF + + IF ( it_det/=0 .AND. it_det/=1 ) THEN + it_det = 0 + error = '*WARNING: INPUT_CHECK: IT_DET must equal 0/1; ' // & + 'reset to 0' + CALL print_error ( ounit, error ) + END IF + + IF ( fluxp<0 .OR. fluxp>2 ) THEN + fluxp = 0 + error = '*WARNING: INPUT_CHECK: FLUXP must equal 0/1/2; ' // & + 'reset to 0' + CALL print_error ( ounit, error ) + END IF + + IF ( fixup/=0 .AND. fixup/=1 ) THEN + fixup = 0 + error = '*WARNING: INPUT_CHECK: FIXUP must equal 0/1; ' // & + 'reset to 0' + CALL print_error ( ounit, error ) + END IF + + IF ( soloutp/=0 .AND. soloutp/=1 ) THEN + soloutp = 0 + error = '*WARNING: INPUT_CHECK: SOLOUTP must equal 0/1; ' // & + 'reset to 0' + CALL print_error ( ounit, error ) + END IF + + IF ( kplane<0 .OR. kplane>nz ) THEN + kplane = 0 + error = '*WARNING: INPUT_CHECK: KPLANE must be in the range ' // & + '0 to NZ; reset to 0' + CALL print_error ( ounit, error ) + END IF + + IF ( popout<0 .OR. popout>2 ) THEN + popout = 0 + error = '*WARNING: INPUT_CHECK: POPOUT must equal 0/1/2; ' // & + 'reset to 0' + call print_error ( ounit, error ) + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE input_check + + + SUBROUTINE input_var_bcast + +!----------------------------------------------------------------------- +! +! Broadcast input variables from root to all other procs +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local Variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ilen, dlen + + INTEGER(i_knd), DIMENSION(30) :: ipak + + REAL(r_knd), DIMENSION(15) :: dpak +!_______________________________________________________________________ +! +! Pack the input variables into arrays before broadcasting +!_______________________________________________________________________ + + ipak = 0 + dpak = zero + + IF ( iproc == root ) THEN + + ipak(1) = npey + ipak(2) = npez + ipak(3) = ichunk + ipak(4) = nthreads + ipak(5) = ndimen + ipak(6) = nx + ipak(7) = ny + ipak(8) = nz + ipak(9) = nmom + ipak(10) = nang + ipak(11) = ng + ipak(12) = timedep + ipak(13) = nsteps + ipak(14) = iitm + ipak(15) = oitm + ipak(16) = mat_opt + ipak(17) = src_opt + ipak(18) = scatp + ipak(19) = swp_typ + ipak(20) = multiswp + ipak(21) = angcpy + ipak(22) = it_det + ipak(23) = fluxp + ipak(24) = fixup + ipak(25) = nnested + ipak(26) = soloutp + ipak(27) = kplane + ipak(28) = popout + + dpak(1) = lx + dpak(2) = ly + dpak(3) = lz + dpak(4) = tf + dpak(5) = epsi + + END IF +!_______________________________________________________________________ +! +! Broadcast data +!_______________________________________________________________________ + + ilen = SIZE( ipak ) + dlen = SIZE( dpak ) + CALL bcast ( ipak, ilen, comm_snap, root ) + CALL bcast ( dpak, dlen, comm_snap, root ) +!_______________________________________________________________________ +! +! Unpack +!_______________________________________________________________________ + + IF ( iproc /= root ) THEN + + npey = ipak(1) + npez = ipak(2) + ichunk = ipak(3) + nthreads = ipak(4) + ndimen = ipak(5) + nx = ipak(6) + ny = ipak(7) + nz = ipak(8) + nmom = ipak(9) + nang = ipak(10) + ng = ipak(11) + timedep = ipak(12) + nsteps = ipak(13) + iitm = ipak(14) + oitm = ipak(15) + mat_opt = ipak(16) + src_opt = ipak(17) + scatp = ipak(18) + swp_typ = ipak(19) + multiswp = ipak(20) + angcpy = ipak(21) + it_det = ipak(22) + fluxp = ipak(23) + fixup = ipak(24) + nnested = ipak(25) + soloutp = ipak(26) + kplane = ipak(27) + popout = ipak(28) + + lx = dpak(1) + ly = dpak(2) + lz = dpak(3) + tf = dpak(4) + epsi = dpak(5) + + END IF + + CALL barrier ( comm_snap ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE input_var_bcast + + +END MODULE input_module diff --git a/Fortran/SNAP/src/lit.local.cfg b/Fortran/SNAP/src/lit.local.cfg new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/lit.local.cfg @@ -0,0 +1,2 @@ +config.traditional_output = True +config.single_source = True diff --git a/Fortran/SNAP/src/mkba_sweep.f90 b/Fortran/SNAP/src/mkba_sweep.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/mkba_sweep.f90 @@ -0,0 +1,481 @@ +!----------------------------------------------------------------------- +! +! MODULE: mkba_sweep_module +!> @brief +!> This module contains the 2D and 3D mesh sweep logic using mini-KBA. +! +!----------------------------------------------------------------------- + +MODULE mkba_sweep_module + + USE global_module, ONLY: i_knd, r_knd, zero, two, one, half + + USE plib_module, ONLY: ichunk, firsty, lasty, firstz, lastz, & + nnested + + USE geom_module, ONLY: nx, hi, hj, hk, ndimen, ny, nz, ndiag, diag, nc + + USE sn_module, ONLY: cmom, nang, mu, eta, xi, w, noct + + USE data_module, ONLY: src_opt, qim + + USE control_module, ONLY: fixup, tolr, update_ptr, angcpy, it_det + + USE thrd_comm_module, ONLY: sweep_recv_bdry, sweep_send_bdry + + IMPLICIT NONE + + PUBLIC :: mkba_sweep + + + CONTAINS + + + SUBROUTINE mkba_sweep ( d1, d2, d3, d4, d5, jd, kd, g, t, nnstd_used,& + reqs, szreq, psii, psij, psik, qtot, ec, vdelt, ptr_in, ptr_out, & + dinv, flux0, fluxm, jb_in, jb_out, kb_in, kb_out, wmu, weta, wxi, & + flkx, flky, flkz, t_xs, fixup_counter ) + +!----------------------------------------------------------------------- +! +! 3-D slab mesh sweeper. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: d1, d2, d3, d4, d5, jd, kd, g, t, & + nnstd_used, szreq + + INTEGER(i_knd), DIMENSION(szreq), INTENT(INOUT) :: reqs + + REAL(r_knd), INTENT(IN) :: vdelt + + REAL(r_knd), DIMENSION(nang), INTENT(IN) :: wmu, weta, wxi + + REAL(r_knd), DIMENSION(4), INTENT(INOUT) :: fixup_counter + + REAL(r_knd), DIMENSION(nang,cmom,noct), INTENT(IN) :: ec + + REAL(r_knd), DIMENSION(nang,ny,nz), INTENT(INOUT) :: psii + + REAL(r_knd), DIMENSION(nang,ichunk,nz), INTENT(INOUT) :: psij, & + jb_in, jb_out + + REAL(r_knd), DIMENSION(nang,ichunk,ny), INTENT(INOUT) :: psik, & + kb_in, kb_out + + REAL(r_knd), DIMENSION(nx,ny,nz), INTENT(IN) :: t_xs + + REAL(r_knd), DIMENSION(nx,ny,nz), INTENT(INOUT) :: flux0 + + REAL(r_knd), DIMENSION(nx+1,ny,nz), INTENT(INOUT) :: flkx + + REAL(r_knd), DIMENSION(nx,ny+1,nz), INTENT(INOUT) :: flky + + REAL(r_knd), DIMENSION(nx,ny,nz+1), INTENT(INOUT) :: flkz + + REAL(r_knd), DIMENSION(nang,ichunk,ny,nz,nc), INTENT(IN) :: dinv + + REAL(r_knd), DIMENSION(cmom-1,nx,ny,nz), INTENT(INOUT) :: fluxm + + REAL(r_knd), DIMENSION(cmom,ichunk,ny,nz,nc), INTENT(IN) :: qtot + + REAL(r_knd), DIMENSION(d1,d2,d3,d4,d5), INTENT(INOUT) :: ptr_in + + REAL(r_knd), DIMENSION(d1,d2,d3,d4,d5), INTENT(OUT) :: ptr_out +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: nedg, ist, jst, jlo, jhi, kst, klo, khi, d, n, & + ic, i, j, k, l, iop, ich, id, oct, ibl, ibr, ibb, ibt, ibf, ibk + + REAL(r_knd) :: sum_hv, sum_hv_n + + REAL(r_knd), DIMENSION(nang) :: psi, pc, den + + REAL(r_knd), DIMENSION(nang,4) :: hv, fxhv +!_______________________________________________________________________ +! +! Initialize the fixup counter +!_______________________________________________________________________ + + nedg = 2 + IF ( ndimen == 3 ) nedg = 3 + IF ( vdelt /= zero ) nedg = nedg + 1 + fxhv = zero +!_______________________________________________________________________ +! +! Set up the y-dir and z-dir sweep order given octant info. +!_______________________________________________________________________ + + IF ( jd == 1 ) THEN + jst = -1; jlo = ny; jhi = 1 + ELSE + jst = +1; jlo = 1; jhi = ny + END IF + + IF ( kd == 1 ) THEN + kst = -1; klo = nz; khi = 1 + ELSE + kst = +1; klo = 1; khi = nz + END IF +!_______________________________________________________________________ +! +! Loop over the spatial work chunks in +/- x-directions. +!_______________________________________________________________________ + + iop_loop: DO iop = 1, 2*nc +!_______________________________________________________________________ +! +! Determine chunk index, octant, and finish setting up sweep order. +!_______________________________________________________________________ + + id = 1 + (iop - 1)/nc + oct = id + 2*(jd - 1) + 4*(kd - 1) + + IF ( id == 1 ) THEN + ich = nc - iop + 1 + ist = -1 + ELSE + ich = iop - nc + ist = +1 + END IF +!_______________________________________________________________________ +! +! Process boundaries in y- and z-directions come from preset +! boundary conditions (vacuum) or from upstream neighbors. Must call +! to receive any data. If upstream is boundary, no communication. +! The master of the nested threaded region calls; i.e., all main +! level threads. +!_______________________________________________________________________ + + CALL sweep_recv_bdry ( g, jd, kd, iop, t, reqs, szreq, nc, nang, & + ichunk, ny, nz, jb_in, kb_in ) +!_______________________________________________________________________ +! +! Loop over cells along the diagonals. Nested threads break up each +! diagonal's work. +!_______________________________________________________________________ + + diagonal_loop: DO d = 1, ndiag + + !$OMP PARALLEL DO NUM_THREADS(nnstd_used) IF(nnstd_used>1) & + !$OMP& SCHEDULE(STATIC,1) PROC_BIND(CLOSE) DEFAULT(SHARED) & + !$OMP& PRIVATE(n,ic,i,j,k,l,psi,pc,hv,sum_hv,fxhv,sum_hv_n,den) + line_loop: DO n = 1, diag(d)%len +!_______________________________________________________________________ +! +! Set the indices chunk indices and the global i index +!_______________________________________________________________________ + + ic = diag(d)%cell_id(n)%ic + IF ( ist < 0 ) ic = ichunk - ic + 1 + + i = (ich-1)*ichunk + ic + + j = diag(d)%cell_id(n)%j + IF ( jst < 0 ) j = ny - j + 1 + + k = diag(d)%cell_id(n)%k + IF ( kst < 0 ) k = nz - k + 1 +!_______________________________________________________________________ +! +! Dummy operation to match real transport code where nx is not +! always even divided by ichunk +!_______________________________________________________________________ + + IF ( i > nx ) THEN + jb_out(:,ic,k) = zero + kb_out(:,ic,j) = zero + CYCLE line_loop + END IF +!_______________________________________________________________________ +! +! Compute the angular source. Add the MMS contribution if +! necessary. +!_______________________________________________________________________ + + psi = qtot(1,ic,j,k,ich) + + DO l = 2, cmom + !DIR$ VECTOR ALIGNED + psi = psi + ec(:,l,oct)*qtot(l,ic,j,k,ich) + END DO + + IF ( src_opt == 3 ) psi = psi + qim(:,i,j,k,oct,g) +!_______________________________________________________________________ +! +! Left/right boundary conditions, always vacuum. Dummy +! operations may be ignored. +!_______________________________________________________________________ + + ibl = 0 + ibr = 0 + IF ( i==nx .AND. ist==-1 ) THEN + psii(:,j,k) = zero + ELSE IF ( i==1 .AND. ist==1 ) THEN + SELECT CASE ( ibl ) + CASE ( 0 ) + psii(:,j,k) = zero + CASE ( 1 ) + psii(:,j,k) = zero + END SELECT + END IF +!_______________________________________________________________________ +! +! Top/bottom boundary condtions for chunk. Either received from +! upstream neighbor or set to zero (always vacuum) if no +! upstream neighbor. +!_______________________________________________________________________ + + ibb = 0 + ibt = 0 + IF ( j == jlo ) THEN + IF ( jd==1 .AND. lasty ) THEN + psij(:,ic,k) = zero + ELSE IF ( jd==2 .AND. firsty ) THEN + SELECT CASE ( ibb ) + CASE ( 0 ) + psij(:,ic,k) = zero + CASE ( 1 ) + psij(:,ic,k) = zero + END SELECT + ELSE + psij(:,ic,k) = jb_in(:,ic,k) + END IF + END IF +!_______________________________________________________________________ +! +! Front/back boundary condtions. Vacuum at global boundaries, +! but set to some incoming flux from neighboring proc. +!_______________________________________________________________________ + + ibf = 0 + ibk = 0 + IF ( k == klo ) THEN + IF ( (kd==1 .AND. lastz) .OR. ndimen<3 ) THEN + psik(:,ic,j) = zero + ELSE IF ( kd==2 .AND. firstz ) THEN + SELECT CASE ( ibf ) + CASE ( 0 ) + psik(:,ic,j) = zero + CASE ( 1 ) + psik(:,ic,j) = zero + END SELECT + ELSE + psik(:,ic,j) = kb_in(:,ic,j) + END IF + END IF +!_______________________________________________________________________ +! +! Compute the initial solution +!_______________________________________________________________________ + + IF ( vdelt /= zero ) THEN + !DIR$ VECTOR ALIGNED + pc = ( psi + psii(:,j,k)*mu*hi + psij(:,ic,k)*eta*hj + & + psik(:,ic,j)*xi*hk + ptr_in(:,i,j,k,oct)*vdelt ) * & + dinv(:,ic,j,k,ich) + ELSE + !DIR$ VECTOR ALIGNED + pc = ( psi + psii(:,j,k)*mu*hi + psij(:,ic,k)*eta*hj + & + psik(:,ic,j)*xi*hk ) * dinv(:,ic,j,k,ich) + END IF +!_______________________________________________________________________ +! +! Compute outgoing edges with diamond difference, no negative +! flux fixup +!_______________________________________________________________________ + + IF ( fixup == 0 ) THEN + + psi = pc + + psii(:,j,k) = two*psi - psii(:,j,k) + psij(:,ic,k) = two*psi - psij(:,ic,k) + IF ( ndimen == 3 ) psik(:,ic,j) = two*psi - psik(:,ic,j) + IF ( vdelt/=zero .AND. update_ptr ) THEN + IF ( angcpy == 1 ) THEN + ptr_in(:,i,j,k,oct) = two*psi - ptr_in(:,i,j,k,oct) + ELSE + ptr_out(:,i,j,k,oct) = two*psi - ptr_in(:,i,j,k,oct) + END IF + END IF + + ELSE +!_______________________________________________________________________ +! +! Use negative flux fixup. Compute outgoing edges. If +! negative, set to zero and rebalance. Multi-pass until all +! negativities eliminated. Initialize counters and determine +! angles that will need fixup first. +!_______________________________________________________________________ + + hv = one + sum_hv = nedg * nang + + fixup_loop: DO + + fxhv(:,1) = two*pc - psii(:,j,k) + WHERE( fxhv(:,1) < zero ) hv(:,1) = zero + sum_hv_n = SUM( hv(:,1) ) + + fxhv(:,2) = two*pc - psij(:,ic,k) + WHERE( fxhv(:,2) < zero ) hv(:,2) = zero + sum_hv_n = sum_hv_n + SUM( hv(:,2) ) + + IF ( ndimen == 3 ) THEN + fxhv(:,3) = two*pc - psik(:,ic,j) + WHERE( fxhv(:,3) < zero ) hv(:,3) = zero + sum_hv_n = sum_hv_n + SUM( hv(:,3) ) + END IF + + IF ( vdelt /= zero ) THEN + fxhv(:,4) = two*pc - ptr_in(:,i,j,k,oct) + WHERE( fxhv(:,4) < zero ) hv(:,4) = zero + sum_hv_n = sum_hv_n + SUM( hv(:,4) ) + END IF +!_______________________________________________________________________ +! +! Exit loop when all angles are fixed up, i.e., no hv change +!_______________________________________________________________________ + + IF ( sum_hv == sum_hv_n ) EXIT fixup_loop + sum_hv = sum_hv_n +!_______________________________________________________________________ +! +! Recompute balance equation numerator and denominator and +! get new cell average flux +!_______________________________________________________________________ + + IF ( vdelt /= zero ) THEN + pc = psi + half * ( psii(:,j,k)*mu*hi*(one+hv(:,1)) + & + psij(:,ic,k)*eta*hj*(one+hv(:,2)) +& + psik(:,ic,j)*xi*hk*(one+hv(:,3)) + & + ptr_in(:,i,j,k,oct)*vdelt*(one+hv(:,4)) ) + den = t_xs(i,j,k) + mu*hi*hv(:,1) + eta*hj*hv(:,2) + & + xi*hk*hv(:,3) + vdelt*hv(:,4) + ELSE + pc = psi + half * ( psii(:,j,k)*mu*hi*(one+hv(:,1)) + & + psij(:,ic,k)*eta*hj*(one+hv(:,2)) +& + psik(:,ic,j)*xi*hk*(one+hv(:,3)) ) + den = t_xs(i,j,k) + mu*hi*hv(:,1) + eta*hj*hv(:,2) + & + xi*hk*hv(:,3) + END IF + + WHERE( pc <= zero ) den = zero + + WHERE( den < tolr ) + pc = zero + den = one + END WHERE + + pc = pc / den + + END DO fixup_loop +!_______________________________________________________________________ +! +! Fixup done: update counts and compute edges with resolved +! center +!_______________________________________________________________________ + + IF ( it_det == 1 ) THEN + fixup_counter(1) = fixup_counter(1) + nang - SUM( HV(:,1) ) + fixup_counter(2) = fixup_counter(2) + nang - SUM( HV(:,2) ) + IF ( ndimen == 3 ) fixup_counter(3) = & + fixup_counter(3) + nang - SUM( HV(:,3) ) + IF ( vdelt/=zero ) fixup_counter(4) = & + fixup_counter(4) + nang - SUM( HV(:,4) ) + END IF + + psi = pc + + psii(:,j,k) = fxhv(:,1) * hv(:,1) + psij(:,ic,k) = fxhv(:,2) * hv(:,2) + IF ( ndimen == 3 ) psik(:,ic,j) = fxhv(:,3) * hv(:,3) + IF ( vdelt/=zero .AND. update_ptr ) THEN + IF ( angcpy == 1 ) THEN + ptr_in(:,i,j,k,oct) = fxhv(:,4) * hv(:,4) + ELSE + ptr_out(:,i,j,k,oct) = fxhv(:,4) * hv(:,4) + END IF + END IF + + END IF +!_______________________________________________________________________ +! +! Save edge fluxes (dummy if checks for unused non-vacuum BCs) +!_______________________________________________________________________ + + IF ( j == jhi ) THEN + IF ( jd==2 .AND. lasty ) THEN + CONTINUE + ELSE IF ( jd==1 .AND. firsty ) THEN + IF ( ibb == 1 ) CONTINUE + ELSE + jb_out(:,ic,k) = psij(:,ic,k) + END IF + END IF + + IF ( k == khi ) THEN + IF ( kd==2 .AND. lastz ) THEN + CONTINUE + ELSE IF ( kd==1 .AND. firstz ) THEN + IF ( ibf == 1 ) CONTINUE + ELSE + kb_out(:,ic,j) = psik(:,ic,j) + END IF + END IF +!_______________________________________________________________________ +! +! Compute dummy leakages (not used elsewhere currently) +!_______________________________________________________________________ + + flkx(i+id-1,j,k) = flkx(i+id-1,j,k) + & + ist*SUM( wmu*psii(:,j,k) ) + flky(i,j+jd-1,k) = flky(i,j+jd-1,k) + & + jst*SUM( weta*psij(:,ic,k) ) + flkz(i,j,k+kd-1) = flkz(i,j,k+kd-1) + & + kst*SUM( wxi*psik(:,ic,j) ) +!_______________________________________________________________________ +! +! Compute the flux moments +!_______________________________________________________________________ + + psi = w*psi + + flux0(i,j,k) = flux0(i,j,k) + SUM( psi ) + DO l = 1, cmom-1 + fluxm(l,i,j,k) = fluxm(l,i,j,k) + SUM( ec(:,l+1,oct)*psi ) + END DO +!_______________________________________________________________________ +! +! Finish the loops +!_______________________________________________________________________ + + END DO line_loop + !$OMP END PARALLEL DO + + END DO diagonal_loop +!_______________________________________________________________________ +! +! Send data to downstream neighbors. Master of threaded region means +! all main level threads do call for send. +!_______________________________________________________________________ + + CALL sweep_send_bdry ( g, jd, kd, iop, t, reqs, szreq, nc, nang, & + ichunk, ny, nz, jb_out, kb_out ) +!_______________________________________________________________________ +! +! Close the chunks loop +!_______________________________________________________________________ + + END DO iop_loop +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE mkba_sweep + + +END MODULE mkba_sweep_module diff --git a/Fortran/SNAP/src/mms.f90 b/Fortran/SNAP/src/mms.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/mms.f90 @@ -0,0 +1,570 @@ +!----------------------------------------------------------------------- +! +! MODULE: mms_module +!> @brief +!> This module contains all the setup and verification subroutines for +!> code verification with MMS. +! +!----------------------------------------------------------------------- + +MODULE mms_module + + USE global_module, ONLY: i_knd, r_knd, ounit, zero, pi, one, half + + USE geom_module, ONLY: nx, ny, nz, lx, ly, lz, dx, dy, dz, ndimen, & + ny_gl, nz_gl + + USE sn_module, ONLY: cmom, nang, mu, eta, xi, w, noct, ec, nmom, lma + + USE data_module, ONLY: src_opt, ng, sigt, slgg, qim, qi, mat, v + + USE control_module, ONLY: timedep, tf, dt, tolr + + USE plib_module, ONLY: yproc, zproc, iproc, root, glmax, glmin, & + comm_snap, glsum + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: mms_setup, mms_deallocate, mms_verify_1 + + SAVE +!_______________________________________________________________________ +! +! Module variables +! +! ref_flux(nx,ny,nz,ng) - Manufactured solution +! ref_fluxm(cmom-1,nx,ny,nz,ng) - Manufactured solution moments +! +! a - i function constant +! b - j function constant +! c - k function constant +! +! ib(nx+1) - i cell boundaries +! jb(ny+1) - j cell boundaries +! kb(nz+1) - k cell boundaries +! +!_______________________________________________________________________ + + REAL(r_knd) :: a, b, c + + REAL(r_knd), ALLOCATABLE, DIMENSION(:) :: ib, jb, kb + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:,:,:), PUBLIC :: ref_flux + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:,:,:,:), PUBLIC :: ref_fluxm + + + CONTAINS + + + SUBROUTINE mms_setup ( ierr, error ) + +!----------------------------------------------------------------------- +! +! This subroutine controls the MMS setup, including computing the MMS +! source and the reference solution for comparison with/verification of +! the SNAP-computed source +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=64), INTENT(OUT) :: error + + INTEGER(i_knd), INTENT(OUT) :: ierr +!_______________________________________________________________________ +! +! Start by allocating the reference, manufactured solution +!_______________________________________________________________________ + + ierr = 0 + error = ' ' + CALL mms_allocate ( ierr, error ) + CALL glmax ( ierr, comm_snap ) + IF ( ierr /= 0 ) THEN + error = '***ERROR: MMS_ALLOCATE: Error allocating MMS arrays' + RETURN + END IF +!_______________________________________________________________________ +! +! Set up cell boundaries +!_______________________________________________________________________ + + ib(1) = zero + jb(1) = yproc*ny*dy + kb(1) = zproc*nz*dz + a = zero; b = zero; c = zero + + CALL mms_cells +!_______________________________________________________________________ +! +! Compute the cell-average manufactured solution according to src_opt. +! Compute for tfinal if timedep==1 +! +! src_opt = 3 --> f = t*g * sin(pi*x/lx) * sin(pi*y/ly) * sin(pi*z/lz) +! +! Then compute the static MMS source. Will apply time dependence +! directly in dim#_sweep. +!_______________________________________________________________________ + + IF ( src_opt == 3 ) THEN + CALL mms_flux_1 + CALL mms_src_1 + IF ( timedep == 1 ) CALL mms_flux_1_2 + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE mms_setup + + + SUBROUTINE mms_allocate ( ierr, error ) + +!----------------------------------------------------------------------- +! +! Allocate MMS arrays. +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=64), INTENT(OUT) :: error + + INTEGER(i_knd), INTENT(OUT) :: ierr +!_______________________________________________________________________ + + ierr = 0 + error = ' ' + + ALLOCATE( ref_flux(nx,ny,nz,ng), ref_fluxm(cmom-1,nx,ny,nz,ng), & + STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + ref_flux = zero + ref_fluxm = zero + + ALLOCATE( ib(nx+1), jb(ny+1), kb(nz+1), STAT= ierr ) + IF ( ierr /= 0 ) RETURN + + ib = zero + jb = zero + kb = zero +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE mms_allocate + + + SUBROUTINE mms_deallocate + +!----------------------------------------------------------------------- +! +! Deallocate MMS arrays. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ + + IF ( ALLOCATED( ref_flux ) ) DEALLOCATE( ref_flux ) + IF ( ALLOCATED( ref_fluxm ) ) DEALLOCATE( ref_fluxm ) + IF ( ALLOCATED( ib ) ) DEALLOCATE( ib ) + IF ( ALLOCATED( jb ) ) DEALLOCATE( jb ) + IF ( ALLOCATED( kb ) ) DEALLOCATE( kb ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE mms_deallocate + + + SUBROUTINE mms_cells + +!----------------------------------------------------------------------- +! +! Compute and store the cell boundaries arrays +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: i, j, k +!_______________________________________________________________________ + + DO i = 1, nx + ib(i+1) = ib(i) + dx + END DO + + IF ( ndimen > 1 ) THEN + + DO j = 1, ny + jb(j+1) = jb(j) + dy + END DO + + IF ( ndimen > 2 ) THEN + DO k = 1, nz + kb(k+1) = kb(k) + dz + END DO + END IF + + END IF + + IF ( src_opt == 3 ) THEN + a = pi/lx + IF ( ndimen > 1 ) b = pi/ly + IF ( ndimen > 2 ) c = pi/lz + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE mms_cells + + + SUBROUTINE mms_flux_1 + +!----------------------------------------------------------------------- +! +! Manufactured solution is +! t*g * sin(pi*x/lx) * sin(pi*y/ly) * sin(pi*z/lz). +! +! Where t = 1 for static, g = 1 for one group, y and z terms dropped for +! 1-D. Compute the cell-average value for the static solution over all +! cells. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: i, j, k, g, l, n + + REAL(r_knd), DIMENSION(cmom-1) :: p + + REAL(r_knd), DIMENSION(nx) :: tx + + REAL(r_knd), DIMENSION(ny) :: ty + + REAL(r_knd), DIMENSION(nz) :: tz +!_______________________________________________________________________ +! +! Get all the integrations done by dimension separately +!_______________________________________________________________________ + + CALL mms_trigint ( 'COS', nx, a, dx, ib, tx ) + IF ( ndimen > 1 ) THEN + CALL mms_trigint ( 'COS', ny, b, dy, jb, ty ) + IF ( ndimen > 2 ) THEN + CALL mms_trigint ( 'COS', nz, c, dz, kb, tz ) + ELSE + tz = one + END IF + ELSE + ty = one + tz = one + END IF +!_______________________________________________________________________ +! +! Combine all dimensions +!_______________________________________________________________________ + + DO g = 1, ng + DO k = 1, nz + DO j = 1, ny + DO i = 1, nx + ref_flux(i,j,k,g) = REAL( g, r_knd) * tx(i) * ty(j) * tz(k) + END DO + END DO + END DO + END DO +!_______________________________________________________________________ +! +! Compute the angular coefficients for the moments +!_______________________________________________________________________ + + p = zero + DO n = 1, noct + DO l = 1, cmom-1 + p(l) = p(l) + SUM( w*ec(:,l+1,n) ) + END DO + END DO +!_______________________________________________________________________ +! +! Apply these coefficients to the angularly independent manufactured +! solution to get the moments +!_______________________________________________________________________ + + DO l = 1, cmom-1 + ref_fluxm(l,:,:,:,:) = p(l)*ref_flux + END DO +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE mms_flux_1 + + + SUBROUTINE mms_trigint ( trig, lc, d, del, cb, fn ) + +!----------------------------------------------------------------------- +! +! Perform the loop to do trig function integration --> sin/cos terms +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=3), INTENT(IN) :: trig + + INTEGER(i_knd), INTENT(IN) :: lc + + REAL(r_knd), INTENT(IN) :: d, del + + REAL(r_knd), DIMENSION(lc+1), INTENT(IN) :: cb + + REAL(r_knd), DIMENSION(lc), INTENT(OUT) :: fn +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: i +!_______________________________________________________________________ +! +! Do the integration, divide by delta and constant. In case of sine +! the constant isn't needed because these are streaming terms. +!_______________________________________________________________________ + + fn = zero + + IF ( trig == 'COS' ) THEN + DO i = 1, lc + fn(i) = COS( d*cb(i) ) - COS( d*cb(i+1) ) + END DO + fn = fn / (d*del) + ELSE IF ( trig == 'SIN' ) THEN + DO i = 1, lc + fn(i) = SIN( d*cb(i+1) ) - SIN( d*cb(i) ) + END DO + fn = fn / (del) + ELSE + RETURN + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE mms_trigint + + + SUBROUTINE mms_src_1 + +!----------------------------------------------------------------------- +! +! Compute the MMS source for the manufactured solution above. Compute +! the source up to the number of moments specified by the user. The +! source must be the cell-average. Does not need to include the time +! coefficient, which is done in octsweep. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: i, j, k, g, m, gp, l, ll, lm, n, is, js, ks, id, & + jd, kd + + REAL(r_knd), DIMENSION(nx) :: cx, sx + + REAL(r_knd), DIMENSION(ny) :: cy, sy + + REAL(r_knd), DIMENSION(nz) :: cz, sz +!_______________________________________________________________________ +! +! Get the needed integrations. Need both sine and cosine for each +! dimension. +!_______________________________________________________________________ + + CALL mms_trigint ( 'COS', nx, a, dx, ib, cx ) + CALL mms_trigint ( 'SIN', nx, a, dx, ib, sx ) + + IF ( ndimen > 1 ) THEN + + CALL mms_trigint ( 'COS', ny, b, dy, jb, cy ) + CALL mms_trigint ( 'SIN', ny, b, dy, jb, sy ) + + IF ( ndimen > 2 ) THEN + CALL mms_trigint ( 'COS', nz, c, dz, kb, cz ) + CALL mms_trigint ( 'SIN', nz, c, dz, kb, sz ) + ELSE + cz = one + sz = zero + END IF + + ELSE + + cy = one + sy = zero + cz = one + sz = zero + + END IF +!_______________________________________________________________________ +! +! Start computing angular MMS source. Loop over dimensions according +! to allocation. Start with total interaction and streaming terms. +! Then loop over scattering terms. If threads are available, use them +! on this larger group loop to help speed up job. +!_______________________________________________________________________ + + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) DEFAULT(SHARED) & + !$OMP& PRIVATE(g,kd,ks,jd,js,id,is,n,k,j,i,m,gp,lm,l,ll) + DO g = 1, ng + DO kd = 1, MAX( ndimen-1, 1 ) + ks = -one + IF ( kd == 2 ) ks = one + DO jd = 1, MIN( ndimen, 2 ) + js = -one + IF ( jd == 2 ) js = one + DO id = 1, 2 + is = -one + IF ( id == 2 ) is = one + n = 4*(kd - 1) + 2*(jd - 1) + id + DO k = 1, nz + DO j = 1, ny + DO i = 1, nx + DO m = 1, nang + qim(m,i,j,k,n,g) = qim(m,i,j,k,n,g) + & + REAL( g, r_knd ) * is*mu(m)*sx(i)*cy(j)*cz(k) + & + sigt(mat(i,j,k),g)*ref_flux(i,j,k,g) + IF ( ndimen > 1 ) THEN + qim(m,i,j,k,n,g) = qim(m,i,j,k,n,g) + & + REAL( g, r_knd ) * js*eta(m)*cx(i)*sy(j)*cz(k) + END IF + IF ( ndimen > 2 ) THEN + qim(m,i,j,k,n,g) = qim(m,i,j,k,n,g) + & + REAL( g, r_knd ) * ks*xi(m)*cx(i)*cy(j)*sz(k) + END IF + DO gp = 1, ng + qim(m,i,j,k,n,g) = qim(m,i,j,k,n,g) - & + slgg(mat(i,j,k),1,gp,g) * ref_flux(i,j,k,gp) + lm = 2 + DO l = 2, nmom + DO ll = 1, lma(l) + qim(m,i,j,k,n,g) = qim(m,i,j,k,n,g) - ec(m,lm,n)*& + slgg(mat(i,j,k),l,gp,g)*ref_fluxm(lm-1,i,j,k,g) + lm = lm + 1 + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + !$OMP END PARALLEL DO +!_______________________________________________________________________ +! +! Time-dependent problems have a time-independent source term that +! can be stored in qi. +!_______________________________________________________________________ + + IF ( timedep == 1 ) THEN + DO g = 1, ng + qi(:,:,:,g) = ref_flux(:,:,:,g) / v(g) + END DO + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE mms_src_1 + + + SUBROUTINE mms_flux_1_2 + +!----------------------------------------------------------------------- +! +! Now that static source is computed, can scale reference solution to +! that of final time step. (Source is linearly scaled in time in +! octsweep.) +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + REAL(r_knd) :: t +!_______________________________________________________________________ +! +! Compute the time at final time step center. Multiply with ref_flux. +! No need for ref_fluxm since only ref_flux is checked in mms_verify. +!_______________________________________________________________________ + + t = tf - half*dt + ref_flux = t*ref_flux +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE mms_flux_1_2 + + + SUBROUTINE mms_verify_1 ( flux ) + +!----------------------------------------------------------------------- +! +! Verify the final solution is near the reference solution. +! +!----------------------------------------------------------------------- + + REAL(r_knd), DIMENSION(nx,ny,nz,ng), INTENT(IN) :: flux +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=1) :: star='*' + + INTEGER(i_knd) :: i + + REAL(r_knd) :: dfmx, dfmn, dfsm + + REAL(r_knd), DIMENSION(nx,ny,nz,ng) :: df +!_______________________________________________________________________ + + df = one + WHERE( ABS( ref_flux ) < tolr ) + ref_flux = one + df = zero + END WHERE + df = ABS( flux/ref_flux - df ) + + dfmx = MAXVAL( df ) + CALL glmax ( dfmx, comm_snap ) + + dfmn = MINVAL( df ) + CALL glmin ( dfmn, comm_snap ) + + dfsm = SUM( df ) + CALL glsum ( dfsm, comm_snap ) + dfsm = dfsm / REAL( nx*ny_gl*nz_gl*ng ) + + IF ( iproc == root ) THEN + WRITE( ounit, 421 ) ( star, i = 1, 80 ) + WRITE( ounit, 422 ) dfmx + WRITE( ounit, 425 ) dfmn + WRITE( ounit, 426 ) dfsm + WRITE( ounit, 428 ) ( star, i = 1, 80 ) + END IF +!_______________________________________________________________________ + + 421 FORMAT( 10X, 'MMS Verification', /, 80A ) + 422 FORMAT( /, 4X, 'Manufactured/Computed Solutions Max Diff=', & + ES13.6 ) + 425 FORMAT( /, 4X, 'Manufactured/Computed Solutions Min Diff=', & + ES13.6 ) + 426 FORMAT( /, 4X, 'Manufactured/Computed Solutions Avg Diff=', & + ES13.6 ) + 428 FORMAT( /, 80A, / ) +!_______________________________________________________________________ + + END SUBROUTINE mms_verify_1 + + +END MODULE mms_module diff --git a/Fortran/SNAP/src/octsweep.f90 b/Fortran/SNAP/src/octsweep.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/octsweep.f90 @@ -0,0 +1,178 @@ +!----------------------------------------------------------------------- +! +! MODULE: octsweep_module +!> @brief +!> This module controls the setup and calls for sweeping a single octant +!> pair. It calls for the actual sweep logic depending on the spatial +!> dimensionality of the problem. +! +!----------------------------------------------------------------------- + +MODULE octsweep_module + + USE global_module, ONLY: i_knd + + USE geom_module, ONLY: nc, ndimen, dinv, nx, ny, nz + + USE sn_module, ONLY: ec, nang, wmu, weta, wxi, noct + + USE data_module, ONLY: vdelt, ng + + USE control_module, ONLY: timedep, swp_typ, multiswp + + USE solvar_module, ONLY: psii, qtot, ptr_in, ptr_out, psij, psik, & + flux0, fluxm, jb_in, jb_out, kb_in, kb_out, fixup_counter, flkx, & + flky, flkz, t_xs + + USE thrd_comm_module, ONLY: no_op_lock_control + + USE dim1_sweep_module, ONLY: dim1_sweep + + USE dim3_sweep_module, ONLY: dim3_sweep + + USE mkba_sweep_module, ONLY: mkba_sweep + + IMPLICIT NONE + + PUBLIC :: octsweep + + + CONTAINS + + + SUBROUTINE octsweep ( g, jd, kd, t, reqs, szreq, ic_op, cor, & + nnstd_used ) + +!----------------------------------------------------------------------- +! +! Call for the appropriate sweeper depending on the dimensionality. Let +! the actual sweep routine know the octant info to get the order right. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: g, jd, kd, t, szreq, ic_op, cor, & + nnstd_used + + INTEGER(i_knd), DIMENSION(szreq), INTENT(INOUT) :: reqs +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: iop, d1, d2, d3, d4, d5, d6, id, oct, ich, llim, & + ulim +!_______________________________________________________________________ +! +! If g=0, this thread has no work to do, but it cannot exit sweeps +! immediately, because it must properly set it's own lock and unset +! the subsequent thread's lock. Call for no-op controls. Note that +! this is a nested threaded region, so master refers to the nested +! threads. All main level threads will do this. +!_______________________________________________________________________ + + IF ( g == 0 ) THEN + DO iop = 1, 2*nc + CALL no_op_lock_control ( t ) + CALL no_op_lock_control ( t ) + END DO + RETURN + END IF +!_______________________________________________________________________ +! +! Set ptr_in and ptr_out dimensions dependent on timedep because they +! are not allocated if static problem. +!_______________________________________________________________________ + + d1 = 0; d2 = 0; d3 = 0; d4 = 0; d5 = 0; d6 = 0 + IF ( timedep == 1 ) THEN + d1 = nang; d2 = nx; d3 = ny; d4 = nz; d5 = noct; d6 = g + END IF +!_______________________________________________________________________ +! +! For swp_typ=0 order, set the iop here and call for the sweep of +! that chunk. Loop contains twice the number of chunks: half for +! negative x-dir sweep, half for positive x-dir sweep. +!_______________________________________________________________________ + + IF ( swp_typ == 0 ) THEN +!_______________________________________________________________________ +! +! For multiswp=0, set loops over all the chunks in the x-domain, +! including the octant pair. Otherwise, multiswp scheduling handles +! sweeping all chunks. +!_______________________________________________________________________ + + IF ( multiswp == 0 ) THEN + llim = 1; ulim = 2*nc + ELSE + llim = ic_op; ulim = ic_op + END IF + + iop_loop: DO iop = llim, ulim +!_______________________________________________________________________ +! +! Determine octant and chunk index. +!_______________________________________________________________________ + + id = 1 + (iop-1)/nc + oct = id + 2*(jd - 1) + 4*(kd - 1) + + IF ( id == 1 ) THEN + ich = nc - iop + 1 + ELSE + ich = iop - nc + END IF + + IF ( timedep == 1 ) d5 = oct +!_______________________________________________________________________ +! +! Call 1-D or multi-D sweeper. +!_______________________________________________________________________ + + IF ( ndimen == 1 ) THEN + + CALL dim1_sweep ( id, d1, d2, d3, d4, oct, g, & + psii(:,1,1,cor,g), qtot(:,:,1,1,ich,g), ec(:,:,oct), & + vdelt(g), ptr_in(:,:,:,:,d5,d6), ptr_out(:,:,:,:,d5,d6), & + dinv(:,:,1,1,ich,g), flux0(:,1,1,g), fluxm(:,:,1,1,g), & + wmu, flkx(:,1,1,g), t_xs(:,1,1,g), fixup_counter(:,cor,g) ) + + ELSE + + CALL dim3_sweep ( ich, id, d1, d2, d3, d4, jd, kd, oct, g, & + t, iop, reqs, szreq, psii(:,:,:,cor,g), psij(:,:,:,g), & + psik(:,:,:,g), qtot(:,:,:,:,ich,g), ec(:,:,oct), & + vdelt(g), ptr_in(:,:,:,:,d5,d6), ptr_out(:,:,:,:,d5,d6), & + dinv(:,:,:,:,ich,g), flux0(:,:,:,g), fluxm(:,:,:,:,g), & + jb_in(:,:,:,cor,g), jb_out(:,:,:,cor,g), & + kb_in(:,:,:,cor,g), kb_out(:,:,:,cor,g), wmu, weta, wxi, & + flkx(:,:,:,g), flky(:,:,:,g), flkz(:,:,:,g), t_xs(:,:,:,g),& + fixup_counter(:,cor,g) ) + + END IF + + END DO iop_loop +!_______________________________________________________________________ +! +! Use the mini-kba swp_typ=1 sweeper that self-contains iop loop. +!_______________________________________________________________________ + + ELSE + + CALL mkba_sweep ( d1, d2, d3, d4, d5, jd, kd, g, t, nnstd_used, & + reqs, szreq, psii(:,:,:,cor,g), psij(:,:,:,g), psik(:,:,:,g), & + qtot(:,:,:,:,:,g), ec, vdelt(g), ptr_in(:,:,:,:,:,d6), & + ptr_out(:,:,:,:,:,d6), dinv(:,:,:,:,:,g), flux0(:,:,:,g), & + fluxm(:,:,:,:,g), jb_in(:,:,:,cor,g), jb_out(:,:,:,cor,g), & + kb_in(:,:,:,cor,g), kb_out(:,:,:,cor,g), wmu, weta, wxi, & + flkx(:,:,:,g), flky(:,:,:,g), flkz(:,:,:,g), t_xs(:,:,:,g), & + fixup_counter(:,cor,g) ) + + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE octsweep + + +END MODULE octsweep_module diff --git a/Fortran/SNAP/src/outer.f90 b/Fortran/SNAP/src/outer.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/outer.f90 @@ -0,0 +1,356 @@ +!----------------------------------------------------------------------- +! +! MODULE: outer_module +!> @brief +!> This module controls the outer iterations. Outer iterations are +!> threaded over the energy dimension and represent a Jacobi iteration +!> strategy. Includes setting the outer source. Checking outer iteration +!> convergence. +! +!----------------------------------------------------------------------- + +MODULE outer_module + + USE global_module, ONLY: i_knd, r_knd, one, zero + + USE geom_module, ONLY: nx, ny, nz + + USE sn_module, ONLY: nmom, cmom + + USE data_module, ONLY: ng, mat, slgg, qi, nmat + + USE solvar_module, ONLY: q2grp0, q2grpm, flux0, fluxm, flux0po + + USE control_module, ONLY: inrdone, tolr, dfmxo, epsi, otrdone + + USE inner_module, ONLY: inner + + USE time_module, ONLY: totrsrc, tinners, wtime + + USE plib_module, ONLY: nthreads, glmax, comm_snap + + USE expxs_module, ONLY: expxs_reg, expxs_slgg + + USE thrd_comm_module, ONLY: assign_thrd_set + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: outer + + + CONTAINS + + + SUBROUTINE outer ( iits, otno, t, do_grp, ng_per_thrd, nnstd_used, & + grp_act, iitm_i ) + +!----------------------------------------------------------------------- +! +! Do a single outer iteration. Sets the out-of-group sources, performs +! inners for all groups. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: otno, t, iitm_i + + INTEGER(i_knd), INTENT(INOUT) :: ng_per_thrd, nnstd_used + + INTEGER(i_knd), DIMENSION(ng), INTENT(OUT) :: iits + + INTEGER(i_knd), DIMENSION(ng), INTENT(INOUT) :: do_grp + + INTEGER(i_knd), DIMENSION(ng,nthreads), INTENT(INOUT) :: grp_act +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: inno, g, n + + REAL(r_knd) :: t1, t2, t3, t4, dft(nthreads) +!_______________________________________________________________________ +! +! Compute the outer source: sum of fixed + out-of-group sources. +! Each thread t will do a set of energy groups. Nested threads will +! work over local ny*nz span. In many instances, changing do_grp +! will not change grp_act, but the action better resembles production. +!_______________________________________________________________________ + + !$OMP MASTER + + CALL wtime ( t1 ) + + inrdone = .FALSE. + iits = 0 + + DO g = 1, ng + do_grp(g) = ng - g + 1 + END DO + + CALL assign_thrd_set ( do_grp, ng, ng_per_thrd, ny*nz, nnstd_used, & + grp_act ) + + !$OMP END MASTER + !$OMP BARRIER + + CALL outer_src ( ng_per_thrd, nnstd_used, grp_act(:,t) ) + !$OMP BARRIER + + !$OMP MASTER + CALL wtime ( t2 ) + totrsrc = totrsrc + t2 - t1 + !$OMP END MASTER +!_______________________________________________________________________ +! +! Zero out the inner iterations group count. Save the flux for +! comparison. Initialize inrdone. +!_______________________________________________________________________ + + DO n = 1, ng_per_thrd + g = grp_act(n,t) + IF ( g == 0 ) EXIT + flux0po(:,:,:,g) = flux0(:,:,:,g) + END DO +!_______________________________________________________________________ +! +! Start the inner iterations +!_______________________________________________________________________ + + !$OMP MASTER + CALL wtime ( t3 ) + !$OMP END MASTER + + !$OMP BARRIER + + inner_loop: DO inno = 1, iitm_i + + CALL inner ( inno, iits, t, do_grp, ng_per_thrd, nnstd_used, & + grp_act ) + !$OMP BARRIER + + IF ( ALL( inrdone ) ) EXIT inner_loop + + END DO inner_loop + + !$OMP MASTER + + CALL wtime ( t4 ) + tinners = tinners + t4 - t3 +!_______________________________________________________________________ +! +! Check outer convergence. Apply nested threads to group sets. +!_______________________________________________________________________ + + do_grp = 1 + CALL assign_thrd_set ( do_grp, ng, ng_per_thrd, 0, nnstd_used, & + grp_act ) + + dfmxo = -HUGE( one ) + + !$OMP END MASTER + !$OMP BARRIER +!_______________________________________________________________________ +! +! Thread to speed up computation of df by looping over groups. Rejoin +! threads and then determine max error. +!_______________________________________________________________________ + + dft(t) = -one + + !$OMP PARALLEL DO NUM_THREADS(nnstd_used) IF(nnstd_used>1) & + !$OMP& SCHEDULE(STATIC,1) DEFAULT(SHARED) PRIVATE(n,g) & + !$OMP& PROC_BIND(CLOSE) + DO n = 1, ng_per_thrd + g = grp_act(n,t) + IF ( g == 0 ) CYCLE + CALL outer_df_calc ( flux0po(:,:,:,g), flux0(:,:,:,g), dft(t) ) + END DO + !$OMP END PARALLEL DO + + !$OMP CRITICAL + dfmxo = MAX( dfmxo, dft(t) ) + !$OMP END CRITICAL + !$OMP BARRIER + + !$OMP MASTER + + CALL glmax ( dfmxo, comm_snap ) + + IF ( dfmxo<=100.0_r_knd*epsi .AND. ALL( inrdone ) .AND. otno/=1 ) & + otrdone = .TRUE. + + !$OMP END MASTER +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE outer + + + SUBROUTINE outer_src ( ng_per_thrd, nnstd_used, grp_act ) + +!----------------------------------------------------------------------- +! +! Loop over groups to compute each one's outer loop source. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: ng_per_thrd, nnstd_used + + INTEGER(i_knd), DIMENSION(ng), INTENT(IN) :: grp_act +!_______________________________________________________________________ +! +! Local Variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: n, g, k, j +!_______________________________________________________________________ +! +! Loop over each set of groups. Use nested threads if available. +!_______________________________________________________________________ + + !$OMP PARALLEL NUM_THREADS(nnstd_used) IF(nnstd_used>1) & + !$OMP& DEFAULT(SHARED) PRIVATE(n,g,k,j) PROC_BIND(CLOSE) + DO n = 1, ng_per_thrd + + g = grp_act(n) + IF ( g == 0 ) EXIT + + !$OMP DO SCHEDULE(STATIC,1) COLLAPSE(2) + DO k = 1, nz + DO j = 1, ny + CALL outer_src_calc ( g, j, k, qi(:,j,k,g), slgg(:,:,:,g), & + mat(:,j,k), q2grp0(:,j,k,g), q2grpm(:,:,j,k,g) ) + END DO + END DO + !$OMP END DO NOWAIT + + END DO + !$OMP END PARALLEL +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE outer_src + + + SUBROUTINE outer_src_calc ( g, j, k, qi0, sxs_g, map, qo0, qom ) + +!----------------------------------------------------------------------- +! +! Compute the scattering source for all cells and moments. This routine +! is called for each group. It computes source components from all the +! other groups, which are looped over here. It skips computing the +! source from itself, which is captured in the inner iterations. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: g, j, k + + INTEGER(i_knd), DIMENSION(nx), INTENT(IN) :: map + + REAL(r_knd), DIMENSION(nx), INTENT(IN) :: qi0 + + REAL(r_knd), DIMENSION(nx), INTENT(OUT) :: qo0 + + REAL(r_knd), DIMENSION(cmom-1,nx), INTENT(OUT) :: qom + + REAL(r_knd), DIMENSION(nmat,nmom,ng), INTENT(IN) :: sxs_g +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: gp + + REAL(r_knd), DIMENSION(nx) :: cs0 + + REAL(r_knd), DIMENSION(cmom-1,nx) :: csm +!_______________________________________________________________________ +! +! Initialize sources. Include isotropic inhomogeneous source. +!_______________________________________________________________________ + + qo0 = qi0 + qom = zero +!_______________________________________________________________________ +! +! Loop over originating groups, gp. Skip own group. +!_______________________________________________________________________ + + DO gp = 1, ng + + IF ( gp == g ) CYCLE +!_______________________________________________________________________ +! +! Expand isotropic cross sections to fine mesh for current gp->g. +! Add out of group scattering source. +!_______________________________________________________________________ + + CALL expxs_reg ( sxs_g(:,1,gp), map, cs0 ) + + qo0 = qo0 + cs0*flux0(:,j,k,gp) +!_______________________________________________________________________ +! +! Expand anisotropic cross sections to fine mesh for gp->g. Add +! components to group scattering source moments. Close the loop. +!_______________________________________________________________________ + + IF ( nmom == 1 ) CYCLE + + CALL expxs_slgg ( sxs_g(:,:,gp), map, csm ) + + qom = qom + csm*fluxm(:,:,j,k,gp) + + END DO +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE outer_src_calc + + + SUBROUTINE outer_df_calc ( flux0po, flux0, dft ) + +!----------------------------------------------------------------------- +! +! Check for convergence of outer iterations. Use only the zeroth moment +! data (flux0/flux0po). +! +!----------------------------------------------------------------------- + + REAL(r_knd), INTENT(INOUT) :: dft + + REAL(r_knd), DIMENSION(nx,ny,nz), INTENT(INOUT) :: flux0po + + REAL(r_knd), DIMENSION(nx,ny,nz), INTENT(IN) :: flux0 +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + REAL(r_knd) :: dfg + + REAL(r_knd), DIMENSION(nx,ny,nz) :: df +!_______________________________________________________________________ +! +! Thread to speed up computation of df by looping over groups. Rejoin +! threads and then determine max error. +!_______________________________________________________________________ + + df = one + WHERE( ABS( flux0po ) < tolr ) + flux0po = one + df = zero + END WHERE + df = ABS( flux0/flux0po - df ) + + dfg = MAXVAL( df ) + + dft = MAX( dft, dfg ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE outer_df_calc + + +END MODULE outer_module diff --git a/Fortran/SNAP/src/output.f90 b/Fortran/SNAP/src/output.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/output.f90 @@ -0,0 +1,378 @@ +!----------------------------------------------------------------------- +! +! MODULE: output_module +!> @brief +!> This module controls final solution output. +! +!----------------------------------------------------------------------- + +MODULE output_module + + USE global_module, ONLY: i_knd, r_knd, ounit, zero + + USE geom_module, ONLY: nx, ny, nz, nz_gl, ny_gl + + USE sn_module, ONLY: cmom + + USE data_module, ONLY: ng, src_opt + + USE control_module, ONLY: fluxp, soloutp, kplane + + USE utils_module, ONLY: open_file, close_file, print_error, stop_run + + USE solvar_module, ONLY: flux0, fluxm + + USE mms_module, ONLY: mms_verify_1 + + USE time_module, ONLY: tout, wtime + + USE plib_module, ONLY: iproc, root, zproc, bcast, comm_snap, & + comm_space, sproc, cartrank, npey, psend, precv, barrier + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: output + + + CONTAINS + + + SUBROUTINE output + +!----------------------------------------------------------------------- +! +! Print the scalar flux output to the output file. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=1) :: star='*' + + CHARACTER(LEN=64) :: error + + INTEGER(i_knd) :: i, j, k, g, is, ii, ierr, klb, kub, mtag, jp, & + kloc, rank, jlb, jub + + INTEGER(i_knd), DIMENSION(2) :: co + + REAL(r_knd) :: t1, t2 + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:) :: fprnt +!_______________________________________________________________________ + + CALL wtime ( t1 ) + + ierr = 0 + error = ' ' +!_______________________________________________________________________ +! +! Get global indices of local PE bounds. +!_______________________________________________________________________ + + klb = zproc*nz + 1 + kub = (zproc+1) * nz +!_______________________________________________________________________ +! +! Only if requested, print solution at mid-plane. +!_______________________________________________________________________ + + IF ( soloutp == 1 ) THEN + + IF ( iproc == root ) THEN + WRITE( ounit, 301 ) ( star, i = 1, 80 ) + ALLOCATE( fprnt(nx,ny_gl), STAT=ierr ) + fprnt = zero + END IF + + CALL bcast ( ierr, comm_snap, root ) + IF ( ierr /= 0 ) THEN + error = '***ERROR: OUTPUT: Allocation error' + CALL print_error ( ounit, error ) + CALL stop_run ( 1, 4, 2, 0 ) + END IF +!_______________________________________________________________________ +! +! Choose the mid-plane. If user specified a k-plane via kplane +! input, use that value instead. Determine k local index. +!_______________________________________________________________________ + + k = nz_gl/2 + 1 + IF ( kplane /= 0 ) k = kplane + kloc = MOD( k-1, nz ) + 1 +!_______________________________________________________________________ +! +! Loops over groups. Send/Recv message. Print flux. +!_______________________________________________________________________ + + g_loop: DO g = 1, ng +!_______________________________________________________________________ +! +! If global k index is within proc's bounds, send message to root +! of flux on that plane +!_______________________________________________________________________ + + IF ( klb<=k .AND. k<=kub ) THEN + mtag = g + CALL output_send ( mtag, flux0(:,:,kloc,g) ) + END IF +!_______________________________________________________________________ +! +! Presets the printed flux to its own value if there is only one +! proc or npez=1. Receives messages in order for proper printing. +!_______________________________________________________________________ + + IF ( iproc == root ) THEN + + co(1) = (k-1)/nz + fprnt(:,1:ny) = flux0(:,:,kloc,g) + WRITE( ounit, 302 ) ( star, i = 1, 35 ), g, k, & + ( star, i = 1, 35 ) + + DO jp = 0, npey-1 + jlb = jp*ny + 1 + jub = (jp+1) * ny + co(2) = jp + CALL cartrank ( co, rank, comm_space ) + mtag = g + CALL output_recv ( mtag, rank, fprnt(:,jlb:jub) ) + END DO + + DO i = 1, nx, 6 + is = i + 6 - 1 + IF ( is > nx ) is = nx + WRITE( ounit, FMT=303, ADVANCE='NO' ) + DO ii = i, is + WRITE( ounit, FMT=304, ADVANCE='NO' ) ii + END DO + WRITE( ounit, FMT=305, ADVANCE='YES' ) + DO j = ny_gl, 1, -1 + WRITE( ounit, 306 ) j, ( fprnt(ii,j), ii = i, is ) + END DO + END DO + + WRITE( ounit, 307 ) ( star, i = 1, 80 ) + + END IF + + END DO g_loop +!_______________________________________________________________________ +! +! Cleanup +!_______________________________________________________________________ + + IF ( iproc == root ) DEALLOCATE( fprnt ) + + END IF +!_______________________________________________________________________ +! +! Print flux to file if requested +!_______________________________________________________________________ + + call barrier ( comm_snap ) + + IF ( fluxp > 0 ) CALL output_flux_file ( klb, kub ) +!_______________________________________________________________________ +! +! If MMS solution, verify compared to ref_flux +!_______________________________________________________________________ + + IF ( src_opt == 3 ) CALL mms_verify_1 ( flux0 ) + + CALL wtime ( t2 ) + tout = t2 - t1 +!_______________________________________________________________________ + + 301 FORMAT( 10X, 'keyword Scalar Flux Solution', /, 80A ) + 302 FORMAT( /, 1X, 35A, /, 2X, 'Group= ', I3, 2X, ' Z Mid-Plane= ',& + I4, /, 1X, 35A ) + 303 FORMAT( /, 5X, 'y' ) + 304 FORMAT( 4X, 'x ', I4, 2X ) + 305 FORMAT( 1X ) + 306 FORMAT( 2X, I4, 6(1X, ES11.4) ) + 307 FORMAT( /, 80A, / ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE output + + + SUBROUTINE output_send ( mtag, fprnt ) + +!----------------------------------------------------------------------- +! +! Send root chunk of flux data for printing +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: mtag + + REAL(r_knd), DIMENSION(nx,ny), INTENT(IN) :: fprnt +!_______________________________________________________________________ + + CALL psend ( root, sproc, nx, ny, fprnt, comm_space, mtag ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE output_send + + + SUBROUTINE output_recv ( mtag, proc, fprnt ) + +!----------------------------------------------------------------------- +! +! Receive flux message for output +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: mtag, proc + + REAL(r_knd), DIMENSION(nx,ny), INTENT(INOUT) :: fprnt +!_______________________________________________________________________ + + CALL precv ( proc, sproc, nx, ny, fprnt, comm_space, mtag ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE output_recv + + + SUBROUTINE output_flux_file ( klb, kub ) + +!----------------------------------------------------------------------- +! +! Print fluxes to file. Either print just first moment or all moments. +! Root does printing. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: klb, kub +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=16) :: fn='flux' + + CHARACTER(LEN=64) :: error + + INTEGER(i_knd) :: i, j, k, g, l, ierr, fu=16, mtag, jp, jlb, jub, & + kloc, rank + + INTEGER(i_knd), DIMENSION(2) :: co + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:) :: fprnt +!_______________________________________________________________________ +! +! Root opens the file, allocates the space for fprnt +!_______________________________________________________________________ + + ierr = 0 + error = ' ' + CALL open_file ( fu, fn, 'REPLACE', 'WRITE', ierr, error ) + CALL bcast ( ierr, comm_snap, root ) + IF ( ierr /= 0 ) THEN + CALL print_error (ounit, error ) + CALL stop_run ( 1, 4, 2, 0 ) + END IF + + IF ( iproc == root ) THEN + ALLOCATE( fprnt(nx,ny_gl), STAT=ierr ) + fprnt = zero + END IF + CALL bcast ( ierr, comm_snap, root ) + IF ( ierr /= 0 ) THEN + error = '***ERROR: OUTPUT_FLUX_FILE: Allocation error' + CALL print_error ( ounit, error ) + CALL stop_run ( 1, 4, 2, 0 ) + END IF +!_______________________________________________________________________ +! +! Root does all printing. Start with file header comment. +!_______________________________________________________________________ + + IF ( iproc == root ) THEN + IF ( fluxp == 1 ) THEN + WRITE( fu, 321 ) + ELSE + WRITE( fu, 322 ) + END IF + END IF +!_______________________________________________________________________ +! +! Print data from first moment. Use similar technique as mid-plane, +! but now loop over all planes and all groups. Store all the data of +! a k-plane, group in one array and print that all at once. +!_______________________________________________________________________ + + DO l = 1, MAX( 1, (fluxp-1)*cmom ) + + IF ( iproc == root ) WRITE( fu, 323 ) l + + DO g = 1, ng + DO k = 1, nz_gl + + kloc = MOD( k-1, nz ) + 1 + IF ( klb<=k .AND. k<=kub ) THEN + mtag = l*ng*nz + (g-1)*nz + kloc + IF ( l == 1 ) THEN + CALL output_send ( mtag, flux0(:,:,kloc,g) ) + ELSE + CALL output_send ( mtag, fluxm(l-1,:,:,kloc,g) ) + END IF + END IF + + + IF ( iproc == root ) THEN + co(1) = (k-1)/nz + fprnt(:,1:ny) = flux0(:,:,kloc,g) + DO jp = 0, npey-1 + jlb = jp*ny + 1 + jub = (jp+1) * ny + co(2) = jp + CALL cartrank ( co, rank, comm_space ) + mtag = l*ng*nz + (g-1)*nz + kloc + CALL output_recv ( mtag, rank, fprnt(:,jlb:jub) ) + END DO + WRITE( fu, 324 ) ( ( fprnt(i,j), i = 1, nx ), j = 1, ny_gl ) + END IF + + END DO + END DO + + END DO +!_______________________________________________________________________ +! +! Cleanup +!_______________________________________________________________________ + + IF ( iproc == root ) DEALLOCATE( fprnt ) + + CALL close_file ( fu, ierr, error ) + CALL bcast ( ierr, comm_snap, root ) + IF ( ierr /= 0 ) THEN + CALL print_error ( ounit, error ) + CALL stop_run ( 1, 4, 2, 0 ) + END IF +!_______________________________________________________________________ + + 321 FORMAT( 1X, 'flux(nx,ny,nz,ng) echo', /, 1X, 'Column-order ', & + 'loops: x-cells (fastest), y-cells, z-cells, groups ', & + '(slowest)' ) + 322 FORMAT( 1X, 'flux(nx,ny,nz,ng) and fluxm(cmom-1,nx,ny,nz,ng)', & + ' echo', /, 1X, 'Column-order loops by moment:', /, & + 1X, 'x-cells (fastest), y-cells, z-cells, groups, ', & + 'moments (slowest)' ) + 323 FORMAT( /, 2X, 'Moment = ', I1 ) + 324 FORMAT( 4(2X, ES17.10) ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE output_flux_file + + +END MODULE output_module diff --git a/Fortran/SNAP/src/plib.F90 b/Fortran/SNAP/src/plib.F90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/plib.F90 @@ -0,0 +1,1420 @@ +!----------------------------------------------------------------------- +! +! MODULE: plib_module +!> @brief +!> This module contains the variables that control parallel +!> decomposition and the subroutines for parallel environment setup. +!> Only module that requires MPI library interaction except for +!> time_module (MPI_WTIME). +! +!----------------------------------------------------------------------- + +MODULE plib_module + + USE global_module, ONLY: i_knd, r_knd, l_knd + + USE time_module, ONLY: wtime + +#ifdef MPI + USE mpi +#endif + + IMPLICIT NONE + + PUBLIC + + INTERFACE glmax + MODULE PROCEDURE glmax_i, glmax_d, glmax_d_1d + END INTERFACE glmax + + INTERFACE glmin + MODULE PROCEDURE glmin_i, glmin_d + END INTERFACE glmin + + INTERFACE glsum + MODULE PROCEDURE glsum_d + END INTERFACE glsum + + INTERFACE rtsum + MODULE PROCEDURE rtsum_d_1d + END INTERFACE rtsum + + INTERFACE bcast + MODULE PROCEDURE bcast_i_scalar, bcast_i_1d, bcast_d_scalar, & + bcast_d_1d + END INTERFACE bcast + + INTERFACE psend + MODULE PROCEDURE psend_d_2d, psend_d_3d + END INTERFACE psend + + INTERFACE isend + MODULE PROCEDURE isend_d_3d + END INTERFACE isend + + INTERFACE precv + MODULE PROCEDURE precv_d_2d, precv_d_3d + END INTERFACE precv + + INTERFACE irecv + MODULE PROCEDURE irecv_d_3d + END INTERFACE irecv + + SAVE +!_______________________________________________________________________ +! +! Module Input Variables +! +! npey - Number of MPI processes in the y-direction +! npez - Number of MPI processes in the z-direction +! ichunk - Size of work chunks in the x-direction +! nthreads - Number of OpenMP threads for energy parallelism +! nnested - number of nested threads +!_______________________________________________________________________ + + INTEGER(i_knd) :: npey=1, npez=1, ichunk=4, nthreads=1, nnested=1 +!_______________________________________________________________________ +! +! Run-time variables +! +! Note: all ranks are zero based +! +! root - root process for comm_snap, 0 +! +! nproc - Number of MPI processes +! iproc - Rank of calling process in base communicator +! +! comm_snap - base communicator, duplicated from MPI_COMM_WORLD +! comm_space - SDD communicator, ndimen-1 grid for 2-D (x-y) or +! 3-D (x-y-z) problems. Non-existent for 1-D (x) problems. +! sproc - Rank of calling process in comm_space +! +! ycomm - y-dimension process communicator +! zcomm - z-dimension process communicator +! yproc - PE column in SDD 2-D PE mesh (comm_space) +! zproc - PE row in SDD 2-D PE mesh (comm_space) +! firsty - logical determining if lowest yproc +! lasty - logical determining if highest yproc +! firstz - logical determining if lowest zproc +! lastz - logical determining if highest zproc +! ylop - rank of preceding yproc in ycomm +! yhip - rank of succeeding yproc in ycomm +! zlop - rank of preceding zproc in zcomm +! zhip - rank of succeeding zproc in zcomm +! +! thread_level - level of MPI thread support +! thread_single - MPI_THREAD_SINGLE +! thread_funneled - MPI_THREAD_FUNNELED +! thread_serialized - MPI_THREAD_SERIALIZED +! thread_multiple - MPI_THREAD_MULTIPLE +! lock(nthreads) - OpenMP lock for each thread +! +! do_nested - true/false use nested threading +! +! use_lock - true/false apply lock to threads MPI communications +! during sweep +! +! pce - Parallel computational efficiency of the run +!_______________________________________________________________________ + + INTEGER(i_knd), PARAMETER :: root=0 + + INTEGER(i_knd) :: nproc, iproc, comm_snap, comm_space, sproc, ycomm, & + zcomm, yproc, zproc, ylop, yhip, zlop, zhip, thread_level, & + thread_single, thread_funneled, thread_serialized, thread_multiple,& + max_threads + + LOGICAL(l_knd) :: firsty, lasty, firstz, lastz, do_nested, & + use_lock=.FALSE. + + REAL(r_knd) :: pce + +#ifdef OPENMP + INCLUDE 'omp_lib.h' + + INTEGER(OMP_LOCK_KIND), ALLOCATABLE, DIMENSION(:) :: lock +#endif + + + CONTAINS + + +#ifdef MPI + + SUBROUTINE pinit ( t1 ) + +!----------------------------------------------------------------------- +! +! Initialize the MPI process environment. Replicate MPI_COMM_WORLD to +! local communicator. Get each process its rank and total size. +! +!----------------------------------------------------------------------- + + REAL(r_knd), INTENT(OUT) :: t1 +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr +!_______________________________________________________________________ +! +! Set thread support levels +!_______________________________________________________________________ + + thread_single = MPI_THREAD_SINGLE + thread_funneled = MPI_THREAD_FUNNELED + thread_serialized = MPI_THREAD_SERIALIZED + thread_multiple = MPI_THREAD_MULTIPLE +!_______________________________________________________________________ +! +! Initialize MPI and return available thread support level. Prefer +! thread_multiple but require thread_serialized. Abort for insufficient +! thread support. Do not want to use MPI_COMM_WORLD everywhere, so +! duplicate to comm_snap. Start the timer. +!_______________________________________________________________________ + + CALL MPI_INIT_THREAD ( thread_serialized, thread_level, ierr ) + + CALL wtime ( t1 ) + + CALL MPI_COMM_DUP ( MPI_COMM_WORLD, comm_snap, ierr ) +!_______________________________________________________________________ +! +! Get the communicator size and each process rank within the main +! communicator +!_______________________________________________________________________ + + CALL MPI_COMM_SIZE ( comm_snap, nproc, ierr ) + CALL MPI_COMM_RANK ( comm_snap, iproc, ierr ) +!_______________________________________________________________________ +! +! Put a barrier for every process to reach this point. +!_______________________________________________________________________ + + CALL barrier ( comm_snap ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE pinit + + + SUBROUTINE barrier ( comm ) + +!----------------------------------------------------------------------- +! +! MPI barrier +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: comm +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr +!_______________________________________________________________________ + + CALL MPI_BARRIER ( comm, ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE barrier + + + SUBROUTINE pcomm_set + +!----------------------------------------------------------------------- +! +! Setup the SDD communicator. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr + + INTEGER(i_knd), DIMENSION(2) :: dims + + LOGICAL(l_knd) :: reorder + + LOGICAL(l_knd), DIMENSION(2) :: periodic, remain +!_______________________________________________________________________ +! +! Use MPI routines to create a Cartesian topology +!_______________________________________________________________________ + + dims(1) = npez + dims(2) = npey + periodic = .FALSE. + reorder = .TRUE. + + CALL MPI_CART_CREATE ( comm_snap, 2, dims, periodic, reorder, & + comm_space, ierr ) +!_______________________________________________________________________ +! +! Set up the sub-communicators of the cartesian grid +!_______________________________________________________________________ + + remain(1) = .FALSE. + remain(2) = .TRUE. + CALL MPI_CART_SUB ( comm_space, remain, ycomm, ierr ) + + remain(1) = .TRUE. + remain(2) = .FALSE. + CALL MPI_CART_SUB ( comm_space, remain, zcomm, ierr ) +!_______________________________________________________________________ +! +! Get comm_space, ycomm, and zcomm ranks +!_______________________________________________________________________ + + CALL MPI_COMM_RANK ( comm_space, sproc, ierr ) + CALL MPI_COMM_RANK ( ycomm, yproc, ierr ) + CALL MPI_COMM_RANK ( zcomm, zproc, ierr ) +!_______________________________________________________________________ +! +! Set some variables used during solution +!_______________________________________________________________________ + + IF ( yproc == 0 ) THEN + firsty = .TRUE. + ylop = yproc + ELSE + firsty = .FALSE. + ylop = yproc - 1 + END IF + + IF ( yproc == npey-1 ) THEN + lasty = .TRUE. + yhip = yproc + ELSE + lasty = .FALSE. + yhip = yproc + 1 + END IF + + IF ( zproc == 0 ) THEN + firstz = .TRUE. + zlop = zproc + ELSE + firstz = .FALSE. + zlop = zproc - 1 + END IF + + IF ( zproc == npez-1 ) THEN + lastz = .TRUE. + zhip = zproc + ELSE + lastz = .FALSE. + zhip = zproc + 1 + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE pcomm_set + + + SUBROUTINE pend + +!----------------------------------------------------------------------- +! +! Call to end MPI processes +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr +!_______________________________________________________________________ + + CALL MPI_FINALIZE ( ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE pend + + + SUBROUTINE glmax_i ( value, comm ) + +!----------------------------------------------------------------------- +! +! All reduce global max value (integer). Use specified communicator. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: comm + + INTEGER(i_knd), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr, x +!_______________________________________________________________________ + + IF ( comm == MPI_COMM_NULL ) RETURN + CALL MPI_ALLREDUCE ( value, x, 1, MPI_INTEGER, MPI_MAX, comm, ierr ) + value = x +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE glmax_i + + + SUBROUTINE glmax_d ( value, comm ) + +!----------------------------------------------------------------------- +! +! All reduce global max value (double precision float). Use specified +! communicator. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: comm + + REAL(r_knd), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr + + REAL(r_knd) :: x +!_______________________________________________________________________ + + IF ( comm == MPI_COMM_NULL ) RETURN + CALL MPI_ALLREDUCE ( value, x, 1, MPI_DOUBLE_PRECISION, MPI_MAX, & + comm, ierr ) + value = x +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE glmax_d + + + SUBROUTINE glmax_d_1d ( value, dlen, comm ) + +!----------------------------------------------------------------------- +! +! All reduce global max value (double precision float) for 1-d array. +! Use specified communicator. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: dlen, comm + + REAL(r_knd), DIMENSION(dlen), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr + + REAL(r_knd), DIMENSION(dlen) :: x +!_______________________________________________________________________ + + IF ( comm == MPI_COMM_NULL ) RETURN + CALL MPI_ALLREDUCE ( value, x, dlen, MPI_DOUBLE_PRECISION, MPI_MAX,& + comm, ierr ) + value = x +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE glmax_d_1d + + + SUBROUTINE glmin_i ( value, comm ) + +!----------------------------------------------------------------------- +! +! All reduce global min value (integer). Use specified communicator. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: comm + + INTEGER(i_knd), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr, x +!_______________________________________________________________________ + + IF ( comm == MPI_COMM_NULL ) RETURN + CALL MPI_ALLREDUCE ( value, x, 1, MPI_INTEGER, MPI_MIN, comm, ierr ) + value = x +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE glmin_i + + + SUBROUTINE glmin_d ( value, comm ) + +!----------------------------------------------------------------------- +! +! All reduce global min value (double precision float). Use specified +! communicator. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: comm + + REAL(r_knd), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr + + REAL(r_knd) :: x +!_______________________________________________________________________ + + IF ( comm == MPI_COMM_NULL ) RETURN + CALL MPI_ALLREDUCE ( value, x, 1, MPI_DOUBLE_PRECISION, MPI_MIN, & + comm, ierr ) + value = x +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE glmin_d + + + SUBROUTINE glsum_d ( value, comm ) + +!----------------------------------------------------------------------- +! +! All reduce global sum value (double precision float). Use specified +! communicator. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: comm + + REAL(r_knd), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr + + REAL(r_knd) :: x +!_______________________________________________________________________ + + IF ( comm == MPI_COMM_NULL ) RETURN + CALL MPI_ALLREDUCE ( value, x, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & + comm, ierr ) + value = x +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE glsum_d + + + SUBROUTINE rtsum_d_1d ( value, dlen, comm, rtproc ) + +!----------------------------------------------------------------------- +! +! Normal reduce-to-root sum (double precision float) for 1-d array. Use +! specified communicator. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: dlen, comm, rtproc + + REAL(r_knd), DIMENSION(dlen), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr + + REAL(r_knd), DIMENSION(dlen) :: x +!_______________________________________________________________________ + + IF ( comm == MPI_COMM_NULL ) RETURN + CALL MPI_REDUCE ( value, x, dlen, MPI_DOUBLE_PRECISION, MPI_SUM, & + rtproc, comm, ierr ) + value = x +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE rtsum_d_1d + + + SUBROUTINE bcast_i_scalar ( value, comm, bproc ) + +!----------------------------------------------------------------------- +! +! Broadcast (integer scalar). Use specified communicator and casting +! proc. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: comm, bproc + + INTEGER(i_knd), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr +!_______________________________________________________________________ + + IF ( nproc == 1 ) RETURN + IF ( comm == MPI_COMM_NULL ) RETURN + + CALL MPI_BCAST ( value, 1, MPI_INTEGER, bproc, comm, ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE bcast_i_scalar + + + SUBROUTINE bcast_i_1d ( value, ilen, comm, bproc ) + +!----------------------------------------------------------------------- +! +! Broadcast (integer 1d array). Use specified communicator and casting +! proc. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: ilen, comm, bproc + + INTEGER(i_knd), DIMENSION(ilen), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr +!_______________________________________________________________________ + + IF ( nproc == 1 ) RETURN + IF ( comm == MPI_COMM_NULL ) RETURN + + CALL MPI_BCAST ( value, ilen, MPI_INTEGER, bproc, comm, & + ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE bcast_i_1d + + + SUBROUTINE bcast_d_scalar ( value, comm, bproc ) + +!----------------------------------------------------------------------- +! +! Broadcast (double precision float scalar). Use specified communicator +! and casting proc. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: comm, bproc + + REAL(r_knd), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr +!_______________________________________________________________________ + + IF ( nproc == 1 ) RETURN + IF ( comm == MPI_COMM_NULL ) RETURN + + CALL MPI_BCAST ( value, 1, MPI_DOUBLE_PRECISION, bproc, comm, ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE bcast_d_scalar + + + SUBROUTINE bcast_d_1d ( value, dlen, comm, bproc ) + +!----------------------------------------------------------------------- +! +! Broadcast (double precision float 1d array). Use specified +! communicator and casting proc. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: dlen, comm, bproc + + REAL(r_knd), DIMENSION(dlen), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr +!_______________________________________________________________________ + + IF ( nproc == 1 ) RETURN + IF ( comm == MPI_COMM_NULL ) RETURN + + CALL MPI_BCAST ( value, dlen, MPI_DOUBLE_PRECISION, bproc, comm, & + ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE bcast_d_1d + + + SUBROUTINE psend_d_2d ( proc, myproc, d1, d2, value, comm, mtag ) + +!----------------------------------------------------------------------- +! +! Send a rank-3 double precision array. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, comm, mtag + + REAL(r_knd), DIMENSION(d1,d2), INTENT(IN) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: dlen, ierr +!_______________________________________________________________________ + + IF ( proc==myproc .OR. comm==MPI_COMM_NULL ) RETURN + + dlen = d1*d2 + + CALL MPI_SEND ( value, dlen, MPI_DOUBLE_PRECISION, proc, mtag, & + comm, ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE psend_d_2d + + + SUBROUTINE psend_d_3d ( proc, myproc, d1, d2, d3, value, comm, mtag ) + +!----------------------------------------------------------------------- +! +! Send a rank-3 double precision array. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, d3, comm, mtag + + REAL(r_knd), DIMENSION(d1,d2,d3), INTENT(IN) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: dlen, ierr +!_______________________________________________________________________ + + IF ( proc==myproc .OR. comm==MPI_COMM_NULL ) RETURN + + dlen = d1*d2*d3 + + CALL MPI_SEND ( value, dlen, MPI_DOUBLE_PRECISION, proc, mtag, & + comm, ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE psend_d_3d + + + SUBROUTINE isend_d_3d ( proc, myproc, d1, d2, d3, value, comm, mtag, & + req ) + +!----------------------------------------------------------------------- +! +! Non-blocking-send a rank-3 double precision array. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, d3, comm, mtag + + INTEGER(i_knd), INTENT(INOUT) :: req + + REAL(r_knd), DIMENSION(d1,d2,d3), INTENT(IN) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: dlen, ierr +!_______________________________________________________________________ + + IF ( proc==myproc .OR. comm==MPI_COMM_NULL ) RETURN + + dlen = d1*d2*d3 + + CALL MPI_ISEND ( value, dlen, MPI_DOUBLE_PRECISION, proc, mtag, & + comm, req, ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE isend_d_3d + + + SUBROUTINE precv_d_2d ( proc, myproc, d1, d2, value, comm, mtag ) + +!----------------------------------------------------------------------- +! +! Send a rank-3 double precision array. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, comm, mtag + + REAL(r_knd), DIMENSION(d1,d2), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: dlen, ierr, istat(MPI_STATUS_SIZE) +!_______________________________________________________________________ + + IF ( proc==myproc .OR. comm==MPI_COMM_NULL ) RETURN + + dlen = d1*d2 + + CALL MPI_RECV ( value, dlen, MPI_DOUBLE_PRECISION, proc, mtag, & + comm, istat, ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE precv_d_2d + + + SUBROUTINE precv_d_3d ( proc, myproc, d1, d2, d3, value, comm, mtag ) + +!----------------------------------------------------------------------- +! +! Send a rank-3 double precision array. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, d3, comm, mtag + + REAL(r_knd), DIMENSION(d1,d2,d3), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: dlen, ierr, istat(MPI_STATUS_SIZE) +!_______________________________________________________________________ + + IF ( proc==myproc .OR. comm==MPI_COMM_NULL ) RETURN + + dlen = d1*d2*d3 + + CALL MPI_RECV ( value, dlen, MPI_DOUBLE_PRECISION, proc, mtag, & + comm, istat, ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE precv_d_3d + + + SUBROUTINE irecv_d_3d ( proc, myproc, d1, d2, d3, value, comm, mtag, & + req ) + +!----------------------------------------------------------------------- +! +! Send a rank-3 double precision array. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, d3, comm, mtag + + INTEGER(i_knd), INTENT(INOUT) :: req + + REAL(r_knd), DIMENSION(d1,d2,d3), INTENT(INOUT) :: value +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: dlen, ierr +!_______________________________________________________________________ + + IF ( proc==myproc .OR. comm==MPI_COMM_NULL ) RETURN + + dlen = d1*d2*d3 + + CALL MPI_IRECV ( value, dlen, MPI_DOUBLE_PRECISION, proc, mtag, & + comm, req, ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE irecv_d_3d + + + SUBROUTINE cartrank ( coord, rank, comm ) + +!----------------------------------------------------------------------- +! +! Return the rank of a proc defined by the coordinates of the Cartesian +! communicator. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(OUT) :: rank + + INTEGER(i_knd), INTENT(IN) :: comm + + INTEGER(i_knd), DIMENSION(2), INTENT(IN) :: coord +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: ierr +!_______________________________________________________________________ + + IF ( comm == MPI_COMM_NULL ) RETURN + + CALL MPI_CART_RANK ( comm, coord, rank, ierr ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE cartrank + + + SUBROUTINE waitinit ( req, d1 ) + +!----------------------------------------------------------------------- +! +! Initialize asynchronous communication request array to null state. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: d1 + + INTEGER(i_knd), DIMENSION(d1), INTENT(OUT) :: req +!_______________________________________________________________________ + + req = MPI_REQUEST_NULL +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE waitinit + + + SUBROUTINE waits ( req ) + +!----------------------------------------------------------------------- +! +! Wait for all asynchronous communications encapsulated in the req array +! to finish. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(INOUT) :: req +!_______________________________________________________________________ +! +! Local variables. +!_______________________________________________________________________ + + INTEGER(i_knd) :: info + + INTEGER(i_knd), DIMENSION(MPI_STATUS_SIZE) :: stat +!_______________________________________________________________________ + + CALL MPI_WAIT ( req, stat, info ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE waits + + + SUBROUTINE waitall ( req, d1 ) + +!----------------------------------------------------------------------- +! +! Wait for all asynchronous communications encapsulated in the req array +! to finish. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: d1 + + INTEGER(i_knd), DIMENSION(d1), INTENT(INOUT) :: req +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: info + + INTEGER(i_knd), DIMENSION(MPI_STATUS_SIZE,d1) :: stat +!_______________________________________________________________________ + + CALL MPI_WAITALL ( d1, req, stat, info ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE waitall + + + SUBROUTINE waitsome ( req, indx, d1, outcount ) + +!----------------------------------------------------------------------- +! +! Wait for some asynchronous communications encapsulated in the req +! array to finish. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: d1 + + INTEGER(i_knd), INTENT(OUT) :: outcount + + INTEGER(i_knd), DIMENSION(d1), INTENT(INOUT) :: req, indx +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: info + + INTEGER(i_knd), DIMENSION(MPI_STATUS_SIZE,d1) :: stat +!_______________________________________________________________________ +!_______________________________________________________________________ + + CALL MPI_WAITSOME ( d1, req, outcount, indx, stat, info ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE waitsome + + + SUBROUTINE tests ( req ) + +!----------------------------------------------------------------------- +! +! Test if the asynchronous communications encapsulated in the req handle +! has finished. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(INOUT) :: req +!_______________________________________________________________________ +! +! Local variables. +!_______________________________________________________________________ + + INTEGER(i_knd) :: info + + INTEGER(i_knd), DIMENSION(MPI_STATUS_SIZE) :: stat + + LOGICAL(l_knd) :: flag +!_______________________________________________________________________ + + CALL MPI_TEST ( req, flag, stat, info ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE tests + + + SUBROUTINE testsome ( req, indx, d1, outcount ) + +!----------------------------------------------------------------------- +! +! Test if some asynchronous communications encapsulated in the req array +! finished. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: d1 + + INTEGER(i_knd), INTENT(OUT) :: outcount + + INTEGER(i_knd), DIMENSION(d1), INTENT(INOUT) :: req, indx +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: info + + INTEGER(i_knd), DIMENSION(MPI_STATUS_SIZE,d1) :: stat +!_______________________________________________________________________ +!_______________________________________________________________________ + + CALL MPI_TESTSOME ( d1, req, outcount, indx, stat, info ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE testsome + +#else + + SUBROUTINE pinit ( t1 ) + REAL(r_knd), INTENT(OUT) :: t1 + INTEGER(i_knd) :: ierr + CALL wtime ( t1 ) + thread_single = 0 + thread_funneled = 0 + thread_serialized = 0 + thread_multiple = 0 + comm_snap = 0 + nproc = 1 + iproc = 0 + END SUBROUTINE pinit + + + SUBROUTINE barrier ( comm ) + INTEGER(i_knd), INTENT(IN) :: comm + END SUBROUTINE barrier + + + SUBROUTINE pcomm_set + comm_space = 0 + ycomm = 0 + zcomm = 0 + sproc = 0 + yproc = 0 + zproc = 0 + firsty = .TRUE. + ylop = 0 + lasty = .TRUE. + yhip = 0 + firstz = .TRUE. + zlop = 0 + lastz = .TRUE. + zhip = 0 + END SUBROUTINE pcomm_set + + + SUBROUTINE pend + END SUBROUTINE pend + + + SUBROUTINE glmax_i ( value, comm ) + INTEGER(i_knd), INTENT(IN) :: comm + INTEGER(i_knd), INTENT(IN) :: value + END SUBROUTINE glmax_i + + + SUBROUTINE glmax_d ( value, comm ) + INTEGER(i_knd), INTENT(IN) :: comm + REAL(r_knd), INTENT(IN) :: value + END SUBROUTINE glmax_d + + + SUBROUTINE glmax_d_1d ( value, dlen, comm ) + INTEGER(i_knd), INTENT(IN) :: dlen, comm + REAL(r_knd), DIMENSION(dlen), INTENT(IN) :: value + END SUBROUTINE glmax_d_1d + + + SUBROUTINE glmin_i ( value, comm ) + INTEGER(i_knd), INTENT(IN) :: comm + INTEGER(i_knd), INTENT(IN) :: value + END SUBROUTINE glmin_i + + + SUBROUTINE glmin_d ( value, comm ) + INTEGER(i_knd), INTENT(IN) :: comm + REAL(r_knd), INTENT(IN) :: value + END SUBROUTINE glmin_d + + + SUBROUTINE glsum_d ( value, comm ) + INTEGER(i_knd), INTENT(IN) :: comm + REAL(r_knd), INTENT(IN) :: value + END SUBROUTINE glsum_d + + + SUBROUTINE rtsum_d_1d ( value, dlen, comm, rtproc ) + INTEGER(i_knd), INTENT(IN) :: dlen, comm, rtproc + REAL(r_knd), DIMENSION(dlen), INTENT(IN) :: value + END SUBROUTINE rtsum_d_1d + + + SUBROUTINE bcast_i_scalar ( value, comm, bproc ) + INTEGER(i_knd), INTENT(IN) :: comm, bproc + INTEGER(i_knd), INTENT(IN) :: value + END SUBROUTINE bcast_i_scalar + + + SUBROUTINE bcast_i_1d ( value, ilen, comm, bproc ) + INTEGER(i_knd), INTENT(IN) :: ilen, comm, bproc + INTEGER(i_knd), DIMENSION(ilen), INTENT(IN) :: value + END SUBROUTINE bcast_i_1d + + + SUBROUTINE bcast_d_scalar ( value, comm, bproc ) + INTEGER(i_knd), INTENT(IN) :: comm, bproc + REAL(r_knd), INTENT(IN) :: value + END SUBROUTINE bcast_d_scalar + + + SUBROUTINE bcast_d_1d ( value, dlen, comm, bproc ) + INTEGER(i_knd), INTENT(IN) :: dlen, comm, bproc + REAL(r_knd), DIMENSION(dlen), INTENT(IN) :: value + END SUBROUTINE bcast_d_1d + + + SUBROUTINE psend_d_2d ( proc, myproc, d1, d2, value, comm, mtag ) + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, comm, mtag + REAL(r_knd), DIMENSION(d1,d2), INTENT(IN) :: value + END SUBROUTINE psend_d_2d + + + SUBROUTINE psend_d_3d ( proc, myproc, d1, d2, d3, value, comm, mtag ) + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, d3, comm, mtag + REAL(r_knd), DIMENSION(d1,d2,d3), INTENT(IN) :: value + END SUBROUTINE psend_d_3d + + + SUBROUTINE isend_d_3d ( proc, myproc, d1, d2, d3, value, comm, mtag, & + req ) + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, d3, comm, mtag + INTEGER(i_knd), INTENT(IN) :: req + REAL(r_knd), DIMENSION(d1,d2,d3), INTENT(IN) :: value + END SUBROUTINE isend_d_3d + + + SUBROUTINE precv_d_2d ( proc, myproc, d1, d2, value, comm, mtag ) + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, comm, mtag + REAL(r_knd), DIMENSION(d1,d2), INTENT(IN) :: value + END SUBROUTINE precv_d_2d + + + SUBROUTINE precv_d_3d ( proc, myproc, d1, d2, d3, value, comm, mtag ) + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, d3, comm, mtag + REAL(r_knd), DIMENSION(d1,d2,d3), INTENT(IN) :: value + END SUBROUTINE precv_d_3d + + + SUBROUTINE irecv_d_3d ( proc, myproc, d1, d2, d3, value, comm, mtag, & + req ) + INTEGER(i_knd), INTENT(IN) :: proc, myproc, d1, d2, d3, comm, mtag + INTEGER(i_knd), INTENT(IN) :: req + REAL(r_knd), DIMENSION(d1,d2,d3), INTENT(IN) :: value + END SUBROUTINE irecv_d_3d + + + SUBROUTINE cartrank ( coord, rank, comm ) + INTEGER(i_knd), INTENT(OUT) :: rank + INTEGER(i_knd), INTENT(IN) :: comm + INTEGER(i_knd), DIMENSION(2), INTENT(IN) :: coord + rank = 0 + END SUBROUTINE cartrank + + + SUBROUTINE waitinit ( req, d1 ) + INTEGER(i_knd), INTENT(IN) :: d1 + INTEGER(i_knd), DIMENSION(d1), INTENT(IN) :: req + END SUBROUTINE waitinit + + + SUBROUTINE waits ( req ) + INTEGER(i_knd), INTENT(IN) :: req + END SUBROUTINE waits + + + SUBROUTINE waitall ( req, d1 ) + INTEGER(i_knd), INTENT(IN) :: d1 + INTEGER(i_knd), DIMENSION(d1), INTENT(IN) :: req + END SUBROUTINE waitall + + + SUBROUTINE waitsome ( req, indx, d1, outcount ) + INTEGER(i_knd), INTENT(IN) :: d1, outcount + INTEGER(i_knd), DIMENSION(d1), INTENT(IN) :: req, indx + END SUBROUTINE waitsome + + + SUBROUTINE tests ( req ) + INTEGER(i_knd), INTENT(IN) :: req + END SUBROUTINE tests + + + SUBROUTINE testsome ( req, indx, d1, outcount ) + INTEGER(i_knd), INTENT(IN) :: d1, outcount + INTEGER(i_knd), DIMENSION(d1), INTENT(IN) :: req, indx + END SUBROUTINE testsome + +#endif + + + SUBROUTINE pinit_omp ( ierr, error ) + +!----------------------------------------------------------------------- +! +! Setup the number of OpenMP threads. Check if any proc is exceeding +! max threads. Reset and report if so. +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=64), INTENT(OUT) :: error + + INTEGER(i_knd), INTENT(OUT) :: ierr +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: max_threads +!_______________________________________________________________________ + + ierr = 0 + error = ' ' + +#ifdef OPENMP + + max_threads = OMP_GET_MAX_THREADS() + + IF ( nthreads > max_threads ) THEN + ierr = 1 + nthreads = max_threads + END IF + + CALL glmax ( ierr, comm_snap ) + IF ( ierr /= 0 ) THEN + error = '*WARNING: PINIT_OMP: NTHREADS>MAX_THREADS; reset to MAX_THREADS' + END IF +!_______________________________________________________________________ +! +! Setup for nested threading +!_______________________________________________________________________ + + do_nested = nnested > 1 +! CALL OMP_SET_NESTED ( do_nested ) + IF ( do_nested ) THEN + CALL OMP_SET_MAX_ACTIVE_LEVELS ( 2 ) + ELSE + CALL OMP_SET_MAX_ACTIVE_LEVELS ( 1 ) + END IF +!_______________________________________________________________________ +! +! Create an array of locks, one for each thread, that will be used +! to control threaded communications +!_______________________________________________________________________ + + CALL plock_omp ( 'init', nthreads ) + +#else + + max_threads = 1 + nthreads = 1 + do_nested = .FALSE. + +#endif + +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE pinit_omp + + +#ifdef OPENMP + + SUBROUTINE plock_omp ( dowhat, nlock ) + +!----------------------------------------------------------------------- +! +! Operate on an OpenMP lock +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=*), INTENT(IN) :: dowhat + + INTEGER(i_knd), INTENT(IN) :: nlock +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: i +!_______________________________________________________________________ + + SELECT CASE ( dowhat ) + + CASE ( 'init' ) + ALLOCATE( lock(nlock) ) + DO i = 1, nlock + CALL OMP_INIT_LOCK ( lock(i) ) + END DO + use_lock = nproc>1 .AND. nthreads>1 .AND. & + thread_level/=thread_multiple + + CASE ( 'set' ) + CALL OMP_SET_LOCK ( lock(nlock) ) + + CASE ( 'unset' ) + CALL OMP_UNSET_LOCK ( lock(nlock) ) + + CASE ( 'destroy' ) + DO i = 1, nlock + CALL OMP_DESTROY_LOCK ( lock(i) ) + END DO + DEALLOCATE( lock ) + + CASE DEFAULT + RETURN + + END SELECT +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE plock_omp + + + FUNCTION thread_num () + +!----------------------------------------------------------------------- +! +! Return thread number of caller, [0, nthreads-1]. Maintains separation +! of main code and OpenMP by placing here. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd) :: thread_num +!_______________________________________________________________________ + + thread_num = OMP_GET_THREAD_NUM() +!_______________________________________________________________________ +!_______________________________________________________________________ + + END FUNCTION thread_num + +#else + + SUBROUTINE plock_omp ( dowhat, nlock ) + CHARACTER(LEN=*), INTENT(IN) :: dowhat + INTEGER(i_knd), INTENT(IN) :: nlock + END SUBROUTINE plock_omp + + + FUNCTION thread_num () + INTEGER(i_knd) :: thread_num + thread_num = 0 + END FUNCTION thread_num + +#endif + + +END MODULE plib_module diff --git a/Fortran/SNAP/src/setup.f90 b/Fortran/SNAP/src/setup.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/setup.f90 @@ -0,0 +1,953 @@ +!----------------------------------------------------------------------- +! +! MODULE: setup_module +!> @brief +!> This module controls problem setup, including geometry setup, angular +!> domain setup, data setup, material and source layouts. Calls for +!> data allocation. +! +!----------------------------------------------------------------------- + +MODULE setup_module + + USE global_module, ONLY: i_knd, r_knd, ounit, zero, half, one, two + + USE geom_module, ONLY: ndimen, nx, ny, nz, lx, ly, lz, dx, dy, dz, & + ny_gl, nz_gl, jlb, jub, klb, kub, nc, jdim, kdim + + USE sn_module, ONLY: nang, mu, eta, xi, w, nmom, noct, sn_allocate, & + sn_expcoeff + + USE data_module, ONLY: ng, v, mat_opt, mat, src_opt, scatp, qi, nmat,& + sigt, siga, sigs, slgg, data_allocate + + USE control_module, ONLY: epsi, iitm, oitm, timedep, tf, nsteps, dt, & + swp_typ, it_det, fluxp, fixup, soloutp, kplane, popout, ncor, nops,& + last_oct, corner_sch, multiswp, yzstg, corner_loop_order, angcpy, & + control_allocate + + USE mms_module, ONLY: mms_setup + + USE utils_module, ONLY: print_error, stop_run, open_file, close_file + + USE time_module, ONLY: tset, wtime + + USE plib_module, ONLY: npey, npez, glmax, comm_snap, yproc, zproc, & + iproc, root, nthreads, thread_level, thread_single, & + thread_funneled, thread_serialized, thread_multiple, nnested, & + do_nested, ichunk, pce + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: setup + + + CONTAINS + + + SUBROUTINE setup ( ndpwds ) + +!----------------------------------------------------------------------- +! +! Control the setup process. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(INOUT) :: ndpwds +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=64) :: error + + INTEGER(i_knd) :: flg, ierr, i, idle, hnpy, hnpz, mgpt, stgs, mis, & + mie, mjs, mje, mks, mke, qis, qie, qjs, qje, qks, qke + + REAL(r_knd) :: t1, t2 +!_______________________________________________________________________ +! +! First put input ny and nz into ny_gl and nz_gl respectively. Use ny +! and nz for local sizes. Determine global indices of local bounds. +! Set the number of spatial work chunks, nc. Set number of sweep +! directions in y/z, jdim/kdim. +!_______________________________________________________________________ + + CALL wtime ( t1 ) + + ny_gl = ny + nz_gl = nz + ny = ny_gl / npey + nz = nz_gl / npez + + jlb = yproc*ny + 1 + jub = (yproc+1) * ny + klb = zproc*nz + 1 + kub = (zproc+1) * nz + + nc = nx/ichunk + + jdim = MIN( ndimen, 2 ) + kdim = MAX( ndimen-1, 1 ) + ncor = jdim*kdim +!_______________________________________________________________________ +! +! Allocate needed arrays +!_______________________________________________________________________ + + CALL setup_alloc ( ndpwds, flg, ierr, error ) + IF ( ierr /= 0 ) THEN + CALL print_error ( ounit, error ) + CALL stop_run ( 1, flg, 0, 0 ) + END IF +!_______________________________________________________________________ +! +! Set up the corner schedule for 3-D problems. Choose the order that +! minimizes the number of stages based on npey and npez. +!_______________________________________________________________________ + + corner_sch = 0 + IF ( ndimen == 3 ) THEN + IF ( npey <= npez ) THEN + last_oct = 6 + corner_sch(:,1) = (/ 1, 1 /) + corner_sch(:,2) = (/ 2, 1 /) + corner_sch(:,3) = (/ 2, 2 /) + corner_sch(:,4) = (/ 1, 2 /) + ELSE + last_oct = 4 + corner_sch(:,1) = (/ 1, 1 /) + corner_sch(:,2) = (/ 1, 2 /) + corner_sch(:,3) = (/ 2, 2 /) + corner_sch(:,4) = (/ 2, 1 /) + END IF + ELSE + last_oct = 2**ndimen + corner_sch(:,1) = (/ 1, 1 /) + corner_sch(:,2) = (/ 2, 1 /) + END IF +!_______________________________________________________________________ +! +! Set up the concurrent octant mesh sweeps control information. Force +! normal sweeper swp_typ=0 for now. Set nops. Compute an array that +! determines each KBA stage's relative position in the full sweep, +! yzstg, and the number of stages per octant pair. Set up the corner +! loop priority to avoid wasted request comparisons on messages not +! likely to be received. +!_______________________________________________________________________ + + yzstg = 0 + corner_loop_order = 0 + + IF ( multiswp == 1 ) THEN + + nops = ng*ncor*2*nc + swp_typ=0 + + yzstg(1) = npey - (yproc + 1) + npez - (zproc + 1) + yzstg(2) = yproc + npez - (zproc + 1) + yzstg(3) = npey - (yproc + 1) + zproc + yzstg(4) = yproc + zproc + + IF ( ndimen == 1 ) THEN + i = 1 + ELSE IF ( ndimen == 2 ) THEN + i = MINLOC( yzstg(1:2), 1 ) + ELSE + i = MINLOC( yzstg, 1 ) + END IF + + SELECT CASE ( i ) + CASE ( 1 ) + IF ( ndimen == 1 ) THEN + corner_loop_order = (/ 1, 0, 0, 0 /) + ELSE IF ( ndimen == 2 ) THEN + corner_loop_order = (/ 1, 2, 0, 0 /) + ELSE + corner_loop_order = (/ 1, 2, 3, 4 /) + END IF + CASE ( 2 ) + IF ( ndimen == 2 ) THEN + corner_loop_order = (/ 2, 1, 0, 0 /) + ELSE + corner_loop_order = (/ 2, 1, 4, 3 /) + END IF + CASE ( 3 ) + corner_loop_order = (/ 3, 4, 1, 2 /) + CASE ( 4 ) + corner_loop_order = (/ 4, 3, 2, 1 /) + END SELECT + + END IF +!_______________________________________________________________________ +! +! Compute PCE. The corner sweeps PCE is a guess as to the best-case +! scenario: effectively that additional corner starts a dimension +! compared to a single corner start. +!_______________________________________________________________________ + + IF ( multiswp == 0 ) THEN + IF ( npey <= npez ) THEN + idle = 3*(npey-1) + 2*(npez-1) + ELSE + idle = 2*(npey-1) + 3*(npez-1) + END IF + ELSE + hnpy = CEILING( REAL( npey, r_knd ) / two ) + hnpz = CEILING( REAL( npez, r_knd ) / two ) + idle = 2 * ( hnpy + hnpz - 2 ) + END IF + + mgpt = CEILING( REAL( ng, r_knd ) / REAL( nthreads, r_knd ) ) + stgs = nc*noct*mgpt + idle + + pce = REAL( nc*noct*ng, r_knd ) / REAL( nthreads*stgs, r_knd ) +!_______________________________________________________________________ +! +! Progress through setups. _delta sets cell and step sizes, _vel sets +! velocity array, _angle sets the ordinates/weights, _mat sets the +! material identifiers, _src sets fixed source, _data sets the +! mock cross section arrays, and sn_expcoeff sets up the scattering +! expansion basis function array. +!_______________________________________________________________________ + + CALL setup_delta + + CALL setup_vel + + CALL setup_angle + + CALL setup_mat ( mis, mie, mjs, mje, mks, mke ) + + CALL setup_data + + CALL sn_expcoeff ( ndimen ) + + CALL setup_src ( qis, qie, qjs, qje, qks, qke, ierr, error ) + IF ( ierr /= 0 ) THEN + CALL print_error ( ounit, error ) + CALL stop_run ( 1, 3, 0, 0 ) + END IF +!_______________________________________________________________________ +! +! Echo the data from this module to the output file. If requested via +! scatp, print the full scattering matrix to file. +!_______________________________________________________________________ + + IF ( iproc == root ) THEN + CALL setup_echo ( mis, mie, mjs, mje, mks, mke, qis, qie, qjs, & + qje, qks, qke ) + IF ( scatp == 1 ) CALL setup_scatp ( ierr, error ) + END IF + + CALL glmax ( ierr, comm_snap ) + IF ( ierr /= 0 ) THEN + CALL print_error ( ounit, error ) + CALL stop_run ( 1, 4, 0, 0 ) + END IF + + CALL wtime ( t2 ) + tset = t2 - t1 +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE setup + + + SUBROUTINE setup_alloc ( ndpwds, flg, ierr, error ) + +!----------------------------------------------------------------------- +! +! Call for individual allocation routines to size the run-time arrays. +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=64), INTENT(OUT) :: error + + INTEGER(i_knd), INTENT(INOUT) :: ndpwds + + INTEGER(i_knd), INTENT(OUT) :: flg, ierr +!_______________________________________________________________________ + + flg = 0 + ierr = 0 + error = ' ' + + CALL sn_allocate ( ndimen, ndpwds, ierr ) + CALL glmax ( ierr, comm_snap ) + IF ( ierr /= 0 ) THEN + error = '***ERROR: SETUP_ALLOC: Allocation error in SN_ALLOCATE' + RETURN + END IF + + CALL data_allocate ( nx, ny, nz, nmom, nang, noct, timedep, & + ndpwds, ierr ) + CALL glmax ( ierr, comm_snap ) + IF ( ierr /= 0 ) THEN + flg = 1 + error = '***ERROR: SETUP_ALLOC: Allocation error in DATA_ALLOCATE' + RETURN + END IF + + CALL control_allocate ( ng, ndpwds, ierr ) + CALL glmax ( ierr, comm_snap ) + IF ( ierr /= 0 ) THEN + flg = 2 + error = '***ERROR: CONTROL_ALLOC: Allocation error of control '//& + 'arrays' + RETURN + END IF + +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE setup_alloc + + + SUBROUTINE setup_delta + +!----------------------------------------------------------------------- +! +! Spatial cell and time step size computation. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +!_______________________________________________________________________ + + dx = lx / REAL( nx, r_knd ) + IF ( ndimen > 1 ) dy = ly / REAL( ny_gl, r_knd ) + IF ( ndimen > 2 ) dz = lz / REAL( nz_gl, r_knd ) + + IF ( timedep == 1 ) dt = tf / REAL( nsteps, r_knd ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE setup_delta + + + SUBROUTINE setup_vel + +!----------------------------------------------------------------------- +! +! Setup a simple mock velocity array for time-dependent calcs +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: g, t +!_______________________________________________________________________ +! +! Loop over groups. Set velocities simply according to ng. +!_______________________________________________________________________ + + IF ( timedep == 0 ) RETURN + + DO g = 1, ng + t = ng - g + 1 + v(g) = REAL( t, r_knd ) + END DO +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE setup_vel + + + SUBROUTINE setup_angle + +!----------------------------------------------------------------------- +! +! Create the mock quadrature sets for 1-D, 2-D, and 3-D problems. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: m + + REAL(r_knd) :: dm + + REAL(r_knd), DIMENSION(nang) :: t +!_______________________________________________________________________ +! +! Arrays allocated according to dimensionality. So do loops by ndimen. +!_______________________________________________________________________ + + dm = one/REAL( nang, r_knd ) + t = zero + + mu(1) = half*dm + DO m = 2, nang + mu(m) = mu(m-1) + dm + END DO + + IF ( ndimen > 1 ) THEN + eta(1) = one - half*dm + DO m = 2, nang + eta(m) = eta(m-1) - dm + END DO + + IF ( ndimen > 2 ) THEN + t = mu**2 + eta**2 + DO m = 1, nang + xi(m) = SQRT( one - t(m) ) + END DO + END IF + + END IF +!_______________________________________________________________________ +! +! Give all weights same value. +!_______________________________________________________________________ + + IF ( ndimen == 1 ) THEN + w = half / REAL( nang, r_knd ) + ELSE IF ( ndimen == 2 ) THEN + w = 0.25_r_knd / REAL( nang, r_knd ) + ELSE + w = 0.125_r_knd / REAL( nang, r_knd ) + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE setup_angle + + + SUBROUTINE setup_mat ( i1, i2, j1, j2, k1, k2 ) + +!----------------------------------------------------------------------- +! +! Setup the material according to mat_opt. +! +! There are only two materials max; one per cell. mat_opt defines the +! material layout and has a similar meaning for 1-D, 2-D, 3-D problems. +! +! 0 = Homogeneous (mat1) problem, regardless of dimension +! 1 = Center. mat1 is the base. 1-D: 25%mat1/50%mat2/25%mat1 +! 2/D: same as 1-D but in two dimensions, so mat2 region is a square +! 3/D: same as 1-D but in three dimensions, so mat2 is a cube +! 2 = Corner. Same concept as 1, but move slab/square/cube to origin +! +! Return starting/ending indices for printint an echo. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(OUT) :: i1, i2, j1, j2, k1, k2 +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: i, j, k, jj, kk +!_______________________________________________________________________ +! +! Form the base with mat1. Use dimension and mat_opt to determine +! the rest of the layout. +!_______________________________________________________________________ + + mat = 1 + + i1 = 1; i2 = 1 + j1 = 1; j2 = 1 + k1 = 1; k2 = 1 + IF ( mat_opt == 1 ) THEN + i1 = nx/4 + 1 + i2 = 3*nx/4 + IF ( ndimen > 1 ) THEN + j1 = ny_gl/4 + 1 + j2 = 3*ny_gl/4 + IF ( ndimen > 2 ) THEN + k1 = nz_gl/4 + 1 + k2 = 3*nz_gl/4 + END IF + END IF + ELSE IF ( mat_opt == 2 ) THEN + i2 = nx/2 + IF ( ndimen > 1 ) THEN + j2 = ny_gl/2 + IF ( ndimen > 2 ) k2 = nz_gl/2 + END IF + END IF + + IF ( mat_opt > 0 ) THEN + DO k = k1, k2 + IF ( klb<=k .AND. k<=kub ) THEN + kk = MOD( k-1, nz ) + 1 + DO j = j1, j2 + IF ( jlb<=j .AND. j<=jub ) THEN + jj = MOD( j-1, ny ) + 1 + DO i = i1, i2 + mat(i,jj,kk) = 2 + END DO + END IF + END DO + END IF + END DO + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE setup_mat + + + SUBROUTINE setup_src ( i1, i2, j1, j2, k1, k2, ierr, error ) + +!----------------------------------------------------------------------- +! +! Setup the material according to src_opt. +! +! Source is either on at strength 1.0 or off, per cell. src_opt defines +! the layout and has a similar meaning for 1-D, 2-D, 3-D problems. +! +! 0 = Source everywhere, regardless of dimension +! 1 = Source occupies the slab/square/cube center of the problem +! 1/D: 25%/50%/25% = source off/on/off +! 2/D: same as 1-D but in two dimensions +! 3/D: same as 1-D but in three dimensions +! 2 = Corner. Same concept as 1, but move slab/square/cube to origin +! 3 = MMS option, f = sin(pi*x/lx) +! All options isotropic source +! +! Return starting/ending indices for printing an echo. +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=64), INTENT(OUT) :: error + + INTEGER(i_knd), INTENT(OUT) :: i1, i2, j1, j2, k1, k2, ierr +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: i, j, k, jj, kk +!_______________________________________________________________________ +! +! Form the base by setting the src to 0 to start. Then assign src to +! spatial cells according to ndimen and src_opt. +!_______________________________________________________________________ + + IF ( src_opt == 3 ) THEN + CALL mms_setup ( ierr, error ) + RETURN + END IF +!_______________________________________________________________________ +! +! If src_opt is 0, source is everywhere +!_______________________________________________________________________ + + i1 = 1; i2 = nx + j1 = 1; j2 = ny_gl + k1 = 1; k2 = nz_gl +!_______________________________________________________________________ +! +! IF src_opt is not 0, reset indices for source's spatial range +!_______________________________________________________________________ + + IF ( src_opt == 1 ) THEN + i1 = nx/4 + 1 + i2 = 3*nx/4 + IF ( ndimen > 1 ) THEN + j1 = ny_gl/4 + 1 + j2 = 3*ny_gl/4 + IF ( ndimen > 2 ) THEN + k1 = nz_gl/4 + 1 + k2 = 3*nz_gl/4 + END IF + END IF + ELSE IF ( src_opt == 2 ) THEN + i2 = nx/2 + IF ( ndimen > 1 ) THEN + j2 = ny_gl/2 + IF ( ndimen > 2 ) k2 = nz_gl/2 + END IF + END IF +!_______________________________________________________________________ +! +! Indices are all known, so set the source to unity for that range +!_______________________________________________________________________ + + DO k = k1, k2 + IF ( klb<=k .AND. k<=kub ) THEN + kk = MOD( k-1, nz ) + 1 + DO j = j1, j2 + IF ( jlb<=j .AND. j<=jub ) THEN + jj = MOD( j-1, ny ) + 1 + DO i = i1, i2 + qi(i,jj,kk,:) = one + END DO + END IF + END DO + END IF + END DO +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE setup_src + + + SUBROUTINE setup_data + +!----------------------------------------------------------------------- +! +! Setup the data arrays according to simple rules/functions to scale +! to arbitrary number of groups, moments. Fixed at a library for two +! materials only. Fixed group-to-group coupling properties. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: g, n + + REAL(r_knd) :: t +!_______________________________________________________________________ +! +! Set the group 1 data for material 1 and material 2 (if present). +!_______________________________________________________________________ + + sigt(1,1) = one + siga(1,1) = half + sigs(1,1) = half + + IF ( nmat == 2 ) THEN + sigt(2,1) = two + siga(2,1) = 0.8_r_knd + sigs(2,1) = 1.2_r_knd + END IF +!_______________________________________________________________________ +! +! Scale these values for increasing number of groups by adding 0.01 +! to sigt for each additional group. Split that 0.01 evenly among +! siga and sigs. +!_______________________________________________________________________ + + DO g = 2, ng + sigt(:,g) = sigt(:,g-1) + 0.01_r_knd + siga(:,g) = siga(:,g-1) + 0.005_r_knd + sigs(:,g) = sigs(:,g-1) + 0.005_r_knd + END DO +!_______________________________________________________________________ +! +! For material 1, upscattering from a given group to all above groups +! is 10% of total scattering. 20% is in group. 70% is down-scattering. +! For group 1/ng, no up/down-scattering, so self-scattering takes the +! remaining fraction. +!_______________________________________________________________________ + + DO g = 1, ng + + IF ( ng == 1 ) THEN + slgg(1,1,1,1) = sigs(1,g) + EXIT + END IF + + slgg(1,1,g,g) = 0.2_r_knd*sigs(1,g) + + IF ( g > 1 ) THEN + t = one / REAL( g-1, r_knd ) + slgg(1,1,g,1:(g-1)) = 0.1_r_knd*sigs(1,g)*t + ELSE + slgg(1,1,1,1) = 0.3_r_knd*sigs(1,1) + END IF + + IF ( g < ng ) THEN + t = one / REAL( ng-g, r_knd ) + slgg(1,1,g,(g+1):ng) = 0.7_r_knd*sigs(1,g)*t + ELSE + slgg(1,1,ng,ng) = 0.9_r_knd*sigs(1,ng) + END IF + + END DO + + IF ( nmat == 2 ) THEN +!_______________________________________________________________________ +! +! Repeat for material 2. Up-scattering is 10%. In-scattering is 50%. +! Down-scattering is 40%. +!_______________________________________________________________________ + + DO g = 1, ng + + IF ( ng == 1 ) THEN + slgg(2,1,1,1) = sigs(2,g) + EXIT + END IF + + slgg(2,1,g,g) = 0.5_r_knd*sigs(2,g) + + IF ( g > 1 ) THEN + t = one / REAL( g-1, r_knd ) + slgg(2,1,g,1:(g-1)) = 0.1_r_knd*sigs(2,g)*t + ELSE + slgg(2,1,1,1) = 0.6_r_knd*sigs(2,1) + END IF + + IF ( g < ng ) THEN + t = one / REAL( ng-g, r_knd ) + slgg(2,1,g,(g+1):ng) = 0.4_r_knd*sigs(2,g)*t + ELSE + slgg(2,1,ng,ng) = 0.9_r_knd*sigs(2,ng) + END IF + + END DO + + END IF +!_______________________________________________________________________ +! +! Set group-to-group scattering moments. Allowed up 4 moments. For +! material 1, start with the base above. Divide by 10 to get next +! moment's data. Repeat that by 2 continuously, up to nmom or 4. +!_______________________________________________________________________ + + IF ( nmom > 1 ) THEN + slgg(1,2,:,:) = 0.1_r_knd*slgg(1,1,:,:) + DO n = 3, nmom + slgg(1,n,:,:) = half*slgg(1,n-1,:,:) + END DO + END IF +!_______________________________________________________________________ +! +! Similar procedure for material 2, but first multiply by 0.8. Then +! reduce the data magnitudes by 0.6 for each successive moment. +!_______________________________________________________________________ + + IF ( nmat==2 .AND. nmom>1 ) THEN + slgg(2,2,:,:) = 0.8_r_knd*slgg(2,1,:,:) + DO n = 3, nmom + slgg(2,n,:,:) = 0.6_r_knd*slgg(2,n-1,:,:) + END DO + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE setup_data + + + SUBROUTINE setup_echo ( mis, mie, mjs, mje, mks, mke, qis, qie, qjs, & + qje, qks, qke ) + +!----------------------------------------------------------------------- +! +! Print run-time variables setup in this module to the output file. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: mis, mie, mjs, mje, mks, mke, qis, & + qie, qjs, qje, qks, qke +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=1) :: star='*' + + INTEGER(i_knd) :: i, j +!_______________________________________________________________________ + + WRITE( ounit, 131 ) ( star, i = 1, 80 ) + + WRITE( ounit, 132 ) + WRITE( ounit, 133 ) ndimen, nx, ny_gl, nz_gl, lx, ly, lz, dx, dy, dz + + WRITE( ounit, 134 ) + WRITE( ounit, 135 ) nmom, nang, noct, w(1) + WRITE( ounit, 136 ) + SELECT CASE ( ndimen ) + CASE ( 1 ) + WRITE( ounit, 137 ) ( mu(i), i = 1, nang ) + CASE ( 2 ) + WRITE( ounit, 138 ) ( mu(i), eta(i), i = 1, nang ) + CASE ( 3 ) + WRITE( ounit, 139 ) ( mu(i), eta(i), xi(i), i = 1, nang ) + END SELECT + + WRITE( ounit, 140 ) + WRITE( ounit, 141 ) mat_opt, nmat + WRITE( ounit, 142 ) + IF( mat_opt > 0 ) WRITE( ounit, 143 ) mis, mjs, mks, mie, mje, mke + + WRITE( ounit, 144 ) + WRITE( ounit, 145 ) src_opt + IF ( src_opt < 3 ) THEN + WRITE( ounit, 146 ) + WRITE( ounit, 147 ) qis, qjs, qks, qie, qje, qke + ELSE + WRITE( ounit, 1471 ) + END IF + + WRITE( ounit, 148 ) + WRITE( ounit, 149 ) ng + DO j = 1, nmat + WRITE( ounit, 150 ) j + WRITE( ounit, 151 ) + WRITE( ounit, 152 ) ( i, sigt(j,i), siga(j,i), sigs(j,i), & + i = 1, ng ) + END DO + + IF ( timedep == 1 ) THEN + WRITE( ounit, 153 ) + WRITE( ounit, 154 ) tf, nsteps, dt + WRITE( ounit, 155 ) + WRITE( ounit, 156 ) ( i, v(i), i = 1, ng ) + END IF + + WRITE( ounit, 157 ) + WRITE( ounit, 158 ) epsi, iitm, oitm, timedep, swp_typ, multiswp, & + angcpy, it_det, soloutp, kplane, popout, fluxp, fixup + + WRITE( ounit, 181 ) + WRITE( ounit, 182 ) npey, npez, nthreads + WRITE( ounit, 183 ) thread_single, thread_funneled, & + thread_serialized, thread_multiple + WRITE( ounit, 184 ) thread_level + IF ( do_nested ) THEN + WRITE( ounit, 185 ) nnested + ELSE + WRITE( ounit, 186 ) nnested + END IF + WRITE ( ounit, 187 ) pce + + WRITE( ounit, 159 ) ( star, i = 1, 80 ) +!_______________________________________________________________________ + + 131 FORMAT( 10X, 'keyword Calculation Run-time Parameters Echo', /,& + 80A, / ) + + 132 FORMAT( 2X, 'Geometry' ) + 133 FORMAT( 4X, 'ndimen = ', I1, /, 4X, 'nx = ', I5, /, 4X, & + 'ny = ', I5, /, 4X, 'nz = ', I5, /, 4X, 'lx = ', & + ES11.4, /, 4X, 'ly = ', ES11.4, /, 4X, 'lz = ', ES11.4,& + /, 4X, 'dx = ', ES11.4, /, 4X, 'dy = ', ES11.4, /, 4X, & + 'dz = ', ES11.4, / ) + + 134 FORMAT( 2X, 'Sn' ) + 135 FORMAT( 4X, 'nmom = ', I1, /, 4X, 'nang = ', I4, /, 4X, & + 'noct = ', I1, /, /, 4X, 'w = ', ES11.4, & + ' ... uniform weights', / ) + 136 FORMAT( 4X, ' mu eta xi') + 137 FORMAT( 4X, ES15.8 ) + 138 FORMAT( 4X, ES15.8, 2X, ES15.8 ) + 139 FORMAT( 4X, ES15.8, 2X, ES15.8, 2X, ES15.8 ) + + 140 FORMAT( /, 2X, 'Material Map' ) + 141 FORMAT( 4X, 'mat_opt = ', I1, ' --> nmat = ', I1 ) + 142 FORMAT( 4X, 'Base material (default for every cell) = 1' ) + 143 FORMAT( 4X, 'Material 2 present:', /, 8X, 'Starting cell: ( ', & + I5, ', ', I5, ', ', I5, ' )', /, 8X, & + 'Ending cell: ( ', I5, ', ', I5, ', ', I5, ' )' ) + + 144 FORMAT( /, 2X, 'Source Map' ) + 145 FORMAT( 4X, 'src_opt = ', I1 ) + 146 FORMAT( 4X, 'Source strength per cell (where applied) = 1.0' ) + 147 FORMAT( 4X, 'Source map:', /, 8X, 'Starting cell: ( ', I5, & + ', ', I5, ', ', I5, ' )', /, 8X, 'Ending cell: ( ', & + I5, ', ', I5, ', ', I5, ' )', / ) + 1471 FORMAT( 4X, 'MMS-generated source', / ) + + 148 FORMAT( 2X, 'Pseudo Cross Sections Data' ) + 149 FORMAT( 4X, 'ng = ', I3 ) + 150 FORMAT( /, 4X, 'Material ', I1 ) + 151 FORMAT( 4X, 'Group Total Absorption ', & + 'Scattering' ) + 152 FORMAT( 5X, I3, 6X, ES13.6, 3X, ES13.6, 3X, ES13.6 ) + + 153 FORMAT( /, 2X, 'Time-Dependent Calculation Data' ) + 154 FORMAT( 4X, 'tf = ', ES11.4, /, 4X, 'nsteps = ', I4, /, 4X, & + 'dt = ', ES11.4, / ) + 155 FORMAT( 4X, 'Group Speed' ) + 156 FORMAT( 5X, I3, 6X, ES11.4 ) + + 157 FORMAT( /, 2X, 'Solution Control Parameters' ) + 158 FORMAT( 4X, 'epsi = ', ES11.4, /, 4X, 'iitm = ', I3, /, & + 4X, 'oitm = ', I4, /, 4X, 'timedep = ', I1, /, & + 4X, 'swp_typ = ', I1, /, 4X, 'multiswp = ', I1, /, & + 4X, 'angcpy = ', I1, /, 4X, 'it_det = ', I1, /, & + 4X, 'soloutp = ', I1, /, 4X, 'kplane = ', I4, /, & + 4X, 'popout = ', I1, /, 4X, 'fluxp = ', I1, /, & + 4X, 'fixup = ', I1, / ) + + 181 FORMAT( /, 2X, 'Parallelization Parameters' ) + 182 FORMAT( 4X, 'npey = ', I5, /, 4X, 'npez = ', I5, /, 4X, & + 'nthreads = ', I4, / ) + 183 FORMAT( 10X, 'Thread Support Level', /, & + 10X, I2, ' - MPI_THREAD_SINGLE', /, & + 10X, I2, ' - MPI_THREAD_FUNNELED', /, & + 10X, I2, ' - MPI_THREAD_SERIALIZED', /, & + 10X, I2, ' - MPI_THREAD_MULTIPLE' ) + 184 FORMAT( 4X, 'thread_level = ', I2, / ) + 185 FORMAT( 4X, '.TRUE. nested threading', /, 6X, 'nnested = ', & + I4, / ) + 186 FORMAT( 4X, '.FALSE. nested threading', /, 6X, 'nnested = ', & + I4, / ) + 187 FORMAT( 4X, 'Parallel Computational Efficiency = ', F6.4, / ) + + 159 FORMAT( 80A, / ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE setup_echo + + + SUBROUTINE setup_scatp ( ierr, error ) + +!----------------------------------------------------------------------- +! +! Print the slgg (scattering matrix) array to special file 'slgg'. +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=64), INTENT(OUT) :: error + + INTEGER(i_knd), INTENT(OUT) :: ierr +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=16) :: fn='slgg' + + INTEGER(i_knd) :: l, g1, g2, n, fu=15 +!_______________________________________________________________________ +! +! Open the 'slgg' file +!_______________________________________________________________________ + + ierr = 0 + error = ' ' + CALL open_file ( fu, fn, 'REPLACE', 'WRITE', ierr, error ) + IF ( ierr /= 0 ) RETURN +!_______________________________________________________________________ +! +! Write out the matrix in standard Fortran column-ordering +!_______________________________________________________________________ + + WRITE( fu, 161 ) + WRITE( fu, 162 ) ( ( ( ( slgg(n,l,g1,g2), n = 1, nmat ), & + l = 1, nmom ), g1 = 1, ng ), g2 = 1, ng ) +!_______________________________________________________________________ +! +! Close file +!_______________________________________________________________________ + + CALL close_file ( fu, ierr, error ) +!_______________________________________________________________________ + + 161 FORMAT( 'slgg(nmat,nmom,ng,ng) echo', /, 'Column-order loops:',& + ' Mats (fastest ), Moments, Groups, Groups (slowest)' ) + 162 FORMAT( 2X, ES15.8, 2X, ES15.8, 2X, ES15.8, 2X, ES15.8 ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE setup_scatp + + +END MODULE setup_module diff --git a/Fortran/SNAP/src/sn.f90 b/Fortran/SNAP/src/sn.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/sn.f90 @@ -0,0 +1,231 @@ +!----------------------------------------------------------------------- +! +! MODULE: sn_module +!> @brief +!> This module contains the variables and setup subroutines related to +!> the mock discrete ordinates treament in SNAP. +! +!----------------------------------------------------------------------- + +MODULE sn_module + + USE global_module, ONLY: i_knd, r_knd, zero, one + + IMPLICIT NONE + + PUBLIC + + SAVE +!_______________________________________________________________________ +! +! Module Input Variables +! +! nang - number of discrete ordinates per octant +! nmom - scattering order +!_______________________________________________________________________ + + INTEGER(i_knd) :: nang=1, nmom=1 +!_______________________________________________________________________ +! +! Run-time variables +! +! cmom - computational number of moments according to nmom & ndimen +! noct - number of directional octants +! mu(nang) - x direction cosines +! eta(nang) - y direction cosines +! xi(nang) - z direction cosines +! w(nang) - angle weights +! +! wmu(nang) - w*mu +! weta(nang) - w*eta +! wxi(nang) - w*xi +! +! ec(nang,cmom,noct) - Scattering expansion coefficients +! lma(nmom) - number of 'm' moments per order l +!_______________________________________________________________________ + + INTEGER(i_knd) :: cmom, noct + + INTEGER(i_knd), ALLOCATABLE, DIMENSION(:) :: lma + + REAL(r_knd), ALLOCATABLE, DIMENSION(:) :: mu, eta, xi, w, wmu, weta, & + wxi + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:,:) :: ec + + + CONTAINS + + + SUBROUTINE sn_allocate ( ndimen, ndpwds, istat ) + +!----------------------------------------------------------------------- +! +! Allocate sn_module arrays. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: ndimen + + INTEGER(i_knd), INTENT(INOUT) :: ndpwds, istat +!_______________________________________________________________________ +! +! Allocate cosines and weights +!_______________________________________________________________________ + + cmom = nmom + noct = 2 + + ALLOCATE( mu(nang), w(nang), wmu(nang), eta(nang), weta(nang), & + xi(nang), wxi(nang), STAT=istat ) + IF ( istat > 0 ) RETURN + + w = zero + mu = zero; wmu = zero + eta = zero; weta = zero + xi = zero; wxi = zero + + IF ( ndimen > 1 ) THEN + cmom = nmom*(nmom+1)/2 + noct = 4 + END IF + + IF ( ndimen > 2 ) THEN + cmom = nmom**2 + noct = 8 + END IF + + ALLOCATE( ec(nang,cmom,noct), lma(nmom), STAT=istat ) + IF ( istat > 0 ) RETURN + + ec = zero + lma = 0 + + ndpwds = ndpwds + SIZE( mu ) + SIZE( w ) + SIZE( wmu ) + & + SIZE( eta ) + SIZE( weta ) + SIZE( xi ) + SIZE( wxi ) + & + SIZE( ec ) + SIZE( lma ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE sn_allocate + + + SUBROUTINE sn_deallocate + +!----------------------------------------------------------------------- +! +! Deallocate sn_module arrays. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ + + DEALLOCATE( mu, w, eta, xi, wmu, weta, wxi, ec, lma ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE sn_deallocate + + + SUBROUTINE sn_expcoeff ( ndimen ) + +!----------------------------------------------------------------------- +! +! Compute and store the scattering expansion coefficients. Coefficient +! polynomial is (mu*eta*xi)^l, where l is the moment, starting at 0. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: ndimen +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: id, jd, kd, is, js, ks, l, oct, m, mom +!_______________________________________________________________________ +! +! Set the coefficient as simply the angle raised to a power equal to +! the moment-1. Set octant and loop according to dimension. +!_______________________________________________________________________ + + SELECT CASE ( ndimen ) + + CASE ( 1 ) + wmu = w*mu + lma = 1 + DO id = 1, 2 + is = -1 + IF ( id == 2 ) is = 1 + ec(:,1,id) = one + DO l = 2, nmom + ec(:,l,id) = (is*mu)**(2*l-3) + END DO + END DO + + CASE ( 2 ) + + wmu = w*mu + weta = w*eta + + DO l = 1, nmom + lma(l) = l + END DO + + DO jd = 1, 2 + js = -1 + IF ( jd == 2 ) js = 1 + DO id = 1, 2 + is = -1 + IF ( id == 2 ) is = 1 + oct = 2*(jd-1) + id + ec(:,1,oct) = one + mom = 2 + DO l = 2, nmom + DO m = 1, l + ec(:,mom,oct) = (is*mu)**(2*l-3)*(js*eta)**(m-1) + mom = mom + 1 + END DO + END DO + END DO + END DO + + CASE ( 3 ) + + wmu = w*mu + weta = w*eta + wxi = w*xi + + DO l = 1, nmom + lma(l) = 2*l - 1 + END DO + + DO kd = 1, 2 + ks = -1 + IF ( kd == 2 ) ks = 1 + DO jd = 1, 2 + js = -1 + IF ( jd == 2 ) js = 1 + DO id = 1, 2 + is = -1 + IF ( id == 2 ) is = 1 + oct = 4*(kd-1) + 2*(jd-1) + id + ec(:,1,oct) = one + mom = 2 + DO l = 2, nmom + DO m = 1, 2*l-1 + ec(:,mom,oct) = (is*mu)**(2*l-3)*(ks*xi*js*eta)**(m-1) + mom = mom + 1 + END DO + END DO + END DO + END DO + END DO + + END SELECT +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE sn_expcoeff + + +END MODULE sn_module diff --git a/Fortran/SNAP/src/snap.reference_output b/Fortran/SNAP/src/snap.reference_output new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/snap.reference_output @@ -0,0 +1,25 @@ + *WARNING: INPUT_CHECK: MULTISWP reset to 0 for serial + + *WARNING: INPUT_CHECK: NSTEPS reset to 1 for static calc + + keyword Iteration Monitor +******************************************************************************** + ******************** + Outer 1 + 1 Dfmxo= 9.3095E+00 No. Inners= 50 + ******************** + Outer 2 + 2 Dfmxo= 2.9000E+01 No. Inners= 39 + ******************** + Outer 3 + 3 Dfmxo= 1.3260E-01 No. Inners= 31 + ******************** + Outer 4 + 4 Dfmxo= 3.4390E-03 No. Inners= 21 + + No. Outers= 4 No. Inners= 141 + +******************************************************************************** + + Success! Done in a SNAP! +exit 0 diff --git a/Fortran/SNAP/src/snap_main.f90 b/Fortran/SNAP/src/snap_main.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/snap_main.f90 @@ -0,0 +1,250 @@ +PROGRAM snap_main + +!----------------------------------------------------------------------- +! +! SNAP - SN Application Proxy +! +! Parallel programming model based on PARTISN +! +! +! SNAP: SN (Discrete Ordinates) Application Proxy +! Version 1.x (C13087) +! LA-CC-13-016 +! +! This code is Unclassified, and contains no Unclassified Controlled +! Nuclear Information +! +! Copyright (c) 2013, Triad National Security, LLC +! All rights reserved. +! +! This program was produced under U.S. Government contract +! 89233218CNA000001 for Los Alamos National Laboratory (LANL), which +! is operated by Triad National Security, LLC for the U.S. Department +! of Energy/National Nuclear Secuirty Administration. +! +! All rights in the program are reserved by Triad National Security, +! LLC and the U.S. Department of Energy/National Nuclear Security +! Administration. The Government is granted for itself and others +! acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +! license in this material to reproduce, prepare derivative works, +! distribute copies to the public, perform publicly and display +! publicly, and to permit others to do so. +! +! NEITHER THE GOVERNMENT NOR TRIAD NATIONAL SECURITY, LLC MAKES ANY +! WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR THE USE +! OF THIS SOFTWARE. +! +! This is open source software; you can redistribute it and/or +! modify it under the terms of the 3-Clause BSD License. If software +! is modified to produce derivative works, such modified software +! should be clearly marked, so as not to confuse it with the version +! available from LANL. Full text of the 3-Clause BSD License is given +! below. +! +! +! 3-Clause BSD License: +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! 2. Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! 3. Neither the name of Triad National Security, LLC, Los Alamos +! National Laboratory, LANL, the U.S. Government, nor the names of +! its contributors may be used to endorse or promote products +! derived from this software without specific prior written +! permission. +! +! THIS SOFTWARE IS PROVIDED BY TRIAD NATIONAL SECURITY, LLC AND +! CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +! INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +! MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +! DISCLAIMED. IN NO EVENT SHALL TRIAD NATIONAL SECURITY, LLC OR +! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +! LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +! USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +! OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +! SUCH DAMAGE. +! +!----------------------------------------------------------------------- + + USE global_module, ONLY: i_knd, r_knd, ifile, ofile, iunit, ounit, one + + USE utils_module + + USE version_module, ONLY: version_print + + USE input_module, ONLY: input_read + + USE setup_module, ONLY: setup + + USE output_module, ONLY: output + + USE time_module, ONLY: tsnap, wtime, tparset, time_summ, tgrind + + USE dealloc_module + + USE plib_module, ONLY: pinit, iproc, root, comm_snap, bcast, & + pcomm_set, pinit_omp + + USE control_module, ONLY: otrdone, swp_typ + + IMPLICIT NONE +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=1) :: star='*' + + CHARACTER(LEN=64) :: error + + INTEGER(i_knd) :: ierr, i, ndpwds + + REAL(r_knd) :: t1, t2, t3, t4, t5 +!_______________________________________________________________________ +! +! Perform calls that set up the parallel environment in MPI and +! OpenMP. Also starts the timer. Update parallel setup time. +!_______________________________________________________________________ + + ierr = 0 + ndpwds = 0 + error = ' ' + + CALL pinit ( t1 ) + + CALL wtime ( t2 ) + tparset = tparset + t2 - t1 +!_______________________________________________________________________ +! +! Read the command line arguments to get i/o file names. Open the two +! files. +!_______________________________________________________________________ + + CALL cmdarg ( ierr, error ) + CALL bcast ( ierr, comm_snap, root ) + IF ( ierr /= 0 ) THEN + CALL print_error ( 0, error ) + CALL stop_run ( 0, 0, 0, 0 ) + END IF + + CALL open_file ( iunit, ifile, 'OLD', 'READ', ierr, error ) + CALL bcast ( ierr, comm_snap, root ) + IF ( ierr /= 0 ) THEN + CALL print_error ( 0, error ) + CALL stop_run ( 0, 0, 0, 0 ) + END IF + + CALL open_file ( ounit, ofile, 'REPLACE', 'WRITE', ierr, error ) + CALL bcast ( ierr, comm_snap, root ) + IF ( ierr /= 0 ) THEN + CALL print_error ( 0, error ) + CALL stop_run ( 0, 0, 0, 0 ) + END IF +!_______________________________________________________________________ +! +! Write code version and execution time to output. +!_______________________________________________________________________ + + IF ( iproc == root ) CALL version_print +!_______________________________________________________________________ +! +! Read input +!_______________________________________________________________________ + + CALL input_read + + CALL close_file ( iunit, ierr, error ) + CALL bcast ( ierr, comm_snap, root ) + IF ( ierr /= 0 ) THEN + CALL print_error ( ounit, error ) + CALL stop_run ( 0, 0, 0, 0 ) + END IF +!_______________________________________________________________________ +! +! Get nthreads for each proc. Print the warning about resetting nthreads +! if necessary. Don't stop run. Set up the SDD MPI topology. +!_______________________________________________________________________ + + CALL wtime ( t3 ) + + CALL pinit_omp ( ierr, error ) + IF ( ierr /= 0 ) CALL print_error ( 0, error ) + + CALL pcomm_set + + CALL wtime ( t4 ) + tparset = tparset + t4 - t3 +!_______________________________________________________________________ +! +! Setup problem +!_______________________________________________________________________ + + CALL setup ( ndpwds ) +!_______________________________________________________________________ +! +! Call for the problem solution +!_______________________________________________________________________ + + CALL translv ( ndpwds ) +!_______________________________________________________________________ +! +! Output the results. Print the timing summary. +!_______________________________________________________________________ + + CALL output + IF ( iproc == root ) CALL time_summ +!_______________________________________________________________________ +! +! Final cleanup: deallocate, close output file, end the program +!_______________________________________________________________________ + + CALL dealloc_input ( 4 ) + CALL dealloc_solve ( swp_typ, 2 ) + + CALL wtime ( t5 ) + tsnap = t5 - t1 + + IF ( iproc == root ) THEN + WRITE( ounit, 501 ) tsnap +! WRITE( ounit, 502 ) tgrind, ( star, i = 1, 80 ) + WRITE( ounit, 502 ) tgrind + WRITE( ounit, 504 ) one/tgrind, ( star, i = 1, 80 ) + WRITE( ounit, 503 ) ndpwds, ( star, i = 1, 80 ) + END IF + + CALL close_file ( ounit, ierr, error ) + CALL bcast ( ierr, comm_snap, root ) + IF ( ierr /= 0 ) THEN + CALL print_error ( 0, error ) + CALL stop_run ( 1, 0, 0, 0 ) + END IF + + IF ( otrdone ) THEN + CALL stop_run ( 1, 0, 0, 1 ) + ELSE + CALL stop_run ( 1, 0, 0, 2 ) + END IF +!_______________________________________________________________________ + + 501 FORMAT( 2X, 'Total Execution time', T41, ES11.4, / ) + 502 FORMAT( 2X, 'Grind Time (nanoseconds)', 8X, ES11.4, / ) + 504 FORMAT( 2X, 'Inverse Grind Time FOM (1/ns)', 8X, ES11.4, /, /, & + 80A ) + 503 FORMAT( /, 'Allocated words (double precision) per rank', & + T61, I15, /, /, 80A ) +!_______________________________________________________________________ +!_______________________________________________________________________ + +END PROGRAM snap_main diff --git a/Fortran/SNAP/src/solvar.f90 b/Fortran/SNAP/src/solvar.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/solvar.f90 @@ -0,0 +1,308 @@ +!----------------------------------------------------------------------- +! +! MODULE: solvar_module +!> @brief +!> This module contains several variables that are used in the solution +!> process, including their allocation and deallocation. Also includes +!> initialization of sweep parameters. +! +!----------------------------------------------------------------------- + +MODULE solvar_module + + USE global_module, ONLY: i_knd, r_knd, zero + + USE plib_module, ONLY: ichunk + + USE geom_module, ONLY: nx, ny, nz, nc + + USE sn_module, ONLY: nang, noct, nmom, cmom + + USE data_module, ONLY: ng + + USE control_module, ONLY: timedep, angcpy, ncor, multiswp + + IMPLICIT NONE + + PUBLIC + + SAVE +!_______________________________________________________________________ +! +! Module variables +! +! ptr_in(nang,nx,ny,nz,noct,ng) - Incoming time-edge flux pointer +! ptr_out(nang,nx,ny,nz,noct,ng) - Outgoing time-edge flux pointer +! +! flux0(nx,ny,nz,ng) - Scalar flux moments array +! flux0po(nx,ny,nz,ng) - Previous outer copy of scalar flux array +! flux0pi(nx,ny,nz,ng) - Previous inner copy of scalar flux array +! fluxm(cmom-1,nx,ny,nz,ng) - Flux moments array +! +! q2grp0(nx,ny,nz,ng) - Isotropic out-of-group + fixed sources +! q2grpm(cmom-1,nx,ny,nz,ng) - Anisotropic out-of-group + fixed sources +! qtot(cmom,ichunk,ny,nz,nc,ng) - Total source: q2grp0 + q2grpm + +! within-group source +! +! t_xs(nx,ny,nz,ng) - Total cross section on mesh +! a_xs(nx,ny,nz,ng) - Absorption cross section on mesh +! s_xs(nx,ny,nz,nmom,ng) - In-group scattering cross section on mesh +! +! psii(nang,ny,nz,ncor,ng) - Working psi_x array +! psij(nang,ichunk,nz,ng) - Working psi_y array +! psik(nang,ichunk,ny,ng) - Working psi_z array +! +! jb_in(nang,ichunk,nz,ncor,ng) - y-dir boundary flux in from comm +! jb_out(nang,ichunk,nz,ncor,ng) - y-dir boundary flux out to comm +! kb_in(nang,ichunk,ny,ncor,ng) - z-dir boundary flux in from comm +! kb_out(nang,ichunk,ny,ncor,ng) - z-dir boundary flux out to comm +! +! fixup_counter(4,ncor,ng) - count spatial and time edge fixups +! +! flkx(nx+1,ny,nz,ng) - x-dir leakage array +! flky(nx,ny+1,nz,ng) - y-dir leakage array +! flkz(nx,ny,nz+1,ng) - z-dir leakage array +! +! fmin(ng) - dummy flux min +! fmax(ng) - dummy flux max +! +! pop(ng) - particle population spectrum +! +!_______________________________________________________________________ + + REAL(r_knd), ALLOCATABLE, DIMENSION(:) :: fmin, fmax, pop + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:,:) :: fixup_counter + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:,:,:) :: flux0, flux0po, & + flux0pi, q2grp0, t_xs, a_xs, psij, psik, flkx, flky, flkz + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:,:,:,:) :: q2grpm, fluxm, & + s_xs, psii, jb_in, jb_out, kb_in, kb_out + + REAL(r_knd), ALLOCATABLE, DIMENSION(:,:,:,:,:,:) :: qtot + + REAL(r_knd), DIMENSION(:,:,:,:,:,:), POINTER :: ptr_in, ptr_out + + + CONTAINS + + + SUBROUTINE solvar_allocate ( ndpwds, ierr ) + +!----------------------------------------------------------------------- +! +! Allocate solution arrays. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(INOUT) :: ndpwds + + INTEGER(i_knd), INTENT(OUT) :: ierr +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: szcor +!_______________________________________________________________________ +! +! Allocate ptr_in/out if needed. If angcpy=1, only allocate for one +! copy and point the other pointer at it. If angcpy=2, allocate for +! two copies. Provide an initial condition of zero. This may be +! changed in the future if necessary. +!_______________________________________________________________________ + + ierr = 0 + + NULLIFY( ptr_in, ptr_out ) + + IF ( timedep == 1 ) THEN + IF ( angcpy == 1 ) THEN + ALLOCATE( ptr_in(nang,nx,ny,nz,noct,ng), STAT=ierr ) + ELSE + ALLOCATE( ptr_in(nang,nx,ny,nz,noct,ng), & + ptr_out(nang,nx,ny,nz,noct,ng), STAT=ierr ) + END IF + ELSE + ALLOCATE( ptr_in(0,0,0,0,0,0), ptr_out(0,0,0,0,0,0), STAT=ierr ) + END IF + IF ( ierr /= 0 ) RETURN + + IF ( timedep == 1 ) THEN + ptr_in = zero + IF ( angcpy == 1 ) THEN + ptr_out => ptr_in + ELSE + ptr_out = zero + END IF + END IF +!_______________________________________________________________________ +! +! Allocate the flux moments arrays. Keep an old copy. If isotropic, +! allocate fluxm as a dummy array to make passing contiguous pieces of +! it in argument lists possible even in debug mode. There are better +! ways to do this, but want to keep data structures simple for others +! to change as they want easily. +!_______________________________________________________________________ + + ALLOCATE( flux0(nx,ny,nz,ng), flux0po(nx,ny,nz,ng), & + flux0pi(nx,ny,nz,ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + IF ( cmom > 1 ) THEN + ALLOCATE( fluxm(cmom-1,nx,ny,nz,ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + ELSE + ALLOCATE( fluxm(0:0,nx,ny,nz,ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + END IF + + flux0 = zero + flux0po = zero + flux0pi = zero + fluxm = zero +!_______________________________________________________________________ +! +! Allocate the source arrays. Do the same thing for q2grpm as was done +! to fluxm above. +!_______________________________________________________________________ + + ALLOCATE( q2grp0(nx,ny,nz,ng), qtot(cmom,ichunk,ny,nz,nc,ng), & + STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + IF ( cmom > 1 ) THEN + ALLOCATE( q2grpm(cmom-1,nx,ny,nz,ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + ELSE + ALLOCATE( q2grpm(0:0,nx,ny,nz,ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + END IF + + q2grp0 = zero + q2grpm = zero + qtot = zero +!_______________________________________________________________________ +! +! Allocate the cross section expanded to spatial mesh arrays +!_______________________________________________________________________ + + ALLOCATE( t_xs(nx,ny,nz,ng), a_xs(nx,ny,nz,ng), & + s_xs(nx,ny,nz,nmom,ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + t_xs = zero + a_xs = zero + s_xs = zero +!_______________________________________________________________________ +! +! Working arrays +!_______________________________________________________________________ + + IF ( multiswp == 0 ) THEN + szcor = 1 + ELSE + szcor = ncor + END IF + + ALLOCATE( psii(nang,ny,nz,szcor,ng), psij(nang,ichunk,nz,ng), & + psik(nang,ichunk,ny,ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + psii = zero + psij = zero + psik = zero +!_______________________________________________________________________ +! +! PE boundary flux arrays +!_______________________________________________________________________ + + ALLOCATE( jb_in(nang,ichunk,nz,szcor,ng), & + jb_out(nang,ichunk,nz,szcor,ng), kb_in(nang,ichunk,ny,szcor,ng), & + kb_out(nang,ichunk,ny,szcor,ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + jb_in = zero + jb_out = zero + kb_in = zero + kb_out = zero +!_______________________________________________________________________ +! +! Fixup counter +!_______________________________________________________________________ + + ALLOCATE( fixup_counter(4,szcor,ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + fixup_counter = zero +!_______________________________________________________________________ +! +! Leakage arrays +!_______________________________________________________________________ + + ALLOCATE( flkx(nx+1,ny,nz,ng), flky(nx,ny+1,nz,ng), & + flkz(nx,ny,nz+1,ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + flkx = zero + flky = zero + flkz = zero +!_______________________________________________________________________ +! +! Flux extremes and particle population spectrum +!_______________________________________________________________________ + + ALLOCATE( fmin(ng), fmax(ng), pop(ng), STAT=ierr ) + IF ( ierr /= 0 ) RETURN + + fmin = zero + fmax = zero + pop = zero +!_______________________________________________________________________ +! +! Add up allocated words +!_______________________________________________________________________ + + ndpwds = ndpwds + SIZE( ptr_in ) + SIZE( flux0 ) + & + SIZE( flux0po ) + SIZE( flux0pi ) + SIZE( fluxm ) + & + SIZE( q2grp0 ) + SIZE( q2grpm ) + SIZE( qtot ) + SIZE( t_xs ) + & + SIZE( a_xs ) + SIZE( s_xs ) + SIZE( psii ) + SIZE( psij ) + & + SIZE( psik ) + SIZE( jb_in ) + SIZE( jb_out ) + SIZE( kb_in ) + & + SIZE( kb_out ) + SIZE( fixup_counter ) + SIZE( flkx ) + & + SIZE( flky ) + SIZE( flkz ) + SIZE( fmin ) + SIZE( fmax ) + & + SIZE( pop ) + + IF ( angcpy==2 .OR. timedep==0 ) ndpwds = ndpwds + SIZE( ptr_out ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE solvar_allocate + + + SUBROUTINE solvar_deallocate + +!----------------------------------------------------------------------- +! +! Deallocate solve_module arrays. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ + + DEALLOCATE( ptr_in ) + IF ( angcpy==2 .OR. timedep==0 ) DEALLOCATE( ptr_out ) + DEALLOCATE( flux0, flux0po, flux0pi, fluxm ) + DEALLOCATE( q2grp0, q2grpm, qtot ) + DEALLOCATE( t_xs, a_xs, s_xs ) + DEALLOCATE( psii, psij, psik ) + DEALLOCATE( jb_in, jb_out, kb_in, kb_out ) + DEALLOCATE( fixup_counter ) + DEALLOCATE( flkx, flky, flkz ) + DEALLOCATE( fmin, fmax, pop ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE solvar_deallocate + + +END MODULE solvar_module diff --git a/Fortran/SNAP/src/sweep.f90 b/Fortran/SNAP/src/sweep.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/sweep.f90 @@ -0,0 +1,312 @@ +!----------------------------------------------------------------------- +! +! MODULE: sweep_module +!> @brief +!> This module controls the mesh sweep scheduling. It directs the flow +!> of KBA stages for different octants, groups, spatial work chunks. +! +!----------------------------------------------------------------------- + +MODULE sweep_module + + USE global_module, ONLY: i_knd, l_knd, zero, one + + USE data_module, ONLY: ng + + USE geom_module, ONLY: nc, ny, nz, jdim, kdim, ndiag + + USE sn_module, ONLY: nang + + USE control_module, ONLY: inrdone, ncor, corner_sch, nops, yzstg, & + corner_loop_order, multiswp + + USE octsweep_module, ONLY: octsweep + + USE solvar_module, ONLY: flkx, flky, flkz, fmin, fmax, flux0, fluxm, & + jb_in, kb_in, jb_out, kb_out, fixup_counter + + USE plib_module, ONLY: nthreads, waitinit, ichunk, npey, npez + + USE thrd_comm_module, ONLY: assign_thrd_set, multiswp_recv_bdry, & + multiswp_test_pick, multiswp_send_bdry + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: sweep + + SAVE + + + CONTAINS + + + SUBROUTINE sweep ( t, do_grp, ng_per_thrd, nnstd_used, grp_act ) + +!----------------------------------------------------------------------- +! +! Driver for the mesh sweeps. Manages the loops over octant pairs. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: t + + INTEGER(i_knd), INTENT(INOUT) :: ng_per_thrd, nnstd_used + + INTEGER(i_knd), DIMENSION(ng), INTENT(INOUT) :: do_grp + + INTEGER(i_knd), DIMENSION(ng,nthreads), INTENT(INOUT) :: grp_act + +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: cor, jd, kd, g, nc_tot, nstg, n, nulreq, & + dummy(2)=(/0, 0/) + + INTEGER(i_knd), DIMENSION(ncor) :: mtag, ic_op, gc, g_op + + INTEGER(i_knd), DIMENSION(2*ncor) :: reqr, reqs + + LOGICAL(l_knd), DIMENSION(ncor) :: done, callrecv +!_______________________________________________________________________ +! +! Assign the work to threads. Main level threads always applied to +! energy groups. Apply nested threads additionally to groups if +! swp_typ is 0. Apply nested threads to mini-KBA if swp_typ is 1. +!_______________________________________________________________________ + + !$OMP MASTER + + do_grp = 1 + WHERE ( inrdone ) do_grp = 0 + + CALL assign_thrd_set ( do_grp, ng, ng_per_thrd, ndiag, nnstd_used, & + grp_act ) + + !$OMP END MASTER + !$OMP BARRIER +!_______________________________________________________________________ +! +! Clean up and initialize some values. +!_______________________________________________________________________ + + nc_tot = 0 + + clean_loop: DO n = 1, ng_per_thrd + + g = grp_act(n,t) + IF ( g == 0 ) EXIT clean_loop + + fixup_counter(:,:,g) = zero + + fmin(g) = HUGE( one ) + fmax(g) = zero + + flkx(:,:,:,g) = zero + flky(:,:,:,g) = zero + flkz(:,:,:,g) = zero + + flux0(:,:,:,g) = zero + fluxm(:,:,:,:,g) = zero + + if ( multiswp == 1 ) nc_tot = nc_tot + nc + + END DO clean_loop +!_______________________________________________________________________ +! +! Single sweep, using non-blocking send, blocking receive. Each thread +! initializes the reqs (send request) array for asynchronous sends. +!_______________________________________________________________________ + + IF ( multiswp == 0 ) THEN + + CALL waitinit ( reqs, SIZE( reqs ) ) +!_______________________________________________________________________ +! +! Loop over octant pairs; set the starting corner, i.e., the +! direction in y and z. Spawn the nested threads if used. +!_______________________________________________________________________ + + cor_loop: DO cor = 1, ncor + + jd = corner_sch(1,cor) + kd = corner_sch(2,cor) +!_______________________________________________________________________ +! +! Loop over the groups assigned to each thread. +!_______________________________________________________________________ + + grp_loop: DO n = 1, ng_per_thrd + + g = grp_act(n,t) +!_______________________________________________________________________ +! +! Sweep all the chunks of an octant pair (+/- x-dir). +!_______________________________________________________________________ + + CALL octsweep ( g, jd, kd, t, reqs, SIZE( reqs ), 0, 1, & + nnstd_used ) +!_______________________________________________________________________ +! +! End the loops. Destroy the task set. +!_______________________________________________________________________ + + END DO grp_loop + + END DO cor_loop + + ELSE +!_______________________________________________________________________ +! +! Multiple, simultaneious sweeps from all starting corners, using +! completely asynchronous sends and receives and tests to check +! communication completeness. +!_______________________________________________________________________ + + CALL waitinit ( reqr, SIZE( reqr ) ) + CALL waitinit ( reqs, SIZE( reqs ) ) + nulreq = reqs(1) + + nc_tot = nc_tot*2 + nstg = nc_tot + npey + npez - 2 +!_______________________________________________________________________ +! +! Initialize all the counters that will control the concurrent +! octant sweeps. +! +! done(ncor) - flag for when all groups done for a starting +! corner +! callrecv(ncor) - which starting corner needs to call for another +! message +! mtag(ncor) - message tag for each starting corner +! ic_op(ncor) - chunk counter for each corner over octant-pair +! gc(ncor) - group corresponding to each starting corner +! g_op(ncor) - counter for each corner over octant-pair and +! all groups +!_______________________________________________________________________ + + done = .FALSE. + callrecv = .TRUE. + + ic_op = 1 + gc = 1 + g_op = 1 +!_______________________________________________________________________ +! +! If there are more threads than groups with work left to do, call +! it done immediately. +!_______________________________________________________________________ + + IF ( grp_act(1,t) == 0 ) done = .TRUE. +!_______________________________________________________________________ +! +! Loop over all chunks: all groups, all octants. Simultaneous sweeps +! from starting corners (2 in 2D, 4 in 3D). Exit when all work done. +!_______________________________________________________________________ + + done_loop: DO + + IF ( ALL( done ) ) EXIT +!_______________________________________________________________________ +! +! Call to non-blocking receive for the corner that has +! callrecv=true: first pass that is all starting corners; +! subsequent passes only one corner is true. +!_______________________________________________________________________ + + CALL multiswp_recv_bdry ( jdim, kdim, ncor, ng, ic_op, & + grp_act(:,t), gc, callrecv, mtag, reqr, SIZE( reqr ), nang, & + ichunk, ny, nz, jb_in, kb_in ) +!_______________________________________________________________________ +! +! Determine next corner to be swept according to available +! message, safe buffer space, and remaining stages for given +! direction. +!_______________________________________________________________________ + + CALL multiswp_test_pick ( ncor, corner_loop_order, yzstg, nstg,& + nulreq, g_op, reqr, SIZE( reqr ), done, reqs, SIZE( reqs ), & + cor ) +!_______________________________________________________________________ +! +! Set up the sweep for this corner. +!_______________________________________________________________________ + + g = grp_act(gc(cor),t) + + SELECT CASE ( cor ) + CASE ( 1 ) + jd = 1; kd = 1 + CASE ( 2 ) + jd = 2; kd = 1 + CASE ( 3 ) + jd = 1; kd = 2 + CASE ( 4 ) + jd = 2; kd = 2 + END SELECT +!_______________________________________________________________________ +! +! Sweep the task defined by the group, chunk, octant. Send the +! outgoing data downstream. +!_______________________________________________________________________ + + CALL octsweep ( g, jd, kd, 0, dummy, 2, ic_op(cor), cor, 0 ) + + CALL multiswp_send_bdry ( ncor, jd, kd, nang, ichunk, ny, nz, & + mtag(cor), reqs(cor), reqs(ncor+cor), jb_out(:,:,:,cor,g), & + kb_out(:,:,:,cor,g) ) +!_______________________________________________________________________ +! +! Increment the counters, and reset control variables. +!_______________________________________________________________________ + + IF ( ic_op(cor) /= 2*nc ) THEN + ic_op(cor) = ic_op(cor) + 1 + ELSE + ic_op(cor) = 1 + gc(cor) = gc(cor) + 1 + END IF + + IF ( g_op(cor) /= nc_tot ) THEN + g_op(cor) = g_op(cor) + 1 + callrecv(cor) = .TRUE. + ELSE + done(cor) = .TRUE. + END IF +!_______________________________________________________________________ +! +! End loop over all operations. End multiswp selection. +!_______________________________________________________________________ + + END DO done_loop + + END IF +!_______________________________________________________________________ +! +! Sum fixup count over corners (if multiswp is on). Calculate dummy +! min and max scalar fluxes (not used elsewhere currently). +!_______________________________________________________________________ + + edit_loop: DO n = 1, ng_per_thrd + + g = grp_act(n,t) + IF ( g == 0 ) EXIT edit_loop + + IF ( multiswp == 1 ) fixup_counter(:,1,g) = & + fixup_counter(:,1,g) + fixup_counter(:,2,g) + & + fixup_counter(:,3,g) + fixup_counter(:,4,g) + + fmin(g) = MINVAL( flux0 ) + fmax(g) = MAXVAL( flux0 ) + + END DO edit_loop +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE sweep + + +END MODULE sweep_module diff --git a/Fortran/SNAP/src/thrd_comm.f90 b/Fortran/SNAP/src/thrd_comm.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/thrd_comm.f90 @@ -0,0 +1,637 @@ +!----------------------------------------------------------------------- +! +! MODULE: thrd_comm_module +!> @brief +!> This module contains the subroutines that setup/handle the +!> communications in the presence of threads. This includes making +!> thread sets that allows synchronized, ordered work as wide as the +!> number of threads; assigning groups to threads within these thread +!> sets; and the communication routines themselves. +! +!----------------------------------------------------------------------- + +MODULE thrd_comm_module + + USE global_module, ONLY: i_knd, l_knd, r_knd + + USE plib_module, ONLY: nthreads, nnested, plock_omp, ycomm, zcomm, & + precv, ylop, yhip, zlop, zhip, firsty, firstz, lasty, lastz, & + use_lock, waitall, isend, yproc, zproc, irecv, waitsome, testsome, & + waits, tests + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: assign_thrd_set, no_op_lock_control, sweep_recv_bdry, & + sweep_send_bdry, multiswp_recv_bdry, multiswp_test_pick, & + multiswp_send_bdry, lock_control + + SAVE +!_______________________________________________________________________ +! +! Run-time variables +! +! g_off - group offset for message tags +!_______________________________________________________________________ + + INTEGER(i_knd), PARAMETER :: g_off = 2**14, c_off = 2**12 + + + CONTAINS + + + SUBROUTINE assign_thrd_set ( do_task, tlen, tasks_per_thrd, nlen, & + nstd_set_size, task_act ) + +!----------------------------------------------------------------------- +! +! Set up thread sets and assign tasks (i.e., groups) to threads. +! tasks_per_thrd: number of tasks each thread will perform +! nstd_set_size: number of nested threads that will be spawned +! task_act: the array that tells each thread what to do +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: tlen, nlen + + INTEGER(i_knd), INTENT(OUT) :: tasks_per_thrd, nstd_set_size + + INTEGER(i_knd), DIMENSION(tlen), INTENT(INOUT) :: do_task + + INTEGER(i_knd), DIMENSION(tlen,nthreads), INTENT(OUT) :: task_act +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: thrd_set_size, ntasks, n, t, next +!_______________________________________________________________________ +! +! Initialize outgoing variables. Set thread set size and number of +! groups per set. Return if no tasks. +!_______________________________________________________________________ + + nstd_set_size = 0 + tasks_per_thrd = 0 + + ntasks = COUNT( do_task > 0 ) + thrd_set_size = MIN( ntasks, nthreads ) + + IF ( thrd_set_size == 0 ) THEN + tasks_per_thrd = 0 + nstd_set_size = 0 + RETURN + END IF + + tasks_per_thrd = (ntasks-1)/thrd_set_size + 1 + + task_act = 0 +!_______________________________________________________________________ +! +! Assign tasks to threads in the set sequentially +!_______________________________________________________________________ + + n_loop: DO n = 1, tasks_per_thrd + DO t = 1, thrd_set_size + next = MAXLOC( do_task, 1 ) + IF ( do_task(next) == 0 ) EXIT n_loop + task_act(n,t) = next + do_task(next) = 0 + END DO + END DO n_loop +!_______________________________________________________________________ +! +! Set the number of nested threads. If provided nlen is zero, apply +! nested threads to main thread tasks. If nlen is non-zero, set the +! nested thread set size according to width of nested work. +!_______________________________________________________________________ + + IF ( nlen == 0 ) THEN + nstd_set_size = MIN( tasks_per_thrd, nnested ) + ELSE + nstd_set_size = MIN( nlen, nnested ) + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE assign_thrd_set + + + SUBROUTINE no_op_lock_control ( t ) + +!----------------------------------------------------------------------- +! +! Control threaded communication that lack thread_multiple thread level +! with locks. If a thread does not have group sweep to perform, must +! still set/unset locks. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: t +!_______________________________________________________________________ +! +! If locks are unnecessary, return immediately +!_______________________________________________________________________ + + IF ( .NOT.use_lock ) RETURN +!_______________________________________________________________________ +! +! Each thread sets its own lock. If a thread's lock is already set, it +! must wait until another thread unlocks it: work is suspended until +! it can set its lock and proceed. Then thread unsets next thread's +! lock. +!_______________________________________________________________________ + + CALL lock_control ( 'set', t ) + + CALL lock_control ( 'unset', t ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE no_op_lock_control + + + SUBROUTINE sweep_recv_bdry ( g, jd, kd, iop, t, reqs, szreq, nc, & + nang, ichunk, ny, nz, jb_in, kb_in ) + +!----------------------------------------------------------------------- +! +! Receive jb_in and kb_in flux from upstream boundaries +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: g, jd, kd, iop, t, szreq, nc, nang, & + ichunk, ny, nz + + INTEGER(i_knd), DIMENSION(szreq), INTENT(INOUT) :: reqs + + REAL(r_knd), DIMENSION(nang,ichunk,nz), INTENT(OUT) :: jb_in + + REAL(r_knd), DIMENSION(nang,ichunk,ny), INTENT(OUT) :: kb_in +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: yp_rcv, zp_rcv, mtag + + LOGICAL(l_knd) :: incomingy, outgoingy, incomingz, outgoingz +!_______________________________________________________________________ +! +! Determine sender of message and set tag if a message is to be +! received (g/=0). Tag is set to offset by group, octant, and spatial +! work chunk. +!_______________________________________________________________________ + + IF ( jd == 1 ) THEN + yp_rcv = yhip + incomingy = .NOT.lasty + outgoingy = .NOT.firsty + ELSE + yp_rcv = ylop + incomingy = .NOT.firsty + outgoingy = .NOT.lasty + END IF + + IF ( kd == 1 ) THEN + zp_rcv = zhip + incomingz = .NOT.lastz + outgoingz = .NOT.firstz + ELSE + zp_rcv = zlop + incomingz = .NOT.firstz + outgoingz = .NOT.lastz + END IF + + mtag = g*g_off + 2*nc*(jd-1 + 2*(kd-1)) + iop +!_______________________________________________________________________ +! +! If locks are used to control threaded communications, each thread +! sets its own lock. If a thread's lock is already set, it must wait +! until another thread unlocks it: work is suspended until it can set +! its lock and proceed. +!_______________________________________________________________________ + + IF ( use_lock ) CALL lock_control ( 'set', t ) +!_______________________________________________________________________ +! +! Call to receive upstream data. Call for a wait on previous +! asynchronous send operations. +!_______________________________________________________________________ + + IF ( incomingy ) CALL precv ( yp_rcv, yproc, nang, ichunk, nz, & + jb_in, ycomm, mtag ) + IF ( incomingz ) CALL precv ( zp_rcv, zproc, nang, ichunk, ny, & + kb_in, zcomm, mtag ) + IF ( outgoingy .OR. outgoingz ) CALL waitall ( reqs, szreq ) +!_______________________________________________________________________ +! +! If locks are used, unset the lock of another thread to allow it to +! perform its communications +!_______________________________________________________________________ + + IF ( use_lock ) CALL lock_control ( 'unset', t ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE sweep_recv_bdry + + + SUBROUTINE sweep_send_bdry ( g, jd, kd, iop, t, reqs, szreq, nc, & + nang, ichunk, ny, nz, jb_out, kb_out ) + +!----------------------------------------------------------------------- +! +! Send flux to downstream boundaries +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: g, jd, kd, iop, t, szreq, nc, nang, & + ichunk, ny, nz + + INTEGER(i_knd), DIMENSION(szreq), INTENT(INOUT) :: reqs + + REAL(r_knd), DIMENSION(nang,ichunk,nz), INTENT(IN) :: jb_out + + REAL(r_knd), DIMENSION(nang,ichunk,ny), INTENT(IN) :: kb_out +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: yp_snd, zp_snd, mtag + + LOGICAL(l_knd) :: outgoingy, outgoingz +!_______________________________________________________________________ +! +! Determine recipient of message and set tag Tag is set to offset by +! group, octant, and spatial work chunk. +!_______________________________________________________________________ + + IF ( jd == 1 ) THEN + yp_snd = ylop + outgoingy = .NOT.firsty + ELSE + yp_snd = yhip + outgoingy = .NOT.lasty + END IF + + IF ( kd == 1 ) THEN + zp_snd = zlop + outgoingz = .NOT.firstz + ELSE + zp_snd = zhip + outgoingz = .NOT.lastz + END IF + + mtag = g*g_off + 2*nc*(jd-1 + 2*(kd-1)) + iop +!_______________________________________________________________________ +! +! If locks are used to control threaded communications, each thread +! sets its own lock. If a thread's lock is already set, it must wait +! until another thread unlocks it: work is suspended until it can set +! its lock and proceed. +!_______________________________________________________________________ + + IF ( use_lock ) CALL lock_control ( 'set', t ) +!_______________________________________________________________________ +! +! Call to send data downstream. Use non-blocking send. +!_______________________________________________________________________ + + IF ( outgoingy ) CALL isend ( yp_snd, yproc, nang, ichunk, nz, & + jb_out, ycomm, mtag, reqs(1) ) + IF ( outgoingz ) CALL isend ( zp_snd, zproc, nang, ichunk, ny, & + kb_out, zcomm, mtag, reqs(2) ) +!_______________________________________________________________________ +! +! If locks are used, unset the lock of another thread to allow it to +! perform its communications +!_______________________________________________________________________ + + IF ( use_lock ) CALL lock_control ( 'unset', t ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE sweep_send_bdry + + + SUBROUTINE multiswp_recv_bdry ( jdim, kdim, ncor, ng, ic_op, & + grp_act, gc, callrecv, mtag, reqr, szreq, nang, ichunk, ny, nz, & + jb_in, kb_in ) + +!----------------------------------------------------------------------- +! +! Receive jb_in and kb_in flux from upstream boundaries for one or all +! starting corner sweeps. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: jdim, kdim, ncor, ng, szreq, nang, & + ichunk, ny, nz + + INTEGER(i_knd), DIMENSION(ncor), INTENT(IN) :: ic_op, gc + + INTEGER(i_knd), DIMENSION(ng), INTENT(IN) :: grp_act + + INTEGER(i_knd), DIMENSION(ncor), INTENT(OUT) :: mtag + + INTEGER(i_knd), DIMENSION(szreq), INTENT(INOUT) :: reqr + + LOGICAL(l_knd), DIMENSION(ncor), INTENT(INOUT) :: callrecv + + REAL(r_knd), DIMENSION(nang,ichunk,nz,ncor,ng), INTENT(OUT) :: jb_in + + REAL(r_knd), DIMENSION(nang,ichunk,ny,ncor,ng), INTENT(OUT) :: kb_in +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + INTEGER(i_knd) :: icor, jd, kd, g, yp_rcv, zp_rcv +!_______________________________________________________________________ +!_______________________________________________________________________ + + IF ( ncor == 1 ) RETURN +!_______________________________________________________________________ +! +! Loop over corners, check callrecv flag, set tag +!_______________________________________________________________________ + + icor = 0 + + DO kd = 1, kdim + DO jd = 1, jdim + + icor = icor + 1 + IF ( .NOT.callrecv(icor) ) CYCLE + callrecv(icor) = .FALSE. + g = grp_act(gc(icor)) + mtag(icor) = g_off*g + c_off*(icor-1) + ic_op(icor) +!_______________________________________________________________________ +! +! Set up the communication basics +!_______________________________________________________________________ + + IF ( jd == 1 ) THEN + yp_rcv = yhip + ELSE + yp_rcv = ylop + END IF + + IF ( kd == 1 ) THEN + zp_rcv = zhip + ELSE + zp_rcv = zlop + END IF +!_______________________________________________________________________ +! +! Make asynchronous calls to receive data +!_______________________________________________________________________ + + IF ( use_lock ) CALL plock_omp ( 'set', 1 ) + + CALL irecv ( yp_rcv, yproc, nang, ichunk, nz, & + jb_in(:,:,:,icor,g), ycomm, mtag(icor), reqr(icor) ) + + CALL irecv ( zp_rcv, zproc, nang, ichunk, ny, & + kb_in(:,:,:,icor,g), zcomm, mtag(icor), & + reqr(ncor+icor) ) + + IF ( use_lock ) CALL plock_omp ( 'unset', 1 ) +!_______________________________________________________________________ +! +! End the loops +!_______________________________________________________________________ + + END DO + END DO +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE multiswp_recv_bdry + + + SUBROUTINE multiswp_test_pick ( ncor, corner_loop_order, yzstg, nstg,& + nulreq, g_op, reqr, szreqr, done, reqs, szreqs, cor ) + +!----------------------------------------------------------------------- +! +! Determine the next corner that shall be swept. Wait until a message is +! received, indicating that the corresponding corner sweep is set to +! advance. Then check that the send buffers are also clear. If yes, +! proceed with sweep. If not, use skip_corner to find a different corner +! to sweep. Exit once a suitable corner is found with received incoming +! information and send buffers safe to overwrite. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: ncor, nstg, nulreq, szreqr, szreqs + + INTEGER(i_knd), INTENT(OUT) :: cor + + INTEGER(i_knd), DIMENSION(ncor), INTENT(IN) :: corner_loop_order, & + yzstg, g_op + + INTEGER(i_knd), DIMENSION(szreqr), INTENT(INOUT) :: reqr + + INTEGER(i_knd), DIMENSION(szreqs), INTENT(INOUT) :: reqs + + LOGICAL(l_knd), DIMENSION(ncor), INTENT(IN) :: done +!_______________________________________________________________________ +! +! Local variables. +!_______________________________________________________________________ + + INTEGER(i_knd) :: i, j, outcount + + INTEGER(i_knd), DIMENSION(ncor) :: tcor, skip_cor + + INTEGER(i_knd), DIMENSION(szreqr) :: indx +!_______________________________________________________________________ +!_______________________________________________________________________ + + IF ( ncor == 1 ) THEN + cor = 1 + RETURN + END IF +!_______________________________________________________________________ +! +! Loop over send and receive buffers to find suitable next task +!_______________________________________________________________________ + + cor = 0 + skip_cor = 0 + + send_cor_loop: DO + + indx = 0 + tcor = -1 +!_______________________________________________________________________ +! +! Loop over starting corners to see if any messages arrived, +! according to reqr values. Determine remaining number of stages for +! prioritizing next task +!_______________________________________________________________________ + + recv_cor_loop: DO + + IF ( use_lock ) CALL plock_omp ( 'set', 1 ) + + CALL testsome ( reqr, indx, szreqr, outcount ) + + IF ( use_lock ) CALL plock_omp ( 'unset', 1 ) + + DO j = 1, ncor + i = corner_loop_order(j) + IF ( skip_cor(i) == 1 ) CYCLE + IF ( done(i) ) CYCLE + IF ( reqr(i)==nulreq .AND. reqr(ncor+i)==nulreq ) & + tcor(i) = nstg - g_op(i) - yzstg(i) + END DO +!_______________________________________________________________________ +! +! Take corner with most stages to go or check against allowed +! repeat of corner. Clean the skip_cor flag if need to re-test. +!_______________________________________________________________________ + + IF ( ANY( tcor >= 0 ) ) THEN + cor = MAXLOC( tcor, 1 ) + EXIT recv_cor_loop + ELSE + skip_cor = 0 + END IF + + END DO recv_cor_loop +!_______________________________________________________________________ +! +! For selected corner, test if the send buffer is also free. Exit if +! so and return to receive side if not. +!_______________________________________________________________________ + + IF ( use_lock ) CALL plock_omp ( 'set', 1 ) + + CALL tests ( reqs(cor) ) + CALL tests ( reqs(ncor+cor) ) + + IF ( use_lock ) CALL plock_omp ( 'unset', 1 ) + + IF ( reqs(cor)==nulreq .AND. reqs(ncor+cor)==nulreq ) THEN + EXIT send_cor_loop + ELSE + skip_cor(cor) = 1 + END If + + END DO send_cor_loop +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE multiswp_test_pick + + + SUBROUTINE multiswp_send_bdry ( ncor, jd, kd, nang, ichunk, ny, nz, & + mtag, reqy, reqz, jb_out, kb_out ) + +!----------------------------------------------------------------------- +! +! Transmit outgoing edge fluxes to downstream PE's. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: ncor, jd, kd, nang, ichunk, ny, nz, & + mtag + + INTEGER(i_knd), INTENT(INOUT) :: reqy, reqz + + REAL(r_knd), DIMENSION(nang,ichunk,nz), INTENT(IN) :: jb_out + + REAL(r_knd), DIMENSION(nang,ichunk,ny), INTENT(IN) :: kb_out + +!----------------------------------------------------------------------- +! +! Local variables. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd) :: yp_snd, zp_snd +!_______________________________________________________________________ +!_______________________________________________________________________ + + IF ( ncor == 1 ) RETURN +!_______________________________________________________________________ +! +! Set up the communication basics +!_______________________________________________________________________ + + IF ( jd == 1 ) THEN + yp_snd = ylop + ELSE + yp_snd = yhip + END IF + + IF ( kd == 1 ) THEN + zp_snd = zlop + ELSE + zp_snd = zhip + END IF +!_______________________________________________________________________ +! +! Make asynchronous calls to send data +!_______________________________________________________________________ + + IF ( use_lock ) CALL plock_omp ( 'set', 1 ) + + CALL isend ( yp_snd, yproc, nang, ichunk, nz, jb_out, ycomm, mtag, & + reqy ) + + CALL isend ( zp_snd, zproc, nang, ichunk, ny, kb_out, zcomm, mtag, & + reqz ) + + IF ( use_lock ) CALL plock_omp ( 'unset', 1 ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE multiswp_send_bdry + + + SUBROUTINE lock_control ( op, t ) + +!----------------------------------------------------------------------- +! +! Set/unset array of locks used by threads with MPI_THREAD_SERIALIZED. +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=*), INTENT(IN) :: op + + INTEGER(i_knd), INTENT(IN) :: t +!_______________________________________________________________________ +!_______________________________________________________________________ + + SELECT CASE ( op ) + + CASE ( 'set' ) + + !$OMP BARRIER + IF ( t < nthreads ) CALL plock_omp ( 'set', t+1 ) + !$OMP BARRIER + CALL plock_omp ( 'set', t ) + + CASE ( 'unset' ) + + IF ( t < nthreads ) CALL plock_omp ( 'unset', t+1 ) + CALL plock_omp ( 'unset', t ) + + CASE DEFAULT + + RETURN + + END SELECT +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE lock_control + + +END MODULE thrd_comm_module diff --git a/Fortran/SNAP/src/time.F90 b/Fortran/SNAP/src/time.F90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/time.F90 @@ -0,0 +1,131 @@ +!----------------------------------------------------------------------- +! +! MODULE: time_module +!> @brief +!> This module contains the variables that measure SNAP execution +!> times for different pieces of code and the subroutine used to get the +!> time. It also has the timing summary print. +! +!----------------------------------------------------------------------- + +MODULE time_module + + USE global_module, ONLY: r_knd, i_knd, zero, ounit + +#ifdef MPI + USE mpi +#endif + + IMPLICIT NONE + + PUBLIC + + SAVE +!_______________________________________________________________________ +! +! Run-time variables +! +! tsnap - total SNAP run time +! tparset - parallel environment setup time +! tinp - input run time +! tset - setup run time +! tslv - total solution run time +! tparam - time for setting up solve parameters +! totrsrc - time for outer source computations +! tinners - total time spent on inner iterations +! tinrsrc - time for inner source computations +! tsweeps - time for transport sweeps, including angular sourc compute +! tinrmisc - time for miscellaneous inner ops +! tslvmisc - time for miscellaneous solution ops +! tout - output run time +! tgrind - transport grind time +!_______________________________________________________________________ + + REAL(r_knd) :: tsnap=zero, tparset=zero, tinp=zero, tset=zero, & + tslv=zero, tparam=zero, totrsrc=zero, tinners=zero, tinrsrc=zero, & + tsweeps=zero, tinrmisc=zero, tslvmisc=zero, tout=zero, tgrind=zero + + + CONTAINS + + + SUBROUTINE wtime ( time ) + +!----------------------------------------------------------------------- +! +! Get the current time +! +!----------------------------------------------------------------------- + + REAL(r_knd), INTENT(OUT) :: time +!_______________________________________________________________________ + +#ifdef MPI + time = MPI_WTIME() +#else + CALL CPU_TIME ( time ) +#endif +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE wtime + + + SUBROUTINE time_summ + +!----------------------------------------------------------------------- +! +! Print the timing summary to the output file. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=1) :: star='*' + + INTEGER(i_knd) :: i +!_______________________________________________________________________ + + tinrmisc = tinners - ( tinrsrc + tsweeps ) + tslvmisc = tslv - ( tparam + totrsrc + tinners ) + + WRITE( ounit, 401 ) ( star, i = 1, 80 ) + WRITE( ounit, 402 ) + WRITE( ounit, 403 ) tparset + WRITE( ounit, 404 ) tinp + WRITE( ounit, 405 ) tset + WRITE( ounit, 406 ) tslv + WRITE( ounit, 407 ) tparam + WRITE( ounit, 408 ) totrsrc + WRITE( ounit, 409 ) tinners + WRITE( ounit, 410 ) tinrsrc + WRITE( ounit, 411 ) tsweeps + WRITE( ounit, 412 ) tinrmisc + WRITE( ounit, 413 ) tslvmisc + WRITE( ounit, 414 ) tout +!_______________________________________________________________________ + + 401 FORMAT( 10X, 'keyword Timing Summary', /, 80A, / ) + 402 FORMAT( 2X, 'Code Section', T41, 'Time (seconds)', /, & + 1X, '**************', T40, '****************' ) + 403 FORMAT( 4X, 'Parallel Setup', T41, ES11.4 ) + 404 FORMAT( 4X, 'Input', T41, ES11.4 ) + 405 FORMAT( 4X, 'Setup', T41, ES11.4 ) + 406 FORMAT( 4X, 'Solve', T41, ES11.4 ) + 407 FORMAT( 7X, 'Parameter Setup', T41, ES11.4 ) + 408 FORMAT( 7X, 'Outer Source', T41, ES11.4 ) + 409 FORMAT( 7X, 'Inner Iterations', T41, ES11.4 ) + 410 FORMAT( 10X, 'Inner Source', T41, ES11.4 ) + 411 FORMAT( 10X, 'Transport Sweeps', T41, ES11.4 ) + 412 FORMAT( 10X, 'Inner Misc Ops', T41, ES11.4 ) + 413 FORMAT( 7X, 'Solution Misc Ops', T41, ES11.4 ) + 414 FORMAT( 4X, 'Output', T41, ES11.4 ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE time_summ + + +END MODULE time_module diff --git a/Fortran/SNAP/src/translv.f90 b/Fortran/SNAP/src/translv.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/translv.f90 @@ -0,0 +1,396 @@ +SUBROUTINE translv ( ndpwds ) + +!----------------------------------------------------------------------- +! +! Solution driver. Contains the time and outer loops. Calls for outer +! iteration work. Checks convergence and handles eventual output. +! +!----------------------------------------------------------------------- + + USE global_module, ONLY: i_knd, r_knd, ounit, zero, half, one, two + + USE plib_module, ONLY: glmax, comm_snap, iproc, root, ichunk, & + nthreads, thread_num + + USE geom_module, ONLY: geom_allocate, dinv, geom_param_calc, nx, & + ny_gl, nz_gl + + USE sn_module, ONLY: nang, noct, mu, eta, xi, nmom + + USE data_module, ONLY: ng, v, vdelt, mat, sigt, siga, slgg, src_opt, & + qim + + USE control_module, ONLY: nsteps, timedep, dt, oitm, otrdone, dfmxo, & + it_det, popout, swp_typ, angcpy, update_ptr, iitm + + USE utils_module, ONLY: print_error, stop_run + + USE solvar_module, ONLY: solvar_allocate, ptr_in, ptr_out, t_xs, & + a_xs, s_xs, flux0, fluxm + + USE expxs_module, ONLY: expxs_reg + + USE outer_module, ONLY: outer + + USE time_module, ONLY: tslv, wtime, tgrind, tparam + + USE thrd_comm_module, ONLY: assign_thrd_set + + USE analyze_module, ONLY: analyze_pop_calc + + IMPLICIT NONE + +!----------------------------------------------------------------------- +! +! Dummy arguments +! +!----------------------------------------------------------------------- + + INTEGER, INTENT(INOUT) :: ndpwds +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=1) :: star='*' + + CHARACTER(LEN=64) :: error + + INTEGER(i_knd) :: cy, otno, ierr, g, l, i, tot_iits, cy_iits, & + out_iits, t, ng_per_thrd, n, nnstd_used + + INTEGER(i_knd), DIMENSION(ng) :: iits, do_grp + + INTEGER(i_knd), DIMENSION(ng,nthreads) :: grp_act + + REAL(r_knd) :: sf, time, t1, t2, t3, t4, t5, tmp + + REAL(r_knd), DIMENSION(:,:,:,:,:,:), POINTER :: ptr_tmp +!_______________________________________________________________________ +! +! Call for data allocations. Some allocations depend on the problem +! type being requested. +!_______________________________________________________________________ + + CALL wtime ( t1 ) + + ierr = 0 + error = ' ' + + CALL geom_allocate ( nang, ng, swp_typ, ichunk, ndpwds, ierr ) + CALL glmax ( ierr, comm_snap ) + IF ( ierr /= 0 ) THEN + error = '***ERROR: GEOM_ALLOC: Allocation error of sweep parameters' + CALL print_error ( ounit, error ) + CALL stop_run ( 1, 4, 0, 0 ) + END IF + + CALL solvar_allocate ( ndpwds, ierr ) + CALL glmax ( ierr, comm_snap ) + IF ( ierr /= 0 ) THEN + error = '***ERROR: SOLVAR_ALLOC: Allocation error of solution ' // & + 'arrays' + CALL print_error ( ounit, error ) + CALL stop_run ( 1, 4, 1, 0 ) + END IF + + CALL wtime ( t2 ) + tparam = tparam + t2 - t1 +!_______________________________________________________________________ +! +! The time loop solves the problem for nsteps. If static, there is +! only one step, and it does not have any time-absorption or -source +! terms. Set the pointers to angular flux arrays. Set time to one for +! static for proper multiplication in octsweep. +!_______________________________________________________________________ + + IF ( iproc == root ) THEN + WRITE( *, 201) ( star, i = 1, 80 ) + WRITE( ounit, 201) ( star, i = 1, 80 ) + END IF + + tot_iits = 0 + + time_loop: DO cy = 1, nsteps +!_______________________________________________________________________ +! +! Initialize the single threaded region that will span the entirety +! of the transport solution per time step. +!_______________________________________________________________________ + + !$OMP PARALLEL NUM_THREADS(nthreads) IF(nthreads>1) PROC_BIND(SPREAD)& + !$OMP& DEFAULT(SHARED) PRIVATE(t,g,otno) + + t = thread_num() + 1 +!_______________________________________________________________________ +! +! A single thread does initial setup. +!_______________________________________________________________________ + + !$OMP MASTER + + CALL wtime ( t3 ) + + vdelt = zero + time = one + update_ptr = .TRUE. + IF ( timedep == 1 ) THEN + IF ( iproc == root ) THEN + WRITE( *, 202 ) ( star, i = 1, 30 ), cy + WRITE( ounit, 202 ) ( star, i = 1, 30 ), cy + END IF + vdelt = two / ( dt * v ) + time = dt * ( REAL( cy, r_knd ) - half ) + IF ( angcpy == 1 ) update_ptr = .FALSE. + END IF + + IF ( cy>1 .AND. src_opt==3 ) & + sf = REAL( 2*cy - 1, r_knd ) / REAL( 2*cy-3, r_knd ) + + IF ( cy > 1 ) THEN + ptr_tmp => ptr_out + ptr_out => ptr_in + ptr_in => ptr_tmp + NULLIFY( ptr_tmp ) + END IF +!_______________________________________________________________________ +! +! Some additional parameter setup work will be done with threads over +! the groups. Each thread gets a set of groups. Nested threads, if +! available will further parallelize the work over each set. +!_______________________________________________________________________ + + do_grp = 1 + CALL assign_thrd_set ( do_grp, ng, ng_per_thrd, 0, nnstd_used, & + grp_act ) + + !$OMP END MASTER + !$OMP BARRIER +!_______________________________________________________________________ +! +! Prepare some cross sections: total, in-group scattering, absorption. +! Keep in the time loop for better consistency with PARTISN. Set up +! geometric sweep parameters. Reset flux moments to zero at start of +! each time step. +!_______________________________________________________________________ + + !$OMP PARALLEL DO SCHEDULE(STATIC,1) NUM_THREADS(nnstd_used) & + !$OMP& IF(nnstd_used>1) PROC_BIND(CLOSE) DEFAULT(SHARED) & + !$OMP& PRIVATE(n,g,l) + DO n = 1, ng_per_thrd + + g = grp_act(n,t) + IF ( g == 0 ) CYCLE + + CALL expxs_reg ( siga(:,g), mat, a_xs(:,:,:,g) ) + CALL expxs_reg ( sigt(:,g), mat, t_xs(:,:,:,g) ) + CALL geom_param_calc ( nang, ichunk, mu, eta, xi, t_xs(:,:,:,g), & + vdelt(g), dinv(:,:,:,:,:,g) ) + DO l = 1, nmom + CALL expxs_reg ( slgg(:,l,g,g), mat, s_xs(:,:,:,l,g) ) + END DO + + flux0(:,:,:,g) = zero + fluxm(:,:,:,:,g) = zero +!_______________________________________________________________________ +! +! Scale the manufactured source for time. +!_______________________________________________________________________ + + IF ( src_opt == 3 ) THEN + IF ( cy == 1 ) THEN + qim(:,:,:,:,:,g) = time*qim(:,:,:,:,:,g) + ELSE + qim(:,:,:,:,:,g) = qim(:,:,:,:,:,g)*sf + END IF + END IF + + END DO + !$OMP END PARALLEL DO +!_______________________________________________________________________ +! +! Using Jacobi iterations in energy, and the work in the outer loop +! will be parallelized with threads. +!_______________________________________________________________________ + + !$OMP MASTER + + otrdone = .FALSE. + + cy_iits = 0 + + IF ( iproc==root .AND. it_det==0 ) THEN + WRITE( *, 203 ) + WRITE( ounit, 203 ) + END IF + + CALL wtime ( t4 ) + tparam = tparam + t4 - t3 + + !$OMP END MASTER + !$OMP BARRIER + + outer_loop: DO otno = 1, oitm + + !$OMP MASTER + IF ( iproc==root .AND. it_det==1 ) THEN + WRITE( *, 204 ) ( star, i = 1, 20 ), otno + WRITE( ounit, 204 ) ( star, i = 1, 20 ), otno + END IF + !$OMP END MASTER +!_______________________________________________________________________ +! +! Perform an outer iteration. Add up inners. Check convergence. +!_______________________________________________________________________ + + CALL outer ( iits, otno, t, do_grp, ng_per_thrd, nnstd_used, & + grp_act, iitm ) + !$OMP BARRIER + + !$OMP MASTER + + out_iits = SUM( iits ) + cy_iits = cy_iits + out_iits + + IF ( iproc == root ) THEN + WRITE( *, 205 ) otno, dfmxo, out_iits + WRITE( ounit, 205 ) otno, dfmxo, out_iits + END IF + + !$OMP END MASTER + !$OMP BARRIER + + IF ( otrdone ) EXIT outer_loop + + END DO outer_loop +!_______________________________________________________________________ +! +! Manually call for another outer if angcpy is 1 to update the +! outgoing time-edge angular flux. This is done regardless of +! convergence. +!_______________________________________________________________________ + + IF ( angcpy == 1 ) THEN + + IF ( otno > oitm ) THEN + otno = oitm + 1 + ELSE + otno = otno + 1 + END IF + + !$OMP MASTER + update_ptr = .TRUE. + do_grp = 1 + CALL assign_thrd_set ( do_grp, ng, ng_per_thrd, 0, nnstd_used, & + grp_act ) + IF ( iproc==root .AND. it_det==1 ) THEN + WRITE( *, 204 ) ( star, i = 1, 20 ), otno + WRITE( ounit, 204 ) ( star, i = 1, 20 ), otno + END IF + !$OMP END MASTER + !$OMP BARRIER + + CALL outer ( iits, otno, t, do_grp, ng_per_thrd, nnstd_used, & + grp_act, 1 ) + !$OMP BARRIER + + !$OMP MASTER + out_iits = SUM( iits ) + cy_iits = cy_iits + out_iits + IF ( iproc == root ) THEN + WRITE( *, 205 ) otno, dfmxo, out_iits + WRITE( ounit, 205 ) otno, dfmxo, out_iits + END IF + !$OMP END MASTER + !$OMP BARRIER + + END IF +!_______________________________________________________________________ +! +! Master thread computes and prints the particle spectrum every cycle. +!_______________________________________________________________________ + + !$OMP MASTER + + IF ( popout == 2 ) CALL analyze_pop_calc ( cy, time ) +!_______________________________________________________________________ +! +! Print the time cycle details. Add time cycle iterations. End the +! main parallel thread region. +!_______________________________________________________________________ + + IF ( iproc == root ) THEN + IF ( timedep == 1 ) THEN + IF ( otrdone ) THEN + WRITE( *, 206 ) cy, time, otno, cy_iits + WRITE( ounit, 206 ) cy, time, otno, cy_iits + ELSE + WRITE( *, 207 ) cy, time, otno-1, cy_iits + WRITE( ounit, 207 ) cy, time, otno-1, cy_iits + END IF + ELSE + IF ( otrdone ) THEN + WRITE( *, 208 ) otno, cy_iits + WRITE( ounit, 208 ) otno, cy_iits + ELSE + WRITE( *, 209 ) otno-1, cy_iits + WRITE( ounit, 209 ) otno-1, cy_iits + END IF + END IF + END IF + + tot_iits = tot_iits + cy_iits + + !$OMP END MASTER + + !$OMP END PARALLEL + + END DO time_loop +!_______________________________________________________________________ +! +! Compute and print the particle spectrum only at end of calc. +!_______________________________________________________________________ + + IF ( popout == 1 ) CALL analyze_pop_calc ( cy, time ) +!_______________________________________________________________________ +! +! Final prints. +!_______________________________________________________________________ + + IF ( iproc == root ) THEN + IF ( timedep == 1 ) THEN + WRITE( *, 210 ) ( star, i = 1, 30 ), tot_iits + WRITE( ounit, 210 ) ( star, i = 1, 30 ), tot_iits + END IF + WRITE( *, 211 ) ( star, i = 1, 80 ) + WRITE( ounit, 211 ) ( star, i = 1, 80 ) + END IF + + CALL wtime ( t5 ) + tslv = t5 - t1 + tmp = REAL( nx, r_knd ) * REAL( ny_gl, r_knd ) * REAL( nz_gl, r_knd )& + * REAL( nang, r_knd ) * REAL( noct, r_knd ) & + * REAL( tot_iits, r_knd ) + tgrind = tslv*1.0E9_r_knd / tmp +!_______________________________________________________________________ + + 201 FORMAT( 10X, 'keyword Iteration Monitor', /, 80A ) + 202 FORMAT( /, 1X, 30A, /, 2X, 'Time Cycle ', I3 ) + 203 FORMAT( 2X, 'Outer' ) + 204 FORMAT( 1X, 20A, /, 2X, 'Outer ', I3 ) + 205 FORMAT( 2X, I3, 4X, 'Dfmxo=', ES11.4, 4X, 'No. Inners=', I5 ) + 206 FORMAT( /, 2X, 'Cycle=', I4, 4X, 'Time=', ES11.4, 4X, 'No. ', & + 'Outers=', I4, 4X, 'No. Inners=', I5 ) + 207 FORMAT( /, 2X, '*WARNING: Unconverged outer iterations', /, 2X, & + 'Cycle=', I4, 4X, 'Time=', ES11.4, 4X, 'No. Outers=', I4, & + 4X, 'No. Inners=', I5, / ) + 208 FORMAT( /, 2X, 'No. Outers=', I4, 4X, 'No. Inners=', I5 ) + 209 FORMAT( /, 2X, '*WARNING: Unconverged outer iteration!', /, 2X, & + 'No. Outers=', I4, 4X, 'No. Inners=', I5, / ) + 210 FORMAT( /, 1X, 30A, /, 2X, 'Total inners for all time steps, ', & + 'outers = ', I6 ) + 211 FORMAT( /, 80A, / ) +!_______________________________________________________________________ +!_______________________________________________________________________ + +END SUBROUTINE translv diff --git a/Fortran/SNAP/src/utils.f90 b/Fortran/SNAP/src/utils.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/utils.f90 @@ -0,0 +1,246 @@ +!----------------------------------------------------------------------- +! +! MODULE: utils_module +!> @brief +!> This module contains utility subroutines for handling file +!> open/close, errors, command line reading, and program termination. +! +!----------------------------------------------------------------------- + +MODULE utils_module + + USE global_module, ONLY: i_knd, ifile, ofile + + USE dealloc_module + + USE plib_module, ONLY: iproc, root, pend, plock_omp, nthreads + + USE control_module, ONLY: swp_typ + + IMPLICIT NONE + + PUBLIC + + + CONTAINS + + + SUBROUTINE cmdarg ( ierr, error ) + +!----------------------------------------------------------------------- +! +! Read the command line for the input and output file names. +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=64), INTENT(OUT) :: error + + INTEGER(i_knd), INTENT(OUT) :: ierr +!_______________________________________________________________________ +! +! Local variables +!_______________________________________________________________________ + + CHARACTER(LEN=64) :: arg + + INTEGER(i_knd) :: narg, n +!_______________________________________________________________________ +! +! Return if not root. Loop over the first two command line arguments +! to get i/o file names. +!_______________________________________________________________________ + + IF ( iproc /= root ) RETURN + + ierr = 0 + error = '' + + narg = COMMAND_ARGUMENT_COUNT ( ) + + IF ( narg /= 2 ) THEN + ierr = 1 + error = '***ERROR: CMDARG: Missing command line entry' + RETURN + END IF + + DO n = 1, 2 + + CALL GET_COMMAND_ARGUMENT ( n, arg ) + arg = ADJUSTL( arg ) + IF ( arg(1:1)=='-' .OR. arg(1:1)=='<' .OR. arg(1:1)=='>' ) THEN + ierr = 1 + error = '***ERROR: CMDARG: Bad command line entry, arg:' + WRITE( error, '(A,A,I2)') TRIM( error ), ' ', n + ELSE IF ( n == 1 ) THEN + ifile = arg + ELSE IF ( n == 2 ) THEN + ofile = arg + END IF + + END DO +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE cmdarg + + + SUBROUTINE open_file ( funit, fname, fstat, faction, ierr, error ) + +!----------------------------------------------------------------------- +! +! Open a file. +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=*), INTENT(IN) :: fstat, faction + + CHARACTER(LEN=*), INTENT(IN) :: fname + + CHARACTER(LEN=64), INTENT(OUT) :: error + + INTEGER(i_knd), INTENT(IN) :: funit + + INTEGER(i_knd), INTENT(OUT) :: ierr +!_______________________________________________________________________ +! +! Local variables. +!_______________________________________________________________________ + + CHARACTER(LEN=64) :: tname +!_______________________________________________________________________ +! +! Return if not root. Open the file with specified unit, name, status, +! action. +!_______________________________________________________________________ + + ierr = 0 + error = '' + + IF ( iproc /= root ) RETURN + + tname = TRIM( fname ) + + OPEN( UNIT=funit, FILE=tname, STATUS=fstat, ACCESS='SEQUENTIAL', & + ACTION=faction, IOSTAT=ierr ) + + IF ( ierr /= 0 ) THEN + error = '***ERROR: OPEN_FILE: Unable to open file, unit:' + WRITE( error, '(A,A,I2)') TRIM( error ), ' ', funit + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE open_file + + + SUBROUTINE close_file ( funit, ierr, error ) + +!----------------------------------------------------------------------- +! +! Close a file. +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=64), INTENT(OUT) :: error + + INTEGER(i_knd), INTENT(IN) :: funit + + INTEGER(i_knd), INTENT(OUT) :: ierr +!_______________________________________________________________________ +! +! Close the file of specified unit number. +!_______________________________________________________________________ + + ierr = 0 + error = '' + + IF ( iproc /= root ) RETURN + + CLOSE( UNIT=funit, IOSTAT=ierr ) + + IF ( ierr /= 0 ) THEN + error = '***ERROR: CLOSE_FILE: Unable to close file, unit:' + WRITE( error, '(A,A,I2)') TRIM( error ), ' ', funit + END IF +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE close_file + + + SUBROUTINE print_error ( funit, error ) + +!----------------------------------------------------------------------- +! +! Print an error message to standard out or to file. +! +!----------------------------------------------------------------------- + + CHARACTER(LEN=*), INTENT(IN) :: error + + INTEGER(i_knd), INTENT(IN) :: funit +!_______________________________________________________________________ +! +! Print the error message. +!_______________________________________________________________________ + + IF ( iproc /= root ) RETURN + + WRITE( *, 101 ) error + IF ( funit > 0 ) WRITE( funit, 101 ) error +!_______________________________________________________________________ + + 101 FORMAT( 3X, A, / ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE print_error + + + SUBROUTINE stop_run ( flg1, flg2, flg3, flg4 ) + +!----------------------------------------------------------------------- +! +! Safely end program execution. +! +!----------------------------------------------------------------------- + + INTEGER(i_knd), INTENT(IN) :: flg1, flg2, flg3, flg4 +!_______________________________________________________________________ +! +! Local Variables +!_______________________________________________________________________ +!_______________________________________________________________________ +! +! Deallocate if necessary. Depends on flg1, 0/1=no/yes deallocate. +!_______________________________________________________________________ + + IF ( flg1 > 0 ) CALL plock_omp ( 'destroy', nthreads ) + + IF ( flg2 > 0 ) CALL dealloc_input ( flg2 ) + + IF ( flg3 > 0 ) CALL dealloc_solve ( swp_typ, flg3 ) + + IF ( iproc == root ) THEN + IF ( flg4 == 0 ) THEN + WRITE( *, '(1X,A)') 'Aww SNAP. Program failed. Try again.' + ELSE IF ( flg4 == 1 ) THEN + WRITE( *, '(1X,A)') 'Success! Done in a SNAP!' + ELSE IF ( flg4 == 2 ) THEN + WRITE( *, '(1X,A)') 'Oh SNAP. That did not converge. But ' // & + 'take a look at the Timing Summary anyway!' + END IF + END IF + + CALL pend + + CALL EXIT ( 0 ) + + !STOP +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE stop_run + + +END MODULE utils_module diff --git a/Fortran/SNAP/src/version.f90 b/Fortran/SNAP/src/version.f90 new file mode 100644 --- /dev/null +++ b/Fortran/SNAP/src/version.f90 @@ -0,0 +1,76 @@ +!----------------------------------------------------------------------- +! +! MODULE: version_module +!> @brief +!> This module handles version information. +! +!----------------------------------------------------------------------- + +MODULE version_module + + USE global_module, ONLY: i_knd, ounit + + IMPLICIT NONE + + PUBLIC + + SAVE +!_______________________________________________________________________ +! +! SNAP release number and release date. +!_______________________________________________________________________ + + INTEGER(i_knd) :: version = 112 + + CHARACTER(LEN=8) :: cvers = '1.12' + + CHARACTER(LEN=10) :: vdate = '09-07-2021' +!_______________________________________________________________________ +!_______________________________________________________________________ + + + CONTAINS + + + SUBROUTINE version_print + +!----------------------------------------------------------------------- +! +! Print version information. +! +!----------------------------------------------------------------------- +!_______________________________________________________________________ +! +! Local variables. +!_______________________________________________________________________ + + CHARACTER(LEN=12) :: rc(3) + + INTEGER(i_knd), DIMENSION(8) :: dttm +!_______________________________________________________________________ +! +! Call intrinsics to get current date and time. Print to output file +! information about the code. +!_______________________________________________________________________ + + CALL DATE_AND_TIME (rc(1), rc(2), rc(3), dttm) + + WRITE( ounit, 111 ) + WRITE( ounit, 112 ) cvers + WRITE( ounit, 113 ) vdate + WRITE( ounit, FMT=114, ADVANCE='NO' ) dttm(2), dttm(3), dttm(1) + WRITE( ounit, 115 ) dttm(5), dttm(6), dttm(7) +!_______________________________________________________________________ + + 111 FORMAT( 1X, 'SNAP: SN (Discrete Ordinates) Application Proxy' ) + 112 FORMAT( 1X, 'Version Number.. ', A5 ) + 113 FORMAT( 1X, 'Version Date.. ', A10 ) + 114 FORMAT( 1X, 'Ran on ', I2, '-', I2, '-', I4 ) + 115 FORMAT( ' at time ', I2, ':', I2, ':', I2 ) +!_______________________________________________________________________ +!_______________________________________________________________________ + + END SUBROUTINE version_print + + +END MODULE version_module