Index: test-suite/trunk/LICENSE.TXT =================================================================== --- test-suite/trunk/LICENSE.TXT +++ test-suite/trunk/LICENSE.TXT @@ -87,6 +87,7 @@ XSBench: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench HPCCG: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C++/HPCCG PENNANT: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT +miniFE: llvm-test/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE Fhourstones: llvm-test/MultiSource/Benchmarks/Fhourstones Fhourstones-3.1: llvm-test/MultiSource/Benchmarks/Fhourstones-3.1 McCat: llvm-test/MultiSource/Benchmarks/McCat Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(HPCCG) add_subdirectory(PENNANT) +add_subdirectory(miniFE) Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile @@ -1,6 +1,6 @@ # MultiSource/DOE-ProxyApps-C++ Makefile: Build all subdirectories automatically LEVEL = ../../.. -PARALLEL_DIRS = HPCCG PENNANT +PARALLEL_DIRS = HPCCG PENNANT miniFE include $(LEVEL)/Makefile.programs Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Box.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Box.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Box.hpp @@ -0,0 +1,55 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef _Box_hpp_ +#define _Box_hpp_ + +/** + * a 'Box' is 3 pairs of ints, where each pair specifies a lower + * and upper bound for one of the 3 spatial dimensions. + * + * This struct stores the 3 pairs as a simple array of 6 ints, + * but defines the bracket operator so that it can be referenced + * using 2-dimensional array notation like this: + * int xmin = box[0][0]; int xmax = box[0][1]; + * int ymin = box[1][0]; int ymax = box[1][1]; + * int zmin = box[2][0]; int zmax = box[2][1]; + */ +struct Box { + int ranges[6]; +#ifdef __CUDACC__ +__host__ __device__ __inline__ +#endif + int* operator[](int xyz) { return &ranges[xyz*2]; } +#ifdef __CUDACC__ +__host__ __device__ __inline__ +#endif + const int* operator[](int xyz) const { return &ranges[xyz*2]; } +}; + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxIterator.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxIterator.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxIterator.hpp @@ -0,0 +1,142 @@ +#ifndef _BoxTraverser_hpp_ +#define _BoxTraverser_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +namespace miniFE { + +/** Class for traversing a 3-dimensional 'box' of indices. + + //One way to traverse a 'box[3][2]' is to use a triply-nested for-loop: + for(int z=box[2][0]; z= box_[0][1]) { + x = box_[0][0]; + ++y; + if (y >= box_[1][1]) { + y = box_[1][0]; + ++z; + if (z >= box_[2][1]) { + z = box_[2][1]; + y = box_[1][1]; + x = box_[0][1]; + } + } + } + return *this; + } + + BoxIterator operator++(int) + { + BoxIterator temp = *this; + ++(*this); + return temp; + } + + bool operator==(const BoxIterator& rhs) const + { + return x == rhs.x && y == rhs.y && z == rhs.z; + } + + bool operator!=(const BoxIterator& rhs) const + { + return !(this->operator==(rhs)); + } + + int x; + int y; + int z; + +private: + BoxIterator(const Box& box, bool at_end = false) + : x(box[0][0]), + y(box[1][0]), + z(box[2][0]), + box_() + { + box_[0][0] = box[0][0]; box_[0][1] = box[0][1]; + box_[1][0] = box[1][0]; box_[1][1] = box[1][1]; + box_[2][0] = box[2][0]; box_[2][1] = box[2][1]; + if (at_end) { + x = box[0][1]; + y = box[1][1]; + z = box[2][1]; + } + } + + Box box_; +};//class BoxTraverser + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.hpp @@ -0,0 +1,103 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef _BoxPartition_hpp_ +#define _BoxPartition_hpp_ + +#include + +/** \brief Recursively split a box into (up-ip) sub-boxes + */ +void box_partition( int ip , int up , int axis , + const Box& box , + Box* p_box ); + +/** \brief Partition a { [ix,jx) X [iy,jy) X [iz,jz) } box. + * + * Use recursive coordinate bisection to partition a box + * into np disjoint sub-boxes. Allocate (via malloc) and + * populate the sub-boxes, mapping the local (x,y,z) to + * a local ordinal, and mappings for the send-recv messages + * to update the ghost cells. + * + * usage: + * + * my_nx = pbox[my_p][0][1] - pbox[my_p][0][0] ; + * my_ny = pbox[my_p][1][1] - pbox[my_p][1][0] ; + * my_nz = pbox[my_p][2][1] - pbox[my_p][2][0] ; + * + * for ( x = -ghost ; x < my_nx + ghost ; ++x ) { + * for ( y = -ghost ; y < my_ny + ghost ; ++y ) { + * for ( z = -ghost ; z < my_nz + ghost ; ++z ) { + * const int x_global = x + pbox[my_p][0][0] ; + * const int y_global = y + pbox[my_p][1][0] ; + * const int z_global = z + pbox[my_p][2][0] ; + * + * const int local_ordinal = + * box_map_local( pbox[my_p], ghost, map_local_id, x, y, z ); + * + * if ( 0 <= local_ordinal ) { + * } + * } + * + * for ( i = 1 ; i < np ; ++i ) { + * const int recv_processor = ( my_p + i ) % np ; + * const int recv_ordinal_begin = map_recv_pc[i]; + * const int recv_ordinal_end = map_recv_pc[i+1]; + * } + * + * for ( i = 1 ; i < np ; ++i ) { + * const int send_processor = ( my_p + i ) % np ; + * const int send_map_begin = map_send_pc[i]; + * const int send_map_end = map_send_pc[i+1]; + * for ( j = send_map_begin ; j < send_map_end ; ++j ) { + * send_ordinal = map_send_id[j] ; + * } + * } + */ +void box_partition_rcb( + const int np /**< [in] Number of partitions */ , + const int my_p /**< [in] My partition rank */ , + const Box& root_box /**< [in] 3D Box to partition */ , + const int ghost /**< [in] Ghost cell boundary */ , + Box* pbox /**< [out] Partition's 3D boxes */ , + int ** map_local_id /**< [out] Map local cells */ , + int ** map_recv_pc /**< [out] Receive spans per processor */ , + int ** map_send_pc /**< [out] Send prefix counts per processor */ , + int ** map_send_id /**< [out] Send message ordinals */ ); + +/* \brief Map a local (x,y,z) to a local ordinal. + */ +int box_map_local( const Box& box_local , + const int ghost , + const int map_local_id[] , + const int local_x , + const int local_y , + const int local_z ); + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.cpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.cpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/BoxPartition.cpp @@ -0,0 +1,503 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include + +#include +#include + +/*--------------------------------------------------------------------*/ + +static int box_map_local_entry( const Box& box , + const int ghost , + int local_x , + int local_y , + int local_z ) +{ + const int nx = 2 * ghost + box[0][1] - box[0][0] ; + const int ny = 2 * ghost + box[1][1] - box[1][0] ; + const int nz = 2 * ghost + box[2][1] - box[2][0] ; + int result = -1 ; + + local_x += ghost ; + local_y += ghost ; + local_z += ghost ; + + if ( 0 <= local_x && local_x < nx && + 0 <= local_y && local_y < ny && + 0 <= local_z && local_z < nz ) { + + result = local_z * ny * nx + local_y * nx + local_x ; + } + return result ; +} + +int box_map_local( const Box& box_local, + const int ghost , + const int box_local_map[] , + const int local_x , + const int local_y , + const int local_z ) +{ + int result = box_map_local_entry(box_local,ghost,local_x,local_y,local_z); + + if ( 0 <= result ) { + result = box_local_map[ result ]; + } + + return result ; +} + +/*--------------------------------------------------------------------*/ +/* Recursively split a box into into (up-ip) sub-boxes */ + +void box_partition( int ip , int up , int axis , + const Box& box, + Box* p_box ) +{ + const int np = up - ip ; + if ( 1 == np ) { + p_box[ip][0][0] = box[0][0] ; p_box[ip][0][1] = box[0][1] ; + p_box[ip][1][0] = box[1][0] ; p_box[ip][1][1] = box[1][1] ; + p_box[ip][2][0] = box[2][0] ; p_box[ip][2][1] = box[2][1] ; + } + else { + const int n = box[ axis ][1] - box[ axis ][0] ; + const int np_low = np / 2 ; /* Rounded down */ + const int np_upp = np - np_low ; + + const int n_upp = (int) (((double) n) * ( ((double)np_upp) / ((double)np))); + const int n_low = n - n_upp ; + const int next_axis = ( axis + 2 ) % 3 ; + + if ( np_low ) { /* P = [ip,ip+np_low) */ + Box dbox ; + dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; + dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; + dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; + + dbox[ axis ][1] = dbox[ axis ][0] + n_low ; + + box_partition( ip, ip + np_low, next_axis, dbox, p_box ); + } + + if ( np_upp ) { /* P = [ip+np_low,ip+np_low+np_upp) */ + Box dbox; + dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; + dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; + dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; + + ip += np_low ; + dbox[ axis ][0] += n_low ; + dbox[ axis ][1] = dbox[ axis ][0] + n_upp ; + + box_partition( ip, ip + np_upp, next_axis, dbox, p_box ); + } + } +} + +/*--------------------------------------------------------------------*/ + +static int box_disjoint( const Box& a , const Box& b) +{ + return a[0][1] <= b[0][0] || b[0][1] <= a[0][0] || + a[1][1] <= b[1][0] || b[1][1] <= a[1][0] || + a[2][1] <= b[2][0] || b[2][1] <= a[2][0] ; +} + +static void resize_int( int ** a , int * allocLen , int newLen ) +{ + int k = 32; + while ( k < newLen ) { k <<= 1 ; } + if ( NULL == *a ) + { *a = (int*)malloc( sizeof(int)*(*allocLen = k) ); } + else if ( *allocLen < k ) + { *a = (int*)realloc(*a , sizeof(int)*(*allocLen = k)); } +} + +static void box_partition_maps( + const int np , + const int my_p , + const Box* pbox, + const int ghost , + int ** map_local_id , + int ** map_recv_pc , + int ** map_send_pc , + int ** map_send_id ) +{ + const Box& my_box = pbox[my_p] ; + + const int my_ix = my_box[0][0] ; + const int my_iy = my_box[1][0] ; + const int my_iz = my_box[2][0] ; + const int my_nx = my_box[0][1] - my_box[0][0] ; + const int my_ny = my_box[1][1] - my_box[1][0] ; + const int my_nz = my_box[2][1] - my_box[2][0] ; + + const int my_use_nx = 2 * ghost + my_nx ; + const int my_use_ny = 2 * ghost + my_ny ; + const int my_use_nz = 2 * ghost + my_nz ; + + const int id_length = my_use_nx * my_use_ny * my_use_nz ; + + int * local_id = (int *) malloc( id_length * sizeof(int) ); + int * recv_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); + int * send_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); + + int * send_id = NULL ; + int send_id_size = 0 ; + + int iLocal , iSend ; + int i ; + + Box my_use_box; + + my_use_box[0][0] = my_box[0][0] - ghost ; + my_use_box[0][1] = my_box[0][1] + ghost ; + my_use_box[1][0] = my_box[1][0] - ghost ; + my_use_box[1][1] = my_box[1][1] + ghost ; + my_use_box[2][0] = my_box[2][0] - ghost ; + my_use_box[2][1] = my_box[2][1] + ghost ; + + for ( i = 0 ; i < id_length ; ++i ) { local_id[i] = -1 ; } + + iSend = 0 ; + iLocal = 0 ; + + /* The vector space is partitioned by processors */ + + for ( i = 0 ; i < np ; ++i ) { + const int ip = ( i + my_p ) % np ; + recv_pc[i] = iLocal ; + send_pc[i] = iSend ; + + if ( ! box_disjoint( my_use_box , pbox[ip] ) ) { + const int p_ix = pbox[ip][0][0] ; + const int p_iy = pbox[ip][1][0] ; + const int p_iz = pbox[ip][2][0] ; + const int p_ex = pbox[ip][0][1] ; + const int p_ey = pbox[ip][1][1] ; + const int p_ez = pbox[ip][2][1] ; + + int local_x , local_y , local_z ; + + /* Run the span of global cells that my processor uses */ + + for ( local_z = -ghost ; local_z < my_nz + ghost ; ++local_z ) { + for ( local_y = -ghost ; local_y < my_ny + ghost ; ++local_y ) { + for ( local_x = -ghost ; local_x < my_nx + ghost ; ++local_x ) { + + const int global_z = local_z + my_iz ; + const int global_y = local_y + my_iy ; + const int global_x = local_x + my_ix ; + + const int entry = + box_map_local_entry(my_box,ghost,local_x,local_y,local_z); + + if ( entry < 0 ) { abort(); } + + if ( p_iz <= global_z && global_z < p_ez && + p_iy <= global_y && global_y < p_ey && + p_ix <= global_x && global_x < p_ex ) { + + /* This ordinal is owned by processor 'ip' */ + + local_id[ entry ] = iLocal++ ; + +#if defined(DEBUG_PRINT) +if ( my_p != ip ) { + fprintf(stdout," (%d,%d,%d) : P%d recv at local %d from P%d\n", + global_x,global_y,global_z,my_p,local_id[entry],ip); + fflush(stdout); +} +#endif + } + + /* If in my ownership and used by the other processor */ + if ( my_p != ip && + /* In my ownership: */ + ( 0 <= local_z && local_z < my_nz && + 0 <= local_y && local_y < my_ny && + 0 <= local_x && local_x < my_nx ) && + /* In other processors usage: */ + ( p_iz - ghost <= global_z && global_z < p_ez + ghost && + p_iy - ghost <= global_y && global_y < p_ey + ghost && + p_ix - ghost <= global_x && global_x < p_ex + ghost ) ) { + + resize_int( & send_id , & send_id_size , (iSend + 1) ); + send_id[ iSend ] = local_id[ entry ] ; + ++iSend ; + +#if defined(DEBUG_PRINT) +{ + fprintf(stdout," (%d,%d,%d) : P%d send at local %d to P%d\n", + global_x,global_y,global_z,my_p,local_id[entry],ip); + fflush(stdout); +} +#endif + } + } + } + } + } + } + recv_pc[np] = iLocal ; + send_pc[np] = iSend ; + + *map_local_id = local_id ; + *map_recv_pc = recv_pc ; + *map_send_pc = send_pc ; + *map_send_id = send_id ; +} + +void box_partition_rcb( const int np , + const int my_p , + const Box& root_box, + const int ghost , + Box** pbox, + int ** map_local_id , + int ** map_recv_pc , + int ** map_send_pc , + int ** map_send_id ) +{ + *pbox = new Box[ np ]; + + box_partition( 0 , np , 2 , root_box , *pbox ); + + box_partition_maps( np , my_p , *pbox , ghost , + map_local_id , map_recv_pc , + map_send_pc , map_send_id ); +} + +/*--------------------------------------------------------------------*/ + +#ifdef UNIT_TEST + +static int box_contain( const Box& a , const Box& b ) +{ + return a[0][0] <= b[0][0] && b[0][1] <= a[0][1] && + a[1][0] <= b[1][0] && b[1][1] <= a[1][1] && + a[2][0] <= b[2][0] && b[2][1] <= a[2][1] ; +} + +static void box_print( FILE * fp , const Box& a ) +{ + fprintf(fp,"{ [ %d , %d ) , [ %d , %d ) , [ %d , %d ) }", + a[0][0] , a[0][1] , + a[1][0] , a[1][1] , + a[2][0] , a[2][1] ); +} + +static void test_box( const Box& box , const int np ) +{ + const int ncell_box = box[0][1] * box[1][1] * box[2][1] ; + int ncell_total = 0 ; + int ncell_min = ncell_box ; + int ncell_max = 0 ; + std::vector pbox(np); + int i , j ; + + box_partition( 0 , np , 2 , box , &pbox[0] ); + + for ( i = 0 ; i < np ; ++i ) { + const int ncell = ( pbox[i][0][1] - pbox[i][0][0] ) * + ( pbox[i][1][1] - pbox[i][1][0] ) * + ( pbox[i][2][1] - pbox[i][2][0] ); + + if ( ! box_contain( box , pbox[i] ) ) { + fprintf(stdout," OUT OF BOUNDS pbox[%d/%d] = ",i,np); + box_print(stdout,pbox[i]); + fprintf(stdout,"\n"); + abort(); + } + + for ( j = i + 1 ; j < np ; ++j ) { + if ( ! box_disjoint( pbox[i] , pbox[j] ) ) { + fprintf(stdout," NOT DISJOINT pbox[%d/%d] = ",i,np); + box_print(stdout, pbox[i]); + fprintf(stdout,"\n"); + fprintf(stdout," pbox[%d/%d] = ",j,np); + box_print(stdout, pbox[j]); + fprintf(stdout,"\n"); + abort(); + } + } + ncell_total += ncell ; + + if ( ncell_max < ncell ) { ncell_max = ncell ; } + if ( ncell < ncell_min ) { ncell_min = ncell ; } + } + + if ( ncell_total != ncell_box ) { + fprintf(stdout," WRONG CELL COUNT NP = %d\n",np); + abort(); + } + fprintf(stdout,"NP = %d, total = %d, avg = %d, min = %d, max = %d\n", + np,ncell_box,ncell_box/np,ncell_min,ncell_max); +} + +/*--------------------------------------------------------------------*/ + +static void test_maps( const Box& root_box , const int np ) +{ + const int ghost = 1 ; + const int nx_global = root_box[0][1] - root_box[0][0] ; + const int ny_global = root_box[1][1] - root_box[1][0] ; + int ieq , i , j ; + std::vector pbox(np); + int **local_values ; + int **map_local_id ; + int **map_recv_pc ; + int **map_send_pc ; + int **map_send_id ; + + box_partition( 0 , np , 2 , root_box , &pbox[0] ); + + local_values = (int **) malloc( sizeof(int*) * np ); + map_local_id = (int **) malloc( sizeof(int*) * np ); + map_recv_pc = (int **) malloc( sizeof(int*) * np ); + map_send_pc = (int **) malloc( sizeof(int*) * np ); + map_send_id = (int **) malloc( sizeof(int*) * np ); + + /* Set each local value to the global equation number */ + + for ( ieq = i = 0 ; i < np ; ++i ) { + const Box& mybox = pbox[i] ; + const int nx = mybox[0][1] - mybox[0][0] ; + const int ny = mybox[1][1] - mybox[1][0] ; + const int nz = mybox[2][1] - mybox[2][0] ; + int ix , iy , iz ; + + /* Generate the partition maps for this rank */ + box_partition_maps( np , i , &pbox[0] , ghost , + & map_local_id[i] , & map_recv_pc[i] , + & map_send_pc[i] , & map_send_id[i] ); + + local_values[i] = (int *) malloc( sizeof(int) * map_recv_pc[i][np] ); + + for ( iz = -ghost ; iz < nz + ghost ; ++iz ) { + for ( iy = -ghost ; iy < ny + ghost ; ++iy ) { + for ( ix = -ghost ; ix < nx + ghost ; ++ix ) { + const int ieq = box_map_local(mybox,ghost,map_local_id[i],ix,iy,iz); + + if ( 0 <= ieq ) { + const int ix_global = ix + mybox[0][0] ; + const int iy_global = iy + mybox[1][0] ; + const int iz_global = iz + mybox[2][0] ; + + if ( root_box[0][0] <= ix_global && ix_global < root_box[0][1] && + root_box[1][0] <= iy_global && iy_global < root_box[1][1] && + root_box[2][0] <= iz_global && iz_global < root_box[2][1] ) { + + local_values[i][ ieq ] = ix_global + + iy_global * nx_global + + iz_global * nx_global * ny_global ; + } + else { + local_values[i][ ieq ] = -1 ; + } + } + } + } + } + } + + /* Pair-wise compare the local values */ + /* i == receiving processor rank */ + /* ip == sending processor rank */ + /* j == receiving processor data entry for message from 'ip' */ + /* jp == sending processor data entry for message to 'i' */ + + for ( i = 0 ; i < np ; ++i ) { + for ( j = 1 ; j < np ; ++j ) { + const int ip = ( i + j ) % np ; + const int jp = ( i + np - ip ) % np ; + const int nrecv = map_recv_pc[i] [j+1] - map_recv_pc[i] [j] ; + const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ; + int k ; + if ( nrecv != nsend ) { + fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip); + fprintf(stderr,"P%d send %d to P%d\n",ip,nsend,i); + abort(); + } + for ( k = 0 ; k < nrecv ; ++k ) { + const int irecv = map_recv_pc[i][j] + k ; + const int isend = map_send_pc[ip][jp] + k ; + const int val_irecv = local_values[i][irecv] ; + const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ; + if ( val_irecv != val_isend ) { + fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip); + fprintf(stderr,"P%d send[%d] = %d , to P%d\n",ip,k,val_isend,i); + abort(); + } + } + } + } + + for ( i = 0 ; i < np ; ++i ) { + free( map_local_id[i] ); + free( map_recv_pc[i] ); + free( map_send_pc[i] ); + free( map_send_id[i] ); + free( local_values[i] ); + } + free( map_send_id ); + free( map_send_pc ); + free( map_recv_pc ); + free( map_local_id ); + free( local_values ); +} + +/*--------------------------------------------------------------------*/ + +int main( int argc , char * argv[] ) +{ + int np_max = 256 ; + Box box = { 0 , 64 , 0 , 64 , 0 , 64 }; + int np = 0 ; + + switch( argc ) { + case 3: + sscanf(argv[1],"%d",&np); + sscanf(argv[2],"%dx%dx%d",& box[0][1] , & box[1][1] , & box[2][1] ); + if ( 0 < np ) { test_box( box , np ); } + if ( 0 < np ) { test_maps( box , np ); } + break ; + default: + for ( np = 1 ; np <= np_max ; ++np ) { + test_box( box , np ); + test_maps( box , np ); + } + break ; + } + return 0 ; +} + +#endif + + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeLists.txt =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeLists.txt +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeLists.txt @@ -0,0 +1,5 @@ +set(PROG miniFE) +list(APPEND CXXFLAGS -I. -DMINIFE_SCALAR=double -DMINIFE_LOCAL_ORDINAL=int -DMINIFE_GLOBAL_ORDINAL=int -DMINIFE_CSR_MATRIX) +list(APPEND LDFLAGS -lm) +set(RUN_OPTIONS -nx 64 -ny 64 -nz 64) +llvm_multisource() Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CSRMatrix.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CSRMatrix.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CSRMatrix.hpp @@ -0,0 +1,134 @@ +#ifndef _CSRMatrix_hpp_ +#define _CSRMatrix_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +struct +CSRMatrix { + CSRMatrix() + : has_local_indices(false), + rows(), row_offsets(), row_offsets_external(), + packed_cols(), packed_coefs(), + num_cols(0) +#ifdef HAVE_MPI + ,external_index(), external_local_index(), elements_to_send(), + neighbors(), recv_length(), send_length(), send_buffer(), request() +#endif + { + } + + ~CSRMatrix() + {} + + typedef Scalar ScalarType; + typedef LocalOrdinal LocalOrdinalType; + typedef GlobalOrdinal GlobalOrdinalType; + + bool has_local_indices; + std::vector rows; + std::vector row_offsets; + std::vector row_offsets_external; + std::vector packed_cols; + std::vector packed_coefs; + LocalOrdinal num_cols; + +#ifdef HAVE_MPI + std::vector external_index; + std::vector external_local_index; + std::vector elements_to_send; + std::vector neighbors; + std::vector recv_length; + std::vector send_length; + std::vector send_buffer; + std::vector request; +#endif + + size_t num_nonzeros() const + { + return row_offsets[row_offsets.size()-1]; + } + + void reserve_space(unsigned nrows, unsigned ncols_per_row) + { + rows.resize(nrows); + row_offsets.resize(nrows+1); + packed_cols.reserve(nrows * ncols_per_row); + packed_coefs.reserve(nrows * ncols_per_row); + } + + void get_row_pointers(GlobalOrdinalType row, size_t& row_length, + GlobalOrdinalType*& cols, + ScalarType*& coefs) + { + ptrdiff_t local_row = -1; + //first see if we can get the local-row index using fast direct lookup: + if (rows.size() >= 1) { + ptrdiff_t idx = row - rows[0]; + if (idx < rows.size() && rows[idx] == row) { + local_row = idx; + } + } + + //if we didn't get the local-row index using direct lookup, try a + //more expensive binary-search: + if (local_row == -1) { + typename std::vector::iterator row_iter = + std::lower_bound(rows.begin(), rows.end(), row); + + //if we still haven't found row, it's not local so jump out: + if (row_iter == rows.end() || *row_iter != row) { + row_length = 0; + return; + } + + local_row = row_iter - rows.begin(); + } + + LocalOrdinalType offset = row_offsets[local_row]; + row_length = row_offsets[local_row+1] - offset; + cols = &packed_cols[offset]; + coefs = &packed_coefs[offset]; + } +}; + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ELLMatrix.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ELLMatrix.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ELLMatrix.hpp @@ -0,0 +1,139 @@ +#ifndef _ELLMatrix_hpp_ +#define _ELLMatrix_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +struct +ELLMatrix { + ELLMatrix() + : has_local_indices(false), + rows(), + cols(), coefs(), + num_cols(0), + num_cols_per_row(0) +#ifdef HAVE_MPI + ,external_index(), external_local_index(), elements_to_send(), + neighbors(), recv_length(), send_length(), send_buffer(), request() +#endif + { + } + + ~ELLMatrix() + {} + + typedef Scalar ScalarType; + typedef LocalOrdinal LocalOrdinalType; + typedef GlobalOrdinal GlobalOrdinalType; + + bool has_local_indices; + std::vector rows; + std::vector cols; + std::vector coefs; + LocalOrdinal num_cols; + LocalOrdinal num_cols_per_row; + +#ifdef HAVE_MPI + std::vector external_index; + std::vector external_local_index; + std::vector elements_to_send; + std::vector neighbors; + std::vector recv_length; + std::vector send_length; + std::vector send_buffer; + std::vector request; +#endif + + size_t num_nonzeros() const + { + return rows.size()*num_cols_per_row; + } + + void reserve_space(unsigned nrows, unsigned ncols_per_row) + { + rows.resize(nrows); + cols.resize(nrows * ncols_per_row); + coefs.resize(nrows * ncols_per_row); + num_cols_per_row = ncols_per_row; + } + + void get_row_pointers(GlobalOrdinalType row, size_t& row_length, + GlobalOrdinalType*& cols_ptr, + ScalarType*& coefs_ptr) + { + ptrdiff_t local_row = -1; + //first see if we can get the local-row index using fast direct lookup: + if (rows.size() >= 1) { + ptrdiff_t idx = row - rows[0]; + if (idx < rows.size() && rows[idx] == row) { + local_row = idx; + } + } + + //if we didn't get the local-row index using direct lookup, try a + //more expensive binary-search: + if (local_row == -1) { + typename std::vector::iterator row_iter = + std::lower_bound(rows.begin(), rows.end(), row); + + //if we still haven't found row, it's not local so jump out: + if (row_iter == rows.end() || *row_iter != row) { + row_length = 0; + return; + } + + local_row = row_iter - rows.begin(); + } + + cols_ptr = &cols[local_row*num_cols_per_row]; + coefs_ptr = &coefs[local_row*num_cols_per_row]; + + int idx = num_cols_per_row-1; + while(idx>=0) { + if (cols_ptr[idx] != 0) break; + --idx; + } + row_length = idx+1; + } +}; + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ElemData.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ElemData.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/ElemData.hpp @@ -0,0 +1,64 @@ +#ifndef _ElemData_hpp_ +#define _ElemData_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include + +namespace miniFE { + +template +struct ElemData { + ElemData() : nodes_per_elem(Hex8::numNodesPerElem) {} + ~ElemData(){} + + const size_t nodes_per_elem; + GlobalOrdinal elem_node_ids[Hex8::numNodesPerElem]; + Scalar grad_vals[Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numNodesPerElem * Hex8::spatialDim]; + Scalar elem_node_coords[Hex8::numNodesPerElem*Hex8::spatialDim]; + Scalar elem_diffusion_matrix[(Hex8::numNodesPerElem*(Hex8::numNodesPerElem+1))/2]; + Scalar elem_source_vector[Hex8::numNodesPerElem]; +}; + +template +struct ElemDataPtr { + ElemDataPtr() : nodes_per_elem(Hex8::numNodesPerElem) {} + ~ElemDataPtr(){} + + const size_t nodes_per_elem; + GlobalOrdinal elem_node_ids[Hex8::numNodesPerElem]; + Scalar grad_vals[Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numNodesPerElem * Hex8::spatialDim]; + Scalar elem_node_coords[(Hex8::numNodesPerElem*(Hex8::spatialDim+1))/2]; + Scalar* elem_diffusion_matrix; + Scalar* elem_source_vector; +}; + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/GetNodesCoords.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/GetNodesCoords.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/GetNodesCoords.hpp @@ -0,0 +1,51 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef _GETNODESCOORDS_HPP_ +#define _GETNODESCOORDS_HPP_ + +#include +#include + +template +struct GetNodesCoords { + const miniFE::simple_mesh_description* mesh; + GlobalOrdinal* elemIDs; + GlobalOrdinal* node_ordinals; + Scalar* elem_node_coords; + +inline void operator()(int i) +{ + unsigned nnodes = miniFE::Hex8::numNodesPerElem; + GlobalOrdinal elemID = elemIDs[i]; + GlobalOrdinal* node_ords = node_ordinals+i*nnodes; + Scalar* node_coords = elem_node_coords+i*nnodes*miniFE::Hex8::spatialDim; + get_elem_nodes_and_coords(*mesh, elemID, node_ords, node_coords); +} +}; + +#endif Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8.hpp @@ -0,0 +1,417 @@ +#ifndef _Hex8_hpp_ +#define _Hex8_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef KERNEL_PREFIX +#define KERNEL_PREFIX +#endif + +#include +#include +#include + +namespace miniFE { + +namespace Hex8 { + +template +KERNEL_PREFIX void shape_fns(const Scalar* x, Scalar* values_at_nodes) +{ + //assumptions: values_at_nodes has length numNodesPerElem + // x has length 3 (hard-coded spatialDim) + + const Scalar u = 1.0 - x[0]; + const Scalar v = 1.0 - x[1]; + const Scalar w = 1.0 - x[2]; + + const Scalar up1 = 1.0 + x[0]; + const Scalar vp1 = 1.0 + x[1]; + const Scalar wp1 = 1.0 + x[2]; + + values_at_nodes[0] = 0.125 * u * v * w;//(1-x)*(1-y)*(1-z) + values_at_nodes[1] = 0.125 * up1 * v * w;//(1+x)*(1-y)*(1-z) + values_at_nodes[2] = 0.125 * up1 * vp1 * w;//(1+x)*(1+y)*(1-z) + values_at_nodes[3] = 0.125 * u * vp1 * w;//(1-x)*(1+y)*(1-z) + values_at_nodes[4] = 0.125 * u * v * wp1;//(1-x)*(1-y)*(1+z) + values_at_nodes[5] = 0.125 * up1 * v * wp1;//(1+x)*(1-y)*(1+z) + values_at_nodes[6] = 0.125 * up1 * vp1 * wp1;//(1+x)*(1+y)*(1+z) + values_at_nodes[7] = 0.125 * u * vp1 * wp1;//(1-x)*(1+y)*(1+z) +} + +template +KERNEL_PREFIX void gradients(const Scalar* x, Scalar* values_per_fn) +{ + //assumptions values_per_fn has length 24 (numNodesPerElem*spatialDim) + // spatialDim == 3 + + const Scalar u = 1.0 - x[0]; + const Scalar v = 1.0 - x[1]; + const Scalar w = 1.0 - x[2]; + + const Scalar up1 = 1.0 + x[0]; + const Scalar vp1 = 1.0 + x[1]; + const Scalar wp1 = 1.0 + x[2]; + +//fn 0 + values_per_fn[0] = -0.125 * v * w; + values_per_fn[1] = -0.125 * u * w; + values_per_fn[2] = -0.125 * u * v; +//fn 1 + values_per_fn[3] = 0.125 * v * w; + values_per_fn[4] = -0.125 * up1 * w; + values_per_fn[5] = -0.125 * up1 * v; +//fn 2 + values_per_fn[6] = 0.125 * vp1 * w; + values_per_fn[7] = 0.125 * up1 * w; + values_per_fn[8] = -0.125 * up1 * vp1; +//fn 3 + values_per_fn[9] = -0.125 * vp1 * w; + values_per_fn[10] = 0.125 * u * w; + values_per_fn[11] = -0.125 * u * vp1; +//fn 4 + values_per_fn[12] = -0.125 * v * wp1; + values_per_fn[13] = -0.125 * u * wp1; + values_per_fn[14] = 0.125 * u * v; +//fn 5 + values_per_fn[15] = 0.125 * v * wp1; + values_per_fn[16] = -0.125 * up1 * wp1; + values_per_fn[17] = 0.125 * up1 * v; +//fn 6 + values_per_fn[18] = 0.125 * vp1 * wp1; + values_per_fn[19] = 0.125 * up1 * wp1; + values_per_fn[20] = 0.125 * up1 * vp1; +//fn 7 + values_per_fn[21] = -0.125 * vp1 * wp1; + values_per_fn[22] = 0.125 * u * wp1; + values_per_fn[23] = 0.125 * u * vp1; +} + +template +KERNEL_PREFIX void gradients_and_detJ(const Scalar* elemNodeCoords, + const Scalar* grad_vals, + Scalar& detJ) +{ +/** + pt is the point at which the jacobian is to be computed. +*/ + + //assumptions on the lengths of input arguments: + //elemNodeCoords has length numNodesPerElem*spatialDim, + //grad_vals has length numNodesPerElem*spatialDim + + const Scalar zero = 0; + + Scalar J00 = zero; + Scalar J01 = zero; + Scalar J02 = zero; + + Scalar J10 = zero; + Scalar J11 = zero; + Scalar J12 = zero; + + Scalar J20 = zero; + Scalar J21 = zero; + Scalar J22 = zero; + + size_t i_X_spatialDim = 0; + for(size_t i=0; i +KERNEL_PREFIX void gradients_and_invJ_and_detJ(const Scalar* elemNodeCoords, + const Scalar* grad_vals, + Scalar* invJ, + Scalar& detJ) +{ +/** + pt is the point at which the jacobian is to be computed. +*/ + + //assumptions on the lengths of input arguments: + //pt has length spatialDim, + //elemNodeCoords has length numNodesPerElem*spatialDim, + //grad_vals has length numNodesPerElem*spatialDim, and + //J has length spatialDim*spatialDim + + const Scalar zero = 0; + + // + //First we compute the jacobian J: + // + Scalar J00 = zero; + Scalar J01 = zero; + Scalar J02 = zero; + + Scalar J10 = zero; + Scalar J11 = zero; + Scalar J12 = zero; + + Scalar J20 = zero; + Scalar J21 = zero; + Scalar J22 = zero; + + size_t i_X_spatialDim = 0; + for(size_t i=0; i +KERNEL_PREFIX void diffusionMatrix_symm(const Scalar* elemNodeCoords, + const Scalar* grad_vals, + Scalar* elem_mat) +{ + int len = (numNodesPerElem * (numNodesPerElem+1))/2; + const Scalar zero = 0; + miniFE::fill(elem_mat, elem_mat+len, zero); + + Scalar gpts[numGaussPointsPerDim]; + Scalar gwts[numGaussPointsPerDim]; + + gauss_pts(numGaussPointsPerDim, gpts, gwts); + + const Scalar k = 1.0; + Scalar detJ = 0.0; + + Scalar dpsidx[numNodesPerElem], dpsidy[numNodesPerElem], dpsidz[numNodesPerElem]; + + Scalar invJ[spatialDim*spatialDim]; + + //The following nested loop implements equations 3.4.5 and 3.4.7 on page 88 + //of Reddy & Gartling, "The Finite Element Method in Heat Transfer and Fluid + //Dynamics", 2nd edition, + //to compute the element diffusion matrix for the steady conduction equation. + + Scalar pt[spatialDim]; + +#ifdef MINIFE_DEBUG + Scalar volume = zero; +#endif + + size_t gv_offset = 0; + for(size_t ig=0; ig 1.e-7) { +// std::cout << "element volume is "< +KERNEL_PREFIX void sourceVector(const Scalar* elemNodeCoords, + const Scalar* grad_vals, + Scalar* elem_vec) +{ + int len = numNodesPerElem; + const Scalar zero = 0; + miniFE::fill(elem_vec, elem_vec+len, zero); + + Scalar gpts[numGaussPointsPerDim]; + Scalar gwts[numGaussPointsPerDim]; + + Scalar psi[numNodesPerElem]; + + gauss_pts(numGaussPointsPerDim, gpts, gwts); + + Scalar Q = 1.0; + + Scalar pt[spatialDim]; + + size_t gv_offset = 0; + for(size_t ig=0; ig +#include +#include + +namespace miniFE { + +template +void compute_gradient_values(Scalar* grad_vals) +{ + Scalar gpts[Hex8::numGaussPointsPerDim]; + Scalar gwts[Hex8::numGaussPointsPerDim]; + + gauss_pts(Hex8::numGaussPointsPerDim, gpts, gwts); + + Scalar pt[Hex8::spatialDim]; + + Scalar* grad_vals_ptr = grad_vals; + for(size_t ig=0; ig +void +compute_element_matrix_and_vector(ElemData& elem_data) +{ + Hex8::diffusionMatrix_symm(elem_data.elem_node_coords, elem_data.grad_vals, + elem_data.elem_diffusion_matrix); + Hex8::sourceVector(elem_data.elem_node_coords, elem_data.grad_vals, + elem_data.elem_source_vector); +} + +template +void +compute_element_matrix_and_vector(ElemDataPtr& elem_data) +{ + Hex8::diffusionMatrix_symm(elem_data.elem_node_coords, elem_data.grad_vals, + elem_data.elem_diffusion_matrix); + Hex8::sourceVector(elem_data.elem_node_coords, elem_data.grad_vals, + elem_data.elem_source_vector); +} + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_box_utils.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_box_utils.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_box_utils.hpp @@ -0,0 +1,173 @@ +#ifndef _Hex8_box_utils_hpp_ +#define _Hex8_box_utils_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include + +#include +#include +#include +#include + +namespace miniFE { + + +template +void get_hex8_node_ids(int nx, int ny, + GlobalOrdinal node0, + GlobalOrdinal* elem_node_ids) +{ +//Given box dimensions nx and ny, and a starting node +//(local-node-0 for a hex8), compute the other nodes +//of the hex8 using the exodus ordering convention. + elem_node_ids[0] = node0; + elem_node_ids[1] = node0 + 1; + elem_node_ids[2] = node0 + nx + 1; + elem_node_ids[3] = node0 + nx; + elem_node_ids[4] = node0 + nx*ny; + elem_node_ids[5] = node0 + 1 + nx*ny; + elem_node_ids[6] = node0 + nx + nx*ny + 1; + elem_node_ids[7] = node0 + nx + nx*ny; +} + +template +void get_hex8_node_coords_3d(Scalar x, Scalar y, Scalar z, + Scalar hx, Scalar hy, Scalar hz, + Scalar* elem_node_coords) +{ + //Input: x,y,z are the coordinates of local-node 0 for a Hex8. + //'hx', 'hy', 'hz' are the lengths of the sides of the element + //in each direction. + + elem_node_coords[0] = x; + elem_node_coords[1] = y; + elem_node_coords[2] = z; + + elem_node_coords[3] = x + hx; + elem_node_coords[4] = y; + elem_node_coords[5] = z; + + elem_node_coords[6] = x + hx; + elem_node_coords[7] = y + hy; + elem_node_coords[8] = z; + + elem_node_coords[9] = x; + elem_node_coords[10] = y + hy; + elem_node_coords[11] = z; + + elem_node_coords[12] = x; + elem_node_coords[13] = y; + elem_node_coords[14] = z + hz; + + elem_node_coords[15] = x + hx; + elem_node_coords[16] = y; + elem_node_coords[17] = z + hz; + + elem_node_coords[18] = x + hx; + elem_node_coords[19] = y + hy; + elem_node_coords[20] = z + hz; + + elem_node_coords[21] = x; + elem_node_coords[22] = y + hy; + elem_node_coords[23] = z + hz; +} + +template +void +get_elem_nodes_and_coords(const simple_mesh_description& mesh, + GlobalOrdinal elemID, + GlobalOrdinal* node_ords, Scalar* node_coords) +{ + int global_nodes_x = mesh.global_box[0][1]+1; + int global_nodes_y = mesh.global_box[1][1]+1; + int global_nodes_z = mesh.global_box[2][1]+1; + + if (elemID < 0) { + //I don't think this can happen, but check for the sake of paranoia... + throw std::runtime_error("get_elem_nodes_and_coords ERROR, negative elemID"); + } + + int elem_int_x, elem_int_y, elem_int_z; + get_int_coords(elemID, global_nodes_x-1, global_nodes_y-1, global_nodes_z-1, + elem_int_x, elem_int_y, elem_int_z); + GlobalOrdinal nodeID = get_id(global_nodes_x, global_nodes_y, global_nodes_z, elem_int_x, elem_int_y, elem_int_z); + +#ifdef MINIFE_DEBUG_VERBOSE + std::cout<<"\nelemID: "<(nodeID, global_nodes_x,global_nodes_y,global_nodes_z, + ix,iy,iz); + Scalar hx = 1.0/global_elems_x; + Scalar hy = 1.0/global_elems_y; + Scalar hz = 1.0/global_elems_z; + get_hex8_node_coords_3d(ix, iy, iz, hx, hy, hz, node_coords); +#ifdef MINIFE_DEBUG_VERBOSE + int offset = 0; + for(int i=0; i +void +get_elem_nodes_and_coords(const simple_mesh_description& mesh, + GlobalOrdinal elemID, + ElemData& elem_data) +{ + get_elem_nodes_and_coords(mesh, elemID, elem_data.elem_node_ids, elem_data.elem_node_coords); +} + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_enums.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_enums.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Hex8_enums.hpp @@ -0,0 +1,52 @@ +#ifndef _Hex8_enums_hpp_ +#define _Hex8_enums_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +namespace miniFE { + +namespace Hex8 { + +// !!!!!!! +//Important note: there are places in miniFE code where +//loops over spatialDim are unrolled (spatialDim is assumed to be 3). +//Thus, changing this enum is not enough to make miniFE code +//work for spatialDim values other than 3. +// !!!!!!! +enum { + spatialDim = 3, + numNodesPerElem = 8, + numGaussPointsPerDim = 2 +}; + +}//namespace Hex8 + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Makefile =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Makefile +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Makefile @@ -0,0 +1,7 @@ +LEVEL = ../../../.. + +PROG = miniFE +CXXFLAGS = -I. -DMINIFE_SCALAR=double -DMINIFE_LOCAL_ORDINAL=int -DMINIFE_GLOBAL_ORDINAL=int -DMINIFE_CSR_MATRIX +LDFLAGS = -lm +RUN_OPTIONS = -nx 64 -ny 64 -nz 64 +include $(LEVEL)/MultiSource/Makefile.multisrc Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/MatrixCopyOp.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/MatrixCopyOp.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/MatrixCopyOp.hpp @@ -0,0 +1,60 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef _MatrixCopyOp_hpp_ +#define _MatrixCopyOp_hpp_ + +template +struct MatrixCopyOp { + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + typedef typename MatrixType::ScalarType ScalarType; + + const GlobalOrdinalType* src_rows; + const LocalOrdinalType* src_rowoffsets; + const GlobalOrdinalType* src_cols; + const ScalarType* src_coefs; + + GlobalOrdinalType* dest_rows; + LocalOrdinalType* dest_rowoffsets; + GlobalOrdinalType* dest_cols; + ScalarType* dest_coefs; + int n; + + inline void operator()(int i) + { + dest_rows[i] = src_rows[i]; + dest_rowoffsets[i] = src_rowoffsets[i]; + for(int j=src_rowoffsets[i]; j +#include + +#include +#include + +#include + +template +void sort_if_needed(GlobalOrdinal* list, + GlobalOrdinal list_len) +{ + bool need_to_sort = false; + for(GlobalOrdinal i=list_len-1; i>=1; --i) { + if (list[i] < list[i-1]) { + need_to_sort = true; + break; + } + } + + if (need_to_sort) { + std::sort(list,list+list_len); + } +} + +template +struct MatrixInitOp { +}; + +template<> +struct MatrixInitOp > { + MatrixInitOp(const std::vector& rows_vec, + const std::vector& row_offsets_vec, + const std::vector& row_coords_vec, + int global_nx, int global_ny, int global_nz, + MINIFE_GLOBAL_ORDINAL global_n_rows, + const miniFE::simple_mesh_description& input_mesh, + miniFE::CSRMatrix& matrix) + : rows(&rows_vec[0]), + row_offsets(&row_offsets_vec[0]), + row_coords(&row_coords_vec[0]), + global_nodes_x(global_nx), + global_nodes_y(global_ny), + global_nodes_z(global_nz), + global_nrows(global_n_rows), + mesh(&input_mesh), + dest_rows(&matrix.rows[0]), + dest_rowoffsets(&matrix.row_offsets[0]), + dest_cols(&matrix.packed_cols[0]), + dest_coefs(&matrix.packed_coefs[0]), + n(matrix.rows.size()) + { + if (matrix.packed_cols.capacity() != matrix.packed_coefs.capacity()) { + std::cout<<"Warning, packed_cols.capacity ("<* mesh; + + inline void operator()(int i) + { + dest_rows[i] = rows[i]; + int offset = row_offsets[i]; + dest_rowoffsets[i] = offset; + int ix = row_coords[i*3]; + int iy = row_coords[i*3+1]; + int iz = row_coords[i*3+2]; + GlobalOrdinalType nnz = 0; + for(int sz=-1; sz<=1; ++sz) { + for(int sy=-1; sy<=1; ++sy) { + for(int sx=-1; sx<=1; ++sx) { + GlobalOrdinalType col_id = + miniFE::get_id(global_nodes_x, global_nodes_y, global_nodes_z, + ix+sx, iy+sy, iz+sz); + if (col_id >= 0 && col_id < global_nrows) { + GlobalOrdinalType col = mesh->map_id_to_row(col_id); + if (col >= global_nrows) { + std::cout << "mesh->map_id_to_row produced col="< +void +sum_into_row(int row_len, + GlobalOrdinal* row_indices, + Scalar* row_coefs, + int num_inputs, + const GlobalOrdinal* input_indices, + const Scalar* input_coefs) +{ + for(size_t i=0; i +void +sum_into_row(typename MatrixType::GlobalOrdinalType row, + size_t num_indices, + const typename MatrixType::GlobalOrdinalType* col_inds, + const typename MatrixType::ScalarType* coefs, + MatrixType& mat) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + size_t row_len = 0; + GlobalOrdinal* mat_row_cols = NULL; + Scalar* mat_row_coefs = NULL; + + mat.get_row_pointers(row, row_len, mat_row_cols, mat_row_coefs); + if (row_len == 0) return; + + sum_into_row(row_len, mat_row_cols, mat_row_coefs, num_indices, col_inds, coefs); +} + +template +void +sum_in_symm_elem_matrix(size_t num, + const typename MatrixType::GlobalOrdinalType* indices, + const typename MatrixType::ScalarType* coefs, + MatrixType& mat) +{ + typedef typename MatrixType::ScalarType Scalar; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + +//indices is length num (which should be nodes-per-elem) +//coefs is the upper triangle of the element diffusion matrix +//which should be length num*(num+1)/2 +//std::cout< +void +sum_in_elem_matrix(size_t num, + const typename MatrixType::GlobalOrdinalType* indices, + const typename MatrixType::ScalarType* coefs, + MatrixType& mat) +{ + size_t offset = 0; + + for(size_t i=0; i +void +sum_into_global_linear_system(ElemData& elem_data, + MatrixType& A, VectorType& b) +{ + sum_in_symm_elem_matrix(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_diffusion_matrix, A); + sum_into_vector(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_source_vector, b); +} + +#ifdef MINIFE_HAVE_TBB +template +void +sum_in_elem_matrix(size_t num, + const typename MatrixType::GlobalOrdinalType* indices, + const typename MatrixType::ScalarType* coefs, + LockingMatrix& mat) +{ + size_t offset = 0; + + for(size_t i=0; i +void +sum_into_global_linear_system(ElemData& elem_data, + LockingMatrix& A, LockingVector& b) +{ + sum_in_elem_matrix(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_diffusion_matrix, A); + sum_into_vector(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_source_vector, b); +} +#endif + +template +void +add_to_diagonal(typename MatrixType::ScalarType value, MatrixType& mat) +{ + for(size_t i=0; i +double +parallel_memory_overhead_MB(const MatrixType& A) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + double mem_MB = 0; + +#ifdef HAVE_MPI + double invMB = 1.0/(1024*1024); + mem_MB = invMB*A.external_index.size()*sizeof(GlobalOrdinal); + mem_MB += invMB*A.external_local_index.size()*sizeof(GlobalOrdinal); + mem_MB += invMB*A.elements_to_send.size()*sizeof(GlobalOrdinal); + mem_MB += invMB*A.neighbors.size()*sizeof(int); + mem_MB += invMB*A.recv_length.size()*sizeof(LocalOrdinal); + mem_MB += invMB*A.send_length.size()*sizeof(LocalOrdinal); + + double tmp = mem_MB; + MPI_Allreduce(&tmp, &mem_MB, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#endif + + return mem_MB; +} + +template +void rearrange_matrix_local_external(MatrixType& A) +{ + //This function will rearrange A so that local entries are contiguous at the front + //of A's memory, and external entries are contiguous at the back of A's memory. + // + //A.row_offsets will describe where the local entries occur, and + //A.row_offsets_external will describe where the external entries occur. + + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + size_t nrows = A.rows.size(); + std::vector tmp_row_offsets(nrows*2); + std::vector tmp_row_offsets_external(nrows*2); + + LocalOrdinal num_local_nz = 0; + LocalOrdinal num_extern_nz = 0; + + //First sort within each row of A, so that local entries come + //before external entries within each row. + //tmp_row_offsets describe the locations of the local entries, and + //tmp_row_offsets_external describe the locations of the external entries. + // + for(size_t i=0; i ext_cols(num_extern_nz); + std::vector ext_coefs(num_extern_nz); + std::vector ext_offsets(nrows+1); + LocalOrdinal offset = 0; + for(size_t i=0; i +void +zero_row_and_put_1_on_diagonal(MatrixType& A, typename MatrixType::GlobalOrdinalType row) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + size_t row_len = 0; + GlobalOrdinal* cols = NULL; + Scalar* coefs = NULL; + A.get_row_pointers(row, row_len, cols, coefs); + + for(size_t i=0; i +void +impose_dirichlet(typename MatrixType::ScalarType prescribed_value, + MatrixType& A, + VectorType& b, + int global_nx, + int global_ny, + int global_nz, + const std::set& bc_rows) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + GlobalOrdinal first_local_row = A.rows.size()>0 ? A.rows[0] : 0; + GlobalOrdinal last_local_row = A.rows.size()>0 ? A.rows[A.rows.size()-1] : -1; + + typename std::set::const_iterator + bc_iter = bc_rows.begin(), bc_end = bc_rows.end(); + for(; bc_iter!=bc_end; ++bc_iter) { + GlobalOrdinal row = *bc_iter; + if (row >= first_local_row && row <= last_local_row) { + size_t local_row = row - first_local_row; + b.coefs[local_row] = prescribed_value; + zero_row_and_put_1_on_diagonal(A, row); + } + } + + for(size_t i=0; i +typename TypeTraits::magnitude_type +matvec_and_dot(MatrixType& A, + VectorType& x, + VectorType& y) +{ + timer_type t0 = mytimer(); + exchange_externals(A, x); + exchtime += mytimer()-t0; + + typedef typename TypeTraits::magnitude_type magnitude; + typedef typename MatrixType::ScalarType ScalarType; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + + int n = A.rows.size(); + const LocalOrdinalType* Arowoffsets = &A.row_offsets[0]; + const GlobalOrdinalType* Acols = &A.packed_cols[0]; + const ScalarType* Acoefs = &A.packed_coefs[0]; + const ScalarType* xcoefs = &x.coefs[0]; + ScalarType* ycoefs = &y.coefs[0]; + ScalarType beta = 0; + + magnitude result = 0; + + for(int row=0; row::mpi_type(); + MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD); + return global_dot; +#else + return result; +#endif +} + +//------------------------------------------------------------------------ +//Compute matrix vector product y = A*x where: +// +// A - input matrix +// x - input vector +// y - result vector +// +#if defined(MINIFE_CSR_MATRIX) +template +struct matvec_std { +void operator()(MatrixType& A, + VectorType& x, + VectorType& y) +{ + exchange_externals(A, x); + + typedef typename MatrixType::ScalarType ScalarType; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + + int n = A.rows.size(); + const LocalOrdinalType* Arowoffsets = &A.row_offsets[0]; + const GlobalOrdinalType* Acols = &A.packed_cols[0]; + const ScalarType* Acoefs = &A.packed_coefs[0]; + const ScalarType* xcoefs = &x.coefs[0]; + ScalarType* ycoefs = &y.coefs[0]; + ScalarType beta = 0; + + for(int row=0; row +struct TypeTraits { + typedef double magnitude_type; + + static const char* name() {return "double";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_DOUBLE;} +#endif +}; + +template<> +struct TypeTraits { + typedef int magnitude_type; + + static const char* name() {return "int";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_INT;} +#endif +}; + +template<> +struct TypeTraits { + typedef long int magnitude_type; + + static const char* name() {return "long int";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_LONG;} +#endif +}; + +#ifndef MINIFE_NO_LONG_LONG + +template<> +struct TypeTraits { + typedef long long magnitude_type; + + static const char* name() {return "long long";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_LONG_LONG;} +#endif +}; + +#endif + +template<> +struct TypeTraits { + typedef unsigned magnitude_type; + + static const char* name() {return "unsigned";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_UNSIGNED;} +#endif +}; + +template<> +struct TypeTraits > { + typedef float magnitude_type; + + static const char* name() {return "std::complex";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_COMPLEX;} +#endif +}; + +template<> +struct TypeTraits > { + typedef double magnitude_type; + + static const char* name() {return "std::complex";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_DOUBLE_COMPLEX;} +#endif +}; + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector.hpp @@ -0,0 +1,67 @@ +#ifndef _Vector_hpp_ +#define _Vector_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include + +namespace miniFE { + + +template +struct Vector { + typedef Scalar ScalarType; + typedef LocalOrdinal LocalOrdinalType; + typedef GlobalOrdinal GlobalOrdinalType; + + Vector(GlobalOrdinal startIdx, LocalOrdinal local_sz) + : startIndex(startIdx), + local_size(local_sz), + coefs(local_size) + { + for(size_t i=0; i < local_size; ++i) { + coefs[i] = 0; + } + } + + ~Vector() + { + } + + GlobalOrdinal startIndex; + LocalOrdinal local_size; + std::vector coefs; +}; + + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector_functions.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector_functions.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/Vector_functions.hpp @@ -0,0 +1,231 @@ +#ifndef _Vector_functions_hpp_ +#define _Vector_functions_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#ifdef MINIFE_HAVE_TBB +#include +#endif + +#include +#include + +namespace miniFE { + + +template +void write_vector(const std::string& filename, + const VectorType& vec) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + std::ostringstream osstr; + osstr << filename << "." << numprocs << "." << myproc; + std::string full_name = osstr.str(); + std::ofstream ofs(full_name.c_str()); + + typedef typename VectorType::ScalarType ScalarType; + + const std::vector& coefs = vec.coefs; + for(int p=0; p +void sum_into_vector(size_t num_indices, + const typename VectorType::GlobalOrdinalType* indices, + const typename VectorType::ScalarType* coefs, + VectorType& vec) +{ + typedef typename VectorType::GlobalOrdinalType GlobalOrdinal; + typedef typename VectorType::ScalarType Scalar; + + GlobalOrdinal first = vec.startIndex; + GlobalOrdinal last = first + vec.local_size - 1; + + std::vector& vec_coefs = vec.coefs; + + for(size_t i=0; i last) continue; + size_t idx = indices[i] - first; + vec_coefs[idx] += coefs[i]; + } +} + +#ifdef MINIFE_HAVE_TBB +template +void sum_into_vector(size_t num_indices, + const typename VectorType::GlobalOrdinalType* indices, + const typename VectorType::ScalarType* coefs, + LockingVector& vec) +{ + vec.sum_in(num_indices, indices, coefs); +} +#endif + +//------------------------------------------------------------ +//Compute the update of a vector with the sum of two scaled vectors where: +// +// w = alpha*x + beta*y +// +// x,y - input vectors +// +// alpha,beta - scalars applied to x and y respectively +// +// w - output vector +// +template +void + waxpby(typename VectorType::ScalarType alpha, const VectorType& x, + typename VectorType::ScalarType beta, const VectorType& y, + VectorType& w) +{ + typedef typename VectorType::ScalarType ScalarType; + +#ifdef MINIFE_DEBUG + if (y.local_size < x.local_size || w.local_size < x.local_size) { + std::cerr << "miniFE::waxpby ERROR, y and w must be at least as long as x." << std::endl; + return; + } +#endif + + int n = x.coefs.size(); + const ScalarType* xcoefs = &x.coefs[0]; + const ScalarType* ycoefs = &y.coefs[0]; + ScalarType* wcoefs = &w.coefs[0]; + + for(int i=0; i +void + fused_waxpby(typename VectorType::ScalarType alpha, const VectorType& x, + typename VectorType::ScalarType beta, const VectorType& y, + VectorType& w, + typename VectorType::ScalarType alpha2, const VectorType& x2, + typename VectorType::ScalarType beta2, const VectorType& y2, + VectorType& w2) +{ + typedef typename VectorType::ScalarType ScalarType; + +#ifdef MINIFE_DEBUG + if (y.local_size < x.local_size || w.local_size < x.local_size) { + std::cerr << "miniFE::waxpby ERROR, y and w must be at least as long as x." << std::endl; + return; + } +#endif + + int n = x.coefs.size(); + const ScalarType* xcoefs = &x.coefs[0]; + const ScalarType* ycoefs = &y.coefs[0]; + ScalarType* wcoefs = &w.coefs[0]; + + const ScalarType* x2coefs = &x2.coefs[0]; + const ScalarType* y2coefs = &y2.coefs[0]; + ScalarType* w2coefs = &w2.coefs[0]; + + for(int i=0; i +typename TypeTraits::magnitude_type + dot(const Vector& x, + const Vector& y) +{ + int n = x.coefs.size(); + +#ifdef MINIFE_DEBUG + if (y.local_size < n) { + std::cerr << "miniFE::dot ERROR, y must be at least as long as x."<::magnitude_type magnitude; + + const Scalar* xcoefs = &x.coefs[0]; + const Scalar* ycoefs = &y.coefs[0]; + magnitude result = 0; + for(int i=0; i::mpi_type(); + MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD); + return global_dot; +#else + return result; +#endif +} + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.hpp @@ -0,0 +1,122 @@ +//@HEADER +// ************************************************************************ +// +// Mantevo: A collection of mini-applications for HPC +// Copyright (2008) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +// Changelog +// +// Version 0.1 +// - Initial version. +// +///////////////////////////////////////////////////////////////////////// + +#ifndef YAML_DOC_H +#define YAML_DOC_H +#include +#include +#include "YAML_Element.hpp" + +//! The Mantevo YAML_Doc class for the uniform collecting and reporting of performance data for mini-applications + +/*! + +The YAML_Doc class works in conjuction with the YAML_Element class to facilitate easy collecting and reporting of YAML-formatted +data that can be then registered with the Mantevo results collection website. + +\code + +//EXAMPLE CODE FOR GENERATING YAML + + YAML_Doc doc("hpccg","1.0"); + doc.add("final_residual",1.4523e-13); + doc.add("time","4.893"); + +//note: the following line will remove the data (4.890) associated with "time" + doc.get("time")->add("total",4.243); + +//note: the following line will likewise remove the data (1.243) associated with "time" + doc.get("time")->get("total")->add("time",2.457); + doc.get("time")->get("total")->add("flops",4.88e5); + doc.get("time")->add("ddot",1.243); + doc.get("time")->add("sparsemv",""); + doc.get("time")->get("sparsemv")->add("time",0.3445); + doc.get("time")->get("sparsemv")->add("overhead",""); + doc.get("time")->get("sparsemv")->get("overhead")->add("time",0.0123); + doc.get("time")->get("sparsemv")->get("overhead")->add("percentage",0.034); + cout << doc.generateYAML() << endl; + return 0; + +\endcode + +Below is the output generated by the above code: + +\verbatim + +final_residual: 1.4523e-13 +time: + total: + time: 2.457 + flops: 4.88e5 + ddot: 1.243 + sparsemv: + time: 0.3445 + overhead: + time: 0.0123 + percentage: 0.034 + +\endverbatim + +\note {No value is allowed to be attached to a key that has children. If children are added to a key, the value is simply set to "".} + +*/ +class YAML_Doc: public YAML_Element { + public: + //! Constructor: accepts mini-application name and version as strings, optionally accepts directory and file name for printing results. + /*! + The sole constructor for this class accepts and name and version number for the mini-application as well as optional directory + and file name information for results that are generated by the generateYAML() method. + \param miniApp_Name (in) string containing name of the mini-application + \param miniApp_Version (in) string containing the version of the mini-application + \param destination_Directory (in, optional) path of diretory where results file will be stored, relative to current working directory. + If this value is not supplied, the results file will be stored in the current working directory. If the directory does not exist + it will be created. + \param destination_FileName (in, optional) root name of the results file. A suffix of ".yaml" will be automatically appended. If no + file name is specified the filename will be constructed by concatenating the miniAppName + miniAppVersion + ".yaml" strings. + */ + YAML_Doc(const std::string& miniApp_Name, const std::string& miniApp_Version, const std::string& destination_Directory = "", const std::string& destination_FileName = ""); + //! Destructor + ~YAML_Doc(); + //! Generate YAML results to standard out and to a file using specified directory and filename, using current directory and miniAppName + miniAppVersion + ".yaml" by default + std::string generateYAML(); + +protected: + std::string miniAppName; + std::string miniAppVersion; + std::string destinationDirectory; + std::string destinationFileName; +}; +#endif /* YAML_DOC_H */ + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.cpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.cpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Doc.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include +#include +#include +#ifdef REDSTORM +#include +#include +#include +#endif +#include "YAML_Doc.hpp" +using namespace std; + +//set the microapp_name and version which will become part of the YAML doc. +YAML_Doc::YAML_Doc(const std::string& miniApp_Name, const std::string& miniApp_Version, const std::string& destination_Directory, const std::string& destination_FileName){ + miniAppName = miniApp_Name; + miniAppVersion = miniApp_Version; + destinationDirectory = destination_Directory; + destinationFileName = destination_FileName; +} + +//inherits the destructor from YAML_Element +YAML_Doc::~YAML_Doc(void){ +} + +/* +* generates YAML from the elements of the document and saves it +* to a file +*/ +string YAML_Doc::generateYAML(){ + string yaml; + yaml = yaml + "Mini-Application Name: " + miniAppName + "\n"; + yaml = yaml + "Mini-Application Version: " + miniAppVersion + "\n"; + for(size_t i=0; iprintYAML(""); + } + + time_t rawtime; + tm * ptm; + time ( &rawtime ); + ptm = localtime(&rawtime); + char sdate[25]; + //use tm_mon+1 because tm_mon is 0 .. 11 instead of 1 .. 12 + sprintf (sdate,"%04d:%02d:%02d-%02d:%02d:%02d",ptm->tm_year + 1900, ptm->tm_mon+1, + ptm->tm_mday, ptm->tm_hour, ptm->tm_min,ptm->tm_sec); + + string filename; + if (destinationFileName=="") + filename = miniAppName + "-" + miniAppVersion + "_"; + else + filename = destinationFileName; + filename = filename + string(sdate) + ".yaml"; + if (destinationDirectory!="" && destinationDirectory!=".") { + string mkdir_cmd = "mkdir " + destinationDirectory; +#ifdef REDSTORM + mkdir(destinationDirectory.c_str(),0755); +#else + system(mkdir_cmd.c_str()); +#endif + filename = destinationDirectory + "/" + destinationFileName; + } + else + filename = "./" + filename; + +#ifdef GENERATE_YAML + ofstream myfile; + myfile.open(filename.c_str()); + myfile << yaml; + myfile.close(); +#endif + + return yaml; +} + + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.hpp @@ -0,0 +1,86 @@ +//@HEADER +// ************************************************************************ +// +// Mantevo: A collection of mini-applications for HPC +// Copyright (2008) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +// Changelog +// +// Version 0.1 +// - Initial version. +// +///////////////////////////////////////////////////////////////////////// + +#ifndef YAML_ELEMENT_H +#define YAML_ELEMENT_H +#include +#include +//! The Mantevo YAML_Element class for registering key-value pairs of performance data + +/*! + Mantevo mini-applications generate a collection of performance data for each run of the executable. YAML_Element, and + the related YAML_Doc class, provide a uniform facility for gathering and reporting this data using the YAML text format. +*/ +class YAML_Element { + public: + + //! Default constructor. + YAML_Element (){key="";value="";} + //! Construct with known key-value pair + YAML_Element (const std::string& key_arg, const std::string& value_arg); + //! Destructor + ~YAML_Element (); + //! Key accessor method + std::string getKey(){return key;} + //! Add a child element to an element list associated with this element, value of type double + YAML_Element* add(const std::string& key_arg, double value_arg); + //! Add a child element to an element list associated with this element, value of type int + YAML_Element* add(const std::string& key_arg, int value_arg); +#ifndef MINIFE_NO_LONG_LONG + //! Add a child element to an element list associated with this element, value of type long long + YAML_Element* add(const std::string& key_arg, long long value_arg); +#endif + //! Add a child element to an element list associated with this element, value of type size_t + YAML_Element* add(const std::string& key_arg, size_t value_arg); + //! Add a child element to an element list associated with this element, value of type string + YAML_Element* add(const std::string& key_arg, const std::string& value_arg); + //! get the element in the list with the given key + YAML_Element* get(const std::string& key_arg); + std::string printYAML(std::string space); + +protected: + std::string key; + std::string value; + std::vector children; + +private: + std::string convert_double_to_string(double value_arg); + std::string convert_int_to_string(int value_arg); +#ifndef MINIFE_NO_LONG_LONG + std::string convert_long_long_to_string(long long value_arg); +#endif + std::string convert_size_t_to_string(size_t value_arg); +}; +#endif /* YAML_ELEMENT_H */ Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.cpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.cpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/YAML_Element.cpp @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include "YAML_Element.hpp" +using namespace std; +YAML_Element::YAML_Element(const std::string& key_arg, const std::string& value_arg){ + key = key_arg; + value = value_arg; +} + +YAML_Element::~YAML_Element(){ + for (size_t i=0; ivalue = ""; + string converted_value = convert_double_to_string(value_arg); + YAML_Element* element = new YAML_Element(key_arg,converted_value); + children.push_back(element); + return element; +} + +YAML_Element* YAML_Element::add(const std::string& key_arg, int value_arg) { + this->value = ""; + string converted_value = convert_int_to_string(value_arg); + YAML_Element* element = new YAML_Element(key_arg,converted_value); + children.push_back(element); + return element; +} + +#ifndef MINIFE_NO_LONG_LONG + +YAML_Element* YAML_Element::add(const std::string& key_arg, long long value_arg) { + this->value = ""; + string converted_value = convert_long_long_to_string(value_arg); + YAML_Element* element = new YAML_Element(key_arg,converted_value); + children.push_back(element); + return element; +} + +#endif + +YAML_Element* YAML_Element::add(const std::string& key_arg, size_t value_arg) { + this->value = ""; + string converted_value = convert_size_t_to_string(value_arg); + YAML_Element* element = new YAML_Element(key_arg,converted_value); + children.push_back(element); + return element; +} + +YAML_Element* YAML_Element::add(const std::string& key_arg, const std::string& value_arg) { + this->value = ""; + YAML_Element* element = new YAML_Element(key_arg, value_arg); + children.push_back(element); + return element; +} + +/* +* returns pointer to the YAML_Element for the given key. +* I, cam, believe an exception should be thrown if there is no +* element in the vector for the specified key +*/ +YAML_Element* YAML_Element::get(const std::string& key_arg) { + for (size_t i=0; igetKey() == key_arg){ + return children[i]; + } + } + return 0; +} + +/* +* prints a line of a YAML document. Correct YAML depends on +* correct spacing; the parameter space should be the proper +* amount of space for the parent element +*/ +string YAML_Element::printYAML(std::string space){ + string yaml_line = space + key + ": " + value + "\n"; + for(int i=0; i<2; i++) space = space + " "; + for(size_t i=0; iprintYAML(space); + } + return yaml_line; +} + +string YAML_Element::convert_double_to_string(double value_arg){ + stringstream strm; + strm << value_arg; + return strm.str(); +} +string YAML_Element::convert_int_to_string(int value_arg){ + stringstream strm; + strm << value_arg; + return strm.str(); +} + +#ifndef MINIFE_NO_LONG_LONG + +string YAML_Element::convert_long_long_to_string(long long value_arg){ + stringstream strm; + strm << value_arg; + return strm.str(); +} + +#endif + +string YAML_Element::convert_size_t_to_string(size_t value_arg){ + stringstream strm; + strm << value_arg; + return strm.str(); +} Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/analytic_soln.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/analytic_soln.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/analytic_soln.hpp @@ -0,0 +1,116 @@ +#ifndef _analytic_soln_hpp_ +#define _analytic_soln_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include + +#ifndef MINIFE_SCALAR +#define MINIFE_SCALAR double; +#endif + +namespace miniFE { + +typedef MINIFE_SCALAR Scalar; + +// The 'soln' function below computes the analytic solution for +// steady state temperature in a brick-shaped domain (formally called +// a rectangular parallelepiped). The inputs to the function are +// the x,y,z coordinates of the point at which temperature is to be +// computed, and the number of terms p,q in the series expansion. +// +// The equations used for the temperature solution are equations 9 and 10 +// in section 6.2 of Carslaw & Jaeger, "Conduction of Heat in Solids". +// +// The paralellepiped being used is defined by this domain: +// 0 <= x <= 1.0 +// 0 <= y <= 1.0 +// 0 <= z <= 1.0 +// +// With boundary conditions prescribing the temperature to be 1.0 on +// the x==1.0 face, and 0.0 on all other faces. +// +// Thus, in the equations from Carslaw & Jaeger, the following constants +// are used: +// +// a == b == c == 1.0 (the extents of the domain) +// v1 == 0.0 (temperature at x == 0.0) +// v2 == 1.0 (temperature at x == 1.0) +// + +const Scalar PI = 3.141592653589793238462; +const Scalar PI_SQR = PI*PI; +const Scalar term0 = 16.0/(PI_SQR); + +inline Scalar fcn_l(int p, int q) +{ + return std::sqrt((2*p+1)*(2*p+1)*PI_SQR + (2*q+1)*(2*q+1)*PI_SQR); +} + +inline Scalar fcn(int n, Scalar u) +{ + return (2*n+1)*PI*u; +} + +inline Scalar soln(Scalar x, Scalar y, Scalar z, int max_p, int max_q) +{ + Scalar sum = 0; + for(int p=0; p<=max_p; ++p) { + const Scalar p21y = fcn(p, y); + const Scalar sin_py = std::sin(p21y)/(2*p+1); + for(int q=0; q<=max_q; ++q) { + const Scalar q21z = fcn(q, z); + const Scalar sin_qz = std::sin(q21z)/(2*q+1); + + const Scalar l = fcn_l(p, q); + + const Scalar sinh1 = std::sinh(l*x); + const Scalar sinh2 = std::sinh(l); + + const Scalar tmp = (sinh1*sin_py)*(sin_qz/sinh2); + + //if the scalar l gets too big, sinh(l) becomes inf. + //if that happens, tmp is a NaN. + //crude check for NaN: + //if tmp != tmp, tmp is NaN + if (tmp == tmp) { + sum += tmp; + } + else { + //if we got a NaN, break out of this inner loop and go to + //the next iteration of the outer loop. + break; + } + } + } + return term0*sum; +} + +}//namespace miniFE + +#endif /* _analytic_soln_hpp_ */ Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/assemble_FE_data.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/assemble_FE_data.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/assemble_FE_data.hpp @@ -0,0 +1,78 @@ +#ifndef _assemble_FE_data_hpp_ +#define _assemble_FE_data_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include + +#include + +namespace miniFE { + +template +void +assemble_FE_data(const simple_mesh_description& mesh, + MatrixType& A, + VectorType& b, + Parameters& params) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + + int global_elems_x = mesh.global_box[0][1]; + int global_elems_y = mesh.global_box[1][1]; + int global_elems_z = mesh.global_box[2][1]; + + Box local_elem_box; + copy_box(mesh.local_box, local_elem_box); + + if (get_num_ids(local_elem_box) < 1) { + return; + } + + // + //We want the element-loop to loop over our (processor-local) domain plus a + //ghost layer, so we can assemble the complete linear-system without doing + //any communication. + // + int ghost = 1; + if (local_elem_box[0][0] > 0) local_elem_box[0][0] -= ghost; + if (local_elem_box[1][0] > 0) local_elem_box[1][0] -= ghost; + if (local_elem_box[2][0] > 0) local_elem_box[2][0] -= ghost; + if (local_elem_box[0][1] < global_elems_x) local_elem_box[0][1] += ghost; + if (local_elem_box[1][1] < global_elems_y) local_elem_box[1][1] += ghost; + if (local_elem_box[2][1] < global_elems_z) local_elem_box[2][1] += ghost; + + perform_element_loop(mesh, local_elem_box, A, b, params); +} + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/box_utils.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/box_utils.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/box_utils.hpp @@ -0,0 +1,320 @@ +#ifndef _box_utils_hpp_ +#define _box_utils_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#include +#include + +namespace miniFE { + +inline void copy_box(const Box& from_box, Box& to_box) +{ + for(int i=0; i<3; ++i) { + to_box[i][0] = from_box[i][0]; + to_box[i][1] = from_box[i][1]; + } +} + +template +#ifdef __CUDACC__ +__host__ __device__ __inline__ +#endif +void get_int_coords(GlobalOrdinal ID, int nx, int ny, int nz, + int& x, int& y, int& z) +{ + z = ID/(nx*ny); + y = (ID%(nx*ny))/nx; + x = ID%nx; +} + +template +#ifdef __CUDACC__ +__host__ __device__ __inline__ +#endif +void get_coords(GlobalOrdinal ID, int nx, int ny, int nz, + Scalar& x, Scalar& y, Scalar& z) +{ + const int xdiv = nx>1 ? nx-1 : 1; + const int ydiv = ny>1 ? ny-1 : 1; + const int zdiv = nz>1 ? nz-1 : 1; + +//This code assumes that ID is 0-based. +// +//compute coordinates that lie on (or in) the unit cube. +//that's why we're dividing by nz,ny,nx: + z = (1.0*(ID/(nx*ny)))/zdiv; + y = 1.0*((ID%(nx*ny))/nx)/ydiv; + x = 1.0*(ID%nx)/xdiv; +} + +template +GlobalOrdinal get_num_ids(const Box& box) +{ + int nx = box[0][1] - box[0][0]; + int ny = box[1][1] - box[1][0]; + int nz = box[2][1] - box[2][0]; + GlobalOrdinal tmp = nx*ny; + tmp *= nz; + return tmp; +} + +template +#ifdef __CUDACC__ +__host__ __device__ __inline__ +#endif +GlobalOrdinal get_id(int nx, int ny, int nz, + int x, int y, int z) +{ + if (x<0 || y<0 || z<0) return -1; + if (x>=nx || y>=ny || z>=nz) return -1; + + //form x + nx*y + nx*ny*z: + + GlobalOrdinal tmp = nx*ny; + tmp *= z; + tmp = x + nx * y + tmp; + return tmp; +} + +template +void get_ids(int nx, int ny, int nz, + const Box& box, + std::vector& ids, + bool include_ghost_layer=false) +{ + ids.clear(); + int minz = box[2][0]; + int maxz = box[2][1]; + int miny = box[1][0]; + int maxy = box[1][1]; + int minx = box[0][0]; + int maxx = box[0][1]; + + if (include_ghost_layer) { + if (minz > 0) minz--; + if (miny > 0) miny--; + if (minx > 0) minx--; + if (maxz < nz) maxz++; + if (maxy < ny) maxy++; + if (maxx < nx) maxx++; + } + + size_t ids_size = ((maxz - minz) * (maxy - miny)) * (maxx - minx); + ids.reserve(ids_size); + + for(int z=minz; z(nx, ny, nz, x, y, z)); + } + } + } +} + +template +void get_ghost_ids(int nx, int ny, int nz, + const Box& box, + std::vector& ids) +{ + ids.clear(); + int minz,maxz,miny,maxy,minx,maxx; + int orig_minz = minz = box[2][0]; + int orig_maxz = maxz = box[2][1]; + int orig_miny = miny = box[1][0]; + int orig_maxy = maxy = box[1][1]; + int orig_minx = minx = box[0][0]; + int orig_maxx = maxx = box[0][1]; + + if (minz > 0) minz--; + if (miny > 0) miny--; + if (minx > 0) minx--; + if (maxz < nz) maxz++; + if (maxy < ny) maxy++; + if (maxx < nx) maxx++; + + for(int z=minz; z= orig_maxx); + bool y_in_ghost_layer = (y < orig_miny) || (y >= orig_maxy); + bool z_in_ghost_layer = (z < orig_minz) || (z >= orig_maxz); + //we are in the ghost layer if any one of x,y,z are in the ghost layer + if (!x_in_ghost_layer && !y_in_ghost_layer && !z_in_ghost_layer) continue; + ids.push_back(get_id(nx, ny, nz, x, y, z)); + } + } + } +} + + inline void print_box(int myproc, const char* name, const Box& box, + const char* name2, const Box& box2) +{ + std::cout << "proc " << myproc << " "< box2[0][0] && box1[0][1] < box2[0][1]) || // range contains other + (box2[0][0] > box1[0][0] && box2[0][1] < box1[0][1]) || // range contains other + (box1[0][0] > box2[0][0] && box1[0][0] < box2[0][1]) || // min contained in rng + (box2[0][0] > box1[0][0] && box2[0][0] < box1[0][1]); // min contained in rng + if (!x_neighbor) { + x_neighbor = (box1[0][1] == box2[0][0]-1) || (box1[0][0] == box2[0][1]+1); + } + + bool y_neighbor = (box1[1][1] == box2[1][0]) || (box1[1][0] == box2[1][1]) || // min matches max + (box1[1][0] == box2[1][0]) || (box1[1][1] == box2[1][1]) || // mins or maxs match + (box1[1][0] > box2[1][0] && box1[1][1] < box2[1][1]) || // range contains other + (box2[1][0] > box1[1][0] && box2[1][1] < box1[1][1]) || // range contains other + (box1[1][0] > box2[1][0] && box1[1][0] < box2[1][1]) || // min contained in rng + (box2[1][0] > box1[1][0] && box2[1][0] < box1[1][1]); // min contained in rng + if (!y_neighbor) { + y_neighbor = (box1[1][1] == box2[1][0]-1) || (box1[1][0] == box2[1][1]+1); + } + + bool z_neighbor = (box1[2][1] == box2[2][0]) || (box1[2][0] == box2[2][1]) || // min matches max + (box1[2][0] == box2[2][0]) || (box1[2][1] == box2[2][1]) || // mins or maxs match + (box1[2][0] > box2[2][0] && box1[2][1] < box2[2][1]) || // range contains other + (box2[2][0] > box1[2][0] && box2[2][1] < box1[2][1]) || // range contains other + (box1[2][0] > box2[2][0] && box1[2][0] < box2[2][1]) || // min contained in rng + (box2[2][0] > box1[2][0] && box2[2][0] < box1[2][1]); // min contained in rng + if (!z_neighbor) { + z_neighbor = (box1[2][1] == box2[2][0]-1) || (box1[2][0] == box2[2][1]+1); + } + + return x_neighbor && y_neighbor && z_neighbor; +} + +template +void create_map_id_to_row(int global_nx, int global_ny, int global_nz, + const Box& box, + std::map& id_to_row) +{ + GlobalOrdinal num_my_ids = get_num_ids(box); + + typename std::vector all_ids; + bool include_ghost_layer = false; + get_ids(global_nx, global_ny, global_nz, box, all_ids, include_ghost_layer); + + GlobalOrdinal my_first_row = 0; + typename std::vector global_offsets; + std::vector all_boxes; + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + + GlobalOrdinal local_num_ids = num_my_ids; + global_offsets.resize(numprocs); + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + MPI_Allgather(&local_num_ids, 1, mpi_dtype, &global_offsets[0], 1, mpi_dtype, MPI_COMM_WORLD); + GlobalOrdinal offset = 0; + for(int i=0; i(&box.ranges[0]); + MPI_Allgather(local_box_ranges, 6, MPI_INT, &all_boxes[0], 6, MPI_INT, MPI_COMM_WORLD); +#endif + + if (all_ids.size() > 0) { + id_to_row.insert(std::make_pair(all_ids[0], my_first_row)); + } + + for(size_t i=1; i 0) { + id_to_row.insert(std::make_pair(all_ids[0], first_row)); + } + for(size_t j=1; j::iterator iter = id_to_row.begin(), end = id_to_row.end(); +//for(; iter!=end; ++iter) { +// std::cout<<"proc "<first<<" :: "<second< +#include + +#include +#include + +#include + +namespace miniFE { + +template +void print_vec(const std::vector& vec, const std::string& name) +{ + for(size_t i=0; i +bool breakdown(typename VectorType::ScalarType inner, + const VectorType& v, + const VectorType& w) +{ + typedef typename VectorType::ScalarType Scalar; + typedef typename TypeTraits::magnitude_type magnitude; + +//This is code that was copied from Aztec, and originally written +//by my hero, Ray Tuminaro. +// +//Assuming that inner = (inner product of v and w), +//v and w are considered orthogonal if +// |inner| < 100 * ||v||_2 * ||w||_2 * epsilon + + magnitude vnorm = std::sqrt(dot(v,v)); + magnitude wnorm = std::sqrt(dot(w,w)); + return std::abs(inner) <= 100*vnorm*wnorm*std::numeric_limits::epsilon(); +} + +template +void +cg_solve(OperatorType& A, + const VectorType& b, + VectorType& x, + Matvec matvec, + typename OperatorType::LocalOrdinalType max_iter, + typename TypeTraits::magnitude_type& tolerance, + typename OperatorType::LocalOrdinalType& num_iters, + typename TypeTraits::magnitude_type& normr, + timer_type* my_cg_times) +{ + typedef typename OperatorType::ScalarType ScalarType; + typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType; + typedef typename OperatorType::LocalOrdinalType LocalOrdinalType; + typedef typename TypeTraits::magnitude_type magnitude_type; + + timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0; + timer_type total_time = mytimer(); + + int myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#ifdef USE_MPI_PCONTROL + MPI_Pcontrol(1); +#endif +#endif + + if (!A.has_local_indices) { + std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means " + << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve." + << std::endl; + return; + } + + size_t nrows = A.rows.size(); + LocalOrdinalType ncols = A.num_cols; + + VectorType r(b.startIndex, nrows); + VectorType p(0, ncols); + VectorType Ap(b.startIndex, nrows); + + normr = 0; + magnitude_type rtrans = 0; + magnitude_type oldrtrans = 0; + + LocalOrdinalType print_freq = max_iter/10; + if (print_freq>50) print_freq = 50; + if (print_freq<1) print_freq = 1; + + ScalarType one = 1.0; + ScalarType zero = 0.0; + + TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY); + +// print_vec(p.coefs, "p"); + + TICK(); + matvec(A, p, Ap); + TOCK(tMATVEC); + + TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY); + + TICK(); rtrans = dot(r, r); TOCK(tDOT); + +//std::cout << "rtrans="<add("Global Nrows",global_num_rows); + ydoc.get("Matrix attributes")->add("Global NNZ",global_nnz); + + //compute how much memory the matrix occupies: + //num-bytes = sizeof(GlobalOrdinal)*global_nrows for A.rows + // + sizeof(LocalOrdinal)*global_nrows for A.rows_offsets + // + sizeof(GlobalOrdinal)*global_nnz for A.packed_cols + // + sizeof(Scalar)*global_nnz for A.packed_coefs + + double invGB = 1.0/(1024*1024*1024); + double memGB = invGB*global_nrows*sizeof(GlobalOrdinal); + memGB += invGB*global_nrows*sizeof(LocalOrdinal); + memGB += invGB*global_nnz*sizeof(GlobalOrdinal); + memGB += invGB*global_nnz*sizeof(Scalar); + ydoc.get("Matrix attributes")->add("Global Memory (GB)",memGB); + + ydoc.get("Matrix attributes")->add("Pll Memory Overhead (MB)",mem_overhead_MB); + + size_t min_num_rows = min_nrows; + size_t max_num_rows = max_nrows; + ydoc.get("Matrix attributes")->add("Rows per proc MIN",min_num_rows); + ydoc.get("Matrix attributes")->add("Rows per proc MAX",max_num_rows); + ydoc.get("Matrix attributes")->add("Rows per proc AVG",avg_nrows); + ydoc.get("Matrix attributes")->add("NNZ per proc MIN",min_nnz); + ydoc.get("Matrix attributes")->add("NNZ per proc MAX",max_nnz); + ydoc.get("Matrix attributes")->add("NNZ per proc AVG",avg_nnz); + } + + return global_nnz; +} + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/driver.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/driver.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/driver.hpp @@ -0,0 +1,412 @@ +#ifndef _driver_hpp_ +#define _driver_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef MINIFE_CSR_MATRIX +#include +#elif defined(MINIFE_ELL_MATRIX) +#include +#else +#include +#endif + +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include +#if MINIFE_KERNELS != 0 +#include +#endif +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#define RUN_TIMED_FUNCTION(msg, fn, time_inc, time_total) \ +{ \ + /*if (myproc==0) { \ + std::cout.width(30); \ + std::cout << msg; \ + std::cout.flush(); \ + }*/ \ + timer_type rtf_t0 = mytimer(); \ + fn; \ + time_inc = mytimer() - rtf_t0; \ + time_total += time_inc; \ + /*if (myproc==0) { \ + std::cout << time_inc << "s, total time: " << time_total << std::endl; \ + }*/ \ +} + +//This program assembles finite-element matrices into a global matrix and +//vector, then solves the linear-system using Conjugate Gradients. +//Each finite-element is a hexahedron with 8 vertex-nodes. +// +//Notes: +//- In finite-element terms, the box dimensions are in elements, not nodes. +// In other words, a 2x2x2 box describes 8 elements, each of which has 8 nodes, +// so it is a 3x3x3 node domain (27 nodes). +// The assembled linear system will have 1 equation for each finite element node. +// +//- The coordinate origin is at the corner of the global box where x=0, +// y=0, z=0, and the box extends along the positive x-axis, positive y-axis, +// and the positive z-axis. +// +//- Some aspects of matrix-structure generation and finite-element assembly +// are convenient to do using global node identifiers. +// A global identifier for each node is obtained from coordinates plus +// global box dimensions. See the function 'get_id' in box_utils.hpp. +// +//- Each node corresponds to a row in the matrix. The RCB partitioning method +// we use to split the global box among processors results in some +// processors owning non-contiguous blocks of global node identifiers. +// Since it is convenient for matrices and vectors to store contiguously- +// numbered blocks of rows, we map global node identifiers to a separate +// space of row numbers such that each processor's nodes correspond to a +// contiguous block of row numbers. +// + +namespace miniFE { + +template +int +driver(const Box& global_box, Box& my_box, + Parameters& params, YAML_Doc& ydoc) +{ + int global_nx = global_box[0][1]; + int global_ny = global_box[1][1]; + int global_nz = global_box[2][1]; + + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (params.load_imbalance > 0) { + add_imbalance(global_box, my_box, params.load_imbalance, ydoc); + } + + float largest_imbalance = 0, std_dev = 0; + compute_imbalance(global_box, my_box, largest_imbalance, + std_dev, ydoc, true); + + + //Create a representation of the mesh: + //Note that 'simple_mesh_description' is a virtual or conceptual + //mesh that doesn't actually store mesh data. +#ifdef TIME_IT + if (myproc==0) { + std::cout.width(30); + std::cout << "creating/filling mesh..."; + std::cout.flush(); + } +#endif + + timer_type t_start = mytimer(); + timer_type t0 = mytimer(); + + simple_mesh_description mesh(global_box, my_box); + + timer_type mesh_fill = mytimer() - t0; + timer_type t_total = mytimer() - t_start; + +#ifdef TIME_IT + if (myproc==0) { + std::cout << mesh_fill << "s, total time: " << t_total << std::endl; + } +#endif + + //next we will generate the matrix structure. + + //Declare matrix object: + +#if defined(MINIFE_ELL_MATRIX) + typedef ELLMatrix MatrixType; +#else + typedef CSRMatrix MatrixType; +#endif + + MatrixType A; + + timer_type gen_structure; + RUN_TIMED_FUNCTION("generating matrix structure...", + generate_matrix_structure(mesh, A), + gen_structure, t_total); + + GlobalOrdinal local_nrows = A.rows.size(); + GlobalOrdinal my_first_row = local_nrows > 0 ? A.rows[0] : -1; + + Vector b(my_first_row, local_nrows); + Vector x(my_first_row, local_nrows); + + //Assemble finite-element sub-matrices and sub-vectors into the global + //linear system: + + timer_type fe_assembly; + RUN_TIMED_FUNCTION("assembling FE data...", + assemble_FE_data(mesh, A, b, params), + fe_assembly, t_total); + + if (myproc == 0) { + ydoc.add("Matrix structure generation",""); + ydoc.get("Matrix structure generation")->add("Mat-struc-gen Time",gen_structure); + ydoc.add("FE assembly",""); + ydoc.get("FE assembly")->add("FE assembly Time",fe_assembly); + } + +#ifdef MINIFE_DEBUG + write_matrix("A_prebc.mtx", A); + write_vector("b_prebc.vec", b); +#endif + + //Now apply dirichlet boundary-conditions + //(Apply the 0-valued surfaces first, then the 1-valued surface last.) + + timer_type dirbc_time; + RUN_TIMED_FUNCTION("imposing Dirichlet BC...", + impose_dirichlet(0.0, A, b, global_nx+1, global_ny+1, global_nz+1, mesh.bc_rows_0), dirbc_time, t_total); + RUN_TIMED_FUNCTION("imposing Dirichlet BC...", + impose_dirichlet(1.0, A, b, global_nx+1, global_ny+1, global_nz+1, mesh.bc_rows_1), dirbc_time, t_total); + +#ifdef MINIFE_DEBUG + write_matrix("A.mtx", A); + write_vector("b.vec", b); +#endif + + //Transform global indices to local, set up communication information: + + timer_type make_local_time; + RUN_TIMED_FUNCTION("making matrix indices local...", + make_local_matrix(A), + make_local_time, t_total); + +#ifdef MINIFE_DEBUG + write_matrix("A_local.mtx", A); + write_vector("b_local.vec", b); +#endif + + size_t global_nnz = compute_matrix_stats(A, myproc, numprocs, ydoc); + + //Prepare to perform conjugate gradient solve: + + LocalOrdinal max_iters = 200; + LocalOrdinal num_iters = 0; + typedef typename TypeTraits::magnitude_type magnitude; + magnitude rnorm = 0; + magnitude tol = std::numeric_limits::epsilon(); + + timer_type cg_times[NUM_TIMERS]; + + typedef Vector VectorType; + + t_total = mytimer() - t_start; + + bool matvec_with_comm_overlap = params.mv_overlap_comm_comp==1; + + int verify_result = 0; + +#if MINIFE_KERNELS != 0 + if (myproc==0) { + std::cout.width(30); + std::cout << "Starting kernel timing loops ..." << std::endl; + } + + max_iters = 500; + x.coefs[0] = 0.9; + if (matvec_with_comm_overlap) { + time_kernels(A, b, x, matvec_overlap(), max_iters, rnorm, cg_times); + } + else { + time_kernels(A, b, x, matvec_std(), max_iters, rnorm, cg_times); + } + num_iters = max_iters; + std::string title("Kernel timings"); +#else + if (myproc==0) { + std::cout << "Starting CG solver ... " << std::endl; + } + + if (matvec_with_comm_overlap) { +#ifdef MINIFE_CSR_MATRIX + rearrange_matrix_local_external(A); + cg_solve(A, b, x, matvec_overlap(), max_iters, tol, + num_iters, rnorm, cg_times); +#else + std::cout << "ERROR, matvec with overlapping comm/comp only works with CSR matrix."<(), max_iters, tol, + num_iters, rnorm, cg_times); + if (myproc == 0) { + std::cout << "Final Resid Norm: " << rnorm << std::endl; + } + + if (params.verify_solution > 0) { + double tolerance = 0.06; + bool verify_whole_domain = false; + #ifdef MINIFE_DEBUG + verify_whole_domain = true; + #endif + if (myproc == 0) { + if (verify_whole_domain) std::cout << "verifying solution..." << std::endl; + else std::cout << "verifying solution at ~ (0.5, 0.5, 0.5) ..." << std::endl; + } + verify_result = verify_solution(mesh, x, tolerance, verify_whole_domain); + } + } + +#ifdef MINIFE_DEBUG + write_vector("x.vec", x); +#endif + std::string title("CG solve"); +#endif + + if (myproc == 0) { + ydoc.get("Global Run Parameters")->add("ScalarType",TypeTraits::name()); + ydoc.get("Global Run Parameters")->add("GlobalOrdinalType",TypeTraits::name()); + ydoc.get("Global Run Parameters")->add("LocalOrdinalType",TypeTraits::name()); + ydoc.add(title,""); + ydoc.get(title)->add("Iterations",num_iters); + ydoc.get(title)->add("Final Resid Norm",rnorm); + + GlobalOrdinal global_nrows = global_nx; + global_nrows *= global_ny*global_nz; + + //flops-per-mv, flops-per-dot, flops-per-waxpy: + double mv_flops = global_nnz*2.0; + double dot_flops = global_nrows*2.0; + double waxpy_flops = global_nrows*3.0; + +#if MINIFE_KERNELS == 0 +//if MINIFE_KERNELS == 0 then we did a CG solve, and in that case +//there were num_iters+1 matvecs, num_iters*2 dots, and num_iters*3+2 waxpys. + mv_flops *= (num_iters+1); + dot_flops *= (2*num_iters); + waxpy_flops *= (3*num_iters+2); +#else +//if MINIFE_KERNELS then we did one of each operation per iteration. + mv_flops *= num_iters; + dot_flops *= num_iters; + waxpy_flops *= num_iters; +#endif + + double total_flops = mv_flops + dot_flops + waxpy_flops; + + double mv_mflops = -1; + if (cg_times[MATVEC] > 1.e-4) + mv_mflops = 1.e-6 * (mv_flops/cg_times[MATVEC]); + + double dot_mflops = -1; + if (cg_times[DOT] > 1.e-4) + dot_mflops = 1.e-6 * (dot_flops/cg_times[DOT]); + + double waxpy_mflops = -1; + if (cg_times[WAXPY] > 1.e-4) + waxpy_mflops = 1.e-6 * (waxpy_flops/cg_times[WAXPY]); + + double total_mflops = -1; + if (cg_times[TOTAL] > 1.e-4) + total_mflops = 1.e-6 * (total_flops/cg_times[TOTAL]); + + ydoc.get(title)->add("WAXPY Time",cg_times[WAXPY]); + ydoc.get(title)->add("WAXPY Flops",waxpy_flops); + if (waxpy_mflops >= 0) + ydoc.get(title)->add("WAXPY Mflops",waxpy_mflops); + else + ydoc.get(title)->add("WAXPY Mflops","inf"); + + ydoc.get(title)->add("DOT Time",cg_times[DOT]); + ydoc.get(title)->add("DOT Flops",dot_flops); + if (dot_mflops >= 0) + ydoc.get(title)->add("DOT Mflops",dot_mflops); + else + ydoc.get(title)->add("DOT Mflops","inf"); + + ydoc.get(title)->add("MATVEC Time",cg_times[MATVEC]); + ydoc.get(title)->add("MATVEC Flops",mv_flops); + if (mv_mflops >= 0) + ydoc.get(title)->add("MATVEC Mflops",mv_mflops); + else + ydoc.get(title)->add("MATVEC Mflops","inf"); + +#ifdef MINIFE_FUSED + ydoc.get(title)->add("MATVECDOT Time",cg_times[MATVECDOT]); + ydoc.get(title)->add("MATVECDOT Flops",mv_flops); + if (mv_mflops >= 0) + ydoc.get(title)->add("MATVECDOT Mflops",mv_mflops); + else + ydoc.get(title)->add("MATVECDOT Mflops","inf"); +#endif + +#if MINIFE_KERNELS == 0 + ydoc.get(title)->add("Total",""); + ydoc.get(title)->get("Total")->add("Total CG Time",cg_times[TOTAL]); + ydoc.get(title)->get("Total")->add("Total CG Flops",total_flops); + if (total_mflops >= 0) + ydoc.get(title)->get("Total")->add("Total CG Mflops",total_mflops); + else + ydoc.get(title)->get("Total")->add("Total CG Mflops","inf"); + ydoc.get(title)->add("Time per iteration",cg_times[TOTAL]/num_iters); +#endif + } + + return verify_result; +} + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/exchange_externals.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/exchange_externals.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/exchange_externals.hpp @@ -0,0 +1,269 @@ +#ifndef _exchange_externals_hpp_ +#define _exchange_externals_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#include + +#include + +namespace miniFE { + +template +void +exchange_externals(MatrixType& A, + VectorType& x) +{ +#ifdef HAVE_MPI +#ifdef MINIFE_DEBUG + std::ostream& os = outstream(); + os << "entering exchange_externals\n"; +#endif + + int numprocs = 1; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + + if (numprocs < 2) return; + + typedef typename MatrixType::ScalarType Scalar; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + + // Extract Matrix pieces + + int local_nrow = A.rows.size(); + int num_neighbors = A.neighbors.size(); + const std::vector& recv_length = A.recv_length; + const std::vector& send_length = A.send_length; + const std::vector& neighbors = A.neighbors; + const std::vector& elements_to_send = A.elements_to_send; + + std::vector& send_buffer = A.send_buffer; + + // + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + // + + int MPI_MY_TAG = 99; + + std::vector& request = A.request; + + // + // Externals are at end of locals + // + + std::vector& x_coefs = x.coefs; + Scalar* x_external = &(x_coefs[local_nrow]); + + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + + // Post receives first + for(int i=0; i x.coefs.size()) { + os << "error, out-of-range. x.coefs.size()=="< exch_ext_requests; +#endif + +template +void +begin_exchange_externals(MatrixType& A, + VectorType& x) +{ +#ifdef HAVE_MPI + + int numprocs = 1, myproc = 0; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + + if (numprocs < 2) return; + + typedef typename MatrixType::ScalarType Scalar; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + + // Extract Matrix pieces + + int local_nrow = A.rows.size(); + int num_neighbors = A.neighbors.size(); + const std::vector& recv_length = A.recv_length; + const std::vector& send_length = A.send_length; + const std::vector& neighbors = A.neighbors; + const std::vector& elements_to_send = A.elements_to_send; + + std::vector send_buffer(elements_to_send.size(), 0); + + // + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + // + + int MPI_MY_TAG = 99; + + exch_ext_requests.resize(num_neighbors); + + // + // Externals are at end of locals + // + + std::vector& x_coefs = x.coefs; + Scalar* x_external = &(x_coefs[local_nrow]); + + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + + // Post receives first + for(int i=0; i +inline +KERNEL_PREFIX void gauss_pts(int N, Scalar* pts, Scalar* wts) +{ + const Scalar x2 = 0.577350269; // 1.0/sqrt(3.0) + const Scalar x3 = 0.77459667; // sqrt(3.0/5.0) + const Scalar w1 = 0.55555556; // 5.0/9.0 + const Scalar w2 = 0.88888889; // 8.0/9.0 + + switch(N) { + case 1: + pts[0] = 0.0; wts[0] = 2.0; + break; + case 2: + pts[0] = -x2; wts[0] = 1.0; + pts[1] = x2; wts[1] = 1.0; + break; + case 3: + pts[0] = -x3; wts[0] = w1; + pts[1] = 0.0; wts[1] = w2; + pts[2] = x3; wts[2] = w1; + break; + default: + break; + } +} + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/generate_matrix_structure.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/generate_matrix_structure.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/generate_matrix_structure.hpp @@ -0,0 +1,154 @@ +#ifndef _generate_matrix_structure_hpp_ +#define _generate_matrix_structure_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +int +generate_matrix_structure(const simple_mesh_description& mesh, + MatrixType& A) +{ + int myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + int threw_exc = 0; + try { + + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + + int global_nodes_x = mesh.global_box[0][1]+1; + int global_nodes_y = mesh.global_box[1][1]+1; + int global_nodes_z = mesh.global_box[2][1]+1; + Box box; + copy_box(mesh.local_box, box); + + //num-owned-nodes in each dimension is num-elems+1 + //only if num-elems > 0 in that dimension *and* + //we are at the high end of the global range in that dimension: + if (box[0][1] > box[0][0] && box[0][1] == mesh.global_box[0][1]) ++box[0][1]; + if (box[1][1] > box[1][0] && box[1][1] == mesh.global_box[1][1]) ++box[1][1]; + if (box[2][1] > box[2][0] && box[2][1] == mesh.global_box[2][1]) ++box[2][1]; + + GlobalOrdinal global_nrows = global_nodes_x; + global_nrows *= global_nodes_y*global_nodes_z; + + GlobalOrdinal nrows = get_num_ids(box); + try { + A.reserve_space(nrows, 27); + } + catch(std::exception& exc) { + std::ostringstream osstr; + osstr << "One of A.rows.resize, A.row_offsets.resize, A.packed_cols.reserve or A.packed_coefs.reserve: nrows=" < rows(nrows); + std::vector row_offsets(nrows+1); + std::vector row_coords(nrows*3); + + unsigned roffset = 0; + GlobalOrdinal nnz = 0; + + for(int iz=box[2][0]; iz(global_nodes_x, global_nodes_y, global_nodes_z, + ix, iy, iz); + rows[roffset] = mesh.map_id_to_row(row_id); + row_coords[roffset*3] = ix; + row_coords[roffset*3+1] = iy; + row_coords[roffset*3+2] = iz; + row_offsets[roffset++] = nnz; + + for(int sz=-1; sz<=1; ++sz) { + for(int sy=-1; sy<=1; ++sy) { + for(int sx=-1; sx<=1; ++sx) { + GlobalOrdinal col_id = + get_id(global_nodes_x, global_nodes_y, global_nodes_z, + ix+sx, iy+sy, iz+sz); + if (col_id >= 0 && col_id < global_nrows) { + ++nnz; + } + } + } + } + + } + } + } + + if (roffset != nrows) { + throw std::runtime_error("ERROR in generate_matrix_structure, roffset != nrows."); + } + row_offsets[roffset] = nnz; + + init_matrix(A, rows, row_offsets, row_coords, + global_nodes_x, global_nodes_y, global_nodes_z, global_nrows, mesh); + } + catch(...) { + std::cout << "proc " << myproc << " threw an exception in generate_matrix_structure, probably due to running out of memory." << std::endl; + threw_exc = 1; + } +#ifdef HAVE_MPI + int global_throw = 0; + MPI_Allreduce(&threw_exc, &global_throw, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + threw_exc = global_throw; +#endif + if (threw_exc) { + return 1; + } + + return 0; +} + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/imbalance.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/imbalance.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/imbalance.hpp @@ -0,0 +1,298 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef _imbalance_hpp_ +#define _imbalance_hpp_ + +#include + +#ifdef HAVE_MPI +#include +#endif + +#include +#include +#include + +namespace miniFE { + +const int X = 0; +const int Y = 1; +const int Z = 2; +const int NONE = 3; + +const int LOWER = 0; +const int UPPER = 1; + +template +void +compute_imbalance(const Box& global_box, + const Box& local_box, + float& largest_imbalance, + float& std_dev, + YAML_Doc& doc, + bool record_in_doc) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + GlobalOrdinal local_nrows = get_num_ids(local_box); + GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0; + int min_proc = myproc, max_proc = myproc; + get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc, + max_nrows, max_proc); + + float avg_nrows = global_nrows; + avg_nrows /= numprocs; + + //largest_imbalance will be the difference between the min (or max) + //rows-per-processor and avg_nrows, represented as a percentage: + largest_imbalance = percentage_difference(min_nrows, avg_nrows); + + float tmp = percentage_difference(max_nrows, avg_nrows); + if (tmp > largest_imbalance) largest_imbalance = tmp; + + std_dev = compute_std_dev_as_percentage(local_nrows, avg_nrows); + + if (myproc == 0 && record_in_doc) { + doc.add("Rows-per-proc Load Imbalance",""); + doc.get("Rows-per-proc Load Imbalance")->add("Largest (from avg, %)",largest_imbalance); + doc.get("Rows-per-proc Load Imbalance")->add("Std Dev (%)",std_dev); + } +} + +std::pair +decide_how_to_grow(const Box& global_box, const Box& local_box) +{ + std::pair result(NONE,UPPER); + + if (local_box[Z][UPPER] < global_box[Z][UPPER]) { + result.first = Z; + result.second = UPPER; + return result; + } + if (local_box[Z][LOWER] > global_box[Z][LOWER]) { + result.first = Z; + result.second = LOWER; + return result; + } + if (local_box[Y][UPPER] < global_box[Y][UPPER]) { + result.first = Y; + result.second = UPPER; + return result; + } + if (local_box[Y][LOWER] > global_box[Y][LOWER]) { + result.first = Y; + result.second = LOWER; + return result; + } + if (local_box[X][UPPER] < global_box[X][UPPER]) { + result.first = X; + result.second = UPPER; + return result; + } + if (local_box[X][LOWER] > global_box[X][LOWER]) { + result.first = X; + result.second = LOWER; + return result; + } + return result; +} + +std::pair +decide_how_to_shrink(const Box& global_box, const Box& local_box) +{ + std::pair result(NONE,UPPER); + + if (local_box[Z][UPPER] < global_box[Z][UPPER] && local_box[Z][UPPER]-local_box[Z][LOWER] > 2) { + result.first = Z; + result.second = UPPER; + return result; + } + if (local_box[Z][LOWER] > global_box[Z][LOWER] && local_box[Z][UPPER]-local_box[Z][LOWER] > 2) { + result.first = Z; + result.second = LOWER; + return result; + } + if (local_box[Y][UPPER] < global_box[Y][UPPER] && local_box[Y][UPPER]-local_box[Y][LOWER] > 2) { + result.first = Y; + result.second = UPPER; + return result; + } + if (local_box[Y][LOWER] > global_box[Y][LOWER] && local_box[Y][UPPER]-local_box[Y][LOWER] > 2) { + result.first = Y; + result.second = LOWER; + return result; + } + if (local_box[X][UPPER] < global_box[X][UPPER] && local_box[X][UPPER]-local_box[X][LOWER] > 2) { + result.first = X; + result.second = UPPER; + return result; + } + if (local_box[X][LOWER] > global_box[X][LOWER] && local_box[X][UPPER]-local_box[X][LOWER] > 2) { + result.first = X; + result.second = LOWER; + return result; + } + return result; +} + +template +void +add_imbalance(const Box& global_box, + Box& local_box, + float imbalance, + YAML_Doc& doc) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (numprocs == 1) { + return; + } + + float cur_imbalance = 0, cur_std_dev = 0; + compute_imbalance(global_box, local_box, + cur_imbalance, cur_std_dev, doc, false); + + while (cur_imbalance < imbalance) { + GlobalOrdinal local_nrows = get_num_ids(local_box); + GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0; + int min_proc = myproc, max_proc = myproc; + get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc, + max_nrows, max_proc); + + std::pair grow(NONE,UPPER); + int grow_axis_val = -1; + std::pair shrink(NONE,UPPER); + int shrink_axis_val = -1; + + if (myproc == max_proc) { + grow = decide_how_to_grow(global_box, local_box); + if (grow.first != NONE) { + grow_axis_val = local_box[grow.first][grow.second]; + } + } + if (myproc == min_proc) { + shrink = decide_how_to_shrink(global_box, local_box); + if (shrink.first != NONE) { + shrink_axis_val = local_box[shrink.first][shrink.second]; + } + } + + int grow_info[8] = {grow.first, grow.second, + local_box[X][0], local_box[X][1], + local_box[Y][0], local_box[Y][1], + local_box[Z][0], local_box[Z][1]}; + + int shrink_info[8] = {shrink.first, shrink.second, + local_box[X][0], local_box[X][1], + local_box[Y][0], local_box[Y][1], + local_box[Z][0], local_box[Z][1]}; +#ifdef HAVE_MPI + MPI_Bcast(&grow_info[0], 8, MPI_INT, max_proc, MPI_COMM_WORLD); + MPI_Bcast(&shrink_info[0], 8, MPI_INT, min_proc, MPI_COMM_WORLD); +#endif + + int grow_axis = grow_info[0]; + int grow_end = grow_info[1]; + int shrink_axis = shrink_info[0]; + int shrink_end = shrink_info[1]; + int grow_incr = 1; + if (grow_end == LOWER) grow_incr = -1; + int shrink_incr = -1; + if (shrink_end == LOWER) shrink_incr = 1; + if (grow_axis != NONE) grow_axis_val = grow_info[2+grow_axis*2+grow_end]; + if (shrink_axis != NONE) shrink_axis_val = shrink_info[2+shrink_axis*2+shrink_end]; + + if (grow_axis == NONE && shrink_axis == NONE) break; + + bool grow_status = grow_axis==NONE ? false : true; + if (grow_axis != NONE) { + if ((grow_incr == 1 && local_box[grow_axis][0] == grow_axis_val) || + (grow_incr == -1 && local_box[grow_axis][1] == grow_axis_val)) { + if (local_box[grow_axis][1] - local_box[grow_axis][0] < 2) { + grow_status = false; + } + } + } + + bool shrink_status = shrink_axis==NONE ? false : true; + if (shrink_axis != NONE) { + if ((shrink_incr == 1 && local_box[shrink_axis][0] == shrink_axis_val) || + (shrink_incr == -1 && local_box[shrink_axis][1] == shrink_axis_val)) { + if (local_box[shrink_axis][1] - local_box[shrink_axis][0] < 2) { + shrink_status = false; + } + } + } + +#ifdef HAVE_MPI + int statusints[2] = { grow_status ? 0 : 1, shrink_status ? 0 : 1 }; + int globalstatus[2] = { 0, 0 }; + MPI_Allreduce(&statusints, &globalstatus, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + grow_status = globalstatus[0]>0 ? false : true; + shrink_status = globalstatus[1]>0 ? false : true; +#endif + + if (grow_status == false && shrink_status == false) break; + + if (grow_status && grow_axis != NONE) { + if (local_box[grow_axis][0] == grow_axis_val) { + local_box[grow_axis][0] += grow_incr; + } + + if (local_box[grow_axis][1] == grow_axis_val) { + local_box[grow_axis][1] += grow_incr; + } + } + + if (shrink_status && shrink_axis != NONE) { + if (local_box[shrink_axis][0] == shrink_axis_val) { + local_box[shrink_axis][0] += shrink_incr; + } + + if (local_box[shrink_axis][1] == shrink_axis_val) { + local_box[shrink_axis][1] += shrink_incr; + } + } + + compute_imbalance(global_box, local_box, + cur_imbalance, cur_std_dev, doc, false); + } +} + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/main.cpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/main.cpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/main.cpp @@ -0,0 +1,248 @@ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include + +#ifdef MINIFE_REPORT_RUSAGE +#include +#include +#endif + +#include + +#include + +#ifdef HAVE_MPI +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#if MINIFE_INFO != 0 +#include +#else +#include +#endif + +//The following macros should be specified as compile-macros in the +//makefile. They are defaulted here just in case... +#ifndef MINIFE_SCALAR +#define MINIFE_SCALAR double +#endif +#ifndef MINIFE_LOCAL_ORDINAL +#define MINIFE_LOCAL_ORDINAL int +#endif +#ifndef MINIFE_GLOBAL_ORDINAL +#define MINIFE_GLOBAL_ORDINAL int +#endif + +// ************************************************************************ + +void add_params_to_yaml(YAML_Doc& doc, miniFE::Parameters& params); +void add_configuration_to_yaml(YAML_Doc& doc, int numprocs, int numthreads); +void add_timestring_to_yaml(YAML_Doc& doc); + +// +//We will create a 'box' of size nx X ny X nz, partition it among processors, +//then call miniFE::driver which will use the partitioned box as the domain +//from which to assemble finite-element matrices into a global matrix and +//vector, then solve the linear-system using Conjugate Gradients. +// + +int main(int argc, char** argv) { + miniFE::Parameters params; + miniFE::get_parameters(argc, argv, params); + + int numprocs = 1, myproc = 0; + miniFE::initialize_mpi(argc, argv, numprocs, myproc); + +#ifdef HAVE_MPI +#ifdef USE_MPI_PCONTROL + MPI_Pcontrol(0); +#endif +#endif + + miniFE::timer_type start_time = miniFE::mytimer(); + +#ifdef MINIFE_DEBUG + outstream(numprocs, myproc); +#endif + + //make sure each processor has the same parameters: + miniFE::broadcast_parameters(params); + + + Box global_box = { 0, params.nx, 0, params.ny, 0, params.nz }; + std::vector local_boxes(numprocs); + + box_partition(0, numprocs, 2, global_box, &local_boxes[0]); + + Box& my_box = local_boxes[myproc]; + + MINIFE_GLOBAL_ORDINAL num_my_ids = miniFE::get_num_ids(my_box); + MINIFE_GLOBAL_ORDINAL min_ids = num_my_ids; + +#ifdef HAVE_MPI + MPI_Datatype mpi_dtype = miniFE::TypeTraits::mpi_type(); + MPI_Allreduce(&num_my_ids, &min_ids, 1, mpi_dtype, MPI_MIN, MPI_COMM_WORLD); +#endif + + if (min_ids == 0) { + std::cout<<"One or more processors have 0 equations. Not currently supported. Exiting."<. + //To run miniFE with float instead of double, or 'long long' instead of int, + //etc., change these template-parameters by changing the macro definitions in + //the makefile or on the make command-line. + + int return_code = + miniFE::driver< MINIFE_SCALAR, MINIFE_LOCAL_ORDINAL, MINIFE_GLOBAL_ORDINAL>(global_box, my_box, params, doc); + + miniFE::timer_type total_time = miniFE::mytimer() - start_time; + +#ifdef MINIFE_REPORT_RUSAGE + struct rusage get_mem; + getrusage(RUSAGE_SELF, &get_mem); + + long long int rank_rss = get_mem.ru_maxrss; + long long int global_rss = 0; + long long int max_rss = 0; + +#ifdef HAVE_MPI + MPI_Reduce(&rank_rss, &global_rss, 1, + MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&rank_rss, &max_rss, 1, + MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD); + if (myproc == 0) { + doc.add("Global All-RSS (kB)", global_rss); + doc.add("Global Max-RSS (kB)", max_rss); + } +#else + doc.add("RSS (kB)", rank_rss); +#endif +#endif + + if (myproc == 0) { + doc.add("Total Program Time",total_time); + doc.generateYAML(); + } + + miniFE::finalize_mpi(); + + return return_code; +} + +void add_params_to_yaml(YAML_Doc& doc, miniFE::Parameters& params) +{ + doc.add("Global Run Parameters",""); + doc.get("Global Run Parameters")->add("dimensions",""); + doc.get("Global Run Parameters")->get("dimensions")->add("nx",params.nx); + doc.get("Global Run Parameters")->get("dimensions")->add("ny",params.ny); + doc.get("Global Run Parameters")->get("dimensions")->add("nz",params.nz); + doc.get("Global Run Parameters")->add("load_imbalance", params.load_imbalance); + if (params.mv_overlap_comm_comp == 1) { + std::string val("1 (yes)"); + doc.get("Global Run Parameters")->add("mv_overlap_comm_comp", val); + } + else { + std::string val("0 (no)"); + doc.get("Global Run Parameters")->add("mv_overlap_comm_comp", val); + } +} + +void add_configuration_to_yaml(YAML_Doc& doc, int numprocs, int numthreads) +{ + doc.get("Global Run Parameters")->add("number of processors", numprocs); + + doc.add("Platform",""); + doc.get("Platform")->add("hostname",MINIFE_HOSTNAME); + doc.get("Platform")->add("kernel name",MINIFE_KERNEL_NAME); + doc.get("Platform")->add("kernel release",MINIFE_KERNEL_RELEASE); + doc.get("Platform")->add("processor",MINIFE_PROCESSOR); + + doc.add("Build",""); + doc.get("Build")->add("CXX",MINIFE_CXX); +#if MINIFE_INFO != 0 + doc.get("Build")->add("compiler version",MINIFE_CXX_VERSION); +#endif + doc.get("Build")->add("CXXFLAGS",MINIFE_CXXFLAGS); + std::string using_mpi("no"); +#ifdef HAVE_MPI + using_mpi = "yes"; +#endif + doc.get("Build")->add("using MPI",using_mpi); +} + +void add_timestring_to_yaml(YAML_Doc& doc) +{ + std::time_t rawtime; + struct tm * timeinfo; + std::time(&rawtime); + timeinfo = std::localtime(&rawtime); + std::ostringstream osstr; + osstr.fill('0'); + osstr << timeinfo->tm_year+1900 << "-"; + osstr.width(2); osstr << timeinfo->tm_mon+1 << "-"; + osstr.width(2); osstr << timeinfo->tm_mday << ", "; + osstr.width(2); osstr << timeinfo->tm_hour << "-"; + osstr.width(2); osstr << timeinfo->tm_min << "-"; + osstr.width(2); osstr << timeinfo->tm_sec; + std::string timestring = osstr.str(); + doc.add("Run Date/Time",timestring); +} + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/make_local_matrix.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/make_local_matrix.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/make_local_matrix.hpp @@ -0,0 +1,447 @@ +#ifndef _make_local_matrix_hpp_ +#define _make_local_matrix_hpp_ +#include + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include + +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +void +make_local_matrix(MatrixType& A) +{ +#ifdef HAVE_MPI + int numprocs = 1, myproc = 0; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + + if (numprocs < 2) { + A.num_cols = A.rows.size(); + A.has_local_indices = true; + return; + } + + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + std::map externals; + LocalOrdinal num_external = 0; + + //Extract Matrix pieces + + size_t local_nrow = A.rows.size(); + GlobalOrdinal start_row = local_nrow>0 ? A.rows[0] : -1; + GlobalOrdinal stop_row = local_nrow>0 ? A.rows[local_nrow-1] : -1; + + // We need to convert the index values for the rows on this processor + // to a local index space. We need to: + // - Determine if each index reaches to a local value or external value + // - If local, subtract start_row from index value to get local index + // - If external, find out if it is already accounted for. + // - If so, then do nothing, + // - otherwise + // - add it to the list of external indices, + // - find out which processor owns the value. + // - Set up communication for sparse MV operation + + /////////////////////////////////////////// + // Scan the indices and transform to local + /////////////////////////////////////////// + + std::vector& external_index = A.external_index; + + for(size_t i=0; i tmp_buffer(numprocs, 0); // Temp buffer space needed below + + // Build list of global index offset + + std::vector global_index_offsets(numprocs, 0); + + tmp_buffer[myproc] = start_row; // This is my start row + + // This call sends the start_row of each ith processor to the ith + // entry of global_index_offsets on all processors. + // Thus, each processor knows the range of indices owned by all + // other processors. + // Note: There might be a better algorithm for doing this, but this + // will work... + + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + MPI_Allreduce(&tmp_buffer[0], &global_index_offsets[0], numprocs, mpi_dtype, + MPI_SUM, MPI_COMM_WORLD); + + // Go through list of externals and find the processor that owns each + std::vector external_processor(num_external); + + for(LocalOrdinal i=0; i=0; --j) { + if (global_index_offsets[j] <= cur_ind && global_index_offsets[j] >= 0) { + external_processor[i] = j; + break; + } + } + } + + ///////////////////////////////////////////////////////////////////////// + // Sift through the external elements. For each newly encountered external + // point assign it the next index in the sequence. Then look for other + // external elements who are updated by the same node and assign them the next + // set of index numbers in the sequence (ie. elements updated by the same node + // have consecutive indices). + ///////////////////////////////////////////////////////////////////////// + + size_t count = local_nrow; + std::vector& external_local_index = A.external_local_index; + external_local_index.assign(num_external, -1); + + for(LocalOrdinal i=0; i new_external_processor(num_external, 0); + + for(int i=0; i No external elements are updated by + // processor i. + // tmp_neighbors[i] = x ==> (x-1)/numprocs elements are updated from + // processor i. + /// + //////////////////////////////////////////////////////////////////////// + + std::vector tmp_neighbors(numprocs, 0); + + int num_recv_neighbors = 0; + int length = 1; + + for(LocalOrdinal i=0; i recv_list; + recv_list.push_back(new_external_processor[0]); + for(LocalOrdinal i=1; i send_list(num_send_neighbors, 0); + + // + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + // + int MPI_MY_TAG = 99; + + std::vector request(num_send_neighbors); + for(int i=0; i new_external(num_external); + for(LocalOrdinal i=0; i lengths(num_recv_neighbors); + + ++MPI_MY_TAG; + + // First post receives + + for(int i=0; i& neighbors = A.neighbors; + std::vector& recv_length = A.recv_length; + std::vector& send_length = A.send_length; + + neighbors.resize(num_recv_neighbors, 0); + A.request.resize(num_recv_neighbors); + recv_length.resize(num_recv_neighbors, 0); + send_length.resize(num_recv_neighbors, 0); + + LocalOrdinal j = 0; + for(int i=0; i= A.rows.size()) { + std::cout<<"start_row: "< +#ifdef __CUDACC__ + __host__ __device__ +#endif +KERNEL_PREFIX void fill(Scalar* begin, Scalar* end, const Scalar& val) +{ + while(begin != end) {*begin++ = val;} +} + +template +KERNEL_PREFIX void inverse_and_determinant3x3(const Scalar* J, Scalar* invJ, Scalar& detJ) +{ + //hardwired "3x3" in function-name allows us to assume + //that J and invJ have length 9: + + Scalar J00 = J[0]; + Scalar J01 = J[1]; + Scalar J02 = J[2]; + + Scalar J10 = J[3]; + Scalar J11 = J[4]; + Scalar J12 = J[5]; + + Scalar J20 = J[6]; + Scalar J21 = J[7]; + Scalar J22 = J[8]; + + Scalar term0 = J22*J11 - J21*J12; + Scalar term1 = J22*J01 - J21*J02; + Scalar term2 = J12*J01 - J11*J02; + + detJ = J00*term0 - J10*term1 + J20*term2; + + Scalar inv_detJ = 1.0/detJ; + + invJ[0] = term0*inv_detJ; + invJ[1] = -term1*inv_detJ; + invJ[2] = term2*inv_detJ; + + invJ[3] = -(J22*J10 - J20*J12)*inv_detJ; + invJ[4] = (J22*J00 - J20*J02)*inv_detJ; + invJ[5] = -(J12*J00 - J10*J02)*inv_detJ; + + invJ[6] = (J21*J10 - J20*J11)*inv_detJ; + invJ[7] = -(J21*J00 - J20*J01)*inv_detJ; + invJ[8] = (J11*J00 - J10*J01)*inv_detJ; +} + +template +KERNEL_PREFIX void matmat3x3(const Scalar* A, const Scalar* B, Scalar* C) +{ + //hardwired "3x3" in function-name allows us to assume args have length 9: + //A,B,C are all assumed to be ordered such that columns are contiguous. + + const Scalar zero = 0; + miniFE::fill(C, C+9, zero); + + for(int i=0; i<3; ++i) { + for(int j=0; j<3; ++j) { + C[i+j*3] = A[i+0]*B[j*3+0] + + A[i+3]*B[j*3+1] + + A[i+6]*B[j*3+2]; + } + } +} + +template +KERNEL_PREFIX Scalar determinant3x3(const Scalar* J) +{ + //hardwired "3x3" in function-name allows us to assume that J has length 9: + + Scalar J00 = J[0]; + Scalar J01 = J[1]; + Scalar J02 = J[2]; + + Scalar J10 = J[3]; + Scalar J11 = J[4]; + Scalar J12 = J[5]; + + Scalar J20 = J[6]; + Scalar J21 = J[7]; + Scalar J22 = J[8]; + + Scalar term0 = J22*J11 - J21*J12; + Scalar term1 = J22*J01 - J21*J02; + Scalar term2 = J12*J01 - J11*J02; + + Scalar detJ = J00*term0 - J10*term1 + J20*term2; + + return detJ; +} + +template +KERNEL_PREFIX void matmat3x3_X_3xn(const Scalar* A, int n, const Scalar* B, Scalar* C) +{ + //A is 3x3, B is 3xn. So C is also 3xn. + //A,B,C are all assumed to be ordered such that columns are contiguous. + + Scalar* Cj = C; + const Scalar* Bj = B; + for(int j=0; j +KERNEL_PREFIX void matTransMat3x3_X_3xn(const Scalar* A, int n, const Scalar* B, Scalar* C) +{ + //A is 3x3, B is 3xn. So C is also 3xn. + //A,B,C are all assumed to be ordered such that columns are contiguous. + + Scalar* Cj = C; + const Scalar* Bj = B; + for(int j=0; j +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +///////////////////////////////////////////////////////////////////////// + +// Function to return time in seconds. +// If compiled with no flags, return CPU time (user and system). +// If compiled with -DWALL, returns elapsed time. + +///////////////////////////////////////////////////////////////////////// + +#if defined(HAVE_MPI) && defined(USE_MPI_WTIME) + +timer_type mytimer() +{ + return((timer_type) MPI_Wtime()); +} + + +#elif defined(UseClock) + +#include +timer_type mytimer(void) +{ + clock_t t1; + static clock_t t0=0; + static timer_type CPS = CLOCKS_PER_SEC; + timer_type d; + + if (t0 == 0) t0 = clock(); + t1 = clock() - t0; + d = t1 / CPS; + return(d); +} + +#elif defined(WALL) + +#include +#include +#include +timer_type mytimer(void) +{ + struct timeval tp; + static long start=0, startu; + if (!start) + { + gettimeofday(&tp, NULL); + start = tp.tv_sec; + startu = tp.tv_usec; + return(0.0); + } + gettimeofday(&tp, NULL); + return( ((timer_type) (tp.tv_sec - start)) + (tp.tv_usec-startu)/1000000.0 ); +} + +#elif defined(UseTimes) + +#include +#include +#include +timer_type mytimer(void) +{ + struct tms ts; + static timer_type ClockTick=0.0; + + if (ClockTick == 0.0) ClockTick = (timer_type) sysconf(_SC_CLK_TCK); + times(&ts); + return( (timer_type) ts.tms_utime / ClockTick ); +} + +#else + +#include +#include +#include +timer_type mytimer(void) +{ +//This function now uses gettimeofday instead of getrusage. See note below. +// + struct timeval tv; + struct timezone tz; + gettimeofday(&tv, &tz); + return ( (timer_type)tv.tv_sec + tv.tv_usec/1000000.0 ); + +//The below use of 'getrusage' is not used because it doesn't do the right thing +//for the case of using threads. It adds up the time spent in multiple threads, +//rather than giving elapsed time. +// +// struct rusage ruse; +// getrusage(RUSAGE_SELF, &ruse); +// return( (timer_type)(ruse.ru_utime.tv_sec+ruse.ru_utime.tv_usec / 1000000.0) ); +} + +#endif + +}//namespace miniFE + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/outstream.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/outstream.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/outstream.hpp @@ -0,0 +1,45 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef _outstream_hpp_ +#define _outstream_hpp_ + +#include +#include + +inline +std::ostream& outstream(int np=1, int p=0) +{ + static bool first = true; + static std::ostringstream oss; + if (first) oss << "minife_debug."< +#include + +//Parameter-parsing Utilities: +// +//The functions declared below are intended to assist with parsing +//input-parameters which may be command-line arguments and/or lines in a +//text file. +// +// Scenario: You want your program to accept parameters that are specified +// as command-line arguments and/or as lines in a text file (such +// as a YAML output file). i.e., your program can be run like this: +// % program.exe foo=3.14159 bar: 42 +// or +// % program.exe input_file=params.txt +// or +// % program.exe foo=3.14159 input_file = params.txt +// +//Example: +// Here is example code to obtain parameters using the 3 functions +// 'read_args_into_string', 'read_file_into_string' and 'parse_parameter': +// +// std::string arg_string; +// +// //put command-line-arguments into 'arg_string': +// read_args_into_string(argc, argv, arg_string); +// +// //do the command-line-arguments specify an 'input_file'? +// std::string filename = +// parse_parameter(arg_string,"input_file","none-specified"); +// +// if (filename != "none-specified") { +// std::string tmp; +// read_file_into_string(filename, tmp); +// arg_string += tmp; +// } +// +// //now parse the parameters: +// float foo = parse_parameter(arg_string, "foo", -9.9); +// int bar = parse_parameter(arg_string, "bar", -1); +// +//See the comments below for parse_parameter, for formatting requirements of +//named parameter-value pairs. +// + +namespace Mantevo { + +/** + * Concatenate command-line arguments into a single string. + * + * Note: this function is purely serial. If argc and argv have different + * values on different MPI processes, then you need to resolve that by + * broadcasting arg_string's contents. + */ +void read_args_into_string(int argc, char** argv, std::string& arg_string); + +/** + * Read the contents of a text-file into a single string. + * + * Note: this function is purely serial. If you want file_contents on multiple + * MPI processes, you need to broadcast it (or call this function on each + * MPI process...). + */ +void read_file_into_string(const std::string& filename, + std::string& file_contents); + +/** + * Parse a named parameter value from input 'arg_string'. + * + * Search 'arg_string' for an occurrence of param_name and attempt to parse + * a value into the return-type. If param_name is not found, then default_value + * is returned. + * + * Example: + * arg_string = "foo = 3.14159"; + * float foo = parse_parameter(arg_string, "foo", -999.9); + * //foo should now contain the value 3.14159; if 'foo' was not found in + * //arg_string, then -999.9 would have been returned. + * + * Other legal name-value separators are ':' and ' '. Extra spaces are also ok, + * e.g. "foo : 3.114159". + * + * Note that if a YAML file is read into a string, that would be a valid input + * string for this function. + */ +template +T parse_parameter(const std::string& arg_string, + const std::string& param_name, + const T& default_value) +{ + std::string::size_type pos = arg_string.find(param_name); + if (pos == std::string::npos) { + //if param_name is not found in arg_string, return default_value: + return default_value; + } + + pos += param_name.size(); + + if (arg_string.size() <= pos) return default_value; + + //skip past ' ', '=' or ':': + while(pos < arg_string.size() && + (arg_string[pos] == ' ' || + arg_string[pos] == '=' || + arg_string[pos] == ':')) + { + ++pos; + } + + if (arg_string[pos] == '=' || arg_string[pos] == ':') ++pos; + + std::string str = arg_string.substr(pos); + + std::istringstream isstr(str); + + T return_val = default_value; + + //parse value into return_val: + isstr >> return_val; + + //if parse failed, return default_value: + if (!isstr) return default_value; + + return return_val; +} + +}//namespace Mantevo + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/param_utils.cpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/param_utils.cpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/param_utils.cpp @@ -0,0 +1,58 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include + +#include +#include + +namespace Mantevo { + +//------------------------------------------------------------- +void read_args_into_string(int argc, char** argv, std::string& arg_string) +{ + arg_string = argv[0]; + for(int i=1; i +#include +#include +#include +#include +#include + +namespace miniFE { + +template +void +perform_element_loop(const simple_mesh_description& mesh, + const Box& local_elem_box, + MatrixType& A, VectorType& b, + Parameters& /*params*/) +{ + typedef typename MatrixType::ScalarType Scalar; + + int global_elems_x = mesh.global_box[0][1]; + int global_elems_y = mesh.global_box[1][1]; + int global_elems_z = mesh.global_box[2][1]; + + //We will iterate the local-element-box (local portion of the mesh), and + //get element-IDs in preparation for later assembling the FE operators + //into the global sparse linear-system. + + GlobalOrdinal num_elems = get_num_ids(local_elem_box); + std::vector elemIDs(num_elems); + + BoxIterator iter = BoxIterator::begin(local_elem_box); + BoxIterator end = BoxIterator::end(local_elem_box); + + for(size_t i=0; iter != end; ++iter, ++i) { + elemIDs[i] = get_id(global_elems_x, global_elems_y, global_elems_z, + iter.x, iter.y, iter.z); +//#ifdef MINIFE_DEBUG +//std::cout << "elem ID " << elemIDs[i] << " ("< elem_data; + + compute_gradient_values(elem_data.grad_vals); + + timer_type t_gn = 0, t_ce = 0, t_si = 0; + timer_type t0 = 0; + for(size_t i=0; i +#include +#include + +namespace miniFE { + +template +class simple_mesh_description { +public: + simple_mesh_description(const Box& global_box_in, const Box& local_box_in) + { + Box local_node_box; + for(int i=0; i<3; ++i) { + global_box[i][0] = global_box_in[i][0]; + global_box[i][1] = global_box_in[i][1]; + local_box[i][0] = local_box_in[i][0]; + local_box[i][1] = local_box_in[i][1]; + local_node_box[i][0] = local_box_in[i][0]; + local_node_box[i][1] = local_box_in[i][1]; + //num-owned-nodes == num-elems+1 in this dimension if the elem box is not empty + //and we are at the high end of the global range in that dimension: + if (local_box_in[i][1] > local_box_in[i][0] && local_box_in[i][1] == global_box[i][1]) local_node_box[i][1] += 1; + } + + int max_node_x = global_box[0][1]+1; + int max_node_y = global_box[1][1]+1; + int max_node_z = global_box[2][1]+1; + create_map_id_to_row(max_node_x, max_node_y, max_node_z, local_node_box, + map_ids_to_rows); + + //As described in analytic_soln.hpp, + //we will impose a 0 boundary-condition on faces x=0, y=0, z=0, y=1, z=1 + //we will impose a 1 boundary-condition on face x=1 + +#ifdef MINIFE_DEBUG +std::cout< 0) --miny; + if (local_node_box[Z][0] > 0) --minz; + if (local_node_box[Y][1] < max_node_y) ++maxy; + if (local_node_box[Z][1] < max_node_z) ++maxz; + + for(int iz=minz; iz(max_node_x, max_node_y, max_node_z, + 0, iy, iz); +#ifdef MINIFE_DEBUG +std::cout<<"x=0 BC, node "< 0) --minx; + if (local_node_box[Z][0] > 0) --minz; + if (local_node_box[X][1] < max_node_x) ++maxx; + if (local_node_box[Z][1] < max_node_z) ++maxz; + + for(int iz=minz; iz(max_node_x, max_node_y, max_node_z, + ix, 0, iz); +#ifdef MINIFE_DEBUG +std::cout<<"y=0 BC, node "< 0) --minz; + if (local_node_box[Y][0] > 0) --miny; + if (local_node_box[Z][1] < max_node_z) ++maxz; + if (local_node_box[Y][1] < max_node_y) ++maxy; + + for(int iy=miny; iy(max_node_x, max_node_y, max_node_z, + x1, iy, iz); + GlobalOrdinal row = map_id_to_row(nodeID); +#ifdef MINIFE_DEBUG +std::cout<<"x=1 BC, node "< 0) --minz; + if (local_node_box[X][0] > 0) --minx; + if (local_node_box[Z][1] < max_node_z) ++maxz; + if (local_node_box[X][1] < max_node_x) ++maxx; + + for(int ix=minx; ix(max_node_x, max_node_y, max_node_z, + ix, y1, iz); +#ifdef MINIFE_DEBUG +std::cout<<"y=1 BC, node "< 0) --miny; + if (local_node_box[X][0] > 0) --minx; + if (local_node_box[Y][1] < max_node_y) ++maxy; + if (local_node_box[X][1] < max_node_x) ++maxx; + + for(int ix=minx; ix(max_node_x, max_node_y, max_node_z, + ix, iy, z1); +#ifdef MINIFE_DEBUG +std::cout<<"z=1 BC, node "<::const_iterator mend = map_ids_to_rows.end(); + --mend; + return mend->second; + } + std::set bc_rows_0; + std::set bc_rows_1; + std::map map_ids_to_rows; + Box global_box; + Box local_box; +};//class simple_mesh_description + +}//namespace miniFE + +#endif Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/time_kernels.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/time_kernels.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/time_kernels.hpp @@ -0,0 +1,139 @@ +#ifndef _time_kernels_hpp_ +#define _time_kernels_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include + +#include +#include + +#ifdef MINIFE_HAVE_CUDA +#include +#endif + +namespace miniFE { + +template +void +time_kernels(OperatorType& A, + const VectorType& b, + VectorType& x, + Matvec matvec, + typename OperatorType::LocalOrdinalType max_iter, + typename OperatorType::ScalarType& xdotp, + timer_type* my_kern_times) +{ + typedef typename OperatorType::ScalarType ScalarType; + typedef typename OperatorType::LocalOrdinalType OrdinalType; + typedef typename TypeTraits::magnitude_type magnitude_type; + + timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0; + + int myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (!A.has_local_indices) { + std::cerr << "miniFE::time_kernels ERROR, A.has_local_indices is false, needs to be true. This probably means " + << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::time_kernels." + << std::endl; + return; + } + + OrdinalType nrows = A.rows.size(); + OrdinalType ncols = A.num_cols; + + VectorType p(0, ncols, b.compute_node); + + ScalarType one = 1.0; + ScalarType zero = 0.0; + + typedef typename VectorType::ComputeNodeType ComputeNodeType; + ComputeNodeType& compute_node = x.compute_node; + + //The following lines that create and initialize buffers are no-ops in many + //cases, but perform actual allocations and copies if a off-cpu device such as + //a GPU is being used by compute_node. + + //Do any required allocations for buffers that will be needed during CG: + ScalarType* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); + ScalarType* d_p = compute_node.get_buffer(&p.coefs[0], p.coefs.size()); + ScalarType* d_b = compute_node.get_buffer(&b.coefs[0], b.coefs.size()); + OrdinalType* d_Arowoff = compute_node.get_buffer(&A.row_offsets[0], A.row_offsets.size()); + OrdinalType* d_Acols = compute_node.get_buffer(&A.packed_cols[0], A.packed_cols.size()); + ScalarType* d_Acoefs = compute_node.get_buffer(&A.packed_coefs[0], A.packed_coefs.size()); + + //Copy data to buffers that need to be initialized from input data: + compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); + compute_node.copy_to_buffer(&b.coefs[0], b.coefs.size(), d_b); + compute_node.copy_to_buffer(&A.row_offsets[0], A.row_offsets.size(), d_Arowoff); + compute_node.copy_to_buffer(&A.packed_cols[0], A.packed_cols.size(), d_Acols); + compute_node.copy_to_buffer(&A.packed_coefs[0], A.packed_coefs.size(), d_Acoefs); + + TICK(); + for(OrdinalType i=0; i +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#include +#include + +namespace miniFE { + +void get_parameters(int argc, char** argv, Parameters& params); + +void broadcast_parameters(Parameters& params); + +void initialize_mpi(int argc, char** argv, int& numprocs, int& myproc); + +void finalize_mpi(); + +template +Scalar percentage_difference(Scalar value, Scalar average) +{ + //result will be the difference between value and average, represented as + //a percentage of average. + //Examples: + // if value=100 and average=50, result is 100% + // if value=500 and average=400, result is 25% + + //Note: if average is 0, result is undefined. We'll return -1.0; + + Scalar result = std::abs(value-average); + if (std::abs(average) > 1.e-5) { + result /= average; + result *= 100; + } + else result = -1; + + return result; +} + +template +void get_global_min_max(GlobalOrdinal local_n, + GlobalOrdinal& global_n, + GlobalOrdinal& min_n, + int& min_proc, + GlobalOrdinal& max_n, + int& max_proc) +{ +//Given a local_n, compute global_n, min/max, etc. All computed results +//will be returned on all processors. +// + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + std::vector all_n(numprocs, 0); + all_n[myproc] = local_n; +#ifdef HAVE_MPI + std::vector tmp(all_n); + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + MPI_Allreduce(&tmp[0], &all_n[0], numprocs, mpi_dtype, MPI_MAX, MPI_COMM_WORLD); +#endif + + global_n = 0; + min_n= 5*local_n; + min_proc = 0; + max_n= 0; + max_proc = 0; + + for(int i=0; i= max_n) { + max_n = all_n[i]; + max_proc = i; + } + } +} + +template +Scalar compute_std_dev_as_percentage(Scalar local_nrows, + Scalar avg_nrows) +{ +//compute and return a standard deviation for the deviation of local_nrows from the average. +//the std. dev. will be expressed as a percentage of avg_nrows. +// +//Input argument local_nrows is really a integer, but taking it as a floating-point scalar is +//harmless. +// +#ifdef HAVE_MPI + int numprocs = 1, myproc = 0; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + +//If it's significantly more efficient, we may consider using MPI_Gather below instead of +//MPI_Allgather. We really only need to compute std.dev. on proc 0... +// +//(But for now, use MPI_Allgather and compute on all procs.) + + std::vector all_nrows(numprocs, 0); + MPI_Allgather(&local_nrows, 1, mpi_dtype, &all_nrows[0], 1, mpi_dtype, MPI_COMM_WORLD); + + //turn all_nrows contents into deviations, add to sum-of-squares-of-deviations: + Scalar sum_sqr_dev = 0; + for(size_t i=0; i1 ? std::sqrt(tmp1/(numprocs-1)) : 0; + + //std_dev is now the standard deviation of rows-per-processor with respect + //to avg_nrows. + //Next turn std_dev into a percentage of avg_nrows: + std_dev /= avg_nrows; + std_dev *= 100; + return std_dev; +#else + return 0; +#endif +} + +template +GlobalOrdinal find_row_for_id(GlobalOrdinal id, + const std::map& ids_to_rows) +{ + typename std::map::const_iterator + iter = ids_to_rows.lower_bound(id); + + if (iter == ids_to_rows.end() || iter->first != id) { + if (ids_to_rows.size() > 0) { + --iter; + } + else { + std::cout << "ERROR, failed to map id to row."<first == id) { + return iter->second; + } + + if (iter == ids_to_rows.begin() && iter->first > id) { + std::cout << "ERROR, id:" << id << ", ids_to_rows.begin(): " << iter->first<first; + + if (offset < 0) { + std::cout << "ERROR, negative offset in find_row_for_id for id="<second + offset; +} + +}//namespace miniFE + +#endif + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/utils.cpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/utils.cpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/utils.cpp @@ -0,0 +1,136 @@ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#ifdef MINIFE_HAVE_TPI +#include +#endif + +#ifdef MINIFE_HAVE_TBB +#include +#endif + +#include +#include +#include + +namespace miniFE { + +//------------------------------------------------------------- +void get_parameters(int argc, char** argv, Parameters& params) +{ + std::string argstring; + Mantevo::read_args_into_string(argc, argv, argstring); + + std::string garbage("garbage"); + std::string filename = + Mantevo::parse_parameter(argstring, "input_file", garbage); + + if (filename != garbage) { + Mantevo::read_file_into_string(filename, argstring); + } + + params.nx = Mantevo::parse_parameter(argstring, "nx", 10); + params.ny = Mantevo::parse_parameter(argstring, "ny", params.nx); + params.nz = Mantevo::parse_parameter(argstring, "nz", params.ny); + params.load_imbalance = + Mantevo::parse_parameter(argstring, "load_imbalance", 0); + params.numthreads = Mantevo::parse_parameter(argstring, "numthreads", 1); + params.mv_overlap_comm_comp = Mantevo::parse_parameter(argstring, "mv_overlap_comm_comp", 0); + params.use_locking = Mantevo::parse_parameter(argstring, "use_locking", 0); + params.name = Mantevo::parse_parameter(argstring, "name",""); + params.elem_group_size = Mantevo::parse_parameter(argstring, "elem_group_size", 1); + params.use_elem_mat_fields = Mantevo::parse_parameter(argstring, "use_elem_mat_fields", 1); + params.verify_solution = Mantevo::parse_parameter(argstring, "verify_solution", 0); + params.device = Mantevo::parse_parameter(argstring, "device", 0); + params.num_devices = Mantevo::parse_parameter(argstring, "num_devices", 2); + params.skip_device = Mantevo::parse_parameter(argstring, "skip_device", 9999); + params.numa = Mantevo::parse_parameter(argstring, "numa", 1); +} + +//------------------------------------------------------------- +void broadcast_parameters(Parameters& params) +{ +#ifdef HAVE_MPI + const int num_int_params = 13; + int iparams[num_int_params] = {params.nx, params.ny, params.nz, params.numthreads, params.mv_overlap_comm_comp, params.use_locking, + params.elem_group_size, params.use_elem_mat_fields, params.verify_solution, + params.device, params.num_devices,params.skip_device,params.numa}; + MPI_Bcast(&iparams[0], num_int_params, MPI_INT, 0, MPI_COMM_WORLD); + params.nx = iparams[0]; + params.ny = iparams[1]; + params.nz = iparams[2]; + params.numthreads = iparams[3]; + params.mv_overlap_comm_comp = iparams[4]; + params.use_locking = iparams[5]; + params.elem_group_size = iparams[6]; + params.use_elem_mat_fields = iparams[7]; + params.verify_solution = iparams[8]; + params.device = iparams[9]; + params.num_devices = iparams[10]; + params.skip_device = iparams[11]; + params.numa = iparams[12]; + + float fparams[1] = {params.load_imbalance}; + MPI_Bcast(&fparams[0], 1, MPI_FLOAT, 0, MPI_COMM_WORLD); + params.load_imbalance = fparams[0]; + +#endif +} + +//------------------------------------------------------------- +void initialize_mpi(int argc, char** argv, int& numprocs, int& myproc) +{ +#ifdef HAVE_MPI + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#else + numprocs = 1; + myproc = 0; +#endif +} + +//------------------------------------------------------------- +void finalize_mpi() +{ +#ifdef HAVE_MPI + MPI_Finalize(); +#endif +} + +}//namespace miniFE + Index: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/verify_solution.hpp =================================================================== --- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/verify_solution.hpp +++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/verify_solution.hpp @@ -0,0 +1,179 @@ +#ifndef _verify_solution_hpp_ +#define _verify_solution_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +struct err_info { + Scalar err; + Scalar computed; + Scalar analytic; + Scalar coords[3]; +}; + +template +int +verify_solution(const simple_mesh_description& mesh, + const VectorType& x, double tolerance, bool verify_whole_domain = false) +{ + typedef typename VectorType::GlobalOrdinalType GlobalOrdinal; + typedef typename VectorType::ScalarType Scalar; + + int global_nodes_x = mesh.global_box[0][1]+1; + int global_nodes_y = mesh.global_box[1][1]+1; + int global_nodes_z = mesh.global_box[2][1]+1; + Box box; + copy_box(mesh.local_box, box); + + //num-owned-nodes in each dimension is num-elems+1 + //only if num-elems > 0 in that dimension *and* + //we are at the high end of the global range in that dimension: + if (box[0][1] > box[0][0] && box[0][1] == mesh.global_box[0][1]) ++box[0][1]; + if (box[1][1] > box[1][0] && box[1][1] == mesh.global_box[1][1]) ++box[1][1]; + if (box[2][1] > box[2][0] && box[2][1] == mesh.global_box[2][1]) ++box[2][1]; + + std::vector rows; + std::vector row_coords; + + int roffset = 0; + for(int iz=box[2][0]; iz(global_nodes_x, global_nodes_y, global_nodes_z, + ix, iy, iz); + Scalar x, y, z; + get_coords(row_id, global_nodes_x, global_nodes_y, global_nodes_z, x, y, z); + + bool verify_this_point = false; + if (verify_whole_domain) verify_this_point = true; + else if (std::abs(x - 0.5) < 0.05 && std::abs(y - 0.5) < 0.05 && std::abs(z - 0.5) < 0.05) { + verify_this_point = true; + } + + if (verify_this_point) { + rows.push_back(roffset); + row_coords.push_back(x); + row_coords.push_back(y); + row_coords.push_back(z); + } + + ++roffset; + } + } + } + + int return_code = 0; + + const int num_terms = 300; + + err_info max_error; + max_error.err = 0.0; + + for(size_t i=0; i max_error.err) { + max_error.err = err; + max_error.computed = computed_soln; + max_error.analytic = analytic_soln; + max_error.coords[0] = x; + max_error.coords[1] = y; + max_error.coords[2] = z; + } + } + + Scalar local_max_err = max_error.err; + Scalar global_max_err = 0; +#ifdef HAVE_MPI + MPI_Allreduce(&local_max_err, &global_max_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); +#else + global_max_err = local_max_err; +#endif + + if (local_max_err == global_max_err) { + if (max_error.err > tolerance) { + std::cout << "max absolute error is "< tolerance) { + return_code = 1; + } + + return return_code; +} + +}//namespace miniFE + +#endif +