Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
MicroBenchmarks/LCALS/SubsetCRawLoops/RawSubsetCbenchmarks.cxx
- This file was added.
// | |||||
// See README-LCALS_license.txt for access and distribution restrictions | |||||
// | |||||
// | |||||
// Source file containing LCALS "C" subset raw loops using the google | |||||
// benchmark library. | |||||
// | |||||
#include <benchmark/benchmark.h> | |||||
#include "../LCALSSuite.hxx" | |||||
static void BM_HYDRO_1D_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 1 -- hydro fragment | |||||
******************************************************************* | |||||
* DO 1 L = 1,Loop | |||||
* DO 1 k = 1,n | |||||
* 1 X(k)= Q + Y(k)*(R*ZX(k+10) + T*ZX(k+11)) | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(HYDRO_1D); | |||||
Real_ptr x = loop_data.array_1D_Real[0]; | |||||
Real_ptr y = loop_data.array_1D_Real[1]; | |||||
Real_ptr z = loop_data.array_1D_Real[2]; | |||||
const Real_type q = loop_data.scalar_Real[0]; | |||||
const Real_type r = loop_data.scalar_Real[1]; | |||||
const Real_type t = loop_data.scalar_Real[2]; | |||||
for (auto _ : state) { | |||||
for (Index_type k=0 ; k< state.range(0) ; k++ ) { | |||||
x[k] = q + y[k]*( r*z[k+10] + t*z[k+11] ); | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_HYDRO_1D_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_ICCG_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 2 -- ICCG excerpt (Incomplete Cholesky Conj. Gradient) | |||||
******************************************************************* | |||||
* DO 200 L= 1,Loop | |||||
* II= n | |||||
* IPNTP= 0 | |||||
*222 IPNT= IPNTP | |||||
* IPNTP= IPNTP+II | |||||
* II= II/2 | |||||
* i= IPNTP+1 | |||||
CDIR$ IVDEP | |||||
* DO 2 k= IPNT+2,IPNTP,2 | |||||
* i= i+1 | |||||
* 2 X(i)= X(k) - V(k)*X(k-1) - V(k+1)*X(k+1) | |||||
* IF( II.GT.1) GO TO 222 | |||||
*200 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(ICCG); | |||||
Real_ptr x = loop_data.array_1D_Nx4_Real[0]; | |||||
Real_ptr v = loop_data.array_1D_Nx4_Real[1]; | |||||
Index_type ii, ipnt, ipntp, i; | |||||
for (auto _ : state) { | |||||
ii = state.range(0); | |||||
ipntp = 0; | |||||
do { | |||||
ipnt = ipntp; | |||||
ipntp += ii; | |||||
ii /= 2; | |||||
i = ipntp ; | |||||
for (Index_type k=ipnt+1 ; k<ipntp ; k=k+2 ) { | |||||
i++; | |||||
x[i] = x[k] - v[k ]*x[k-1] - v[k+1]*x[k+1]; | |||||
} | |||||
} while ( ii>0 ); | |||||
} | |||||
} | |||||
BENCHMARK(BM_ICCG_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_INNER_PROD_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 3 -- inner product | |||||
******************************************************************* | |||||
* DO 3 L= 1,Loop | |||||
* Q= 0.0 | |||||
* DO 3 k= 1,n | |||||
* 3 Q= Q + Z(k)*X(k) | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(INNER_PROD); | |||||
Real_ptr x = loop_data.array_1D_Real[0]; | |||||
Real_ptr z = loop_data.array_1D_Real[1]; | |||||
Real_type q = 0.0; | |||||
Real_type val = 0.0; | |||||
for (auto _ : state) { | |||||
q = 0.0; | |||||
for (Index_type k=0 ; k< state.range(0); k++ ) { | |||||
benchmark::DoNotOptimize(q += z[k]*x[k]); | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_INNER_PROD_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_BAND_LIN_EQ_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 4 -- banded linear equations | |||||
******************************************************************* | |||||
* m= (1001-7)/2 | |||||
* DO 444 L= 1,Loop | |||||
* DO 444 k= 7,1001,m | |||||
* lw= k-6 | |||||
* temp= X(k-1) | |||||
CDIR$ IVDEP | |||||
* DO 4 j= 5,n,5 | |||||
* temp = temp - XZ(lw)*Y(j) | |||||
* 4 lw= lw+1 | |||||
* X(k-1)= Y(5)*temp | |||||
*444 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(BAND_LIN_EQ); | |||||
Real_ptr x = loop_data.array_1D_Real[0]; | |||||
Real_ptr y = loop_data.array_1D_Real[1]; | |||||
Index_type lw; | |||||
Real_type temp; | |||||
for (auto _ : state) { | |||||
Index_type m = ( 1001-7 )/2; | |||||
for ( Index_type k=6 ; k<1001 ; k=k+m ) { | |||||
lw = k - 6; | |||||
temp = x[k-1]; | |||||
for (Index_type j=4 ; j< state.range(0) ; j=j+5 ) { | |||||
temp -= x[lw]*y[j]; | |||||
lw++; | |||||
} | |||||
x[k-1] = y[4]*temp; | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_BAND_LIN_EQ_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_TRIDIAG_ELIM_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 5 -- tri-diagonal elimination, below diagonal | |||||
******************************************************************* | |||||
* DO 5 L = 1,Loop | |||||
* DO 5 i = 2,n | |||||
* 5 X(i)= Z(i)*(Y(i) - X(i-1)) | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(TRIDIAG_ELIM); | |||||
Real_ptr x = loop_data.array_1D_Real[0]; | |||||
Real_ptr y = loop_data.array_1D_Real[1]; | |||||
Real_ptr z = loop_data.array_1D_Real[2]; | |||||
for (auto _ : state) { | |||||
for ( Index_type i=1 ; i< state.range(0) ; i++ ) { | |||||
x[i] = z[i]*( y[i] - x[i-1] ); | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_TRIDIAG_ELIM_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_EOS_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 7 -- equation of state fragment | |||||
******************************************************************* | |||||
* DO 7 L= 1,Loop | |||||
* DO 7 k= 1,n | |||||
* X(k)= U(k ) + R*( Z(k ) + R*Y(k )) + | |||||
* . T*( U(k+3) + R*( U(k+2) + R*U(k+1)) + | |||||
* . T*( U(k+6) + Q*( U(k+5) + Q*U(k+4)))) | |||||
* 7 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(EOS); | |||||
Real_ptr x = loop_data.array_1D_Real[0]; | |||||
Real_ptr y = loop_data.array_1D_Real[1]; | |||||
Real_ptr z = loop_data.array_1D_Real[2]; | |||||
Real_ptr u = loop_data.array_1D_Real[3]; | |||||
const Real_type q = loop_data.scalar_Real[0]; | |||||
const Real_type r = loop_data.scalar_Real[1]; | |||||
const Real_type t = loop_data.scalar_Real[2]; | |||||
for (auto _ : state) { | |||||
for ( Index_type k=0 ; k< state.range(0) ; k++ ) { | |||||
x[k] = u[k] + r*( z[k] + r*y[k] ) + | |||||
t*( u[k+3] + r*( u[k+2] + r*u[k+1] ) + | |||||
t*( u[k+6] + q*( u[k+5] + q*u[k+4] ) ) ); | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_EOS_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_ADI_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 8 -- ADI integration | |||||
******************************************************************* | |||||
* DO 8 L = 1,Loop | |||||
* nl1 = 1 | |||||
* nl2 = 2 | |||||
* DO 8 kx = 2,3 | |||||
CDIR$ IVDEP | |||||
* DO 8 ky = 2,n | |||||
* DU1(ky)=U1(kx,ky+1,nl1) - U1(kx,ky-1,nl1) | |||||
* DU2(ky)=U2(kx,ky+1,nl1) - U2(kx,ky-1,nl1) | |||||
* DU3(ky)=U3(kx,ky+1,nl1) - U3(kx,ky-1,nl1) | |||||
* U1(kx,ky,nl2)=U1(kx,ky,nl1) +A11*DU1(ky) +A12*DU2(ky) +A13*DU3(ky) | |||||
* . + SIG*(U1(kx+1,ky,nl1) -2.*U1(kx,ky,nl1) +U1(kx-1,ky,nl1)) | |||||
* U2(kx,ky,nl2)=U2(kx,ky,nl1) +A21*DU1(ky) +A22*DU2(ky) +A23*DU3(ky) | |||||
* . + SIG*(U2(kx+1,ky,nl1) -2.*U2(kx,ky,nl1) +U2(kx-1,ky,nl1)) | |||||
* U3(kx,ky,nl2)=U3(kx,ky,nl1) +A31*DU1(ky) +A32*DU2(ky) +A33*DU3(ky) | |||||
* . + SIG*(U3(kx+1,ky,nl1) -2.*U3(kx,ky,nl1) +U3(kx-1,ky,nl1)) | |||||
* 8 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(ADI); | |||||
Real_ptr du1 = loop_data.array_1D_Real[0]; | |||||
Real_ptr du2 = loop_data.array_1D_Real[1]; | |||||
Real_ptr du3 = loop_data.array_1D_Real[2]; | |||||
Real_ptr** u1 = loop_data.array_3D_2xNx4_Real[0]; | |||||
Real_ptr** u2 = loop_data.array_3D_2xNx4_Real[1]; | |||||
Real_ptr** u3 = loop_data.array_3D_2xNx4_Real[2]; | |||||
const Real_type sig = loop_data.scalar_Real[0]; | |||||
const Real_type a11 = loop_data.scalar_Real[1]; | |||||
const Real_type a12 = loop_data.scalar_Real[2]; | |||||
const Real_type a13 = loop_data.scalar_Real[3]; | |||||
const Real_type a21 = loop_data.scalar_Real[4]; | |||||
const Real_type a22 = loop_data.scalar_Real[5]; | |||||
const Real_type a23 = loop_data.scalar_Real[6]; | |||||
const Real_type a31 = loop_data.scalar_Real[7]; | |||||
const Real_type a32 = loop_data.scalar_Real[8]; | |||||
const Real_type a33 = loop_data.scalar_Real[9]; | |||||
Index_type nl1 = 0; | |||||
Index_type nl2 = 1; | |||||
Index_type kx; | |||||
for (auto _ : state) { | |||||
for ( kx=1 ; kx<3 ; kx++ ) { | |||||
for (Index_type ky=1 ; ky< state.range(0) ; ky++ ) { | |||||
du1[ky] = u1[nl1][ky+1][kx] - u1[nl1][ky-1][kx]; | |||||
du2[ky] = u2[nl1][ky+1][kx] - u2[nl1][ky-1][kx]; | |||||
du3[ky] = u3[nl1][ky+1][kx] - u3[nl1][ky-1][kx]; | |||||
u1[nl2][ky][kx]= | |||||
u1[nl1][ky][kx]+a11*du1[ky]+a12*du2[ky]+a13*du3[ky] + sig* | |||||
(u1[nl1][ky][kx+1]-2.0*u1[nl1][ky][kx]+u1[nl1][ky][kx-1]); | |||||
u2[nl2][ky][kx]= | |||||
u2[nl1][ky][kx]+a21*du1[ky]+a22*du2[ky]+a23*du3[ky] + sig* | |||||
(u2[nl1][ky][kx+1]-2.0*u2[nl1][ky][kx]+u2[nl1][ky][kx-1]); | |||||
u3[nl2][ky][kx]= | |||||
u3[nl1][ky][kx]+a31*du1[ky]+a32*du2[ky]+a33*du3[ky] + sig* | |||||
(u3[nl1][ky][kx+1]-2.0*u3[nl1][ky][kx]+u3[nl1][ky][kx-1]); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_ADI_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_INT_PREDICT_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 9 -- integrate predictors | |||||
******************************************************************* | |||||
* DO 9 L = 1,Loop | |||||
* DO 9 i = 1,n | |||||
* PX( 1,i)= DM28*PX(13,i) + DM27*PX(12,i) + DM26*PX(11,i) + | |||||
* . DM25*PX(10,i) + DM24*PX( 9,i) + DM23*PX( 8,i) + | |||||
* . DM22*PX( 7,i) + C0*(PX( 5,i) + PX( 6,i))+ PX( 3,i) | |||||
* 9 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(INT_PREDICT); | |||||
Real_ptr* px = loop_data.array_2D_Nx25_Real[0]; | |||||
const Real_type dm22 = loop_data.scalar_Real[0]; | |||||
const Real_type dm23 = loop_data.scalar_Real[1]; | |||||
const Real_type dm24 = loop_data.scalar_Real[2]; | |||||
const Real_type dm25 = loop_data.scalar_Real[3]; | |||||
const Real_type dm26 = loop_data.scalar_Real[4]; | |||||
const Real_type dm27 = loop_data.scalar_Real[5]; | |||||
const Real_type dm28 = loop_data.scalar_Real[6]; | |||||
const Real_type c0 = loop_data.scalar_Real[7]; | |||||
for (auto _ : state) { | |||||
for (Index_type i=0 ; i< state.range(0) ; i++ ) { | |||||
px[i][0] = dm28*px[i][12] + dm27*px[i][11] + dm26*px[i][10] + | |||||
dm25*px[i][ 9] + dm24*px[i][ 8] + dm23*px[i][ 7] + | |||||
dm22*px[i][ 6] + c0*( px[i][ 4] + px[i][ 5]) + px[i][ 2]; | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_INT_PREDICT_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_DIFF_PREDICT_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 10 -- difference predictors | |||||
******************************************************************* | |||||
* DO 10 L= 1,Loop | |||||
* DO 10 i= 1,n | |||||
* AR = CX(5,i) | |||||
* BR = AR - PX(5,i) | |||||
* PX(5,i) = AR | |||||
* CR = BR - PX(6,i) | |||||
* PX(6,i) = BR | |||||
* AR = CR - PX(7,i) | |||||
* PX(7,i) = CR | |||||
* BR = AR - PX(8,i) | |||||
* PX(8,i) = AR | |||||
* CR = BR - PX(9,i) | |||||
* PX(9,i) = BR | |||||
* AR = CR - PX(10,i) | |||||
* PX(10,i)= CR | |||||
* BR = AR - PX(11,i) | |||||
* PX(11,i)= AR | |||||
* CR = BR - PX(12,i) | |||||
* PX(12,i)= BR | |||||
* PX(14,i)= CR - PX(13,i) | |||||
* PX(13,i)= CR | |||||
* 10 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(DIFF_PREDICT); | |||||
Real_ptr* px = loop_data.array_2D_Nx25_Real[0]; | |||||
Real_ptr* cx = loop_data.array_2D_Nx25_Real[1]; | |||||
for (auto _ : state) { | |||||
for (Index_type i=0 ; i< state.range(0) ; i++ ) { | |||||
Real_type ar, br, cr; | |||||
ar = cx[i][ 4]; | |||||
br = ar - px[i][ 4]; | |||||
px[i][ 4] = ar; | |||||
cr = br - px[i][ 5]; | |||||
px[i][ 5] = br; | |||||
ar = cr - px[i][ 6]; | |||||
px[i][ 6] = cr; | |||||
br = ar - px[i][ 7]; | |||||
px[i][ 7] = ar; | |||||
cr = br - px[i][ 8]; | |||||
px[i][ 8] = br; | |||||
ar = cr - px[i][ 9]; | |||||
px[i][ 9] = cr; | |||||
br = ar - px[i][10]; | |||||
px[i][10] = ar; | |||||
cr = br - px[i][11]; | |||||
px[i][11] = br; | |||||
px[i][13] = cr - px[i][12]; | |||||
px[i][12] = cr; | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_DIFF_PREDICT_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_FIRST_SUM_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 11 -- first sum | |||||
******************************************************************* | |||||
* DO 11 L = 1,Loop | |||||
* X(1)= Y(1) | |||||
* DO 11 k = 2,n | |||||
* 11 X(k)= X(k-1) + Y(k) | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(FIRST_SUM); | |||||
Real_ptr x = loop_data.array_1D_Real[0]; | |||||
Real_ptr y = loop_data.array_1D_Real[1]; | |||||
for (auto _ :state) { | |||||
x[0] = y[0]; | |||||
for (Index_type k=1 ; k< state.range(0) ; k++ ) { | |||||
x[k] = x[k-1] + y[k]; | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_FIRST_SUM_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_FIRST_DIFF_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 12 -- first difference | |||||
******************************************************************* | |||||
* DO 12 L = 1,Loop | |||||
* DO 12 k = 1,n | |||||
* 12 X(k)= Y(k+1) - Y(k) | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(FIRST_DIFF); | |||||
Real_ptr x = loop_data.array_1D_Real[0]; | |||||
Real_ptr y = loop_data.array_1D_Real[1]; | |||||
for (auto _ : state) { | |||||
for (Index_type k=0 ; k< state.range(0) ; k++ ) { | |||||
x[k] = y[k+1] - y[k]; | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_FIRST_DIFF_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_PIC_2D_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 13 -- 2-D PIC (Particle In Cell) | |||||
******************************************************************* | |||||
* DO 13 L= 1,Loop | |||||
* DO 13 ip= 1,n | |||||
* i1= P(1,ip) | |||||
* j1= P(2,ip) | |||||
* i1= 1 + MOD2N(i1,64) | |||||
* j1= 1 + MOD2N(j1,64) | |||||
* P(3,ip)= P(3,ip) + B(i1,j1) | |||||
* P(4,ip)= P(4,ip) + C(i1,j1) | |||||
* P(1,ip)= P(1,ip) + P(3,ip) | |||||
* P(2,ip)= P(2,ip) + P(4,ip) | |||||
* i2= P(1,ip) | |||||
* j2= P(2,ip) | |||||
* i2= MOD2N(i2,64) | |||||
* j2= MOD2N(j2,64) | |||||
* P(1,ip)= P(1,ip) + Y(i2+32) | |||||
* P(2,ip)= P(2,ip) + Z(j2+32) | |||||
* i2= i2 + E(i2+32) | |||||
* j2= j2 + F(j2+32) | |||||
* H(i2,j2)= H(i2,j2) + 1.0 | |||||
* 13 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(PIC_2D); | |||||
Real_ptr* p = loop_data.array_2D_Nx25_Real[0]; | |||||
Real_ptr* b = loop_data.array_2D_Nx25_Real[1]; | |||||
Real_ptr* c = loop_data.array_2D_Nx25_Real[2]; | |||||
Real_ptr y = loop_data.array_1D_Real[0]; | |||||
Real_ptr z = loop_data.array_1D_Real[1]; | |||||
Index_type* e = loop_data.array_1D_Indx[0]; | |||||
Index_type* f = loop_data.array_1D_Indx[1]; | |||||
Real_ptr* h = loop_data.array_2D_64x64_Real[0]; | |||||
for (auto _ : state) { | |||||
for (Index_type ip=0 ; ip< state.range(0) ; ip++ ) { | |||||
Index_type i1, j1, i2, j2; | |||||
i1 = (Index_type) p[ip][0]; | |||||
j1 = (Index_type) p[ip][1]; | |||||
i1 &= 64-1; | |||||
j1 &= 64-1; | |||||
p[ip][2] += b[j1][i1]; | |||||
p[ip][3] += c[j1][i1]; | |||||
p[ip][0] += p[ip][2]; | |||||
p[ip][1] += p[ip][3]; | |||||
i2 = (Index_type) p[ip][0]; | |||||
j2 = (Index_type) p[ip][1]; | |||||
i2 = ( i2 & 64-1 ) ; | |||||
j2 = ( j2 & 64-1 ) ; | |||||
p[ip][0] += y[i2+32]; | |||||
p[ip][1] += z[j2+32]; | |||||
i2 += e[i2+32]; | |||||
j2 += f[j2+32]; | |||||
h[j2][i2] += 1.0; | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_PIC_2D_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_PIC_1D_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 14 -- 1-D PIC (Particle In Cell) | |||||
******************************************************************* | |||||
* DO 14 L= 1,Loop | |||||
* DO 141 k= 1,n | |||||
* VX(k)= 0.0 | |||||
* XX(k)= 0.0 | |||||
* IX(k)= INT( GRD(k)) | |||||
* XI(k)= REAL( IX(k)) | |||||
* EX1(k)= EX ( IX(k)) | |||||
* DEX1(k)= DEX ( IX(k)) | |||||
*41 CONTINUE | |||||
* DO 142 k= 1,n | |||||
* VX(k)= VX(k) + EX1(k) + (XX(k) - XI(k))*DEX1(k) | |||||
* XX(k)= XX(k) + VX(k) + FLX | |||||
* IR(k)= XX(k) | |||||
* RX(k)= XX(k) - IR(k) | |||||
* IR(k)= MOD2N( IR(k),2048) + 1 | |||||
* XX(k)= RX(k) + IR(k) | |||||
*42 CONTINUE | |||||
* DO 14 k= 1,n | |||||
* RH(IR(k) )= RH(IR(k) ) + 1.0 - RX(k) | |||||
* RH(IR(k)+1)= RH(IR(k)+1) + RX(k) | |||||
*14 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(PIC_1D); | |||||
Real_ptr vx = loop_data.array_1D_Real[0]; | |||||
Real_ptr xx = loop_data.array_1D_Real[1]; | |||||
Real_ptr xi = loop_data.array_1D_Real[2]; | |||||
Real_ptr ex = loop_data.array_1D_Real[3]; | |||||
Real_ptr ex1 = loop_data.array_1D_Real[4]; | |||||
Real_ptr dex = loop_data.array_1D_Real[5]; | |||||
Real_ptr dex1 = loop_data.array_1D_Real[6]; | |||||
Real_ptr rh = loop_data.array_1D_Real[7]; | |||||
Real_ptr rx = loop_data.array_1D_Real[8]; | |||||
const Real_type flx = loop_data.scalar_Real[0]; | |||||
Index_type* ix = loop_data.array_1D_Indx[2]; | |||||
Index_type* ir = loop_data.array_1D_Indx[3]; | |||||
Index_type* grd = loop_data.array_1D_Indx[4]; | |||||
for (auto _ : state) { | |||||
for (Index_type k=0 ; k< state.range(0) ; k++ ) { | |||||
vx[k] = 0.0; | |||||
xx[k] = 0.0; | |||||
ix[k] = (Index_type) grd[k]; | |||||
xi[k] = (Real_type) ix[k]; | |||||
ex1[k] = ex[ ix[k] - 1 ]; | |||||
dex1[k] = dex[ ix[k] - 1 ]; | |||||
} | |||||
for (Index_type k=0 ; k< state.range(0) ; k++ ) { | |||||
vx[k] = vx[k] + ex1[k] + ( xx[k] - xi[k] )*dex1[k]; | |||||
xx[k] = xx[k] + vx[k] + flx; | |||||
ir[k] = (Index_type) xx[k]; | |||||
rx[k] = xx[k] - ir[k]; | |||||
ir[k] = ( ir[k] & (2048-1) ) + 1; | |||||
xx[k] = rx[k] + ir[k]; | |||||
} | |||||
for (Index_type k=0 ; k< state.range(0) ; k++ ) { | |||||
rh[ ir[k]-1 ] += 1.0 - rx[k]; | |||||
rh[ ir[k] ] += rx[k]; | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_PIC_1D_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_HYDRO_2D_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 18 - 2-D explicit hydrodynamics fragment | |||||
******************************************************************* | |||||
* DO 75 L= 1,Loop | |||||
* T= 0.0037 | |||||
* S= 0.0041 | |||||
* KN= 6 | |||||
* JN= n | |||||
* DO 70 k= 2,KN | |||||
* DO 70 j= 2,JN | |||||
* ZA(j,k)= (ZP(j-1,k+1)+ZQ(j-1,k+1)-ZP(j-1,k)-ZQ(j-1,k)) | |||||
* . *(ZR(j,k)+ZR(j-1,k))/(ZM(j-1,k)+ZM(j-1,k+1)) | |||||
* ZB(j,k)= (ZP(j-1,k)+ZQ(j-1,k)-ZP(j,k)-ZQ(j,k)) | |||||
* . *(ZR(j,k)+ZR(j,k-1))/(ZM(j,k)+ZM(j-1,k)) | |||||
* 70 CONTINUE | |||||
* DO 72 k= 2,KN | |||||
* DO 72 j= 2,JN | |||||
* ZU(j,k)= ZU(j,k)+S*(ZA(j,k)*(ZZ(j,k)-ZZ(j+1,k)) | |||||
* . -ZA(j-1,k) *(ZZ(j,k)-ZZ(j-1,k)) | |||||
* . -ZB(j,k) *(ZZ(j,k)-ZZ(j,k-1)) | |||||
* . +ZB(j,k+1) *(ZZ(j,k)-ZZ(j,k+1))) | |||||
* ZV(j,k)= ZV(j,k)+S*(ZA(j,k)*(ZR(j,k)-ZR(j+1,k)) | |||||
* . -ZA(j-1,k) *(ZR(j,k)-ZR(j-1,k)) | |||||
* . -ZB(j,k) *(ZR(j,k)-ZR(j,k-1)) | |||||
* . +ZB(j,k+1) *(ZR(j,k)-ZR(j,k+1))) | |||||
* 72 CONTINUE | |||||
* DO 75 k= 2,KN | |||||
* DO 75 j= 2,JN | |||||
* ZR(j,k)= ZR(j,k)+T*ZU(j,k) | |||||
* ZZ(j,k)= ZZ(j,k)+T*ZV(j,k) | |||||
* 75 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(HYDRO_2D); | |||||
Real_ptr* za = loop_data.array_2D_7xN_Real[0]; | |||||
Real_ptr* zb = loop_data.array_2D_7xN_Real[1]; | |||||
Real_ptr* zm = loop_data.array_2D_7xN_Real[2]; | |||||
Real_ptr* zp = loop_data.array_2D_7xN_Real[3]; | |||||
Real_ptr* zq = loop_data.array_2D_7xN_Real[4]; | |||||
Real_ptr* zr = loop_data.array_2D_7xN_Real[5]; | |||||
Real_ptr* zu = loop_data.array_2D_7xN_Real[6]; | |||||
Real_ptr* zv = loop_data.array_2D_7xN_Real[7]; | |||||
Real_ptr* zz = loop_data.array_2D_7xN_Real[8]; | |||||
Real_ptr* zrout = loop_data.array_2D_7xN_Real[9]; | |||||
Real_ptr* zzout = loop_data.array_2D_7xN_Real[10]; | |||||
const Real_type t = 0.0037; | |||||
const Real_type s = 0.0041; | |||||
Index_type kn = 6; | |||||
Index_type jn = state.range(0); | |||||
Index_type k; | |||||
for (auto _ : state) { | |||||
for ( k=1 ; k<kn ; k++ ) { | |||||
for (Index_type j=1 ; j<jn ; j++ ) { | |||||
za[k][j] = ( zp[k+1][j-1] +zq[k+1][j-1] -zp[k][j-1] -zq[k][j-1] )* | |||||
( zr[k][j] +zr[k][j-1] ) / ( zm[k][j-1] +zm[k+1][j-1]); | |||||
zb[k][j] = ( zp[k][j-1] +zq[k][j-1] -zp[k][j] -zq[k][j] ) * | |||||
( zr[k][j] +zr[k-1][j] ) / ( zm[k][j] +zm[k][j-1]); | |||||
} | |||||
} | |||||
for ( k=1 ; k<kn ; k++ ) { | |||||
for (Index_type j=1 ; j<jn ; j++ ) { | |||||
zu[k][j] += s*( za[k][j] *( zz[k][j] - zz[k][j+1] ) - | |||||
za[k][j-1] *( zz[k][j] - zz[k][j-1] ) - | |||||
zb[k][j] *( zz[k][j] - zz[k-1][j] ) + | |||||
zb[k+1][j] *( zz[k][j] - zz[k+1][j] ) ); | |||||
zv[k][j] += s*( za[k][j] *( zr[k][j] - zr[k][j+1] ) - | |||||
za[k][j-1] *( zr[k][j] - zr[k][j-1] ) - | |||||
zb[k][j] *( zr[k][j] - zr[k-1][j] ) + | |||||
zb[k+1][j] *( zr[k][j] - zr[k+1][j] ) ); | |||||
} | |||||
} | |||||
for ( k=1 ; k<kn ; k++ ) { | |||||
for (Index_type j=1 ; j<jn ; j++ ) { | |||||
zrout[k][j] = zr[k][j] + t*zu[k][j]; | |||||
zzout[k][j] = zz[k][j] + t*zv[k][j]; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_HYDRO_2D_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_GEN_LIN_RECUR_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 19 -- general linear recurrence equations | |||||
******************************************************************* | |||||
* KB5I= 0 | |||||
* DO 194 L= 1,Loop | |||||
* DO 191 k= 1,n | |||||
* B5(k+KB5I)= SA(k) +STB5*SB(k) | |||||
* STB5= B5(k+KB5I) -STB5 | |||||
*191 CONTINUE | |||||
*192 DO 193 i= 1,n | |||||
* k= n-i+1 | |||||
* B5(k+KB5I)= SA(k) +STB5*SB(k) | |||||
* STB5= B5(k+KB5I) -STB5 | |||||
*193 CONTINUE | |||||
*194 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(GEN_LIN_RECUR); | |||||
Real_ptr b5 = loop_data.array_1D_Real[0]; | |||||
Real_ptr sa = loop_data.array_1D_Real[1]; | |||||
Real_ptr sb = loop_data.array_1D_Real[2]; | |||||
Real_type stb5 = loop_data.scalar_Real[0]; | |||||
Index_type kb5i = 0; | |||||
for (auto _ : state) { | |||||
for ( Index_type k=0 ; k< state.range(0) ; k++ ) { | |||||
b5[k+kb5i] = sa[k] + stb5*sb[k]; | |||||
stb5 = b5[k+kb5i] - stb5; | |||||
} | |||||
for (Index_type i=1 ; i<= state.range(0) ; i++ ) { | |||||
Index_type k = state.range(0) - i ; | |||||
b5[k+kb5i] = sa[k] + stb5*sb[k]; | |||||
stb5 = b5[k+kb5i] - stb5; | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_GEN_LIN_RECUR_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_DISC_ORD_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 20 -- Discrete ordinates transport, cond recurrence on xx | |||||
******************************************************************* | |||||
* DO 20 L= 1,Loop | |||||
* DO 20 k= 1,n | |||||
* DI= Y(k)-G(k)/( XX(k)+DK) | |||||
* DN= 0.2 | |||||
* IF( DI.NE.0.0) DN= MAX( S,MIN( Z(k)/DI, T)) | |||||
* X(k)= ((W(k)+V(k)*DN)* XX(k)+U(k))/(VX(k)+V(k)*DN) | |||||
* XX(k+1)= (X(k)- XX(k))*DN+ XX(k) | |||||
* 20 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(DISC_ORD); | |||||
Real_ptr x = loop_data.array_1D_Real[0]; | |||||
Real_ptr y = loop_data.array_1D_Real[1]; | |||||
Real_ptr z = loop_data.array_1D_Real[2]; | |||||
Real_ptr u = loop_data.array_1D_Real[3]; | |||||
Real_ptr v = loop_data.array_1D_Real[4]; | |||||
Real_ptr w = loop_data.array_1D_Real[5]; | |||||
Real_ptr g = loop_data.array_1D_Real[6]; | |||||
Real_ptr xx = loop_data.array_1D_Real[7]; | |||||
Real_ptr vx = loop_data.array_1D_Real[9]; | |||||
const Real_type s = loop_data.scalar_Real[0]; | |||||
const Real_type t = loop_data.scalar_Real[1]; | |||||
const Real_type dk = loop_data.scalar_Real[2]; | |||||
for (auto _ : state) { | |||||
for (Index_type k=0 ; k< state.range(0) ; k++ ) { | |||||
Real_type di = y[k] - g[k] / ( xx[k] + dk ); | |||||
Real_type dn = 0.2; | |||||
if ( di ) { | |||||
dn = z[k]/di ; | |||||
if ( t < dn ) dn = t; | |||||
if ( s > dn ) dn = s; | |||||
} | |||||
x[k] = ( ( w[k] + v[k]*dn )* xx[k] + u[k] ) / ( vx[k] + v[k]*dn ); | |||||
xx[k+1] = ( x[k] - xx[k] )* dn + xx[k]; | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_DISC_ORD_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_MAT_X_MAT_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 21 -- matrix*matrix product | |||||
******************************************************************* | |||||
* DO 21 L= 1,Loop | |||||
* DO 21 k= 1,25 | |||||
* DO 21 i= 1,25 | |||||
* DO 21 j= 1,n | |||||
* PX(i,j)= PX(i,j) +VY(i,k) * CX(k,j) | |||||
* 21 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(MAT_X_MAT); | |||||
Real_ptr* px = loop_data.array_2D_Nx25_Real[0]; | |||||
Real_ptr* cx = loop_data.array_2D_Nx25_Real[1]; | |||||
Real_ptr* vy = loop_data.array_2D_64x64_Real[0]; | |||||
Index_type k, i; | |||||
for (auto _ : state) { | |||||
for ( k=0 ; k<25 ; k++ ) { | |||||
for ( i=0 ; i<25 ; i++ ) { | |||||
for (Index_type j=0 ; j< state.range(0) ; j++ ) { | |||||
px[j][i] += vy[k][i] * cx[j][k]; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_MAT_X_MAT_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_PLANCKIAN_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 22 -- Planckian distribution | |||||
******************************************************************* | |||||
* EXPMAX= 20.0 | |||||
* U(n)= 0.99*EXPMAX*V(n) | |||||
* DO 22 L= 1,Loop | |||||
* DO 22 k= 1,n | |||||
* Y(k)= U(k)/V(k) | |||||
* W(k)= X(k)/( EXP( Y(k)) -1.0) | |||||
* 22 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(PLANCKIAN); | |||||
Real_ptr x = loop_data.array_1D_Real[0]; | |||||
Real_ptr y = loop_data.array_1D_Real[1]; | |||||
Real_ptr u = loop_data.array_1D_Real[2]; | |||||
Real_ptr v = loop_data.array_1D_Real[3]; | |||||
Real_ptr w = loop_data.array_1D_Real[4]; | |||||
Real_type expmax = 20.0; | |||||
u[state.range(0)-1] = 0.99*expmax*v[state.range(0)-1]; | |||||
for (auto _ : state) { | |||||
for (Index_type k=0 ; k< state.range(0) ; k++ ) { | |||||
y[k] = u[k] / v[k]; | |||||
w[k] = x[k] / ( exp( y[k] ) -1.0 ); | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_PLANCKIAN_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_IMP_HYDRO_2D_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 23 -- 2-D implicit hydrodynamics fragment | |||||
******************************************************************* | |||||
* DO 23 L= 1,Loop | |||||
* DO 23 j= 2,6 | |||||
* DO 23 k= 2,n | |||||
* QA= ZA(k,j+1)*ZR(k,j) +ZA(k,j-1)*ZB(k,j) + | |||||
* . ZA(k+1,j)*ZU(k,j) +ZA(k-1,j)*ZV(k,j) +ZZ(k,j) | |||||
* 23 ZA(k,j)= ZA(k,j) +.175*(QA -ZA(k,j)) | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(IMP_HYDRO_2D); | |||||
Real_ptr* za = loop_data.array_2D_7xN_Real[0]; | |||||
Real_ptr* zb = loop_data.array_2D_7xN_Real[1]; | |||||
Real_ptr* zr = loop_data.array_2D_7xN_Real[2]; | |||||
Real_ptr* zu = loop_data.array_2D_7xN_Real[3]; | |||||
Real_ptr* zv = loop_data.array_2D_7xN_Real[4]; | |||||
Real_ptr* zz = loop_data.array_2D_7xN_Real[5]; | |||||
Index_type j; | |||||
for (auto _ : state) { | |||||
for ( j=1 ; j<6 ; j++ ) { | |||||
for ( Index_type k=1 ; k< state.range(0) ; k++ ) { | |||||
Real_type qa = za[j+1][k]*zr[j][k] + za[j-1][k]*zb[j][k] + | |||||
za[j][k+1]*zu[j][k] + za[j][k-1]*zv[j][k] + zz[j][k]; | |||||
za[j][k] += 0.175*( qa - za[j][k] ); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_IMP_HYDRO_2D_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); | |||||
static void BM_FIND_FIRST_MIN_RAW(benchmark::State& state) { | |||||
/* | |||||
******************************************************************* | |||||
* Kernel 24 -- find location of first minimum in array | |||||
******************************************************************* | |||||
* X( n/2)= -1.0E+10 | |||||
* DO 24 L= 1,Loop | |||||
* m= 1 | |||||
* DO 24 k= 2,n | |||||
* IF( X(k).LT.X(m)) m= k | |||||
* 24 CONTINUE | |||||
*/ | |||||
LoopData& loop_data = getLoopData(); | |||||
loopInit(FIND_FIRST_MIN); | |||||
Real_ptr x = loop_data.array_1D_Real[0]; | |||||
Index_type m = 0; | |||||
Index_type val = 0; | |||||
for (auto _ : state) { | |||||
m = 0; | |||||
for (Index_type k=1 ; k< state.range(0) ; k++ ) { | |||||
if ( x[k] < x[m] ) benchmark::DoNotOptimize(m = k); | |||||
} | |||||
} | |||||
} | |||||
BENCHMARK(BM_FIND_FIRST_MIN_RAW)->Arg(171)->Arg(5001)-> | |||||
Arg(44217)->Unit(benchmark::kMicrosecond); |