Differential D38417 Diff 122106 MultiSource/Benchmarks/DOE-ProxyApps-C++/HACCKernels/GravityForceKernel.cpp
Changeset View
Changeset View
Standalone View
Standalone View
MultiSource/Benchmarks/DOE-ProxyApps-C++/HACCKernels/GravityForceKernel.cpp
- This file was added.
/* | |||||
* Copyright (C) 2017, UChicago Argonne, LLC | |||||
* All Rights Reserved | |||||
* | |||||
* Hardware/Hybrid Cosmology Code (HACC), Version 1.0 | |||||
* | |||||
* Salman Habib, Adrian Pope, Hal Finkel, Nicholas Frontiere, Katrin Heitmann, | |||||
* Vitali Morozov, Jeffrey Emberson, Thomas Uram, Esteban Rangel | |||||
* (Argonne National Laboratory) | |||||
* | |||||
* David Daniel, Patricia Fasel, Chung-Hsing Hsu, Zarija Lukic, James Ahrens | |||||
* (Los Alamos National Laboratory) | |||||
* | |||||
* George Zagaris | |||||
* (Kitware) | |||||
* | |||||
* OPEN SOURCE LICENSE | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions are met: | |||||
* | |||||
* 1. Redistributions of source code must retain the above copyright notice, | |||||
* this list of conditions and the following disclaimer. Software changes, | |||||
* modifications, or derivative works, should be noted with comments and | |||||
* the author and organization’s name. | |||||
* | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* 3. Neither the names of UChicago Argonne, LLC or the Department of Energy | |||||
* nor the names of its contributors may be used to endorse or promote | |||||
* products derived from this software without specific prior written | |||||
* permission. | |||||
* | |||||
* 4. The software and the end-user documentation included with the | |||||
* redistribution, if any, must include the following acknowledgment: | |||||
* | |||||
* "This product includes software produced by UChicago Argonne, LLC under | |||||
* Contract No. DE-AC02-06CH11357 with the Department of Energy." | |||||
* | |||||
* ***************************************************************************** | |||||
* DISCLAIMER | |||||
* THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. NEITHER THE | |||||
* UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR | |||||
* UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, | |||||
* EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE | |||||
* ACCURARY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS, | |||||
* PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE | |||||
* PRIVATELY OWNED RIGHTS. | |||||
* | |||||
* ***************************************************************************** | |||||
*/ | |||||
#include "HACCKernels.h" | |||||
#include <cmath> | |||||
extern const float PolyCoefficients4[] = { | |||||
0.263729f, -0.0686285f, 0.00882248f, -0.000592487f, 0.0000164622f | |||||
}; | |||||
extern const float PolyCoefficients5[] = { | |||||
0.269327f, -0.0750978f, 0.0114808f, -0.00109313f, 0.0000605491f, | |||||
-0.00000147177f | |||||
}; | |||||
extern const float PolyCoefficients6[] = { | |||||
0.271431f, -0.0783394f, 0.0133122f, -0.00159485f, 0.000132336f, | |||||
-0.00000663394f, 0.000000147305f | |||||
}; | |||||
// HACC's gravity short-range-force kernel represents the part of the 1/r^2 | |||||
// gravitational force that is not computed by the long-range grid solver. This | |||||
// kernel computes the acceleration of a target particle from all of the other | |||||
// particles in the provided interaction lists. It is assumed that the target | |||||
// particle has unit mass while the interaction-list can contain pseudo | |||||
// particles with larger mass values. Beyond a distance of MaxSep, the | |||||
// inter-particle force should be completely accounted for by the long-range | |||||
// grid solver (and thus we filter out such interactions here). Closer than | |||||
// MaxSep, we directly compute the inter-particle force, subtracting the | |||||
// long-range part of the force (as fit to a polynomial of the specified | |||||
// degree). A softening length, SofteningLen, is also used, as is standard in | |||||
// N-body codes. | |||||
template <int PolyOrder, const float (&PolyCoefficients)[PolyOrder+1]> | |||||
static void GravityForceKernel(int n, float *RESTRICT x, float *RESTRICT y, | |||||
float *RESTRICT z, float *RESTRICT mass, | |||||
float x0, float y0, float z0, | |||||
float MaxSepSqrd, float SofteningLenSqrd, | |||||
float &RESTRICT ax, float &RESTRICT ay, | |||||
float &RESTRICT az) { | |||||
float lax = 0.0f, lay = 0.0f, laz = 0.0f; | |||||
// As written below, the mass array is conditionally accessed (i.e. accessed | |||||
// only if the interaction is not filtered by the distance checks). This will | |||||
// tend to inhibit vectorization on architectures without masked vector loads. | |||||
// With OpenMP 4+, we can explicitly inform the compiler that vectorization is | |||||
// safe. | |||||
// | |||||
// For the test suite: Clang does not report a high-enough version of OpenMP | |||||
// to enable the pragma below. Moreover, vectorization is desirable regardless | |||||
hfinkel: I suggest wording this as follows:
// For the test suite: Clang does not report a high… | |||||
// of whether OpenMP is enabled (even if Clang's reported version were high | |||||
Not Done ReplyInline ActionsAs Brian and I discussed offline, I've suggested that we update this to read: #if _OPENMP >= 201307 So that we'll vectorize the loop. Looking at this again today, it seems like the problem blocking loop vectorization here is just that we don't self report, by default, a new enough OpenMP version. I suspect that bumping that version is blocked on unrelated things. hfinkel: As Brian and I discussed offline, I've suggested that we update this to read:
#if _OPENMP >=… | |||||
// enough), so we also use the Clang loop pragma to assume vectorization safety. | |||||
#if _OPENMP >= 201307 | |||||
#pragma omp simd reduction(+:lax,lay,laz) | |||||
#elif defined(clang) | |||||
#pragma clang loop vectorize(assume_safety) | |||||
#endif | |||||
for (int i = 0; i < n; ++i) { | |||||
float dx = x[i] - x0, dy = y[i] - y0, dz = z[i] - z0; | |||||
float r2 = dx * dx + dy * dy + dz * dz; | |||||
if (r2 >= MaxSepSqrd || r2 == 0.0f) | |||||
continue; | |||||
float r2s = r2 + SofteningLenSqrd; | |||||
float f = PolyCoefficients[PolyOrder]; | |||||
for (int p = 1; p <= PolyOrder; ++p) | |||||
f = PolyCoefficients[PolyOrder-p] + r2*f; | |||||
f = (1.0f / (r2s * std::sqrt(r2s)) - f) * mass[i]; | |||||
lax += f * dx; | |||||
lay += f * dy; | |||||
laz += f * dz; | |||||
} | |||||
ax += lax; | |||||
ay += lay; | |||||
az += laz; | |||||
} | |||||
void GravityForceKernel4(int n, float *RESTRICT x, float *RESTRICT y, | |||||
float *RESTRICT z, float *RESTRICT mass, | |||||
float x0, float y0, float z0, | |||||
float MaxSepSqrd, float SofteningLenSqrd, | |||||
float &RESTRICT ax, float &RESTRICT ay, | |||||
float &RESTRICT az) { | |||||
GravityForceKernel<4, PolyCoefficients4>(n, x, y, z, mass, x0, y0, z0, | |||||
MaxSepSqrd, SofteningLenSqrd, | |||||
ax, ay, az); | |||||
} | |||||
void GravityForceKernel5(int n, float *RESTRICT x, float *RESTRICT y, | |||||
float *RESTRICT z, float *RESTRICT mass, | |||||
float x0, float y0, float z0, | |||||
float MaxSepSqrd, float SofteningLenSqrd, | |||||
float &RESTRICT ax, float &RESTRICT ay, | |||||
float &RESTRICT az) { | |||||
GravityForceKernel<5, PolyCoefficients5>(n, x, y, z, mass, x0, y0, z0, | |||||
MaxSepSqrd, SofteningLenSqrd, | |||||
ax, ay, az); | |||||
} | |||||
void GravityForceKernel6(int n, float *RESTRICT x, float *RESTRICT y, | |||||
float *RESTRICT z, float *RESTRICT mass, | |||||
float x0, float y0, float z0, | |||||
float MaxSepSqrd, float SofteningLenSqrd, | |||||
float &RESTRICT ax, float &RESTRICT ay, | |||||
float &RESTRICT az) { | |||||
GravityForceKernel<6, PolyCoefficients6>(n, x, y, z, mass, x0, y0, z0, | |||||
MaxSepSqrd, SofteningLenSqrd, | |||||
ax, ay, az); | |||||
} | |||||
I suggest wording this as follows: