Changeset View
Changeset View
Standalone View
Standalone View
lib/CodeGen/PPCGCodeGeneration.cpp
Show All 10 Lines | |||||
// GPU mapping strategy. | // GPU mapping strategy. | ||||
// | // | ||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
#include "polly/CodeGen/PPCGCodeGeneration.h" | #include "polly/CodeGen/PPCGCodeGeneration.h" | ||||
#include "polly/CodeGen/CodeGeneration.h" | #include "polly/CodeGen/CodeGeneration.h" | ||||
#include "polly/CodeGen/IslAst.h" | #include "polly/CodeGen/IslAst.h" | ||||
#include "polly/CodeGen/IslNodeBuilder.h" | #include "polly/CodeGen/IslNodeBuilder.h" | ||||
#include "polly/CodeGen/PerfMonitor.h" | |||||
#include "polly/CodeGen/Utils.h" | #include "polly/CodeGen/Utils.h" | ||||
#include "polly/DependenceInfo.h" | #include "polly/DependenceInfo.h" | ||||
#include "polly/LinkAllPasses.h" | #include "polly/LinkAllPasses.h" | ||||
#include "polly/Options.h" | #include "polly/Options.h" | ||||
#include "polly/ScopDetection.h" | #include "polly/ScopDetection.h" | ||||
#include "polly/ScopInfo.h" | #include "polly/ScopInfo.h" | ||||
#include "polly/Support/SCEVValidator.h" | #include "polly/Support/SCEVValidator.h" | ||||
#include "llvm/ADT/PostOrderIterator.h" | #include "llvm/ADT/PostOrderIterator.h" | ||||
▲ Show 20 Lines • Show All 90 Lines • ▼ Show 20 Lines | CudaVersion("polly-acc-cuda-version", | ||||
cl::desc("The CUDA version to compile for"), cl::Hidden, | cl::desc("The CUDA version to compile for"), cl::Hidden, | ||||
cl::init("sm_30"), cl::ZeroOrMore, cl::cat(PollyCategory)); | cl::init("sm_30"), cl::ZeroOrMore, cl::cat(PollyCategory)); | ||||
static cl::opt<int> | static cl::opt<int> | ||||
MinCompute("polly-acc-mincompute", | MinCompute("polly-acc-mincompute", | ||||
cl::desc("Minimal number of compute statements to run on GPU."), | cl::desc("Minimal number of compute statements to run on GPU."), | ||||
cl::Hidden, cl::init(10 * 512 * 512)); | cl::Hidden, cl::init(10 * 512 * 512)); | ||||
extern bool polly::PerfMonitoring; | |||||
/// Return a unique name for a Scop, which is the scop region with the | /// Return a unique name for a Scop, which is the scop region with the | ||||
/// function name. | /// function name. | ||||
std::string getUniqueScopName(const Scop *S) { | std::string getUniqueScopName(const Scop *S) { | ||||
return "Scop Region: " + S->getNameStr() + | return "Scop Region: " + S->getNameStr() + | ||||
" | Function: " + std::string(S->getFunction().getName()); | " | Function: " + std::string(S->getFunction().getName()); | ||||
} | } | ||||
/// Used to store information PPCG wants for kills. This information is | /// Used to store information PPCG wants for kills. This information is | ||||
▲ Show 20 Lines • Show All 1,242 Lines • ▼ Show 20 Lines | isl_bool collectReferencesInGPUStmt(__isl_keep isl_ast_node *Node, void *User) { | ||||
addReferencesFromStmt(Stmt, User, false /* CreateScalarRefs */); | addReferencesFromStmt(Stmt, User, false /* CreateScalarRefs */); | ||||
return isl_bool_true; | return isl_bool_true; | ||||
} | } | ||||
/// A list of functions that are available in NVIDIA's libdevice. | /// A list of functions that are available in NVIDIA's libdevice. | ||||
const std::set<std::string> CUDALibDeviceFunctions = { | const std::set<std::string> CUDALibDeviceFunctions = { | ||||
"exp", "expf", "expl", "cos", "cosf", | "exp", "expf", "expl", "cos", "cosf", "sqrt", | ||||
"sqrt", "sqrtf", "copysign", "copysignf", "copysignl"}; | "sqrtf", "copysign", "copysignf", "copysignl", "log", "logf"}; | ||||
/// Return the corresponding CUDA libdevice function name for @p F. | /// Return the corresponding CUDA libdevice function name for @p F. | ||||
/// | /// | ||||
/// Return "" if we are not compiling for CUDA. | /// Return "" if we are not compiling for CUDA. | ||||
std::string getCUDALibDeviceFuntion(Function *F) { | std::string getCUDALibDeviceFuntion(Function *F) { | ||||
if (CUDALibDeviceFunctions.count(F->getName())) | if (CUDALibDeviceFunctions.count(F->getName())) | ||||
return std::string("__nv_") + std::string(F->getName()); | return std::string("__nv_") + std::string(F->getName()); | ||||
return ""; | return ""; | ||||
} | } | ||||
/// Check if F is a function that we can code-generate in a GPU kernel. | /// Check if F is a function that we can code-generate in a GPU kernel. | ||||
static bool isValidFunctionInKernel(llvm::Function *F, bool AllowLibDevice) { | static bool isValidFunctionInKernel(llvm::Function *F, bool AllowLibDevice) { | ||||
assert(F && "F is an invalid pointer"); | assert(F && "F is an invalid pointer"); | ||||
// We string compare against the name of the function to allow | // We string compare against the name of the function to allow | ||||
// all variants of the intrinsic "llvm.sqrt.*", "llvm.fabs", and | // all variants of the intrinsic "llvm.sqrt.*", "llvm.fabs", and | ||||
// "llvm.copysign". | // "llvm.copysign". | ||||
const StringRef Name = F->getName(); | const StringRef Name = F->getName(); | ||||
if (AllowLibDevice && getCUDALibDeviceFuntion(F).length() > 0) | if (AllowLibDevice && getCUDALibDeviceFuntion(F).length() > 0) | ||||
return true; | return true; | ||||
return F->isIntrinsic() && | return F->isIntrinsic() && | ||||
(Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") || | (Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") || | ||||
Name.startswith("llvm.copysign")); | Name.startswith("llvm.copysign") || Name.startswith("llvm.powi")); | ||||
} | } | ||||
/// Do not take `Function` as a subtree value. | /// Do not take `Function` as a subtree value. | ||||
/// | /// | ||||
/// We try to take the reference of all subtree values and pass them along | /// We try to take the reference of all subtree values and pass them along | ||||
/// to the kernel from the host. Taking an address of any function and | /// to the kernel from the host. Taking an address of any function and | ||||
/// trying to pass along is nonsensical. Only allow `Value`s that are not | /// trying to pass along is nonsensical. Only allow `Value`s that are not | ||||
/// `Function`s. | /// `Function`s. | ||||
▲ Show 20 Lines • Show All 2,010 Lines • ▼ Show 20 Lines | if (!NodeBuilder.preloadInvariantLoads()) { | ||||
assert(ExitingBB); | assert(ExitingBB); | ||||
DT->changeImmediateDominator(MergeBlock, ExitingBB); | DT->changeImmediateDominator(MergeBlock, ExitingBB); | ||||
DT->eraseNode(ExitingBlock); | DT->eraseNode(ExitingBlock); | ||||
isl_ast_expr_free(Condition); | isl_ast_expr_free(Condition); | ||||
isl_ast_node_free(Root); | isl_ast_node_free(Root); | ||||
} else { | } else { | ||||
if (polly::PerfMonitoring) { | |||||
PerfMonitor P(*S, EnteringBB->getParent()->getParent()); | |||||
P.initialize(); | |||||
P.insertRegionStart(SplitBlock->getTerminator()); | |||||
// TODO: actually think if this is the correct exiting block to place | |||||
// the `end` performance marker. Invariant load hoisting changes | |||||
// the CFG in a way that I do not precisely understand, so I | |||||
// (Siddharth<siddu.druid@gmail.com>) should come back to this and | |||||
// think about which exiting block to use. | |||||
auto *ExitingBlock = StartBlock->getUniqueSuccessor(); | |||||
assert(ExitingBlock); | |||||
BasicBlock *MergeBlock = ExitingBlock->getUniqueSuccessor(); | |||||
P.insertRegionEnd(MergeBlock->getTerminator()); | |||||
} | |||||
NodeBuilder.addParameters(S->getContext().release()); | NodeBuilder.addParameters(S->getContext().release()); | ||||
Value *RTC = NodeBuilder.createRTC(Condition); | Value *RTC = NodeBuilder.createRTC(Condition); | ||||
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); | Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); | ||||
Builder.SetInsertPoint(&*StartBlock->begin()); | Builder.SetInsertPoint(&*StartBlock->begin()); | ||||
NodeBuilder.create(Root); | NodeBuilder.create(Root); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 102 Lines • Show Last 20 Lines |