Index: include/polly/RegisterPasses.h =================================================================== --- include/polly/RegisterPasses.h +++ include/polly/RegisterPasses.h @@ -23,6 +23,7 @@ } // namespace llvm namespace polly { +bool useManagedMemory(); void initializePollyPasses(llvm::PassRegistry &Registry); void registerPollyPasses(llvm::legacy::PassManagerBase &PM); } // namespace polly Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -21,6 +21,7 @@ #include "polly/DependenceInfo.h" #include "polly/LinkAllPasses.h" #include "polly/Options.h" +#include "polly/RegisterPasses.h" #include "polly/ScopDetection.h" #include "polly/ScopInfo.h" #include "polly/Support/SCEVValidator.h" @@ -91,15 +92,6 @@ cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); -bool polly::PollyManagedMemory; -static cl::opt - XManagedMemory("polly-acc-codegen-managed-memory", - cl::desc("Generate Host kernel code assuming" - " that all memory has been" - " declared as managed memory"), - cl::location(PollyManagedMemory), cl::Hidden, - cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); - static cl::opt FailOnVerifyModuleFailure("polly-acc-fail-on-verify-module-failure", cl::desc("Fail and generate a backtrace if" @@ -757,14 +749,14 @@ GPUContext = createCallInitContext(); - if (!PollyManagedMemory) + if (!useManagedMemory()) allocateDeviceArrays(); else prepareManagedDeviceArrays(); } void GPUNodeBuilder::finalize() { - if (!PollyManagedMemory) + if (!useManagedMemory()) freeDeviceArrays(); createCallFreeContext(GPUContext); @@ -772,7 +764,7 @@ } void GPUNodeBuilder::allocateDeviceArrays() { - assert(!PollyManagedMemory && + assert(!useManagedMemory() && "Managed memory will directly send host pointers " "to the kernel. There is no need for device arrays"); isl_ast_build *Build = isl_ast_build_from_context(S.getContext().release()); @@ -812,7 +804,7 @@ } void GPUNodeBuilder::prepareManagedDeviceArrays() { - assert(PollyManagedMemory && + assert(useManagedMemory() && "Device array most only be prepared in managed-memory mode"); for (int i = 0; i < Prog->n_array; ++i) { gpu_array_info *Array = &Prog->array[i]; @@ -859,7 +851,7 @@ } void GPUNodeBuilder::freeDeviceArrays() { - assert(!PollyManagedMemory && "Managed memory does not use device arrays"); + assert(!useManagedMemory() && "Managed memory does not use device arrays"); for (auto &Array : DeviceAllocations) createCallFreeDeviceMemory(Array.second); } @@ -944,7 +936,7 @@ } void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) { - assert(!PollyManagedMemory && + assert(!useManagedMemory() && "Managed memory does not allocate or free memory " "for device"); const char *Name = "polly_freeDeviceMemory"; @@ -964,7 +956,7 @@ } Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) { - assert(!PollyManagedMemory && + assert(!useManagedMemory() && "Managed memory does not allocate or free memory " "for device"); const char *Name = "polly_allocateMemoryForDevice"; @@ -986,7 +978,7 @@ void GPUNodeBuilder::createCallCopyFromHostToDevice(Value *HostData, Value *DeviceData, Value *Size) { - assert(!PollyManagedMemory && + assert(!useManagedMemory() && "Managed memory does not transfer memory between " "device and host"); const char *Name = "polly_copyFromHostToDevice"; @@ -1010,7 +1002,7 @@ void GPUNodeBuilder::createCallCopyFromDeviceToHost(Value *DeviceData, Value *HostData, Value *Size) { - assert(!PollyManagedMemory && + assert(!useManagedMemory() && "Managed memory does not transfer memory between " "device and host"); const char *Name = "polly_copyFromDeviceToHost"; @@ -1032,7 +1024,7 @@ } void GPUNodeBuilder::createCallSynchronizeDevice() { - assert(PollyManagedMemory && "explicit synchronization is only necessary for " + assert(useManagedMemory() && "explicit synchronization is only necessary for " "managed memory"); const char *Name = "polly_synchronizeDevice"; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); @@ -1159,7 +1151,7 @@ Value *GPUNodeBuilder::getManagedDeviceArray(gpu_array_info *Array, ScopArrayInfo *ArrayInfo) { - assert(PollyManagedMemory && "Only used when you wish to get a host " + assert(useManagedMemory() && "Only used when you wish to get a host " "pointer for sending data to the kernel, " "with managed memory"); std::map::iterator it; @@ -1171,7 +1163,7 @@ void GPUNodeBuilder::createDataTransfer(__isl_take isl_ast_node *TransferStmt, enum DataDirection Direction) { - assert(!PollyManagedMemory && "Managed memory needs no data transfers"); + assert(!useManagedMemory() && "Managed memory needs no data transfers"); isl_ast_expr *Expr = isl_ast_node_user_get_expr(TransferStmt); isl_ast_expr *Arg = isl_ast_expr_get_op_arg(Expr, 0); isl_id *Id = isl_ast_expr_get_id(Arg); @@ -1225,7 +1217,7 @@ const char *Str = isl_id_get_name(Id); if (!strcmp(Str, "kernel")) { createKernel(UserStmt); - if (PollyManagedMemory) + if (useManagedMemory()) createCallSynchronizeDevice(); isl_ast_expr_free(Expr); return; @@ -1243,7 +1235,7 @@ return; } if (isPrefix(Str, "to_device")) { - if (!PollyManagedMemory) + if (!useManagedMemory()) createDataTransfer(UserStmt, HOST_TO_DEVICE); else isl_ast_node_free(UserStmt); @@ -1253,7 +1245,7 @@ } if (isPrefix(Str, "from_device")) { - if (!PollyManagedMemory) { + if (!useManagedMemory()) { createDataTransfer(UserStmt, DEVICE_TO_HOST); } else { isl_ast_node_free(UserStmt); @@ -1647,7 +1639,7 @@ ArgSizes[Index] = SAI->getElemSizeInBytes(); Value *DevArray = nullptr; - if (PollyManagedMemory) { + if (useManagedMemory()) { DevArray = getManagedDeviceArray(&Prog->array[i], const_cast(SAI)); } else { @@ -1669,7 +1661,7 @@ if (gpu_array_is_read_only_scalar(&Prog->array[i])) { Value *ValPtr = nullptr; - if (PollyManagedMemory) + if (useManagedMemory()) ValPtr = DevArray; else ValPtr = BlockGen.getOrCreateAlloca(SAI); @@ -3240,7 +3232,7 @@ LLVM_DEBUG(dbgs() << getUniqueScopName(S) << " does not have permutable bands. Bailing out\n";); } else { - const bool CreateTransferToFromDevice = !PollyManagedMemory; + const bool CreateTransferToFromDevice = (!useManagedMemory()); Schedule = map_to_device(PPCGGen, Schedule, CreateTransferToFromDevice); PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule)); } Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -131,6 +131,15 @@ "target SPIR 64-bit architecture")), cl::init(GPUArch::NVPTX64), cl::ZeroOrMore, cl::cat(PollyCategory)); + +bool polly::PollyManagedMemory; +static cl::opt + XManagedMemory("polly-acc-codegen-managed-memory", + cl::desc("Generate Host kernel code assuming" + " that all memory has been" + " declared as managed memory"), + cl::location(PollyManagedMemory), cl::Hidden, + cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); #endif VectorizerChoice polly::PollyVectorizerChoice; @@ -242,6 +251,14 @@ cl::init(true), cl::cat(PollyCategory)); namespace polly { + +/// Check whether the GPU architecture and runtime are correct to +/// use CUDA managed memory +bool useManagedMemory() { + return (PollyManagedMemory && (GPURuntimeChoice == GPURuntime::CUDA) && + (GPUArchChoice == GPUArch::NVPTX64)); +} + void initializePollyPasses(PassRegistry &Registry) { initializeCodeGenerationPass(Registry); @@ -353,8 +370,9 @@ if (Target == TARGET_HYBRID) { PM.add( polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); - PM.add(polly::createManagedMemoryRewritePassPass(GPUArchChoice, - GPURuntimeChoice)); + if (useManagedMemory()) + PM.add(polly::createManagedMemoryRewritePassPass(GPUArchChoice, + GPURuntimeChoice)); } #endif if (Target == TARGET_CPU || Target == TARGET_HYBRID) @@ -385,7 +403,8 @@ else { PM.add( polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); - PM.add(polly::createManagedMemoryRewritePassPass()); + if (useManagedMemory()) + PM.add(polly::createManagedMemoryRewritePassPass()); } #endif