Index: include/polly/CodeGen/PPCGCodeGeneration.h =================================================================== --- include/polly/CodeGen/PPCGCodeGeneration.h +++ include/polly/CodeGen/PPCGCodeGeneration.h @@ -25,4 +25,8 @@ extern bool PollyManagedMemory; } +/// Check whether we use the right GPU architecure and runtime to generate +/// CUDA managed memory +bool useManagedMemory(GPUArch Arch, GPURuntime Runtime); + #endif // POLLY_PPCGCODEGENERATION_H Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -90,7 +90,6 @@ cl::desc("Use private memory"), cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); - bool polly::PollyManagedMemory; static cl::opt XManagedMemory("polly-acc-codegen-managed-memory", @@ -132,6 +131,13 @@ " | Function: " + std::string(S->getFunction().getName()); } +/// Check whether the GPU architecture and runtime are correct to use +/// CUDA managed memory +bool useManagedMemory(GPUArch Arch, GPURuntime Runtime) { + return (PollyManagedMemory && (Runtime == GPURuntime::CUDA) && + (Arch == GPUArch::NVPTX64)); +} + /// Used to store information PPCG wants for kills. This information is /// used by live range reordering. /// @@ -757,14 +763,14 @@ GPUContext = createCallInitContext(); - if (!PollyManagedMemory) + if (!useManagedMemory(Arch, Runtime)) allocateDeviceArrays(); else prepareManagedDeviceArrays(); } void GPUNodeBuilder::finalize() { - if (!PollyManagedMemory) + if (!useManagedMemory(Arch, Runtime)) freeDeviceArrays(); createCallFreeContext(GPUContext); @@ -772,7 +778,7 @@ } void GPUNodeBuilder::allocateDeviceArrays() { - assert(!PollyManagedMemory && + assert(!useManagedMemory(Arch, Runtime) && "Managed memory will directly send host pointers " "to the kernel. There is no need for device arrays"); isl_ast_build *Build = isl_ast_build_from_context(S.getContext().release()); @@ -812,7 +818,7 @@ } void GPUNodeBuilder::prepareManagedDeviceArrays() { - assert(PollyManagedMemory && + assert(useManagedMemory(Arch, Runtime) && "Device array most only be prepared in managed-memory mode"); for (int i = 0; i < Prog->n_array; ++i) { gpu_array_info *Array = &Prog->array[i]; @@ -859,7 +865,8 @@ } void GPUNodeBuilder::freeDeviceArrays() { - assert(!PollyManagedMemory && "Managed memory does not use device arrays"); + assert(!useManagedMemory(Arch, Runtime) && + "Managed memory does not use device arrays"); for (auto &Array : DeviceAllocations) createCallFreeDeviceMemory(Array.second); } @@ -944,7 +951,7 @@ } void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) { - assert(!PollyManagedMemory && + assert(!useManagedMemory(Arch, Runtime) && "Managed memory does not allocate or free memory " "for device"); const char *Name = "polly_freeDeviceMemory"; @@ -964,7 +971,7 @@ } Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) { - assert(!PollyManagedMemory && + assert(!useManagedMemory(Arch, Runtime) && "Managed memory does not allocate or free memory " "for device"); const char *Name = "polly_allocateMemoryForDevice"; @@ -986,7 +993,7 @@ void GPUNodeBuilder::createCallCopyFromHostToDevice(Value *HostData, Value *DeviceData, Value *Size) { - assert(!PollyManagedMemory && + assert(!useManagedMemory(Arch, Runtime) && "Managed memory does not transfer memory between " "device and host"); const char *Name = "polly_copyFromHostToDevice"; @@ -1010,7 +1017,7 @@ void GPUNodeBuilder::createCallCopyFromDeviceToHost(Value *DeviceData, Value *HostData, Value *Size) { - assert(!PollyManagedMemory && + assert(!useManagedMemory(Arch, Runtime) && "Managed memory does not transfer memory between " "device and host"); const char *Name = "polly_copyFromDeviceToHost"; @@ -1032,8 +1039,9 @@ } void GPUNodeBuilder::createCallSynchronizeDevice() { - assert(PollyManagedMemory && "explicit synchronization is only necessary for " - "managed memory"); + assert(useManagedMemory(Arch, Runtime) && + "explicit synchronization is only necessary for " + "managed memory"); const char *Name = "polly_synchronizeDevice"; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Function *F = M->getFunction(Name); @@ -1159,9 +1167,10 @@ Value *GPUNodeBuilder::getManagedDeviceArray(gpu_array_info *Array, ScopArrayInfo *ArrayInfo) { - assert(PollyManagedMemory && "Only used when you wish to get a host " - "pointer for sending data to the kernel, " - "with managed memory"); + assert(useManagedMemory(Arch, Runtime) && + "Only used when you wish to get a host " + "pointer for sending data to the kernel, " + "with managed memory"); std::map::iterator it; it = DeviceAllocations.find(ArrayInfo); assert(it != DeviceAllocations.end() && @@ -1171,7 +1180,8 @@ void GPUNodeBuilder::createDataTransfer(__isl_take isl_ast_node *TransferStmt, enum DataDirection Direction) { - assert(!PollyManagedMemory && "Managed memory needs no data transfers"); + assert(!useManagedMemory(Arch, Runtime) && + "Managed memory needs no data transfers"); isl_ast_expr *Expr = isl_ast_node_user_get_expr(TransferStmt); isl_ast_expr *Arg = isl_ast_expr_get_op_arg(Expr, 0); isl_id *Id = isl_ast_expr_get_id(Arg); @@ -1225,7 +1235,7 @@ const char *Str = isl_id_get_name(Id); if (!strcmp(Str, "kernel")) { createKernel(UserStmt); - if (PollyManagedMemory) + if (useManagedMemory(Arch, Runtime)) createCallSynchronizeDevice(); isl_ast_expr_free(Expr); return; @@ -1243,7 +1253,7 @@ return; } if (isPrefix(Str, "to_device")) { - if (!PollyManagedMemory) + if (!useManagedMemory(Arch, Runtime)) createDataTransfer(UserStmt, HOST_TO_DEVICE); else isl_ast_node_free(UserStmt); @@ -1253,7 +1263,7 @@ } if (isPrefix(Str, "from_device")) { - if (!PollyManagedMemory) { + if (!useManagedMemory(Arch, Runtime)) { createDataTransfer(UserStmt, DEVICE_TO_HOST); } else { isl_ast_node_free(UserStmt); @@ -1647,7 +1657,7 @@ ArgSizes[Index] = SAI->getElemSizeInBytes(); Value *DevArray = nullptr; - if (PollyManagedMemory) { + if (useManagedMemory(Arch, Runtime)) { DevArray = getManagedDeviceArray(&Prog->array[i], const_cast(SAI)); } else { @@ -1669,7 +1679,7 @@ if (gpu_array_is_read_only_scalar(&Prog->array[i])) { Value *ValPtr = nullptr; - if (PollyManagedMemory) + if (useManagedMemory(Arch, Runtime)) ValPtr = DevArray; else ValPtr = BlockGen.getOrCreateAlloca(SAI); @@ -3240,7 +3250,8 @@ LLVM_DEBUG(dbgs() << getUniqueScopName(S) << " does not have permutable bands. Bailing out\n";); } else { - const bool CreateTransferToFromDevice = !PollyManagedMemory; + const bool CreateTransferToFromDevice = + (!useManagedMemory(Architecture, Runtime)); Schedule = map_to_device(PPCGGen, Schedule, CreateTransferToFromDevice); PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule)); } Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -242,6 +242,7 @@ cl::init(true), cl::cat(PollyCategory)); namespace polly { + void initializePollyPasses(PassRegistry &Registry) { initializeCodeGenerationPass(Registry); @@ -353,8 +354,9 @@ if (Target == TARGET_HYBRID) { PM.add( polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); - PM.add(polly::createManagedMemoryRewritePassPass(GPUArchChoice, - GPURuntimeChoice)); + if (useManagedMemory(GPUArchChoice, GPURuntimeChoice)) + PM.add(polly::createManagedMemoryRewritePassPass(GPUArchChoice, + GPURuntimeChoice)); } #endif if (Target == TARGET_CPU || Target == TARGET_HYBRID) @@ -385,7 +387,8 @@ else { PM.add( polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); - PM.add(polly::createManagedMemoryRewritePassPass()); + if (useManagedMemory(GPUArchChoice, GPURuntimeChoice)) + PM.add(polly::createManagedMemoryRewritePassPass()); } #endif