Index: include/polly/CodeGen/PPCGCodeGeneration.h =================================================================== --- include/polly/CodeGen/PPCGCodeGeneration.h +++ include/polly/CodeGen/PPCGCodeGeneration.h @@ -23,6 +23,10 @@ namespace polly { extern bool PollyManagedMemory; -} + +/// Check whether we use the right GPU architecture and runtime to generate +/// CUDA managed memory +bool useManagedMemory(GPUArch Arch, GPURuntime Runtime); +} // namespace polly #endif // POLLY_PPCGCODEGENERATION_H Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -132,6 +132,13 @@ " | Function: " + std::string(S->getFunction().getName()); } +/// Check whether the GPU architecture and runtime are correct to use +/// CUDA managed memory +bool polly::useManagedMemory(GPUArch Arch, GPURuntime Runtime) { + return (PollyManagedMemory && (Runtime == GPURuntime::CUDA) && + (Arch == GPUArch::NVPTX64)); +} + /// Used to store information PPCG wants for kills. This information is /// used by live range reordering. /// @@ -757,14 +764,14 @@ GPUContext = createCallInitContext(); - if (!PollyManagedMemory) + if (!useManagedMemory(Arch, Runtime)) allocateDeviceArrays(); else prepareManagedDeviceArrays(); } void GPUNodeBuilder::finalize() { - if (!PollyManagedMemory) + if (!useManagedMemory(Arch, Runtime)) freeDeviceArrays(); createCallFreeContext(GPUContext); @@ -772,7 +779,7 @@ } void GPUNodeBuilder::allocateDeviceArrays() { - assert(!PollyManagedMemory && + assert(!useManagedMemory(Arch, Runtime) && "Managed memory will directly send host pointers " "to the kernel. There is no need for device arrays"); isl_ast_build *Build = isl_ast_build_from_context(S.getContext().release()); @@ -812,7 +819,7 @@ } void GPUNodeBuilder::prepareManagedDeviceArrays() { - assert(PollyManagedMemory && + assert(useManagedMemory(Arch, Runtime) && "Device array most only be prepared in managed-memory mode"); for (int i = 0; i < Prog->n_array; ++i) { gpu_array_info *Array = &Prog->array[i]; @@ -859,7 +866,8 @@ } void GPUNodeBuilder::freeDeviceArrays() { - assert(!PollyManagedMemory && "Managed memory does not use device arrays"); + assert(!useManagedMemory(Arch, Runtime) && + "Managed memory does not use device arrays"); for (auto &Array : DeviceAllocations) createCallFreeDeviceMemory(Array.second); } @@ -944,7 +952,7 @@ } void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) { - assert(!PollyManagedMemory && + assert(!useManagedMemory(Arch, Runtime) && "Managed memory does not allocate or free memory " "for device"); const char *Name = "polly_freeDeviceMemory"; @@ -964,7 +972,7 @@ } Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) { - assert(!PollyManagedMemory && + assert(!useManagedMemory(Arch, Runtime) && "Managed memory does not allocate or free memory " "for device"); const char *Name = "polly_allocateMemoryForDevice"; @@ -986,7 +994,7 @@ void GPUNodeBuilder::createCallCopyFromHostToDevice(Value *HostData, Value *DeviceData, Value *Size) { - assert(!PollyManagedMemory && + assert(!useManagedMemory(Arch, Runtime) && "Managed memory does not transfer memory between " "device and host"); const char *Name = "polly_copyFromHostToDevice"; @@ -1010,7 +1018,7 @@ void GPUNodeBuilder::createCallCopyFromDeviceToHost(Value *DeviceData, Value *HostData, Value *Size) { - assert(!PollyManagedMemory && + assert(!useManagedMemory(Arch, Runtime) && "Managed memory does not transfer memory between " "device and host"); const char *Name = "polly_copyFromDeviceToHost"; @@ -1032,8 +1040,9 @@ } void GPUNodeBuilder::createCallSynchronizeDevice() { - assert(PollyManagedMemory && "explicit synchronization is only necessary for " - "managed memory"); + assert(useManagedMemory(Arch, Runtime) && + "explicit synchronization is only necessary for " + "managed memory"); const char *Name = "polly_synchronizeDevice"; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Function *F = M->getFunction(Name); @@ -1159,9 +1168,10 @@ Value *GPUNodeBuilder::getManagedDeviceArray(gpu_array_info *Array, ScopArrayInfo *ArrayInfo) { - assert(PollyManagedMemory && "Only used when you wish to get a host " - "pointer for sending data to the kernel, " - "with managed memory"); + assert(useManagedMemory(Arch, Runtime) && + "Only used when you wish to get a host " + "pointer for sending data to the kernel, " + "with managed memory"); std::map::iterator it; it = DeviceAllocations.find(ArrayInfo); assert(it != DeviceAllocations.end() && @@ -1171,7 +1181,8 @@ void GPUNodeBuilder::createDataTransfer(__isl_take isl_ast_node *TransferStmt, enum DataDirection Direction) { - assert(!PollyManagedMemory && "Managed memory needs no data transfers"); + assert(!useManagedMemory(Arch, Runtime) && + "Managed memory needs no data transfers"); isl_ast_expr *Expr = isl_ast_node_user_get_expr(TransferStmt); isl_ast_expr *Arg = isl_ast_expr_get_op_arg(Expr, 0); isl_id *Id = isl_ast_expr_get_id(Arg); @@ -1225,7 +1236,7 @@ const char *Str = isl_id_get_name(Id); if (!strcmp(Str, "kernel")) { createKernel(UserStmt); - if (PollyManagedMemory) + if (useManagedMemory(Arch, Runtime)) createCallSynchronizeDevice(); isl_ast_expr_free(Expr); return; @@ -1243,7 +1254,7 @@ return; } if (isPrefix(Str, "to_device")) { - if (!PollyManagedMemory) + if (!useManagedMemory(Arch, Runtime)) createDataTransfer(UserStmt, HOST_TO_DEVICE); else isl_ast_node_free(UserStmt); @@ -1253,7 +1264,7 @@ } if (isPrefix(Str, "from_device")) { - if (!PollyManagedMemory) { + if (!useManagedMemory(Arch, Runtime)) { createDataTransfer(UserStmt, DEVICE_TO_HOST); } else { isl_ast_node_free(UserStmt); @@ -1647,7 +1658,7 @@ ArgSizes[Index] = SAI->getElemSizeInBytes(); Value *DevArray = nullptr; - if (PollyManagedMemory) { + if (useManagedMemory(Arch, Runtime)) { DevArray = getManagedDeviceArray(&Prog->array[i], const_cast(SAI)); } else { @@ -1669,7 +1680,7 @@ if (gpu_array_is_read_only_scalar(&Prog->array[i])) { Value *ValPtr = nullptr; - if (PollyManagedMemory) + if (useManagedMemory(Arch, Runtime)) ValPtr = DevArray; else ValPtr = BlockGen.getOrCreateAlloca(SAI); @@ -3240,7 +3251,8 @@ LLVM_DEBUG(dbgs() << getUniqueScopName(S) << " does not have permutable bands. Bailing out\n";); } else { - const bool CreateTransferToFromDevice = !PollyManagedMemory; + const bool CreateTransferToFromDevice = + (!useManagedMemory(Architecture, Runtime)); Schedule = map_to_device(PPCGGen, Schedule, CreateTransferToFromDevice); PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule)); } Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -353,8 +353,9 @@ if (Target == TARGET_HYBRID) { PM.add( polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); - PM.add(polly::createManagedMemoryRewritePassPass(GPUArchChoice, - GPURuntimeChoice)); + if (useManagedMemory(GPUArchChoice, GPURuntimeChoice)) + PM.add(polly::createManagedMemoryRewritePassPass(GPUArchChoice, + GPURuntimeChoice)); } #endif if (Target == TARGET_CPU || Target == TARGET_HYBRID) @@ -385,7 +386,8 @@ else { PM.add( polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); - PM.add(polly::createManagedMemoryRewritePassPass()); + if (useManagedMemory(GPUArchChoice, GPURuntimeChoice)) + PM.add(polly::createManagedMemoryRewritePassPass()); } #endif