Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -23,6 +23,9 @@ #include "llvm/ADT/SmallVector.h" #include "isl/ctx.h" #include "isl/union_map.h" + +#include "isl-noexceptions.h" + #include #include @@ -41,6 +44,7 @@ SetVector &Values; SetVector &SCEVs; BlockGenerator &BlockGen; + isl::space *ParamSpace; }; /// Extract the out-of-scop values and SCEVs referenced from a ScopStmt. @@ -50,6 +54,10 @@ /// statements we force the generation of alloca memory locations and list /// these locations in the set of out-of-scop values as well. /// +/// We also collect an isl::space that includes all parameter dimensions +/// used in the statement's memory accesses, in case the ParamSpace pointer +/// is non-null. +/// /// @param Stmt The statement for which to extract the information. /// @param UserPtr A void pointer that can be casted to a /// SubtreeReferences structure. Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -229,6 +229,12 @@ } for (auto &Access : *Stmt) { + if (References.ParamSpace) { + isl::space ParamSpace = Access->getLatestAccessRelation().get_space(); + (*References.ParamSpace) = + References.ParamSpace->align_params(ParamSpace); + } + if (Access->isLatestArrayKind()) { auto *BasePtr = Access->getScopArrayInfo()->getBasePtr(); if (Instruction *OpInst = dyn_cast(BasePtr)) @@ -297,7 +303,7 @@ SetVector SCEVs; struct SubtreeReferences References = { - LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator()}; + LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr}; for (const auto &I : IDToValue) Values.insert(I.second); Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -436,7 +436,8 @@ /// in the scop, nor do they immediately surroung the Scop. /// See [Code generation of induction variables of loops outside /// Scops] - std::tuple, SetVector, SetVector> + std::tuple, SetVector, SetVector, + isl::space> getReferencesInKernel(ppcg_kernel *Kernel); /// Compute the sizes of the execution grid for a given kernel. @@ -1434,13 +1435,16 @@ return SubtreeFunctions; } -std::tuple, SetVector, SetVector> +std::tuple, SetVector, SetVector, + isl::space> GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) { SetVector SubtreeValues; SetVector SCEVs; SetVector Loops; + isl::space ParamSpace = isl::space(S.getIslCtx(), 0, 0).params(); SubtreeReferences References = { - LI, SE, S, ValueMap, SubtreeValues, SCEVs, getBlockGenerator()}; + LI, SE, S, ValueMap, SubtreeValues, SCEVs, getBlockGenerator(), + &ParamSpace}; for (const auto &I : IDToValue) SubtreeValues.insert(I.second); @@ -1507,7 +1511,8 @@ else ReplacedValues.insert(It->second); } - return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops); + return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops, + ParamSpace); } void GPUNodeBuilder::clearDominators(Function *F) { @@ -1751,9 +1756,16 @@ SetVector SubtreeValues; SetVector SubtreeFunctions; SetVector Loops; - std::tie(SubtreeValues, SubtreeFunctions, Loops) = + isl::space ParamSpace; + std::tie(SubtreeValues, SubtreeFunctions, Loops, ParamSpace) = getReferencesInKernel(Kernel); + // Add parameters that appear only in the access function to the kernel + // space. This is important to make sure that all isl_ids are passed as + // parameters to the kernel, even though we may not have all parameters + // in the context to improve compile time. + Kernel->space = isl_space_align_params(Kernel->space, ParamSpace.release()); + assert(Kernel->tree && "Device AST of kernel node is empty"); Instruction &HostInsertPoint = *Builder.GetInsertPoint(); Index: test/GPGPU/memory-only-referenced-from-access.ll =================================================================== --- /dev/null +++ test/GPGPU/memory-only-referenced-from-access.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \ +; RUN: -polly-invariant-load-hoisting -polly-ignore-aliasing \ +; RUN: -polly-process-unprofitable -polly-ignore-parameter-bounds \ +; RUN: -polly-acc-fail-on-verify-module-failure \ +; RUN: -polly-acc-codegen-managed-memory \ +; RUN: -disable-output < %s | \ +; RUN: FileCheck %s + +; REQUIRES: pollyacc + +; Verify that we correctly generate a kernel even if certain invariant load +; hoisted parameters appear only in memory accesses, but not domain elements. + +; CHECK: @FUNC_quux_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_tmp4, i32 %tmp3, i32 %tmp, i32 %tmp31, i32 %tmp2) + +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct.hoge = type { i8*, i64, i64, [1 x %struct.widget] } +%struct.widget = type { i64, i64, i64 } + +@global = external unnamed_addr global %struct.hoge, align 32 + +define void @quux(i32* noalias %arg, i32* noalias %arg1) { +bb: + %tmp = load i32, i32* %arg, align 4 + %tmp2 = sext i32 %tmp to i64 + %tmp3 = load i32, i32* %arg1, align 4 + %tmp4 = load [0 x double]*, [0 x double]** bitcast (%struct.hoge* @global to [0 x double]**), align 32 + br label %bb5 + +bb5: ; preds = %bb5, %bb + %tmp6 = phi i32 [ %tmp11, %bb5 ], [ 0, %bb ] + %tmp7 = sext i32 %tmp6 to i64 + %tmp8 = sub nsw i64 %tmp7, %tmp2 + %tmp9 = getelementptr [0 x double], [0 x double]* %tmp4, i64 0, i64 %tmp8 + store double undef, double* %tmp9, align 8 + %tmp10 = icmp eq i32 %tmp6, %tmp3 + %tmp11 = add i32 %tmp6, 1 + br i1 %tmp10, label %bb12, label %bb5 + +bb12: ; preds = %bb5 + ret void +}