Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -23,6 +23,9 @@ #include "llvm/ADT/SmallVector.h" #include "isl/ctx.h" #include "isl/union_map.h" + +#include "isl-noexceptions.h" + #include #include @@ -41,6 +44,7 @@ SetVector &Values; SetVector &SCEVs; BlockGenerator &BlockGen; + isl::space ParamSpace; }; /// Extract the out-of-scop values and SCEVs referenced from a ScopStmt. @@ -50,6 +54,9 @@ /// statements we force the generation of alloca memory locations and list /// these locations in the set of out-of-scop values as well. /// +/// We also collect an isl::space that includes all parameter dimensions +/// used in the statement's memory accesses. +/// /// @param Stmt The statement for which to extract the information. /// @param UserPtr A void pointer that can be casted to a /// SubtreeReferences structure. Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -229,6 +229,9 @@ } for (auto &Access : *Stmt) { + isl::space ParamSpace = Access->getLatestAccessRelation().get_space(); + References.ParamSpace = References.ParamSpace.align_params(ParamSpace); + if (Access->isLatestArrayKind()) { auto *BasePtr = Access->getScopArrayInfo()->getBasePtr(); if (Instruction *OpInst = dyn_cast(BasePtr)) @@ -297,7 +300,7 @@ SetVector SCEVs; struct SubtreeReferences References = { - LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator()}; + LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr}; for (const auto &I : IDToValue) Values.insert(I.second); Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -436,7 +436,8 @@ /// in the scop, nor do they immediately surroung the Scop. /// See [Code generation of induction variables of loops outside /// Scops] - std::tuple, SetVector, SetVector> + std::tuple, SetVector, SetVector, + isl::space> getReferencesInKernel(ppcg_kernel *Kernel); /// Compute the sizes of the execution grid for a given kernel. @@ -1434,13 +1435,20 @@ return SubtreeFunctions; } -std::tuple, SetVector, SetVector> +std::tuple, SetVector, SetVector, + isl::space> GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) { SetVector SubtreeValues; SetVector SCEVs; SetVector Loops; - SubtreeReferences References = { - LI, SE, S, ValueMap, SubtreeValues, SCEVs, getBlockGenerator()}; + SubtreeReferences References = {LI, + SE, + S, + ValueMap, + SubtreeValues, + SCEVs, + getBlockGenerator(), + isl::space(S.getIslCtx(), 0, 0).params()}; for (const auto &I : IDToValue) SubtreeValues.insert(I.second); @@ -1507,7 +1515,8 @@ else ReplacedValues.insert(It->second); } - return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops); + return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops, + References.ParamSpace); } void GPUNodeBuilder::clearDominators(Function *F) { @@ -1751,9 +1760,16 @@ SetVector SubtreeValues; SetVector SubtreeFunctions; SetVector Loops; - std::tie(SubtreeValues, SubtreeFunctions, Loops) = + isl::space ParamSpace; + std::tie(SubtreeValues, SubtreeFunctions, Loops, ParamSpace) = getReferencesInKernel(Kernel); + // Add parameters that appear only in the access function to the kernel + // space. This is important to make sure that all isl_ids are passed as + // parameters to the kernel, even though we may not have all parameters + // in the context to improve compile time. + Kernel->space = isl_space_align_params(Kernel->space, ParamSpace.release()); + assert(Kernel->tree && "Device AST of kernel node is empty"); Instruction &HostInsertPoint = *Builder.GetInsertPoint(); Index: test/GPGPU/memory-only-referenced-from-access.ll =================================================================== --- /dev/null +++ test/GPGPU/memory-only-referenced-from-access.ll @@ -0,0 +1,86 @@ +; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \ +; RUN: -polly-invariant-load-hoisting -polly-ignore-aliasing \ +; RUN: -polly-process-unprofitable -polly-ignore-parameter-bounds \ +; RUN: -polly-acc-fail-on-verify-module-failure \ +; RUN: -polly-acc-codegen-managed-memory \ +; RUN: -disable-output < %s | \ +; RUN: FileCheck %s + +; REQUIRES: pollyacc + +; Verify that we correctly generate a kernel even if certain invariant load +; hoisted parameters appear only in memory accesses, but not domain elements. + +; CHECK: @FUNC_quux_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_tmp4, i32 %tmp3, i32 %tmp, i32 %tmp31, i32 %tmp2) + +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct.hoge = type { i8*, i64, i64, [1 x %struct.widget] } +%struct.widget = type { i64, i64, i64 } + +@global = external unnamed_addr global %struct.hoge, align 32 + +; Function Attrs: nounwind uwtable +define void @quux(i32* noalias %arg, i32* noalias %arg1) #0 { +bb: + %tmp = load i32, i32* %arg, align 4 + %tmp2 = sext i32 %tmp to i64 + %tmp3 = load i32, i32* %arg1, align 4 + %tmp4 = load [0 x double]*, [0 x double]** bitcast (%struct.hoge* @global to [0 x double]**), align 32 + br label %bb5 + +bb5: ; preds = %bb5, %bb + %tmp6 = phi i32 [ %tmp11, %bb5 ], [ 0, %bb ] + %tmp7 = sext i32 %tmp6 to i64 + %tmp8 = sub nsw i64 %tmp7, %tmp2 + %tmp9 = getelementptr [0 x double], [0 x double]* %tmp4, i64 0, i64 %tmp8 + store double undef, double* %tmp9, align 8, !alias.scope !0, !noalias !3 + %tmp10 = icmp eq i32 %tmp6, %tmp3 + %tmp11 = add i32 %tmp6, 1 + br i1 %tmp10, label %bb12, label %bb5 + +bb12: ; preds = %bb5 + ret void +} + +attributes #0 = { nounwind uwtable } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"__radiation_rg_MOD_coe_so: %pa1f"} +!2 = distinct !{!2, !"__radiation_rg_MOD_coe_so"} +!3 = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37} +!4 = distinct !{!4, !2, !"__radiation_rg_MOD_coe_so: %pduh2oc"} +!5 = distinct !{!5, !2, !"__radiation_rg_MOD_coe_so: %pduh2of"} +!6 = distinct !{!6, !2, !"__radiation_rg_MOD_coe_so: %pduco2"} +!7 = distinct !{!7, !2, !"__radiation_rg_MOD_coe_so: %pduo3"} +!8 = distinct !{!8, !2, !"__radiation_rg_MOD_coe_so: %palogp"} +!9 = distinct !{!9, !2, !"__radiation_rg_MOD_coe_so: %palogt"} +!10 = distinct !{!10, !2, !"__radiation_rg_MOD_coe_so: %podsc"} +!11 = distinct !{!11, !2, !"__radiation_rg_MOD_coe_so: %podsf"} +!12 = distinct !{!12, !2, !"__radiation_rg_MOD_coe_so: %podac"} +!13 = distinct !{!13, !2, !"__radiation_rg_MOD_coe_so: %podaf"} +!14 = distinct !{!14, !2, !"__radiation_rg_MOD_coe_so: %pbsfc"} +!15 = distinct !{!15, !2, !"__radiation_rg_MOD_coe_so: %pbsff"} +!16 = distinct !{!16, !2, !"__radiation_rg_MOD_coe_so: %pusfc"} +!17 = distinct !{!17, !2, !"__radiation_rg_MOD_coe_so: %pusff"} +!18 = distinct !{!18, !2, !"__radiation_rg_MOD_coe_so: %pqsmu0"} +!19 = distinct !{!19, !2, !"__radiation_rg_MOD_coe_so: %ki3"} +!20 = distinct !{!20, !2, !"__radiation_rg_MOD_coe_so: %kspec"} +!21 = distinct !{!21, !2, !"__radiation_rg_MOD_coe_so: %kh2o"} +!22 = distinct !{!22, !2, !"__radiation_rg_MOD_coe_so: %kco2"} +!23 = distinct !{!23, !2, !"__radiation_rg_MOD_coe_so: %ko3"} +!24 = distinct !{!24, !2, !"__radiation_rg_MOD_coe_so: %ki1sd"} +!25 = distinct !{!25, !2, !"__radiation_rg_MOD_coe_so: %ki1ed"} +!26 = distinct !{!26, !2, !"__radiation_rg_MOD_coe_so: %ki3sd"} +!27 = distinct !{!27, !2, !"__radiation_rg_MOD_coe_so: %ki1sc"} +!28 = distinct !{!28, !2, !"__radiation_rg_MOD_coe_so: %ki1ec"} +!29 = distinct !{!29, !2, !"__radiation_rg_MOD_coe_so: %pa1c"} +!30 = distinct !{!30, !2, !"__radiation_rg_MOD_coe_so: %pa2c"} +!31 = distinct !{!31, !2, !"__radiation_rg_MOD_coe_so: %pa2f"} +!32 = distinct !{!32, !2, !"__radiation_rg_MOD_coe_so: %pa3c"} +!33 = distinct !{!33, !2, !"__radiation_rg_MOD_coe_so: %pa3f"} +!34 = distinct !{!34, !2, !"__radiation_rg_MOD_coe_so: %pa4c"} +!35 = distinct !{!35, !2, !"__radiation_rg_MOD_coe_so: %pa4f"} +!36 = distinct !{!36, !2, !"__radiation_rg_MOD_coe_so: %pa5c"} +!37 = distinct !{!37, !2, !"__radiation_rg_MOD_coe_so: %pa5f"}