Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -2161,7 +2161,7 @@ /// @param S The SCEV to normalize. /// /// @return The representing SCEV for invariant loads or @p S if none. - const SCEV *getRepresentingInvariantLoadSCEV(const SCEV *S); + const SCEV *getRepresentingInvariantLoadSCEV(const SCEV *S) const; /// Create a new SCoP statement for @p BB. /// @@ -2425,7 +2425,7 @@ /// @param Parameter A SCEV that was recognized as a Parameter. /// /// @return The corresponding isl_id or NULL otherwise. - __isl_give isl_id *getIdForParam(const SCEV *Parameter); + __isl_give isl_id *getIdForParam(const SCEV *Parameter) const; /// Get the maximum region of this static control part. /// @@ -2512,8 +2512,22 @@ /// /// @return The constraint on parameter of this Scop. __isl_give isl_set *getContext() const; + + /// Return space of isl context parameters. + /// + /// Returns the set of context parameters that are currently constrained. In + /// case the full set of parameters is needed, see @getFullParamSpace. __isl_give isl_space *getParamSpace() const; + /// Return the full space of parameters. + /// + /// getParamSpace will only return the parameters of the context that are + /// actually constrained, whereas getFullParamSpace will return all + // parameters. This is useful in cases, where we need to ensure all + // parameters are available, as certain isl functions will abort if this is + // not the case. + isl::space getFullParamSpace() const; + /// Get the assumed context for this Scop. /// /// @return The assumed context of this Scop. Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -346,8 +346,7 @@ std::string param_name = getName(); param_name += "_fortranarr_size"; - // TODO: see if we need to add `this` as the id user pointer - isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name.c_str(), nullptr); + isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name.c_str(), this); Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff); isl::pw_aff PwAff = @@ -2062,14 +2061,15 @@ /// Remap parameter values but keep AddRecs valid wrt. invariant loads. struct SCEVSensitiveParameterRewriter : public SCEVRewriteVisitor { - ValueToValueMap &VMap; + const ValueToValueMap &VMap; public: - SCEVSensitiveParameterRewriter(ValueToValueMap &VMap, ScalarEvolution &SE) + SCEVSensitiveParameterRewriter(const ValueToValueMap &VMap, + ScalarEvolution &SE) : SCEVRewriteVisitor(SE), VMap(VMap) {} static const SCEV *rewrite(const SCEV *E, ScalarEvolution &SE, - ValueToValueMap &VMap) { + const ValueToValueMap &VMap) { SCEVSensitiveParameterRewriter SSPR(VMap, SE); return SSPR.visit(E); } @@ -2091,16 +2091,17 @@ /// Check whether we should remap a SCEV expression. struct SCEVFindInsideScop : public SCEVTraversal { - ValueToValueMap &VMap; + const ValueToValueMap &VMap; bool FoundInside = false; - Scop *S; + const Scop *S; public: - SCEVFindInsideScop(ValueToValueMap &VMap, ScalarEvolution &SE, Scop *S) + SCEVFindInsideScop(const ValueToValueMap &VMap, ScalarEvolution &SE, + const Scop *S) : SCEVTraversal(*this), VMap(VMap), S(S) {} static bool hasVariant(const SCEV *E, ScalarEvolution &SE, - ValueToValueMap &VMap, Scop *S) { + const ValueToValueMap &VMap, const Scop *S) { SCEVFindInsideScop SFIS(VMap, SE, S); SFIS.visitAll(E); return SFIS.FoundInside; @@ -2119,7 +2120,7 @@ }; } // namespace -const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *E) { +const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *E) const { // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution // doesn't like addition between an AddRec and an expression that // doesn't have a dominance relationship with it.) @@ -2208,7 +2209,7 @@ } } -__isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) { +__isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) const { // Normalize the SCEV to get the representing element for an invariant load. Parameter = getRepresentingInvariantLoadSCEV(Parameter); return isl_id_copy(ParameterIds.lookup(Parameter)); @@ -2382,41 +2383,38 @@ } } -// We use the outermost dimension to generate GPU transfers for Fortran arrays -// even when the array bounds are not known statically. To do so, we need the -// outermost dimension information. We add this into the context so that the -// outermost dimension is available during codegen. -// We currently do not care about dimensions other than the outermost -// dimension since it doesn't affect transfers. -static isl_set *addFortranArrayOutermostDimParams(__isl_give isl_set *Context, - Scop::array_range Arrays) { - - std::vector OutermostSizeIds; +static std::vector getFortranArrayIds(Scop::array_range Arrays) { + std::vector OutermostSizeIds; for (auto Array : Arrays) { // To check if an array is a Fortran array, we check if it has a isl_pw_aff // for its outermost dimension. Fortran arrays will have this since the // outermost dimension size can be picked up from their runtime description. // TODO: actually need to check if it has a FAD, but for now this works. if (Array->getNumberOfDimensions() > 0) { - isl_pw_aff *PwAff = Array->getDimensionSizePw(0).release(); + isl::pw_aff PwAff = Array->getDimensionSizePw(0); if (!PwAff) continue; - isl_id *Id = isl_pw_aff_get_dim_id(PwAff, isl_dim_param, 0); - isl_pw_aff_free(PwAff); - assert(Id && "Invalid Id for PwAff expression in Fortran array"); + isl::id Id = + isl::manage(isl_pw_aff_get_dim_id(PwAff.get(), isl_dim_param, 0)); + assert(!Id.is_null() && + "Invalid Id for PwAff expression in Fortran array"); + Id.dump(); OutermostSizeIds.push_back(Id); } } + return OutermostSizeIds; +} - const int NumTrueParams = isl_set_dim(Context, isl_dim_param); - Context = isl_set_add_dims(Context, isl_dim_param, OutermostSizeIds.size()); - - for (size_t i = 0; i < OutermostSizeIds.size(); i++) { - Context = isl_set_set_dim_id(Context, isl_dim_param, NumTrueParams + i, - OutermostSizeIds[i]); - Context = - isl_set_lower_bound_si(Context, isl_dim_param, NumTrueParams + i, 0); +// The FORTRAN array size parameters are known to be non-negative. +static isl_set *boundFortranArrayParams(__isl_give isl_set *Context, + Scop::array_range Arrays) { + std::vector OutermostSizeIds; + OutermostSizeIds = getFortranArrayIds(Arrays); + + for (isl::id Id : OutermostSizeIds) { + int dim = isl_set_find_dim_by_id(Context, isl_dim_param, Id.get()); + Context = isl_set_lower_bound_si(Context, isl_dim_param, dim, 0); } return Context; @@ -2427,20 +2425,13 @@ return; // Add all parameters into a common model. - isl_space *Space = isl_space_params_alloc(getIslCtx(), ParameterIds.size()); - - unsigned PDim = 0; - for (const auto *Parameter : Parameters) { - isl_id *id = getIdForParam(Parameter); - Space = isl_space_set_dim_id(Space, isl_dim_param, PDim++, id); - } + isl::space Space = getFullParamSpace(); // Align the parameters of all data structures to the model. - Context = isl_set_align_params(Context, Space); + Context = isl_set_align_params(Context, Space.copy()); - // Add the outermost dimension of the Fortran arrays into the Context. - // See the description of the function for more information. - Context = addFortranArrayOutermostDimParams(Context, arrays()); + // Bound the size of the fortran array dimensions. + Context = boundFortranArrayParams(Context, arrays()); // As all parameters are known add bounds to them. addParameterBounds(); @@ -4312,6 +4303,25 @@ return isl_set_get_space(Context); } +isl::space Scop::getFullParamSpace() const { + std::vector FortranIDs; + FortranIDs = getFortranArrayIds(arrays()); + + isl::space Space = isl::space::params_alloc( + getIslCtx(), ParameterIds.size() + FortranIDs.size()); + + unsigned PDim = 0; + for (const SCEV *Parameter : Parameters) { + isl::id Id = isl::manage(getIdForParam(Parameter)); + Space = Space.set_dim_id(isl::dim::param, PDim++, Id); + } + + for (isl::id Id : FortranIDs) + Space = Space.set_dim_id(isl::dim::param, PDim++, Id); + + return Space; +} + __isl_give isl_set *Scop::getAssumedContext() const { assert(AssumedContext && "Assumed context not yet built"); return isl_set_copy(AssumedContext); Index: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp +++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp @@ -3088,6 +3088,9 @@ int has_permutable = has_any_permutable_node(Schedule); + Schedule = + isl_schedule_align_params(Schedule, S->getFullParamSpace().release()); + if (!has_permutable || has_permutable < 0) { Schedule = isl_schedule_free(Schedule); } else { Index: polly/trunk/test/GPGPU/align-params-in-schedule.ll =================================================================== --- polly/trunk/test/GPGPU/align-params-in-schedule.ll +++ polly/trunk/test/GPGPU/align-params-in-schedule.ll @@ -0,0 +1,51 @@ +; RUN: opt -S -polly-process-unprofitable -polly-codegen-ppcg \ +; RUN: -polly-invariant-load-hoisting -polly-ignore-parameter-bounds < %s | \ +; RUN: FileCheck %s + +; CHECK: polly_launchKernel + +; Verify that this program compiles. At some point, this compilation crashed +; due to insufficient parameters being available. + +source_filename = "bugpoint-output-4d01492.bc" +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct.barney = type { i8*, i64, i64, [2 x %struct.widget] } +%struct.widget = type { i64, i64, i64 } + +@global = external unnamed_addr global %struct.barney, align 32 + +; Function Attrs: nounwind uwtable +define void @wobble(i32* noalias %arg) #0 { +bb: + %tmp = load i32, i32* %arg, align 4 + br label %bb1 + +bb1: ; preds = %bb13, %bb + %tmp2 = phi i32 [ %tmp15, %bb13 ], [ 1, %bb ] + br label %bb3 + +bb3: ; preds = %bb3, %bb1 + %tmp4 = load i32*, i32** bitcast (%struct.barney* @global to i32**), align 32 + %tmp5 = sext i32 %tmp2 to i64 + %tmp6 = load i64, i64* getelementptr inbounds (%struct.barney, %struct.barney* @global, i64 0, i32 3, i64 1, i32 0), align 8 + %tmp7 = mul i64 %tmp6, %tmp5 + %tmp8 = add i64 %tmp7, 0 + %tmp9 = load i64, i64* getelementptr inbounds (%struct.barney, %struct.barney* @global, i64 0, i32 1), align 8 + %tmp10 = add i64 %tmp8, %tmp9 + %tmp11 = getelementptr i32, i32* %tmp4, i64 %tmp10 + store i32 undef, i32* %tmp11, align 4 + %tmp12 = icmp eq i32 0, 0 + br i1 %tmp12, label %bb13, label %bb3 + +bb13: ; preds = %bb3 + %tmp14 = icmp eq i32 %tmp2, %tmp + %tmp15 = add i32 %tmp2, 1 + br i1 %tmp14, label %bb16, label %bb1 + +bb16: ; preds = %bb13 + ret void +} + +attributes #0 = { nounwind uwtable }