Index: polly/trunk/include/polly/Support/ScopHelper.h =================================================================== --- polly/trunk/include/polly/Support/ScopHelper.h +++ polly/trunk/include/polly/Support/ScopHelper.h @@ -391,10 +391,12 @@ /// @param LI The loop info. /// @param SE The scalar evolution analysis. /// @param DT The dominator tree of the function. +/// @param KnownInvariantLoads The invariant load set. /// /// @return True if @p LInst can be hoisted in @p R. bool isHoistableLoad(llvm::LoadInst *LInst, llvm::Region &R, llvm::LoopInfo &LI, - llvm::ScalarEvolution &SE, const llvm::DominatorTree &DT); + llvm::ScalarEvolution &SE, const llvm::DominatorTree &DT, + const InvariantLoadsSetTy &KnownInvariantLoads); /// Return true iff @p V is an intrinsic that we ignore during code /// generation. Index: polly/trunk/lib/Analysis/ScopDetection.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopDetection.cpp +++ polly/trunk/lib/Analysis/ScopDetection.cpp @@ -483,8 +483,7 @@ // very function (and its children). if (Context.RequiredILS.count(Load)) continue; - - if (!isHoistableLoad(Load, CurRegion, LI, SE, DT)) + if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) return false; for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { @@ -938,7 +937,7 @@ auto *V = dyn_cast(Unknown->getValue()); if (auto *Load = dyn_cast(V)) { if (Context.CurRegion.contains(Load) && - isHoistableLoad(Load, CurRegion, LI, SE, DT)) + isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) Context.RequiredILS.insert(Load); continue; } @@ -1148,18 +1147,38 @@ // inside the scop. Hence, we can only create a run-time check if we are // sure the base pointer is not an instruction defined inside the scop. // However, we can ignore loads that will be hoisted. - for (const auto &Ptr : AS) { - Instruction *Inst = dyn_cast(Ptr.getValue()); - if (Inst && Context.CurRegion.contains(Inst)) { - auto *Load = dyn_cast(Inst); - if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT)) { - Context.RequiredILS.insert(Load); - continue; + + InvariantLoadsSetTy VariantLS, InvariantLS; + // In order to detect loads which are dependent on other invariant loads + // as invariant, we use fixed-point iteration method here i.e we iterate + // over the alias set for arbitrary number of times until it is safe to + // assume that all the invariant loads have been detected + while (1) { + const unsigned int VariantSize = VariantLS.size(), + InvariantSize = InvariantLS.size(); + + for (const auto &Ptr : AS) { + Instruction *Inst = dyn_cast(Ptr.getValue()); + if (Inst && Context.CurRegion.contains(Inst)) { + auto *Load = dyn_cast(Inst); + if (Load && InvariantLS.count(Load)) + continue; + if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, + InvariantLS)) { + if (VariantLS.count(Load)) + VariantLS.remove(Load); + Context.RequiredILS.insert(Load); + InvariantLS.insert(Load); + } else { + CanBuildRunTimeCheck = false; + VariantLS.insert(Load); + } } + } - CanBuildRunTimeCheck = false; + if (InvariantSize == InvariantLS.size() && + VariantSize == VariantLS.size()) break; - } } if (CanBuildRunTimeCheck) Index: polly/trunk/lib/Support/ScopHelper.cpp =================================================================== --- polly/trunk/lib/Support/ScopHelper.cpp +++ polly/trunk/lib/Support/ScopHelper.cpp @@ -17,6 +17,7 @@ #include "polly/Support/SCEVValidator.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -453,10 +454,40 @@ return nullptr; } +static bool hasVariantIndex(GetElementPtrInst *Gep, Loop *L, Region &R, + ScalarEvolution &SE) { + for (const Use &Val : llvm::drop_begin(Gep->operands(), 1)) { + const SCEV *PtrSCEV = SE.getSCEVAtScope(Val, L); + Loop *OuterLoop = R.outermostLoopInRegion(L); + if (!SE.isLoopInvariant(PtrSCEV, OuterLoop)) + return true; + } + return false; +} + bool polly::isHoistableLoad(LoadInst *LInst, Region &R, LoopInfo &LI, - ScalarEvolution &SE, const DominatorTree &DT) { + ScalarEvolution &SE, const DominatorTree &DT, + const InvariantLoadsSetTy &KnownInvariantLoads) { Loop *L = LI.getLoopFor(LInst->getParent()); auto *Ptr = LInst->getPointerOperand(); + + // A LoadInst is hoistable if the address it is loading from is also + // invariant; in this case: another invariant load (whether that address + // is also not written to has to be checked separately) + // TODO: This only checks for a LoadInst->GetElementPtrInst->LoadInst + // pattern generated by the Chapel frontend, but generally this applies + // for any chain of instruction that does not also depend on any + // induction variable + if (auto *GepInst = dyn_cast(Ptr)) { + if (!hasVariantIndex(GepInst, L, R, SE)) { + if (auto *DecidingLoad = + dyn_cast(GepInst->getPointerOperand())) { + if (KnownInvariantLoads.count(DecidingLoad)) + return true; + } + } + } + const SCEV *PtrSCEV = SE.getSCEVAtScope(Ptr, L); while (L && R.contains(L)) { if (!SE.isLoopInvariant(PtrSCEV, L)) Index: polly/trunk/test/ScopDetect/collective_invariant_loads.ll =================================================================== --- polly/trunk/test/ScopDetect/collective_invariant_loads.ll +++ polly/trunk/test/ScopDetect/collective_invariant_loads.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly -polly-scops -polly-invariant-load-hoisting -analyze < %s | FileCheck %s + +;CHECK: Function: test_init_chpl +;CHECK-NEXT: Region: %bb1---%bb16 +;CHECK-NEXT: Max Loop Depth: 2 +;CHECK-NEXT: Invariant Accesses: { +;CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +;CHECK-NEXT: [tmp5] -> { Stmt_bb2[i0, i1] -> MemRef_arg[1] }; +;CHECK-NEXT: Execution Context: [tmp5] -> { : } +;CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +;CHECK-NEXT: [tmp5] -> { Stmt_bb2[i0, i1] -> MemRef_tmp3[9] }; +;CHECK-NEXT: Execution Context: [tmp5] -> { : } +;CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +;CHECK-NEXT: [tmp5] -> { Stmt_bb2[i0, i1] -> MemRef_tmp3[2] }; +;CHECK-NEXT: Execution Context: [tmp5] -> { : } +;CHECK-NEXT: } + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%array_ty = type { i64, %array_ptr*, i8 } +%array_ptr = type { [2 x i64], [2 x i64], [2 x i64], i64, i64, double*, double*, i8 } + +; Function Attrs: noinline +define weak dso_local void @test_init_chpl(%array_ty* nonnull %arg) { +bb: + br label %bb1 + +bb1: ; preds = %bb14, %bb + %.0 = phi i64 [ 0, %bb ], [ %tmp15, %bb14 ] + br label %bb2 + +bb2: ; preds = %bb2, %bb1 + %.01 = phi i64 [ 0, %bb1 ], [ %tmp13, %bb2 ] + %tmp = getelementptr inbounds %array_ty, %array_ty* %arg, i64 0, i32 1 + %tmp3 = load %array_ptr*, %array_ptr** %tmp, align 8 + %tmp4 = getelementptr inbounds %array_ptr, %array_ptr* %tmp3, i64 0, i32 1, i64 0 + %tmp5 = load i64, i64* %tmp4, align 8 + %tmp6 = mul nsw i64 %tmp5, %.0 + %tmp7 = add nsw i64 %tmp6, %.01 + %tmp8 = getelementptr inbounds %array_ptr, %array_ptr* %tmp3, i64 0, i32 6 + %tmp9 = load double*, double** %tmp8, align 8 + %tmp10 = getelementptr inbounds double, double* %tmp9, i64 %tmp7 + store double 13.0, double* %tmp10, align 8 + %tmp13 = add nuw nsw i64 %.01, 1 + %exitcond = icmp ne i64 %tmp13, 1000 + br i1 %exitcond, label %bb2, label %bb14 + +bb14: ; preds = %bb2 + %tmp15 = add nuw nsw i64 %.0, 1 + %exitcond8 = icmp ne i64 %tmp15, 1000 + br i1 %exitcond8, label %bb1, label %bb16 + +bb16: ; preds = %bb14 + ret void +}