Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -391,10 +391,12 @@ /// @param LI The loop info. /// @param SE The scalar evolution analysis. /// @param DT The dominator tree of the function. +/// @param KnownInvariantLoads The invariant load set. /// /// @return True if @p LInst can be hoisted in @p R. bool isHoistableLoad(llvm::LoadInst *LInst, llvm::Region &R, llvm::LoopInfo &LI, - llvm::ScalarEvolution &SE, const llvm::DominatorTree &DT); + llvm::ScalarEvolution &SE, const llvm::DominatorTree &DT, + const InvariantLoadsSetTy &KnownInvariantLoads); /// Return true iff @p V is an intrinsic that we ignore during code /// generation. Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -484,7 +484,7 @@ if (Context.RequiredILS.count(Load)) continue; - if (!isHoistableLoad(Load, CurRegion, LI, SE, DT)) + if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) return false; for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { @@ -938,7 +938,7 @@ auto *V = dyn_cast(Unknown->getValue()); if (auto *Load = dyn_cast(V)) { if (Context.CurRegion.contains(Load) && - isHoistableLoad(Load, CurRegion, LI, SE, DT)) + isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) Context.RequiredILS.insert(Load); continue; } @@ -1143,23 +1143,40 @@ if (!AS.isMustAlias()) { if (PollyUseRuntimeAliasChecks) { bool CanBuildRunTimeCheck = true; + + InvariantLoadsSetTy VariantLS, InvariantLS; // The run-time alias check places code that involves the base pointer at // the beginning of the SCoP. This breaks if the base pointer is defined // inside the scop. Hence, we can only create a run-time check if we are // sure the base pointer is not an instruction defined inside the scop. // However, we can ignore loads that will be hoisted. - for (const auto &Ptr : AS) { - Instruction *Inst = dyn_cast(Ptr.getValue()); - if (Inst && Context.CurRegion.contains(Inst)) { - auto *Load = dyn_cast(Inst); - if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT)) { - Context.RequiredILS.insert(Load); - continue; + + while (1) { + const unsigned int VariantSize = VariantLS.size(), + InvariantSize = InvariantLS.size(); + + for (const auto &Ptr : AS) { + Instruction *Inst = dyn_cast(Ptr.getValue()); + if (Inst && Context.CurRegion.contains(Inst)) { + auto *Load = dyn_cast(Inst); + if (Load && InvariantLS.count(Load)) + continue; + if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, + InvariantLS)) { + if (VariantLS.count(Load)) + VariantLS.remove(Load); + Context.RequiredILS.insert(Load); + InvariantLS.insert(Load); + } else { + CanBuildRunTimeCheck = false; + VariantLS.insert(Load); + } } + } - CanBuildRunTimeCheck = false; + if (InvariantSize == InvariantLS.size() && + VariantSize == VariantLS.size()) break; - } } if (CanBuildRunTimeCheck) Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -442,9 +442,18 @@ } bool polly::isHoistableLoad(LoadInst *LInst, Region &R, LoopInfo &LI, - ScalarEvolution &SE, const DominatorTree &DT) { + ScalarEvolution &SE, const DominatorTree &DT, + const InvariantLoadsSetTy &KnownInvariantLoads) { Loop *L = LI.getLoopFor(LInst->getParent()); auto *Ptr = LInst->getPointerOperand(); + + if (auto *GepInst = dyn_cast(Ptr)) { + if (auto *DecidingLoad = dyn_cast(GepInst->getPointerOperand())) { + if (KnownInvariantLoads.count(DecidingLoad)) + return true; + } + } + const SCEV *PtrSCEV = SE.getSCEVAtScope(Ptr, L); while (L && R.contains(L)) { if (!SE.isLoopInvariant(PtrSCEV, L)) Index: test/ScopDetect/collective_invariant_loads.ll =================================================================== --- /dev/null +++ test/ScopDetect/collective_invariant_loads.ll @@ -0,0 +1,110 @@ +; RUN: opt %s -polly-invariant-load-hoisting -polly-scops -pass-remarks=polly-detect -pass-remarks-missed=polly-detect -pass-remarks-analysis=polly-detect -debug-only=polly-detect 2>&1| FileCheck %s + +;CHECK: Checking region: %2 => %18 +;CHECK-NEXT: OK + + + + +; ModuleID = 'affine_2d_proper.ll' +source_filename = "root" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%range_int64_t_bounded_F = type { i64, i64 } +%_domain_DefaultRectangularDom_2_int64_t_F = type { i64, %chpl_DefaultRectangularDom_2_int64_t_F_object*, i8 } +%chpl_DefaultRectangularDom_2_int64_t_F_object = type { %chpl_BaseRectangularDom_2_int64_t_F_object, %chpl_DefaultDist_object*, [2 x %range_int64_t_bounded_F] } +%chpl_BaseRectangularDom_2_int64_t_F_object = type { %chpl_BaseDom_object } +%chpl_BaseDom_object = type { %chpl_object_object, %list_BaseArr_chpl, i64, %atomicbool, i8, i64 } +%chpl_object_object = type { i32, i32 } +%list_BaseArr_chpl = type { %chpl_listNode_BaseArr_chpl_object*, %chpl_listNode_BaseArr_chpl_object*, i64 } +%chpl_listNode_BaseArr_chpl_object = type { %chpl_object_object, %chpl_BaseArr_object*, %chpl_listNode_BaseArr_chpl_object* } +%chpl_BaseArr_object = type { %chpl_object_object, i64, i8 } +%atomicbool = type { i8 } +%chpl_DefaultDist_object = type { %chpl_BaseDist_object } +%chpl_BaseDist_object = type { %chpl_object_object, %list_BaseDom_chpl, %atomicbool, i8, i64 } +%list_BaseDom_chpl = type { %chpl_listNode_BaseDom_chpl_object*, %chpl_listNode_BaseDom_chpl_object*, i64 } +%chpl_listNode_BaseDom_chpl_object = type { %chpl_object_object, %chpl_BaseDom_object*, %chpl_listNode_BaseDom_chpl_object* } +%_distribution_DefaultDist = type { i64, %chpl_DefaultDist_object*, i8 } +%_array_DefaultRectangularArr_2_int64_t_F__real64_int64_t = type { i64, %chpl_DefaultRectangularArr_2_int64_t_F__real64_int64_t_object*, i8 } +%chpl_DefaultRectangularArr_2_int64_t_F__real64_int64_t_object = type { %chpl_BaseRectangularArr_2_int64_t_F__real64_object, %chpl_DefaultRectangularDom_2_int64_t_F_object*, [2 x i64], [2 x i64], [2 x i64], i64, i64, double*, double*, i8, %range_int64_t_bounded_F } +%chpl_BaseRectangularArr_2_int64_t_F__real64_object = type { %chpl_BaseArrOverRectangularDom_2_int64_t_F_object } +%chpl_BaseArrOverRectangularDom_2_int64_t_F_object = type { %chpl_BaseArr_object } + +; Function Attrs: noinline +define weak dso_local void @test_init_chpl(%_array_DefaultRectangularArr_2_int64_t_F__real64_int64_t* nonnull) #0 { + br label %.split + +.split: ; preds = %1 + br label %2 + +;