Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -67,6 +67,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" @@ -137,6 +138,17 @@ "number of accesses allowed to be present in a loop in order to " "enable memory promotion.")); +// Experimental option which allows the LICM pass to promote conditional +// accesses of loop-invariant locations. +// The load instructions will be hoisted into the preheader, while the store +// instructions will be sunk into the exit blocks, where they will be executed +// conditionally, depending on whether or not the control flow actually got to +// them. +cl::opt SetLicmConditionalAccessPromotion( + "licm-conditional-access-promotion", cl::Hidden, cl::init(false), + cl::desc("Enable promotion of conditional accesses of loop-invariant " + "locations")); + static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, @@ -1779,6 +1791,12 @@ AAMDNodes AATags; ICFLoopSafetyInfo &SafetyInfo; bool CanInsertStoresInExitBlocks; + bool PromoteConditionalAccesses; + // This flag will be used to make sure that every sinken, conditional store + // instruction is executed conditionally within the exit blocks. In the + // preheader, it is initialized to 0. In every basic block containing a + // conditional store it is raised. + SSAUpdater *FlagSSAUpdater; // We're about to add a use of V in a loop exit block. Insert an LCSSA phi // (if legal) if doing so would add an out-of-loop use to an instruction @@ -1805,13 +1823,16 @@ SmallVectorImpl &MSSAIP, PredIteratorCache &PIC, MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl, Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags, - ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks) + ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks, + bool PromoteConditionalAccesses, SSAUpdater *FlagSSAUpdater) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)), Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo), - CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {} + CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), + PromoteConditionalAccesses(PromoteConditionalAccesses), + FlagSSAUpdater(FlagSSAUpdater) {} bool isInstInList(Instruction *I, const SmallVectorImpl &) const override { @@ -1834,26 +1855,42 @@ LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); Instruction *InsertPos = LoopInsertPts[i]; - StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); - if (UnorderedAtomic) - NewSI->setOrdering(AtomicOrdering::Unordered); - NewSI->setAlignment(Alignment); - NewSI->setDebugLoc(DL); - if (AATags) - NewSI->setAAMetadata(AATags); - - MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; - MemoryAccess *NewMemAcc; - if (!MSSAInsertPoint) { - NewMemAcc = MSSAU.createMemoryAccessInBB( - NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning); + if (!PromoteConditionalAccesses) { + StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); + if (UnorderedAtomic) + NewSI->setOrdering(AtomicOrdering::Unordered); + NewSI->setAlignment(Alignment); + NewSI->setDebugLoc(DL); + if (AATags) + NewSI->setAAMetadata(AATags); + + MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; + MemoryAccess *NewMemAcc; + if (!MSSAInsertPoint) { + NewMemAcc = MSSAU.createMemoryAccessInBB( + NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning); + } else { + NewMemAcc = + MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); + } + MSSAInsertPts[i] = NewMemAcc; + MSSAU.insertDef(cast(NewMemAcc), true); + // FIXME: true for safety, false may still be correct. } else { - NewMemAcc = - MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); + Value *FlagValue = FlagSSAUpdater->GetValueInMiddleOfBlock(ExitBlock); + IRBuilder<> Builder(InsertPos); + Type *LiveInValueType = LiveInValue->getType(); + Type *DataType = VectorType::get(LiveInValueType, 1, false); + Value *V = UndefValue::get(DataType); + V = Builder.CreateInsertElement(V, LiveInValue, uint64_t(0)); + Value *P = + Builder.CreatePointerCast(Ptr, PointerType::getUnqual(DataType)); + Type *MaskType = + VectorType::get(Type::getInt1Ty(ExitBlock->getContext()), 1, false); + Value *M = UndefValue::get(MaskType); + M = Builder.CreateInsertElement(M, FlagValue, uint64_t(0)); + Builder.CreateMaskedStore(V, P, Alignment, M); } - MSSAInsertPts[i] = NewMemAcc; - MSSAU.insertDef(cast(NewMemAcc), true); - // FIXME: true for safety, false may still be correct. } } @@ -1971,6 +2008,9 @@ bool SawNotAtomic = false; AAMDNodes AATags; + bool SawConditionalLIStore = false; + StringRef PointerOperandName; + const DataLayout &MDL = Preheader->getModule()->getDataLayout(); bool IsKnownThreadLocalObject = false; @@ -2046,6 +2086,12 @@ DereferenceableInPH = true; SafeToInsertStore = true; Alignment = std::max(Alignment, InstAlignment); + } else if (SetLicmConditionalAccessPromotion && + (!SawConditionalLIStore || (InstAlignment > Alignment))) { + SawConditionalLIStore = true; + if (PointerOperandName.empty()) + PointerOperandName = Store->getPointerOperand()->getName(); + Alignment = std::max(Alignment, InstAlignment); } } @@ -2125,6 +2171,30 @@ // If we cannot hoist the load either, give up. return false; + const bool PromoteConditionalAccesses = + SetLicmConditionalAccessPromotion && SawConditionalLIStore; + SmallVector FlagPHIs; + SSAUpdater FlagSSAUpdater(&FlagPHIs); + SSAUpdater *FlagSSAUpdaterPtr = nullptr; + if (!SafeToInsertStore && PromoteConditionalAccesses) { + // There are only conditional store instructions to the location within the + // loop. + SafeToInsertStore = true; + Type *Int1Ty = Type::getInt1Ty(Preheader->getParent()->getContext()); + FlagSSAUpdater.Initialize(Int1Ty, PointerOperandName.str() + ".flag"); + // Initialize the flag with 0 in the preheader. + FlagSSAUpdater.AddAvailableValue(Preheader, + ConstantInt::get(Int1Ty, + /* Value */ 0)); + for (auto *UI : LoopUses) + if (StoreInst *ConditionalLIStore = dyn_cast(UI)) + // Raise the flag if a conditional store happened. + FlagSSAUpdater.AddAvailableValue(ConditionalLIStore->getParent(), + ConstantInt::get(Int1Ty, + /* Value */ 1)); + FlagSSAUpdaterPtr = &FlagSSAUpdater; + } + // Lets do the promotion! if (SafeToInsertStore) LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr @@ -2152,8 +2222,8 @@ LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment, SawUnorderedAtomic, AATags, *SafetyInfo, - SafeToInsertStore); - + SafeToInsertStore, PromoteConditionalAccesses, + FlagSSAUpdaterPtr); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst( Index: llvm/test/Transforms/LICM/conditional-access-promotion.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/conditional-access-promotion.ll @@ -0,0 +1,75 @@ +;; C reproducer: +;; int u; +;; +;; void f(int a[restrict], int n) +;; { +;; for (int i = 0; i < n; ++i) +;; if (a[i]) +;; ++u; +;; } + +; RUN: opt -licm -licm-conditional-access-promotion -S < %s | FileCheck %s + +@u = dso_local local_unnamed_addr global i32 0, align 4 + +; CHECK-LABEL: @f +define dso_local void @f(i32* noalias nocapture readonly %a, i32 %n) { + +; CHECK-NEXT: entry: +; CHECK-NEXT: [[U_PROMOTED:%.*]] = load i32, i32* @u, align 4 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +entry: + br label %for.cond + +; CHECK: for.cond: +; CHECK-NEXT: [[U_FLAG4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[U_FLAG:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[INC3:%.*]] = phi i32 [ [[U_PROMOTED:%.*]], [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0:%.*]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc1, %for.inc ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.cond.cleanup + +; CHECK: for.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_0:%.*]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM:%.*]] +; CHECK-NEXT: %0 = load i32, i32* [[ARRAYIDX:%.*]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 %0, 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT:%.*]], label [[FOR_INC:%.*]], label [[IF_THEN:%.*]] +for.body: ; preds = %for.cond + %idxprom = zext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %for.inc, label %if.then + +; CHECK: if.then: +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[INC3:%.*]], 1 +; CHECK-NEXT: br label [[FOR_INC:%.*]] +if.then: ; preds = %for.body + %1 = load i32, i32* @u, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* @u, align 4 + br label %for.inc + +; CHECK: for.inc: +; CHECK-NEXT: [[U_FLAG:%.*]] = phi i1 [ true, [[IF_THEN:%.*]] ], [ [[U_FLAG4:%.*]], [[FOR_COND:%.*]] ] +; CHECK-NEXT: [[INC2:%.*]] = phi i32 [ [[INC:%.*]], [[IF_THEN:%.*]] ], [ [[INC3:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[INC1:%.*]] = add nuw nsw i32 [[I_0:%.*]], 1 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +for.inc: ; preds = %for.body, %if.then + %inc1 = add nuw nsw i32 %i.0, 1 + br label %for.cond + +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[U_FLAG4_LCSSA:%.*]] = phi i1 [ [[U_FLAG4:%.*]], [[FOR_COND:%.*]] ] +; CHECK-NEXT: [[INC3_LCSSA:%.*]] = phi i32 [ [[INC3:%.*]], [[FOR_COND:%.*]] ] +; CHECK-NEXT: %1 = insertelement <1 x i32> undef, i32 [[INC3_LCSSA:%.*]], i64 0 +; CHECK-NEXT: %2 = insertelement <1 x i1> undef, i1 [[U_FLAG4_LCSSA:%.*]], i64 0 +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> %1, <1 x i32>* bitcast (i32* @u to <1 x i32>*), i32 4, <1 x i1> %2) +; CHECK-NEXT: ret void +for.cond.cleanup: ; preds = %for.cond + ret void +}