Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -138,6 +138,17 @@ "number of accesses allowed to be present in a loop in order to " "enable memory promotion.")); +// Experimental option which allows the LICM pass to promote conditional +// accesses of loop-invariant locations. +// The load instructions will be hoisted into the preheader, while the store +// instructions will be sunk into the exit blocks, where they will be executed +// conditionally, depending on whether or not the control flow actually got to +// them. +cl::opt SetLicmConditionalAccessPromotion( + "licm-conditional-access-promotion", cl::Hidden, cl::init(false), + cl::desc("Enable promotion of conditional accesses of loop-invariant " + "locations")); + static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, @@ -1841,12 +1852,19 @@ PredIteratorCache &PredCache; MemorySSAUpdater *MSSAU; LoopInfo &LI; + DominatorTree &DT; DebugLoc DL; Align Alignment; bool UnorderedAtomic; AAMDNodes AATags; ICFLoopSafetyInfo &SafetyInfo; bool CanInsertStoresInExitBlocks; + bool PromoteConditionalAccesses; + // This flag will be used to make sure that every sinken, conditional store + // instruction is executed conditionally within the exit blocks. In the + // preheader, it is initialized to 0. In every basic block containing a + // conditional store it is raised. + SSAUpdater *FlagSSAUpdater; // We're about to add a use of V in a loop exit block. Insert an LCSSA phi // (if legal) if doing so would add an out-of-loop use to an instruction @@ -1871,15 +1889,19 @@ SmallVectorImpl &LEB, SmallVectorImpl &LIP, SmallVectorImpl &MSSAIP, PredIteratorCache &PIC, - MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl, - Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags, - ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks) + MemorySSAUpdater *MSSAU, LoopInfo &li, DominatorTree &dt, + DebugLoc dl, Align Alignment, bool UnorderedAtomic, + const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo, + bool CanInsertStoresInExitBlocks, + bool PromoteConditionalAccesses, SSAUpdater *FlagSSAUpdater) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), - PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)), + PredCache(PIC), MSSAU(MSSAU), LI(li), DT(dt), DL(std::move(dl)), Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo), - CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {} + CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), + PromoteConditionalAccesses(PromoteConditionalAccesses), + FlagSSAUpdater(FlagSSAUpdater) {} bool isInstInList(Instruction *I, const SmallVectorImpl &) const override { @@ -1919,9 +1941,53 @@ NewMemAcc = MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); } - MSSAInsertPts[i] = NewMemAcc; + // If we promote a conditional access, the insert position will be reset + // to the first instruction in the exit block, after all the PHIs and + // LandingPad instructions. This means that the next store being sunk will + // be inserted at the beginning of the exit basic block. + if (!PromoteConditionalAccesses) + MSSAInsertPts[i] = NewMemAcc; + else + MSSAInsertPts[i] = nullptr; MSSAU->insertDef(cast(NewMemAcc), true); // FIXME: true for safety, false may still be correct. + if (PromoteConditionalAccesses) { + Value *FlagValue = FlagSSAUpdater->GetValueInMiddleOfBlock(ExitBlock); + const auto &Name = SomePtr->getName(); + // Split the exit basic block in half. The split point is the new store + // instruction just inserted. + BasicBlock *ThenBB = SplitBlock(ExitBlock, NewSI, &DT, &LI, MSSAU, + Name + ".flag.then.bb"); + // The new basic block starts with the new store instruction. We need to + // isolate the new store instruction in a separate "then" basic block. + // The split point is the InsertPos, which is the successor instruction + // of the new store instruction. + BasicBlock *ElseBB = SplitBlock(ThenBB, InsertPos, &DT, &LI, MSSAU, + Name + ".flag.else.bb"); + // The split exit block now unconditionally branches to the "then" basic + // block. We need to do that conditionally, depending on the flag's + // value. + Instruction *OldTerminator = ExitBlock->getTerminator(); + ICmpInst *NewICmpInst = new ICmpInst( + OldTerminator, CmpInst::Predicate::ICMP_EQ, FlagValue, + ConstantInt::get( + Type::getInt1Ty(ExitBlock->getParent()->getContext()), + /* Value */ 1), + "tobool." + Name + ".flag"); + BranchInst *NewTerminator = + BranchInst::Create(ThenBB, ElseBB, NewICmpInst); + ReplaceInstWithInst(OldTerminator, NewTerminator); + // Update the dominator tree with the added edge to the "else" basic + // block. + DT.insertEdge(ExitBlock, ElseBB); + // Update the MemorySSA with the added edge to the "else" basic block. + SmallVector Update; + Update.push_back({cfg::UpdateKind::Insert, ExitBlock, ElseBB}); + MSSAU->applyInsertUpdates(Update, DT); + if (VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + LoopInsertPts[i] = &*ExitBlock->getFirstInsertionPt(); + } } } @@ -2039,6 +2105,9 @@ bool SawNotAtomic = false; AAMDNodes AATags; + bool SawConditionalLIStore = false; + StringRef PointerOperandName; + const DataLayout &MDL = Preheader->getModule()->getDataLayout(); bool IsKnownThreadLocalObject = false; @@ -2114,6 +2183,12 @@ DereferenceableInPH = true; SafeToInsertStore = true; Alignment = std::max(Alignment, InstAlignment); + } else if (SetLicmConditionalAccessPromotion && + (!SawConditionalLIStore || (InstAlignment > Alignment))) { + SawConditionalLIStore = true; + if (PointerOperandName.empty()) + PointerOperandName = Store->getPointerOperand()->getName(); + Alignment = std::max(Alignment, InstAlignment); } } @@ -2193,6 +2268,30 @@ // If we cannot hoist the load either, give up. return false; + const bool PromoteConditionalAccesses = + SetLicmConditionalAccessPromotion && SawConditionalLIStore; + SmallVector FlagPHIs; + SSAUpdater FlagSSAUpdater(&FlagPHIs); + SSAUpdater *FlagSSAUpdaterPtr = nullptr; + if (!SafeToInsertStore && PromoteConditionalAccesses) { + // There are only conditional store instructions to the location within the + // loop. + SafeToInsertStore = true; + Type *Int1Ty = Type::getInt1Ty(Preheader->getParent()->getContext()); + FlagSSAUpdater.Initialize(Int1Ty, PointerOperandName.str() + ".flag"); + // Initialize the flag with 0 in the preheader. + FlagSSAUpdater.AddAvailableValue(Preheader, + ConstantInt::get(Int1Ty, + /* Value */ 0)); + for (auto *UI : LoopUses) + if (StoreInst *ConditionalLIStore = dyn_cast(UI)) + // Raise the flag if a conditional store happened. + FlagSSAUpdater.AddAvailableValue(ConditionalLIStore->getParent(), + ConstantInt::get(Int1Ty, + /* Value */ 1)); + FlagSSAUpdaterPtr = &FlagSSAUpdater; + } + // Lets do the promotion! if (SafeToInsertStore) LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr @@ -2218,10 +2317,10 @@ SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL, + InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, *DT, DL, Alignment, SawUnorderedAtomic, AATags, *SafetyInfo, - SafeToInsertStore); - + SafeToInsertStore, PromoteConditionalAccesses, + FlagSSAUpdaterPtr); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst( Index: llvm/test/Transforms/LICM/conditional-access-promotion.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/conditional-access-promotion.ll @@ -0,0 +1,80 @@ +;; C reproducer: +;; int u; +;; +;; void f(int a[restrict], int n) +;; { +;; for (int i = 0; i < n; ++i) +;; if (a[i]) +;; ++u; +;; } + +; RUN: opt -licm -licm-conditional-access-promotion -S < %s | FileCheck %s + +@u = dso_local local_unnamed_addr global i32 0, align 4 + +; CHECK-LABEL: @f +define dso_local void @f(i32* noalias nocapture readonly %a, i32 %n) { + +; CHECK-NEXT: entry: +; CHECK-NEXT: [[U_PROMOTED:%.*]] = load i32, i32* @u, align 4 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +entry: + br label %for.cond + +; CHECK: for.cond: +; CHECK-NEXT: [[U_FLAG4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[U_FLAG:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[INC3:%.*]] = phi i32 [ [[U_PROMOTED:%.*]], [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0:%.*]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc1, %for.inc ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.cond.cleanup + +; CHECK: for.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_0:%.*]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM:%.*]] +; CHECK-NEXT: %0 = load i32, i32* [[ARRAYIDX:%.*]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 %0, 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT:%.*]], label [[FOR_INC:%.*]], label [[IF_THEN:%.*]] +for.body: ; preds = %for.cond + %idxprom = zext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %for.inc, label %if.then + +; CHECK: if.then: +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[INC3:%.*]], 1 +; CHECK-NEXT: br label [[FOR_INC:%.*]] +if.then: ; preds = %for.body + %1 = load i32, i32* @u, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* @u, align 4 + br label %for.inc + +; CHECK: for.inc: +; CHECK-NEXT: [[U_FLAG:%.*]] = phi i1 [ true, [[IF_THEN:%.*]] ], [ [[U_FLAG4:%.*]], [[FOR_COND:%.*]] ] +; CHECK-NEXT: [[INC2:%.*]] = phi i32 [ [[INC:%.*]], [[IF_THEN:%.*]] ], [ [[INC3:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[INC1:%.*]] = add nuw nsw i32 [[I_0:%.*]], 1 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +for.inc: ; preds = %for.body, %if.then + %inc1 = add nuw nsw i32 %i.0, 1 + br label %for.cond + +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[U_FLAG4_LCSSA:%.*]] = phi i1 [ [[U_FLAG4:%.*]], [[FOR_COND:%.*]] ] +; CHECK-NEXT: [[INC3_LCSSA:%.*]] = phi i32 [ [[INC3:%.*]], [[FOR_COND:%.*]] ] +; CHECK-NEXT: [[TOBOOL_U_FLAG:%.*]] = icmp eq i1 [[U_FLAG4_LCSSA:%.*]], true +; CHECK-NEXT: br i1 [[TOBOOL_U_FLAG:%.*]], label [[U_FLAG_THEN_BB:%.*]], label [[U_FLAG_ELSE_BB:%.*]] +for.cond.cleanup: ; preds = %for.cond + ret void + +; CHECK: u.flag.then.bb: +; CHECK-NEXT: store i32 [[INC3_LCSSA:%.*]], i32* @u, align 4 +; CHECK-NEXT: br label [[U_FLAG_ELSE_BB:%.*]] + +; CHECK: u.flag.else.bb: +; CHECK-NEXT: ret void +}