Index: llvm/trunk/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ llvm/trunk/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -19,6 +19,8 @@ // thinks it safe to do so. This optimization helps with eg. hiding load // latencies, triggering if-conversion, and reducing static code size. // +// NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist. +// //===----------------------------------------------------------------------===// // // @@ -118,16 +120,6 @@ void removeInstruction(Instruction *Inst); BasicBlock *getDiamondTail(BasicBlock *BB); bool isDiamondHead(BasicBlock *BB); - // Routines for hoisting loads - bool isLoadHoistBarrierInRange(const Instruction &Start, - const Instruction &End, LoadInst *LI, - bool SafeToLoadUnconditionally); - LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI); - void hoistInstruction(BasicBlock *BB, Instruction *HoistCand, - Instruction *ElseInst); - bool isSafeToHoist(Instruction *I) const; - bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst); - bool mergeLoads(BasicBlock *BB); // Routines for sinking stores StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); @@ -188,169 +180,6 @@ return true; } -/// -/// \brief True when instruction is a hoist barrier for a load -/// -/// Whenever an instruction could possibly modify the value -/// being loaded or protect against the load from happening -/// it is considered a hoist barrier. -/// -bool MergedLoadStoreMotion::isLoadHoistBarrierInRange( - const Instruction &Start, const Instruction &End, LoadInst *LI, - bool SafeToLoadUnconditionally) { - if (!SafeToLoadUnconditionally) - for (const Instruction &Inst : - make_range(Start.getIterator(), End.getIterator())) - if (!isGuaranteedToTransferExecutionToSuccessor(&Inst)) - return true; - MemoryLocation Loc = MemoryLocation::get(LI); - return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod); -} - -/// -/// \brief Decide if a load can be hoisted -/// -/// When there is a load in \p BB to the same address as \p LI -/// and it can be hoisted from \p BB, return that load. -/// Otherwise return Null. -/// -LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1, - LoadInst *Load0) { - BasicBlock *BB0 = Load0->getParent(); - BasicBlock *Head = BB0->getSinglePredecessor(); - bool SafeToLoadUnconditionally = isSafeToLoadUnconditionally( - Load0->getPointerOperand(), Load0->getAlignment(), - Load0->getModule()->getDataLayout(), - /*ScanFrom=*/Head->getTerminator()); - for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE; - ++BBI) { - Instruction *Inst = &*BBI; - - // Only merge and hoist loads when their result in used only in BB - auto *Load1 = dyn_cast(Inst); - if (!Load1 || Inst->isUsedOutsideOfBlock(BB1)) - continue; - - MemoryLocation Loc0 = MemoryLocation::get(Load0); - MemoryLocation Loc1 = MemoryLocation::get(Load1); - if (Load0->isSameOperationAs(Load1) && AA->isMustAlias(Loc0, Loc1) && - !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1, - SafeToLoadUnconditionally) && - !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0, - SafeToLoadUnconditionally)) { - return Load1; - } - } - return nullptr; -} - -/// -/// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into -/// \p BB -/// -/// BB is the head of a diamond -/// -void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB, - Instruction *HoistCand, - Instruction *ElseInst) { - DEBUG(dbgs() << " Hoist Instruction into BB \n"; BB->dump(); - dbgs() << "Instruction Left\n"; HoistCand->dump(); dbgs() << "\n"; - dbgs() << "Instruction Right\n"; ElseInst->dump(); dbgs() << "\n"); - // Hoist the instruction. - assert(HoistCand->getParent() != BB); - - // Intersect optional metadata. - HoistCand->andIRFlags(ElseInst); - HoistCand->dropUnknownNonDebugMetadata(); - - // Prepend point for instruction insert - Instruction *HoistPt = BB->getTerminator(); - - // Merged instruction - Instruction *HoistedInst = HoistCand->clone(); - - // Hoist instruction. - HoistedInst->insertBefore(HoistPt); - - HoistCand->replaceAllUsesWith(HoistedInst); - removeInstruction(HoistCand); - // Replace the else block instruction. - ElseInst->replaceAllUsesWith(HoistedInst); - removeInstruction(ElseInst); -} - -/// -/// \brief Return true if no operand of \p I is defined in I's parent block -/// -bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const { - BasicBlock *Parent = I->getParent(); - for (Use &U : I->operands()) - if (auto *Instr = dyn_cast(&U)) - if (Instr->getParent() == Parent) - return false; - return true; -} - -/// -/// \brief Merge two equivalent loads and GEPs and hoist into diamond head -/// -bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0, - LoadInst *L1) { - // Only one definition? - auto *A0 = dyn_cast(L0->getPointerOperand()); - auto *A1 = dyn_cast(L1->getPointerOperand()); - if (A0 && A1 && A0->isIdenticalTo(A1) && isSafeToHoist(A0) && - A0->hasOneUse() && (A0->getParent() == L0->getParent()) && - A1->hasOneUse() && (A1->getParent() == L1->getParent()) && - isa(A0)) { - DEBUG(dbgs() << "Hoist Instruction into BB \n"; BB->dump(); - dbgs() << "Instruction Left\n"; L0->dump(); dbgs() << "\n"; - dbgs() << "Instruction Right\n"; L1->dump(); dbgs() << "\n"); - hoistInstruction(BB, A0, A1); - hoistInstruction(BB, L0, L1); - return true; - } - return false; -} - -/// -/// \brief Try to hoist two loads to same address into diamond header -/// -/// Starting from a diamond head block, iterate over the instructions in one -/// successor block and try to match a load in the second successor. -/// -bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) { - bool MergedLoads = false; - assert(isDiamondHead(BB)); - BranchInst *BI = cast(BB->getTerminator()); - BasicBlock *Succ0 = BI->getSuccessor(0); - BasicBlock *Succ1 = BI->getSuccessor(1); - // #Instructions in Succ1 for Compile Time Control - int Size1 = Succ1->size(); - int NLoads = 0; - for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end(); - BBI != BBE;) { - Instruction *I = &*BBI; - ++BBI; - - // Don't move non-simple (atomic, volatile) loads. - auto *L0 = dyn_cast(I); - if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0)) - continue; - - ++NLoads; - if (NLoads * Size1 >= MagicCompileTimeControl) - break; - if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) { - bool Res = hoistLoad(BB, L0, L1); - MergedLoads |= Res; - // Don't attempt to hoist above loads that had not been hoisted. - if (!Res) - break; - } - } - return MergedLoads; -} /// /// \brief True when instruction is a sink barrier for a store @@ -534,7 +363,6 @@ // Hoist equivalent loads and sink stores // outside diamonds when possible if (isDiamondHead(BB)) { - Changed |= mergeLoads(BB); Changed |= mergeStores(getDiamondTail(BB)); } } Index: llvm/trunk/test/Transforms/GVNHoist/ld_hoist1.ll =================================================================== --- llvm/trunk/test/Transforms/GVNHoist/ld_hoist1.ll +++ llvm/trunk/test/Transforms/GVNHoist/ld_hoist1.ll @@ -0,0 +1,64 @@ +; Test load hoist +; RUN: opt -gvn-hoist -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc_linux" + +; Function Attrs: nounwind uwtable +define float* @foo(i32* noalias nocapture readonly %in, float* noalias %out, i32 %size, i32* nocapture readonly %trigger) { +entry: + %cmp11 = icmp eq i32 %size, 0 + br i1 %cmp11, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %0 = add i32 %size, -1 + br label %for.body + +; CHECK-LABEL: for.body +; CHECK: load +; CHECK: %2 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv +; CHECK: %3 = load i32, i32* %2, align 4 + +for.body: ; preds = %for.body.lr.ph, %for.inc + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %1, 0 + br i1 %cmp1, label %if.then, label %if.else + +; CHECK-LABEL: if.then +if.then: ; preds = %for.body +; This load should be hoisted + %arrayidx3 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv + %2 = load i32, i32* %arrayidx3, align 4 + %conv = sitofp i32 %2 to float + %add = fadd float %conv, 5.000000e-01 + %arrayidx5 = getelementptr inbounds float, float* %out, i64 %indvars.iv + store float %add, float* %arrayidx5, align 4 + br label %for.inc + +if.else: ; preds = %for.body + %arrayidx7 = getelementptr inbounds float, float* %out, i64 %indvars.iv + %3 = load float, float* %arrayidx7, align 4 + %div = fdiv float %3, 3.000000e+00 + store float %div, float* %arrayidx7, align 4 +; This load should be hoisted in spite of store + %arrayidx9 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv + %4 = load i32, i32* %arrayidx9, align 4 + %conv10 = sitofp i32 %4 to float + %add13 = fadd float %div, %conv10 + store float %add13, float* %arrayidx7, align 4 + br label %for.inc + +for.inc: ; preds = %if.then, %if.else + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %0 + br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %entry, %for.cond.for.end_crit_edge + ret float* %out +} + Index: llvm/trunk/test/Transforms/GVNHoist/ld_hoist_st_sink.ll =================================================================== --- llvm/trunk/test/Transforms/GVNHoist/ld_hoist_st_sink.ll +++ llvm/trunk/test/Transforms/GVNHoist/ld_hoist_st_sink.ll @@ -0,0 +1,84 @@ +; Tests to make sure that loads and stores in a diamond get merged +; Loads are hoisted into the header. Stores sunks into the footer. +; RUN: opt -gvn-hoist -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +%struct.node = type { i64, %struct.node*, %struct.node*, %struct.node*, i64, %struct.arc*, i64, i64, i64 } +%struct.arc = type { i64, i64, i64 } + +define i64 @foo(%struct.node* nocapture readonly %r) nounwind { +entry: + %node.0.in16 = getelementptr inbounds %struct.node, %struct.node* %r, i64 0, i32 2 + %node.017 = load %struct.node*, %struct.node** %node.0.in16, align 8 + %tobool18 = icmp eq %struct.node* %node.017, null + br i1 %tobool18, label %while.end, label %while.body.preheader + +; CHECK-LABEL: while.body.preheader +while.body.preheader: ; preds = %entry +; CHECK: load + br label %while.body + +while.body: ; preds = %while.body.preheader, %if.end + %node.020 = phi %struct.node* [ %node.0, %if.end ], [ %node.017, %while.body.preheader ] + %sum.019 = phi i64 [ %inc, %if.end ], [ 0, %while.body.preheader ] + %orientation = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 4 + %0 = load i64, i64* %orientation, align 8 + %cmp = icmp eq i64 %0, 1 + br i1 %cmp, label %if.then, label %if.else +; CHECK: if.then +if.then: ; preds = %while.body + %a = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 5 +; CHECK-NOT: load %struct.arc + %1 = load %struct.arc*, %struct.arc** %a, align 8 + %cost = getelementptr inbounds %struct.arc, %struct.arc* %1, i64 0, i32 0 +; CHECK-NOT: load i64, i64* + %2 = load i64, i64* %cost, align 8 + %pred = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 1 +; CHECK-NOT: load %struct.node*, %struct.node** + %3 = load %struct.node*, %struct.node** %pred, align 8 + %p = getelementptr inbounds %struct.node, %struct.node* %3, i64 0, i32 6 +; CHECK-NOT: load i64, i64* + %4 = load i64, i64* %p, align 8 + %add = add nsw i64 %4, %2 + %p1 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 6 +; FIXME: store i64 + store i64 %add, i64* %p1, align 8 + br label %if.end + +; CHECK: if.else +if.else: ; preds = %while.body + %pred2 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 1 +; CHECK-NOT: load %struct.node*, %struct.node** + %5 = load %struct.node*, %struct.node** %pred2, align 8 + %p3 = getelementptr inbounds %struct.node, %struct.node* %5, i64 0, i32 6 +; CHECK-NOT: load i64, i64* + %6 = load i64, i64* %p3, align 8 + %a4 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 5 +; CHECK-NOT: load %struct.arc*, %struct.arc** + %7 = load %struct.arc*, %struct.arc** %a4, align 8 + %cost5 = getelementptr inbounds %struct.arc, %struct.arc* %7, i64 0, i32 0 +; CHECK-NOT: load i64, i64* + %8 = load i64, i64* %cost5, align 8 + %sub = sub nsw i64 %6, %8 + %p6 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 6 +; FIXME: store i64 + store i64 %sub, i64* %p6, align 8 + br label %if.end + +; CHECK: if.end +if.end: ; preds = %if.else, %if.then +; FIXME: store + %inc = add nsw i64 %sum.019, 1 + %node.0.in = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 2 + %node.0 = load %struct.node*, %struct.node** %node.0.in, align 8 + %tobool = icmp eq %struct.node* %node.0, null + br i1 %tobool, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %if.end + %inc.lcssa = phi i64 [ %inc, %if.end ] + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %sum.0.lcssa = phi i64 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ] + ret i64 %sum.0.lcssa +} Index: llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll =================================================================== --- llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll +++ llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll @@ -1,64 +0,0 @@ -; Test load hoist -; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc_linux" - -; Function Attrs: nounwind uwtable -define float* @foo(i32* noalias nocapture readonly %in, float* noalias %out, i32 %size, i32* nocapture readonly %trigger) { -entry: - %cmp11 = icmp eq i32 %size, 0 - br i1 %cmp11, label %for.end, label %for.body.lr.ph - -for.body.lr.ph: ; preds = %entry - %0 = add i32 %size, -1 - br label %for.body - -; CHECK-LABEL: for.body -; CHECK: load -; CHECK: %2 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv -; CHECK: %3 = load i32, i32* %2, align 4 - -for.body: ; preds = %for.body.lr.ph, %for.inc - %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.inc ] - %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv - %1 = load i32, i32* %arrayidx, align 4 - %cmp1 = icmp sgt i32 %1, 0 - br i1 %cmp1, label %if.then, label %if.else - -; CHECK-LABEL: if.then -if.then: ; preds = %for.body -; This load should be hoisted - %arrayidx3 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv - %2 = load i32, i32* %arrayidx3, align 4 - %conv = sitofp i32 %2 to float - %add = fadd float %conv, 5.000000e-01 - %arrayidx5 = getelementptr inbounds float, float* %out, i64 %indvars.iv - store float %add, float* %arrayidx5, align 4 - br label %for.inc - -if.else: ; preds = %for.body - %arrayidx7 = getelementptr inbounds float, float* %out, i64 %indvars.iv - %3 = load float, float* %arrayidx7, align 4 - %div = fdiv float %3, 3.000000e+00 - store float %div, float* %arrayidx7, align 4 -; This load should be hoisted in spite of store - %arrayidx9 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv - %4 = load i32, i32* %arrayidx9, align 4 - %conv10 = sitofp i32 %4 to float - %add13 = fadd float %div, %conv10 - store float %add13, float* %arrayidx7, align 4 - br label %for.inc - -for.inc: ; preds = %if.then, %if.else - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv to i32 - %exitcond = icmp ne i32 %lftr.wideiv, %0 - br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge - -for.cond.for.end_crit_edge: ; preds = %for.inc - br label %for.end - -for.end: ; preds = %entry, %for.cond.for.end_crit_edge - ret float* %out -} - Index: llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll =================================================================== --- llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll +++ llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll @@ -1,84 +0,0 @@ -; Tests to make sure that loads and stores in a diamond get merged -; Loads are hoisted into the header. Stores sunks into the footer. -; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s -target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" - -%struct.node = type { i64, %struct.node*, %struct.node*, %struct.node*, i64, %struct.arc*, i64, i64, i64 } -%struct.arc = type { i64, i64, i64 } - -define i64 @foo(%struct.node* nocapture readonly %r) nounwind { -entry: - %node.0.in16 = getelementptr inbounds %struct.node, %struct.node* %r, i64 0, i32 2 - %node.017 = load %struct.node*, %struct.node** %node.0.in16, align 8 - %tobool18 = icmp eq %struct.node* %node.017, null - br i1 %tobool18, label %while.end, label %while.body.preheader - -; CHECK-LABEL: while.body.preheader -while.body.preheader: ; preds = %entry -; CHECK: load - br label %while.body - -while.body: ; preds = %while.body.preheader, %if.end - %node.020 = phi %struct.node* [ %node.0, %if.end ], [ %node.017, %while.body.preheader ] - %sum.019 = phi i64 [ %inc, %if.end ], [ 0, %while.body.preheader ] - %orientation = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 4 - %0 = load i64, i64* %orientation, align 8 - %cmp = icmp eq i64 %0, 1 - br i1 %cmp, label %if.then, label %if.else -; CHECK: if.then -if.then: ; preds = %while.body - %a = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 5 -; CHECK-NOT: load %struct.arc - %1 = load %struct.arc*, %struct.arc** %a, align 8 - %cost = getelementptr inbounds %struct.arc, %struct.arc* %1, i64 0, i32 0 -; CHECK-NOT: load i64, i64* - %2 = load i64, i64* %cost, align 8 - %pred = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 1 -; CHECK-NOT: load %struct.node*, %struct.node** - %3 = load %struct.node*, %struct.node** %pred, align 8 - %p = getelementptr inbounds %struct.node, %struct.node* %3, i64 0, i32 6 -; CHECK-NOT: load i64, i64* - %4 = load i64, i64* %p, align 8 - %add = add nsw i64 %4, %2 - %p1 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 6 -; CHECK-NOT: store i64 - store i64 %add, i64* %p1, align 8 - br label %if.end - -; CHECK: if.else -if.else: ; preds = %while.body - %pred2 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 1 -; CHECK-NOT: load %struct.node*, %struct.node** - %5 = load %struct.node*, %struct.node** %pred2, align 8 - %p3 = getelementptr inbounds %struct.node, %struct.node* %5, i64 0, i32 6 -; CHECK-NOT: load i64, i64* - %6 = load i64, i64* %p3, align 8 - %a4 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 5 -; CHECK-NOT: load %struct.arc*, %struct.arc** - %7 = load %struct.arc*, %struct.arc** %a4, align 8 - %cost5 = getelementptr inbounds %struct.arc, %struct.arc* %7, i64 0, i32 0 -; CHECK-NOT: load i64, i64* - %8 = load i64, i64* %cost5, align 8 - %sub = sub nsw i64 %6, %8 - %p6 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 6 -; CHECK-NOT: store i64 - store i64 %sub, i64* %p6, align 8 - br label %if.end - -; CHECK: if.end -if.end: ; preds = %if.else, %if.then -; CHECK: store - %inc = add nsw i64 %sum.019, 1 - %node.0.in = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 2 - %node.0 = load %struct.node*, %struct.node** %node.0.in, align 8 - %tobool = icmp eq %struct.node* %node.0, null - br i1 %tobool, label %while.end.loopexit, label %while.body - -while.end.loopexit: ; preds = %if.end - %inc.lcssa = phi i64 [ %inc, %if.end ] - br label %while.end - -while.end: ; preds = %while.end.loopexit, %entry - %sum.0.lcssa = phi i64 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ] - ret i64 %sum.0.lcssa -}