Index: lib/Transforms/Scalar/MergedLoadStoreMotion.cpp =================================================================== --- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -131,7 +131,9 @@ BasicBlock *getDiamondTail(BasicBlock *BB); bool isDiamondHead(BasicBlock *BB); // Routines for hoisting loads - bool isLoadHoistBarrier(Instruction *Inst); + bool isLoadHoistBarrierInRange(const Instruction& Start, + const Instruction& End, + LoadInst* LI); LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI); void hoistInstruction(BasicBlock *BB, Instruction *HoistCand, Instruction *ElseInst); @@ -232,27 +234,12 @@ /// being loaded or protect against the load from happening /// it is considered a hoist barrier. /// -bool MergedLoadStoreMotion::isLoadHoistBarrier(Instruction *Inst) { - // FIXME: A call with no side effects should not be a barrier. - // Aren't all such calls covered by mayHaveSideEffects() below? - // Then this check can be removed. - if (isa(Inst)) - return true; - if (isa(Inst)) - return true; - // FIXME: Conservatively let a store instruction block the load. - // Use alias analysis instead. - if (isa(Inst)) - return true; - // Note: mayHaveSideEffects covers all instructions that could - // trigger a change to state. Eg. in-flight stores have to be executed - // before ordered loads or fences, calls could invoke functions that store - // data to memory etc. - if (Inst->mayHaveSideEffects()) { - return true; - } - DEBUG(dbgs() << "No Hoist Barrier\n"); - return false; + +bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start, + const Instruction& End, + LoadInst* LI) { + AliasAnalysis::Location Loc = AA->getLocation(LI); + return AA->canInstructionRangeModify(Start, End, Loc); } /// @@ -262,33 +249,29 @@ /// and it can be hoisted from \p BB, return that load. /// Otherwise return Null. /// -LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB, - LoadInst *LI) { - LoadInst *I = nullptr; - assert(isa(LI)); - if (LI->isUsedOutsideOfBlock(LI->getParent())) - return nullptr; +LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1, + LoadInst *Load0) { - for (BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); BBI != BBE; + for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE; ++BBI) { Instruction *Inst = BBI; // Only merge and hoist loads when their result in used only in BB - if (isLoadHoistBarrier(Inst)) - break; - if (!isa(Inst)) + if (!isa(Inst) || Inst->isUsedOutsideOfBlock(BB1)) continue; - if (Inst->isUsedOutsideOfBlock(Inst->getParent())) - continue; - AliasAnalysis::Location LocLI = AA->getLocation(LI); - AliasAnalysis::Location LocInst = AA->getLocation((LoadInst *)Inst); - if (AA->isMustAlias(LocLI, LocInst) && LI->getType() == Inst->getType()) { - I = (LoadInst *)Inst; - break; + LoadInst *Load1 = dyn_cast(Inst); + BasicBlock *BB0 = Load0->getParent(); + + AliasAnalysis::Location Loc0 = AA->getLocation(Load0); + AliasAnalysis::Location Loc1 = AA->getLocation(Load1); + if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) && + !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) && + !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) { + return Load1; } } - return I; + return nullptr; } /// @@ -385,17 +368,12 @@ Instruction *I = BBI; ++BBI; - if (isLoadHoistBarrier(I)) - break; // Only move non-simple (atomic, volatile) loads. - if (!isa(I)) + LoadInst *L0 = dyn_cast(I); + if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0)) continue; - LoadInst *L0 = (LoadInst *)I; - if (!L0->isSimple()) - continue; - ++NLoads; if (NLoads * Size1 >= MagicCompileTimeControl) break; Index: test/Transforms/InstMerge/ld_hoist1.ll =================================================================== --- test/Transforms/InstMerge/ld_hoist1.ll +++ test/Transforms/InstMerge/ld_hoist1.ll @@ -0,0 +1,64 @@ +; Test load hoist +; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc_linux" + +; Function Attrs: nounwind uwtable +define float* @foo(i32* noalias nocapture readonly %in, float* noalias %out, i32 %size, i32* nocapture readonly %trigger) { +entry: + %cmp11 = icmp eq i32 %size, 0 + br i1 %cmp11, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %0 = add i32 %size, -1 + br label %for.body + +; CHECK-LABEL: for.body +; CHECK: load +; CHECK: %2 = getelementptr inbounds i32* %in, i64 %indvars.iv +; CHECK: %3 = load i32* %2, align 4 + +for.body: ; preds = %for.body.lr.ph, %for.inc + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i32* %trigger, i64 %indvars.iv + %1 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %1, 0 + br i1 %cmp1, label %if.then, label %if.else + +; CHECK-LABEL: if.then +if.then: ; preds = %for.body +; This load should be hoisted + %arrayidx3 = getelementptr inbounds i32* %in, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %conv = sitofp i32 %2 to float + %add = fadd float %conv, 5.000000e-01 + %arrayidx5 = getelementptr inbounds float* %out, i64 %indvars.iv + store float %add, float* %arrayidx5, align 4 + br label %for.inc + +if.else: ; preds = %for.body + %arrayidx7 = getelementptr inbounds float* %out, i64 %indvars.iv + %3 = load float* %arrayidx7, align 4 + %div = fdiv float %3, 3.000000e+00 + store float %div, float* %arrayidx7, align 4 +; This load should be hoisted in spite of store + %arrayidx9 = getelementptr inbounds i32* %in, i64 %indvars.iv + %4 = load i32* %arrayidx9, align 4 + %conv10 = sitofp i32 %4 to float + %add13 = fadd float %div, %conv10 + store float %add13, float* %arrayidx7, align 4 + br label %for.inc + +for.inc: ; preds = %if.then, %if.else + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %0 + br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %entry, %for.cond.for.end_crit_edge + ret float* %out +} +