Index: include/llvm/Analysis/AliasAnalysis.h =================================================================== --- include/llvm/Analysis/AliasAnalysis.h +++ include/llvm/Analysis/AliasAnalysis.h @@ -502,7 +502,7 @@ /// /// canBasicBlockModify - Return true if it is possible for execution of the - /// specified basic block to modify the value pointed to by Ptr. + /// specified basic block to modify the location Loc. bool canBasicBlockModify(const BasicBlock &BB, const Location &Loc); /// canBasicBlockModify - A convenience wrapper. @@ -510,17 +510,20 @@ return canBasicBlockModify(BB, Location(P, Size)); } - /// canInstructionRangeModify - Return true if it is possible for the - /// execution of the specified instructions to modify the value pointed to by - /// Ptr. The instructions to consider are all of the instructions in the - /// range of [I1,I2] INCLUSIVE. I1 and I2 must be in the same basic block. - bool canInstructionRangeModify(const Instruction &I1, const Instruction &I2, - const Location &Loc); - - /// canInstructionRangeModify - A convenience wrapper. - bool canInstructionRangeModify(const Instruction &I1, const Instruction &I2, - const Value *Ptr, uint64_t Size) { - return canInstructionRangeModify(I1, I2, Location(Ptr, Size)); + /// canInstructionRangeModRef - Return true if it is possible for the + /// execution of the specified instructions to mod\ref (according to the + /// mode) the location Loc. The instructions to consider are all + /// of the instructions in the range of [I1,I2] INCLUSIVE. + /// I1 and I2 must be in the same basic block. + bool canInstructionRangeModRef(const Instruction &I1, + const Instruction &I2, const Location &Loc, + const ModRefResult Mode); + + /// canInstructionRangeModRef - A convenience wrapper. + bool canInstructionRangeModRef(const Instruction &I1, + const Instruction &I2, const Value *Ptr, + uint64_t Size, const ModRefResult Mode) { + return canInstructionRangeModRef(I1, I2, Location(Ptr, Size), Mode); } //===--------------------------------------------------------------------===// Index: lib/Analysis/AliasAnalysis.cpp =================================================================== --- lib/Analysis/AliasAnalysis.cpp +++ lib/Analysis/AliasAnalysis.cpp @@ -483,21 +483,22 @@ } /// canBasicBlockModify - Return true if it is possible for execution of the -/// specified basic block to modify the value pointed to by Ptr. +/// specified basic block to modify the location Loc. /// bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, const Location &Loc) { - return canInstructionRangeModify(BB.front(), BB.back(), Loc); + return canInstructionRangeModRef(BB.front(), BB.back(), Loc, Mod); } -/// canInstructionRangeModify - Return true if it is possible for the execution -/// of the specified instructions to modify the value pointed to by Ptr. The -/// instructions to consider are all of the instructions in the range of [I1,I2] -/// INCLUSIVE. I1 and I2 must be in the same basic block. -/// -bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, +/// canInstructionRangeModRef - Return true if it is possible for the +/// execution of the specified instructions to mod\ref (according to the +/// mode) the location Loc. The instructions to consider are all +/// of the instructions in the range of [I1,I2] INCLUSIVE. +/// I1 and I2 must be in the same basic block. +bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, - const Location &Loc) { + const Location &Loc, + const ModRefResult Mode) { assert(I1.getParent() == I2.getParent() && "Instructions not in same basic block!"); BasicBlock::const_iterator I = &I1; @@ -505,7 +506,7 @@ ++E; // Convert from inclusive to exclusive range. for (; I != E; ++I) // Check every instruction in range - if (getModRefInfo(I, Loc) & Mod) + if (getModRefInfo(I, Loc) & Mode) return true; return false; } Index: lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- lib/Transforms/IPO/ArgumentPromotion.cpp +++ lib/Transforms/IPO/ArgumentPromotion.cpp @@ -554,7 +554,8 @@ BasicBlock *BB = Load->getParent(); AliasAnalysis::Location Loc = AA.getLocation(Load); - if (AA.canInstructionRangeModify(BB->front(), *Load, Loc)) + if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc, + AliasAnalysis::Mod)) return false; // Pointer is invalidated! // Now check every path from the entry block to the load for transparency. Index: lib/Transforms/Scalar/MergedLoadStoreMotion.cpp =================================================================== --- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -143,7 +143,9 @@ // Routines for sinking stores StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); - bool isStoreSinkBarrier(Instruction *Inst); + bool isStoreSinkBarrierInRange(const Instruction& Start, + const Instruction& End, + AliasAnalysis::Location Loc); bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); bool mergeStores(BasicBlock *BB); // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, @@ -239,7 +241,7 @@ const Instruction& End, LoadInst* LI) { AliasAnalysis::Location Loc = AA->getLocation(LI); - return AA->canInstructionRangeModify(Start, End, Loc); + return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod); } /// @@ -389,26 +391,19 @@ } /// -/// \brief True when instruction is sink barrier for a store -/// -bool MergedLoadStoreMotion::isStoreSinkBarrier(Instruction *Inst) { - // FIXME: Conservatively let a load instruction block the store. - // Use alias analysis instead. - if (isa(Inst)) - return true; - if (isa(Inst)) - return true; - if (isa(Inst) && !isa(Inst)) - return true; - // Note: mayHaveSideEffects covers all instructions that could - // trigger a change to state. Eg. in-flight stores have to be executed - // before ordered loads or fences, calls could invoke functions that store - // data to memory etc. - if (!isa(Inst) && Inst->mayHaveSideEffects()) { - return true; - } - DEBUG(dbgs() << "No Sink Barrier\n"); - return false; +/// \brief True when instruction is a sink barrier for a store +/// located in Loc +/// +/// Whenever an instruction could possibly read or modify the +/// value being stored or protect against the store from +/// happening it is considered a sink barrier. +/// + +bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start, + const Instruction& End, + AliasAnalysis::Location + Loc) { + return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Ref); } /// @@ -416,27 +411,28 @@ /// /// \return The store in \p when it is safe to sink. Otherwise return Null. /// -StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB, - StoreInst *SI) { - StoreInst *I = 0; - DEBUG(dbgs() << "can Sink? : "; SI->dump(); dbgs() << "\n"); - for (BasicBlock::reverse_iterator RBI = BB->rbegin(), RBE = BB->rend(); +StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1, + StoreInst *Store0) { + DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n"); + for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend(); RBI != RBE; ++RBI) { Instruction *Inst = &*RBI; - // Only move loads if they are used in the block. - if (isStoreSinkBarrier(Inst)) - break; - if (isa(Inst)) { - AliasAnalysis::Location LocSI = AA->getLocation(SI); - AliasAnalysis::Location LocInst = AA->getLocation((StoreInst *)Inst); - if (AA->isMustAlias(LocSI, LocInst)) { - I = (StoreInst *)Inst; - break; - } + if (!isa(Inst)) + continue; + + StoreInst *Store1 = cast(Inst); + BasicBlock *BB0 = Store0->getParent(); + + AliasAnalysis::Location Loc0 = AA->getLocation(Store0); + AliasAnalysis::Location Loc1 = AA->getLocation(Store1); + if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && + !isStoreSinkBarrierInRange(*Store1, BB1->back(), Loc1) && + !isStoreSinkBarrierInRange(*Store0, BB0->back(), Loc0)) { + return Store1; } } - return I; + return nullptr; } /// @@ -548,8 +544,7 @@ Instruction *I = &*RBI; ++RBI; - if (isStoreSinkBarrier(I)) - break; + // Sink move non-simple (atomic, volatile) stores if (!isa(I)) continue; Index: test/Transforms/InstMerge/st_sink_barrier_call.ll =================================================================== --- test/Transforms/InstMerge/st_sink_barrier_call.ll +++ test/Transforms/InstMerge/st_sink_barrier_call.ll @@ -0,0 +1,43 @@ +; Test to make sure that a function call that needs to be a barrier to sinking stores is indeed a barrier. +; Stores sunks into the footer. +; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } + +declare i32 @foo(i32 %x) + +; Function Attrs: nounwind uwtable +define void @sink_store(%struct.node* nocapture %r, i32 %index) { +entry: + %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2 + %node.017 = load %struct.node** %node.0.in16, align 8 + %index.addr = alloca i32, align 4 + store i32 %index, i32* %index.addr, align 4 + %0 = load i32* %index.addr, align 4 + %cmp = icmp slt i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +; CHECK: if.then +if.then: ; preds = %entry + %1 = load i32* %index.addr, align 4 + %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK: store i32 + store i32 %1, i32* %p1, align 4 + br label %if.end + +; CHECK: if.else +if.else: ; preds = %entry + %2 = load i32* %index.addr, align 4 + %add = add nsw i32 %2, 1 + %p3 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK: store i32 + store i32 %add, i32* %p3, align 4 + call i32 @foo(i32 5) ;barrier + br label %if.end + +; CHECK: if.end +if.end: ; preds = %if.else, %if.then +; CHECK-NOT: store + ret void +} Index: test/Transforms/InstMerge/st_sink_no_barrier_call.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_call.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_call.ll @@ -0,0 +1,45 @@ +; Test to make sure that stores in a diamond get merged with a non barrier function call after the store instruction +; Stores sunks into the footer. +; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } + +declare i32 @foo(i32 %x) #0 + +; Function Attrs: nounwind uwtable +define void @sink_store(%struct.node* nocapture %r, i32 %index) { +entry: + %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2 + %node.017 = load %struct.node** %node.0.in16, align 8 + %index.addr = alloca i32, align 4 + store i32 %index, i32* %index.addr, align 4 + %0 = load i32* %index.addr, align 4 + %cmp = icmp slt i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +; CHECK: if.then +if.then: ; preds = %entry + %1 = load i32* %index.addr, align 4 + %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK-NOT: store i32 + store i32 %1, i32* %p1, align 4 + br label %if.end + +; CHECK: if.else +if.else: ; preds = %entry + %2 = load i32* %index.addr, align 4 + %add = add nsw i32 %2, 1 + %p3 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK-NOT: store i32 + store i32 %add, i32* %p3, align 4 + call i32 @foo(i32 5) ;not a barrier + br label %if.end + +; CHECK: if.end +if.end: ; preds = %if.else, %if.then +; CHECK: store + ret void +} + +attributes #0 = { readnone } Index: test/Transforms/InstMerge/st_sink_no_barrier_load.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_load.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_load.ll @@ -0,0 +1,43 @@ +; Test to make sure that stores in a diamond get merged with a non barrier load after the store instruction +; Stores sunks into the footer. +; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } + +; Function Attrs: nounwind uwtable +define void @sink_store(%struct.node* nocapture %r, i32 %index) { +entry: + %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2 + %node.017 = load %struct.node** %node.0.in16, align 8 + %index.addr = alloca i32, align 4 + store i32 %index, i32* %index.addr, align 4 + %0 = load i32* %index.addr, align 4 + %cmp = icmp slt i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +; CHECK: if.then +if.then: ; preds = %entry + %1 = load i32* %index.addr, align 4 + %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK-NOT: store i32 + store i32 %1, i32* %p1, align 4 + %p2 = getelementptr inbounds %struct.node* %node.017, i32 5, i32 6 + ; CHECK: load i32* + %not_barrier = load i32 * %p2, align 4 + br label %if.end + +; CHECK: if.else +if.else: ; preds = %entry + %2 = load i32* %index.addr, align 4 + %add = add nsw i32 %2, 1 + %p3 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK-NOT: store i32 + store i32 %add, i32* %p3, align 4 + br label %if.end + +; CHECK: if.end +if.end: ; preds = %if.else, %if.then +; CHECK: store + ret void +} Index: test/Transforms/InstMerge/st_sink_no_barrier_store.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_store.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_store.ll @@ -0,0 +1,42 @@ +; Test to make sure that stores in a diamond get merged with a non barrier store after the store instruction to be sunk +; Stores sunks into the footer. +; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } + +; Function Attrs: nounwind uwtable +define void @sink_store(%struct.node* nocapture %r, i32 %index) { +entry: + %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2 + %node.017 = load %struct.node** %node.0.in16, align 8 + %index.addr = alloca i32, align 4 + store i32 %index, i32* %index.addr, align 4 + %0 = load i32* %index.addr, align 4 + %cmp = icmp slt i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +; CHECK: if.then +if.then: ; preds = %entry + %1 = load i32* %index.addr, align 4 + %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK-NOT: store i32 + store i32 %1, i32* %p1, align 4 + br label %if.end + +; CHECK: if.else +if.else: ; preds = %entry + %2 = load i32* %index.addr, align 4 + %add = add nsw i32 %2, 1 + %p2 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + store i32 %add, i32* %p2, align 4 + %p3 = getelementptr inbounds %struct.node* %node.017, i32 5, i32 6 + ; CHECK: store i32 + store i32 %add, i32* %p3, align 4 ; This is not a barrier + br label %if.end + +; CHECK: if.end +if.end: ; preds = %if.else, %if.then +; CHECK: store + ret void +} Index: test/Transforms/InstMerge/st_sink_two_stores.ll =================================================================== --- test/Transforms/InstMerge/st_sink_two_stores.ll +++ test/Transforms/InstMerge/st_sink_two_stores.ll @@ -0,0 +1,47 @@ +; Test to make sure that stores in a diamond get merged +; Stores sunks into the footer. +; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } + +; Function Attrs: nounwind uwtable +define void @sink_store(%struct.node* nocapture %r, i32 %index) { +entry: + %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2 + %node.017 = load %struct.node** %node.0.in16, align 8 + %index.addr = alloca i32, align 4 + store i32 %index, i32* %index.addr, align 4 + %0 = load i32* %index.addr, align 4 + %cmp = icmp slt i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +; CHECK: if.then +if.then: ; preds = %entry + %1 = load i32* %index.addr, align 4 + %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK-NOT: store i32 + store i32 %1, i32* %p1, align 4 + %p2 = getelementptr inbounds %struct.node* %node.017, i32 4, i32 6 + ; CHECK-NOT: store i32 + store i32 %1, i32* %p2, align 4 + br label %if.end + +; CHECK: if.else +if.else: ; preds = %entry + %2 = load i32* %index.addr, align 4 + %add = add nsw i32 %2, 1 + %p3 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK-NOT: store i32 + store i32 %add, i32* %p3, align 4 + %p4 = getelementptr inbounds %struct.node* %node.017, i32 4, i32 6 + ; CHECK-NOT: store i32 + store i32 %2, i32* %p4, align 4 + br label %if.end + +; CHECK: if.end +if.end: ; preds = %if.else, %if.then +; CHECK: store +; CHECK: store + ret void +} Index: test/Transforms/InstMerge/st_sink_with_barrier.ll =================================================================== --- test/Transforms/InstMerge/st_sink_with_barrier.ll +++ test/Transforms/InstMerge/st_sink_with_barrier.ll @@ -0,0 +1,42 @@ +; Test to make sure that load from the same address as a store and appears after the store prevents the store from being sunk +; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } + +; Function Attrs: nounwind uwtable +define void @sink_store(%struct.node* nocapture %r, i32 %index) { +entry: + %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2 + %node.017 = load %struct.node** %node.0.in16, align 8 + %index.addr = alloca i32, align 4 + store i32 %index, i32* %index.addr, align 4 + %0 = load i32* %index.addr, align 4 + %cmp = icmp slt i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +; CHECK: if.then +if.then: ; preds = %entry + %1 = load i32* %index.addr, align 4 + %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK: store i32 + store i32 %1, i32* %p1, align 4 + %p2 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK: load i32* + %barrier = load i32 * %p2, align 4 + br label %if.end + +; CHECK: if.else +if.else: ; preds = %entry + %2 = load i32* %index.addr, align 4 + %add = add nsw i32 %2, 1 + %p3 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6 + ; CHECK: store i32 + store i32 %add, i32* %p3, align 4 + br label %if.end + +; CHECK: if.end +if.end: ; preds = %if.else, %if.then +; CHECK-NOT: store + ret void +}