diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -25,6 +25,7 @@ class LoadInst; class Loop; class MDNode; +class MemoryLocation; class ScalarEvolution; class TargetLibraryInfo; @@ -164,7 +165,7 @@ /// /// \returns The found value, or nullptr if no value is found. Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic, - BasicBlock *ScanBB, + Optional, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst); diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -437,9 +438,10 @@ if (!Load->isUnordered()) return nullptr; - return FindAvailablePtrLoadStore( - Load->getPointerOperand(), Load->getType(), Load->isAtomic(), ScanBB, - ScanFrom, MaxInstsToScan, AA, IsLoad, NumScanedInst); + MemoryLocation Loc = MemoryLocation::get(Load); + return FindAvailablePtrLoadStore(Load->getPointerOperand(), Load->getType(), + Load->isAtomic(), Loc, ScanBB, ScanFrom, + MaxInstsToScan, AA, IsLoad, NumScanedInst); } // Check if the load and the store have the same base, constant offsets and @@ -511,12 +513,11 @@ return nullptr; } -Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, - bool AtLeastAtomic, BasicBlock *ScanBB, - BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, - AAResults *AA, bool *IsLoadCSE, - unsigned *NumScanedInst) { +Value *llvm::FindAvailablePtrLoadStore( + Value *Ptr, Type *AccessTy, bool AtLeastAtomic, + Optional Loc, BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AAResults *AA, + bool *IsLoadCSE, unsigned *NumScanedInst) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; @@ -548,6 +549,8 @@ // Try to get the store size for the type. auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy)); + if (!Loc) + Loc = MemoryLocation(StrippedPtr, AccessSize); if (StoreInst *SI = dyn_cast(Inst)) { Value *StorePtr = SI->getPointerOperand()->stripPointerCasts(); @@ -572,7 +575,7 @@ } else { // If we have alias analysis and it says the store won't modify the // loaded value, ignore the store. - if (!isModSet(AA->getModRefInfo(SI, StrippedPtr, AccessSize))) + if (!isModSet(AA->getModRefInfo(SI, *Loc))) continue; } @@ -585,7 +588,7 @@ if (Inst->mayWriteToMemory()) { // If alias analysis claims that it really won't modify the load, // ignore it. - if (AA && !isModSet(AA->getModRefInfo(Inst, StrippedPtr, AccessSize))) + if (AA && !isModSet(AA->getModRefInfo(Inst, *Loc))) continue; // May modify the pointer, bail out. @@ -635,9 +638,9 @@ // If we found an available value, ensure that the instructions in between // did not modify the memory location. if (Available) { - auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy)); + MemoryLocation Loc = MemoryLocation::get(Load); for (Instruction *Inst : MustNotAliasInsts) - if (isModSet(AA.getModRefInfo(Inst, StrippedPtr, AccessSize))) + if (isModSet(AA.getModRefInfo(Inst, Loc))) return nullptr; } diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1390,7 +1390,7 @@ // for available load/store to the pointer in predecessors. Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB); PredAvailable = FindAvailablePtrLoadStore( - Ptr, LoadI->getType(), LoadI->isAtomic(), PredBB, BBIt, + Ptr, LoadI->getType(), LoadI->isAtomic(), None, PredBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE, &NumScanedInst); // If PredBB has a single predecessor, continue scanning through the @@ -1402,7 +1402,7 @@ if (SinglePredBB) { BBIt = SinglePredBB->end(); PredAvailable = FindAvailablePtrLoadStore( - Ptr, LoadI->getType(), LoadI->isAtomic(), SinglePredBB, BBIt, + Ptr, LoadI->getType(), LoadI->isAtomic(), None, SinglePredBB, BBIt, (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE, &NumScanedInst); } diff --git a/llvm/test/Transforms/LoopRotate/load-hoist.ll b/llvm/test/Transforms/LoopRotate/load-hoist.ll --- a/llvm/test/Transforms/LoopRotate/load-hoist.ll +++ b/llvm/test/Transforms/LoopRotate/load-hoist.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info | FileCheck %s -; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa | FileCheck %s +; RUN: opt -S -tbaa -evaluate-aa-metadata -loop-rotate < %s -verify-loop-info -verify-dom-info | FileCheck %s +; RUN: opt -S -tbaa -evaluate-aa-metadata -loop-rotate < %s -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa | FileCheck %s ; Invariant loads are hoisted if no aliasing occurs ; CHECK-LABEL: @load( @@ -38,6 +38,43 @@ ret i32 %ld } +; Invariant loads are hoisted if TBAA determines no aliasing occurs +; CHECK-LABEL: @tbaa_load( +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* %src, align 4, !tbaa !0 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[LD]], 100 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INC]] = add nsw i32 [[IV]], 1 +; CHECK-NEXT: store float 0.000000e+00, float* %dst, align 4, !tbaa !6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[INC]], 100 +; CHECK-NEXT: [[CMP:%.*]] = or i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]] +; CHECK: [[FOR_END]] +; CHECK-NEXT: [[LD_LCSSA:%.*]] = phi i32 [ [[LD]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i32 [[LD_LCSSA]] +define i32 @tbaa_load(i32* %src, float* %dst) nounwind ssp { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %ld = load i32, i32* %src, align 4, !tbaa !5 + %cmp1 = icmp slt i32 %i.0, 100 + %cmp2 = icmp slt i32 %ld, 100 + %cmp = or i1 %cmp1, %cmp2 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %inc = add nsw i32 %i.0, 1 + store float 0.0, float* %dst, align 4, !tbaa !6 + br label %for.cond + +for.end: ; preds = %for.cond + ret i32 %ld +} + ; Atomic loads are not hoisted ; CHECK-LABEL: @atomic_load( ; CHECK-NEXT: [[ENTRY:.*]]: @@ -154,3 +191,12 @@ for.end: ; preds = %for.cond ret i32 %ld } + +!0 = !{!"i32 pointer", !1, i64 0, !4, i64 8} +!1 = !{!"any pointer", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C++ TBAA"} +!4 = !{!"int", !1, i64 0} +!5 = !{!0, !1, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"float", !2, i64 0}