diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -25,6 +25,7 @@ class LoadInst; class Loop; class MDNode; +class MemoryLocation; class ScalarEvolution; class TargetLibraryInfo; @@ -147,7 +148,7 @@ /// this function, if ScanFrom points at the beginning of the block, it's safe /// to continue scanning the predecessors. /// -/// \param Ptr The pointer we want the load and store to originate from. +/// \param Loc The location we want the load and store to originate from. /// \param AccessTy The access type of the pointer. /// \param AtLeastAtomic Are we looking for at-least an atomic load/store ? In /// case it is false, we can return an atomic or non-atomic load or store. In @@ -163,8 +164,8 @@ /// location in memory, as opposed to the value operand of a store. /// /// \returns The found value, or nullptr if no value is found. -Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic, - BasicBlock *ScanBB, +Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, + bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst); diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -437,21 +438,24 @@ if (!Load->isUnordered()) return nullptr; - return FindAvailablePtrLoadStore( - Load->getPointerOperand(), Load->getType(), Load->isAtomic(), ScanBB, - ScanFrom, MaxInstsToScan, AA, IsLoad, NumScanedInst); + MemoryLocation Loc = MemoryLocation::get(Load); + return findAvailablePtrLoadStore(Loc, Load->getType(), Load->isAtomic(), + ScanBB, ScanFrom, MaxInstsToScan, AA, IsLoad, + NumScanedInst); } // Check if the load and the store have the same base, constant offsets and // non-overlapping access ranges. -static bool AreNonOverlapSameBaseLoadAndStore( - Value *LoadPtr, Type *LoadTy, Value *StorePtr, Type *StoreTy, - const DataLayout &DL) { +static bool areNonOverlapSameBaseLoadAndStore(const Value *LoadPtr, + Type *LoadTy, + const Value *StorePtr, + Type *StoreTy, + const DataLayout &DL) { APInt LoadOffset(DL.getTypeSizeInBits(LoadPtr->getType()), 0); APInt StoreOffset(DL.getTypeSizeInBits(StorePtr->getType()), 0); - Value *LoadBase = LoadPtr->stripAndAccumulateConstantOffsets( + const Value *LoadBase = LoadPtr->stripAndAccumulateConstantOffsets( DL, LoadOffset, /* AllowNonInbounds */ false); - Value *StoreBase = StorePtr->stripAndAccumulateConstantOffsets( + const Value *StoreBase = StorePtr->stripAndAccumulateConstantOffsets( DL, StoreOffset, /* AllowNonInbounds */ false); if (LoadBase != StoreBase) return false; @@ -464,7 +468,7 @@ return LoadRange.intersectWith(StoreRange).isEmptySet(); } -static Value *getAvailableLoadStore(Instruction *Inst, Value *Ptr, +static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr, Type *AccessTy, bool AtLeastAtomic, const DataLayout &DL, bool *IsLoadCSE) { // If this is a load of Ptr, the loaded value is available. @@ -511,17 +515,15 @@ return nullptr; } -Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, - bool AtLeastAtomic, BasicBlock *ScanBB, - BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, - AAResults *AA, bool *IsLoadCSE, - unsigned *NumScanedInst) { +Value *llvm::findAvailablePtrLoadStore( + const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, + BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, + AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; const DataLayout &DL = ScanBB->getModule()->getDataLayout(); - Value *StrippedPtr = Ptr->stripPointerCasts(); + const Value *StrippedPtr = Loc.Ptr->stripPointerCasts(); while (ScanFrom != ScanBB->begin()) { // We must ignore debug info directives when counting (otherwise they @@ -547,8 +549,6 @@ return Available; // Try to get the store size for the type. - auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy)); - if (StoreInst *SI = dyn_cast(Inst)) { Value *StorePtr = SI->getPointerOperand()->stripPointerCasts(); @@ -565,14 +565,14 @@ // base, constant offsets and non-overlapping access ranges, ignore the // store. This is a simple form of alias analysis that is used by the // inliner. FIXME: use BasicAA if possible. - if (AreNonOverlapSameBaseLoadAndStore( - Ptr, AccessTy, SI->getPointerOperand(), + if (areNonOverlapSameBaseLoadAndStore( + Loc.Ptr, AccessTy, SI->getPointerOperand(), SI->getValueOperand()->getType(), DL)) continue; } else { // If we have alias analysis and it says the store won't modify the // loaded value, ignore the store. - if (!isModSet(AA->getModRefInfo(SI, StrippedPtr, AccessSize))) + if (!isModSet(AA->getModRefInfo(SI, Loc))) continue; } @@ -585,7 +585,7 @@ if (Inst->mayWriteToMemory()) { // If alias analysis claims that it really won't modify the load, // ignore it. - if (AA && !isModSet(AA->getModRefInfo(Inst, StrippedPtr, AccessSize))) + if (AA && !isModSet(AA->getModRefInfo(Inst, Loc))) continue; // May modify the pointer, bail out. @@ -635,9 +635,9 @@ // If we found an available value, ensure that the instructions in between // did not modify the memory location. if (Available) { - auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy)); + MemoryLocation Loc = MemoryLocation::get(Load); for (Instruction *Inst : MustNotAliasInsts) - if (isModSet(AA.getModRefInfo(Inst, StrippedPtr, AccessSize))) + if (isModSet(AA.getModRefInfo(Inst, Loc))) return nullptr; } diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -1388,10 +1389,16 @@ "Attempting to CSE volatile or atomic loads"); // If this is a load on a phi pointer, phi-translate it and search // for available load/store to the pointer in predecessors. - Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB); - PredAvailable = FindAvailablePtrLoadStore( - Ptr, LoadI->getType(), LoadI->isAtomic(), PredBB, BBIt, - DefMaxInstsToScan, AA, &IsLoadCSE, &NumScanedInst); + Type *AccessTy = LoadI->getType(); + AAMDNodes AATags; + LoadI->getAAMetadata(AATags); + const auto &DL = LoadI->getModule()->getDataLayout(); + MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB), + LocationSize::precise(DL.getTypeStoreSize(AccessTy)), + AATags); + PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(), + PredBB, BBIt, DefMaxInstsToScan, + AA, &IsLoadCSE, &NumScanedInst); // If PredBB has a single predecessor, continue scanning through the // single predecessor. @@ -1401,8 +1408,8 @@ SinglePredBB = SinglePredBB->getSinglePredecessor(); if (SinglePredBB) { BBIt = SinglePredBB->end(); - PredAvailable = FindAvailablePtrLoadStore( - Ptr, LoadI->getType(), LoadI->isAtomic(), SinglePredBB, BBIt, + PredAvailable = findAvailablePtrLoadStore( + Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt, (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE, &NumScanedInst); } diff --git a/llvm/test/Transforms/InstCombine/load-no-aliasing.ll b/llvm/test/Transforms/InstCombine/load-no-aliasing.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/load-no-aliasing.ll @@ -0,0 +1,22 @@ +; RUN: opt -tbaa -evaluate-aa-metadata -instcombine -S < %s | FileCheck %s + +; Check that load to load forwarding work with non aliasing store inbetween. +; CHECK-LABEL: @test_load_store_load_combine( +; CHECK-NEXT: %a = load i32, i32* %0, align 4, !tbaa !0 +; CHECK-NEXT: %f = sitofp i32 %a to float +; CHECK-NEXT: store float %f, float* %1, align 4, !tbaa !4 +; CHECK-NEXT: ret i32 %a +define i32 @test_load_store_load_combine(i32*, float*) { + %a = load i32, i32* %0, align 4, !tbaa !0 + %f = sitofp i32 %a to float + store float %f, float* %1, align 4, !tbaa !4 + %b = load i32, i32* %0, align 4, !tbaa !0 + ret i32 %b +} + +!0 = !{!1, !1, i64 0} +!1 = !{!"int", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"float", !2, i64 0}