diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -19,6 +19,7 @@ #include "llvm/ADT/PointerSumType.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PHITransAddr.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/IR/ValueHandle.h" @@ -31,7 +32,6 @@ class AssumptionCache; class BatchAAResults; class DominatorTree; -class PHITransAddr; /// A memory dependence query can return one of three different answers. class MemDepResult { @@ -230,10 +230,11 @@ /// (potentially phi translated) address that was live in the block. class NonLocalDepResult { NonLocalDepEntry Entry; - Value *Address; + SelectAddr Address; public: - NonLocalDepResult(BasicBlock *BB, MemDepResult Result, Value *Address) + NonLocalDepResult(BasicBlock *BB, MemDepResult Result, + const SelectAddr &Address) : Entry(BB, Result), Address(Address) {} // BB is the sort key, it can't be changed. @@ -254,7 +255,7 @@ /// a cached result and that address was deleted. /// /// The address is always null for a non-local 'call' dependence. - Value *getAddress() const { return Address; } + SelectAddr getAddress() const { return Address; } }; /// Provides a lazy, caching interface for making common memory aliasing diff --git a/llvm/include/llvm/Analysis/PHITransAddr.h b/llvm/include/llvm/Analysis/PHITransAddr.h --- a/llvm/include/llvm/Analysis/PHITransAddr.h +++ b/llvm/include/llvm/Analysis/PHITransAddr.h @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" namespace llvm { class AssumptionCache; @@ -22,6 +23,36 @@ class DataLayout; class TargetLibraryInfo; +// SelectAddr - storage of normal Value address or pair of addresses for true +// and false variant of select dependency. If V2 == nullptr, V1 is a normal +// address; otherwise, V1 and V2 are "true" and "false" addresses respectively. +class SelectAddr { +public: + using SelectAddrs = std::pair; + + SelectAddr(Value *Addr) : V1(Addr), V2(nullptr) {} + SelectAddr(Value *TrueAddr, Value *FalseAddr) : V1(TrueAddr), V2(FalseAddr) { + assert(TrueAddr && FalseAddr && "TrueAddr and FalseAddr must be present"); + }; + Value *getAddr() const { + assert(!V2); + return V1; + } + SelectAddrs getSelectAddrs() const { + assert(V1); + // If V2 is present, return pair of V1 and V2. + if (V2) + return {V1, V2}; + // Otherwise V1 must be SelectInst; return both addresses from its operands. + assert(isa(V1)); + auto *SI = cast(V1); + return {SI->getTrueValue(), SI->getFalseValue()}; + } + +private: + Value *V1, *V2; +}; + /// PHITransAddr - An address value which tracks and handles phi translation. /// As we walk "up" the CFG through predecessors, we need to ensure that the /// address we're tracking is kept up to date. For example, if we're analyzing @@ -57,6 +88,15 @@ Value *getAddr() const { return Addr; } + /// getSelectCondition - if address has select input, return its condition + /// (otherwise nullptr). + Value *getSelectCondition() const { + for (auto *I : InstInputs) + if (auto *SI = dyn_cast(I)) + return SI->getCondition(); + return nullptr; + } + /// needsPHITranslationFromBlock - Return true if moving from the specified /// BasicBlock to its predecessors requires PHI translation. bool needsPHITranslationFromBlock(BasicBlock *BB) const { @@ -78,6 +118,12 @@ Value *translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate); + /// translateValue - PHI translate the current address from \p CurBB to \p + /// PredBB, and if the resulted address depends on select instructions with \p + /// Cond predicate, translate both cases of this selects. + SelectAddr::SelectAddrs translateValue(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree *DT, Value *Cond); + /// translateWithInsertion - PHI translate this value into the specified /// predecessor block, inserting a computation of the value if it is /// unavailable. @@ -97,8 +143,13 @@ bool verify() const; private: + /// translateSubExpr - recursively translate value \p V from \p CurBB to \p + /// PredBB, and if value depends from selects with \p Cond condition, also + /// translate it through these selects with \p CondVal predicate. Return + /// nullptr on failure. Value *translateSubExpr(Value *V, BasicBlock *CurBB, BasicBlock *PredBB, - const DominatorTree *DT); + const DominatorTree *DT, Value *Cond = nullptr, + bool CondVal = false); /// insertTranslatedSubExpr - Insert a computation of the PHI translated /// version of 'V' for the edge PredBB->CurBB into the end of the PredBB diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -52,6 +52,7 @@ class NonLocalDepResult; class OptimizationRemarkEmitter; class PHINode; +class SelectAddr; class TargetLibraryInfo; class Value; /// A private "module" namespace for types and utilities used by GVN. These @@ -320,7 +321,8 @@ /// Given a local dependency (Def or Clobber) determine if a value is /// available for the load. std::optional - AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, Value *Address); + AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, + const SelectAddr &Addr); /// Given a list of non-local dependencies, determine if a value is /// available for the load in each specified block. If it is, add it to diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -23,7 +23,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" @@ -1016,6 +1015,34 @@ } } +/// Try to translate \p Pointer from \p CurBB to \p PredBB with select condition +/// \p Cond, and if succeed, find non-clobbered select dependency for this \p +/// Loc in \p PredBB; return NonLocalDepResult in this case. +static std::optional +tryToTranslateSelect(const PHITransAddr &Pointer, const MemoryLocation &Loc, + BasicBlock *CurBB, BasicBlock *PredBB, Value *Cond, + DominatorTree &DT, AAResults &AA) { + auto [TrueAddr, FalseAddr] = + PHITransAddr(Pointer).translateValue(CurBB, PredBB, &DT, Cond); + if (!TrueAddr || !FalseAddr) + return std::nullopt; + BatchAAResults BatchAA(AA); + MemoryLocation TrueLoc = Loc.getWithNewPtr(TrueAddr); + MemoryLocation FalseLoc = Loc.getWithNewPtr(FalseAddr); + for (auto &I : reverse(*PredBB)) { + // Check there are no clobbers for both locations. + if (BatchAA.getModRefInfo(&I, TrueLoc) == ModRefInfo::Mod || + BatchAA.getModRefInfo(&I, FalseLoc) == ModRefInfo::Mod) + return std::nullopt; + // Check if I is suitable candidate for non-local dependency. + if (auto *SI = dyn_cast(&I)) + if (SI->getCondition() == Cond) + return NonLocalDepResult(PredBB, MemDepResult::getDef(SI), + SelectAddr(TrueAddr, FalseAddr)); + } + return std::nullopt; +} + /// Perform a dependency query based on pointer/pointeesize starting at the end /// of StartBB. /// @@ -1346,8 +1373,17 @@ // predecessor, then we have to assume that the pointer is clobbered in // that predecessor. We can still do PRE of the load, which would insert // a computation of the pointer in this predecessor. - if (!PredPtrVal) + if (!PredPtrVal) { + // If we have translation failure, but there is a select input in + // address, try to translate both sides of it. + if (Value *Cond = PredPointer.getSelectCondition()) + if (auto Res = + tryToTranslateSelect(Pointer, Loc, BB, Pred, Cond, DT, AA)) { + Result.push_back(*Res); + continue; + } CanTranslate = false; + } // FIXME: it is entirely possible that PHI translating will end up with // the same value. Consider PHI translating something like: diff --git a/llvm/lib/Analysis/PHITransAddr.cpp b/llvm/lib/Analysis/PHITransAddr.cpp --- a/llvm/lib/Analysis/PHITransAddr.cpp +++ b/llvm/lib/Analysis/PHITransAddr.cpp @@ -16,7 +16,6 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Instructions.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -126,9 +125,14 @@ RemoveInstInputs(OpInst, InstInputs); } +/// translateSubExpr - recursively translate value \p V from \p CurBB to \p +/// PredBB, and if value depends from selects with \p Cond condition, also +/// translate it through these selects with \p CondVal predicate. Return nullptr +/// on failure. Value *PHITransAddr::translateSubExpr(Value *V, BasicBlock *CurBB, BasicBlock *PredBB, - const DominatorTree *DT) { + const DominatorTree *DT, Value *Cond, + bool CondVal) { // If this is a non-instruction value, it can't require PHI translation. Instruction *Inst = dyn_cast(V); if (!Inst) return V; @@ -151,8 +155,13 @@ InstInputs.erase(find(InstInputs, Inst)); // If this is a PHI, go ahead and translate it. - if (PHINode *PN = dyn_cast(Inst)) - return addAsInput(PN->getIncomingValueForBlock(PredBB)); + if (PHINode *PN = dyn_cast(Inst)) { + auto *V = PN->getIncomingValueForBlock(PredBB); + if (auto *SI = dyn_cast(V)) + if (SI->getCondition() == Cond) + return addAsInput(CondVal ? SI->getTrueValue() : SI->getFalseValue()); + return addAsInput(V); + } // If this is a non-phi value, and it is analyzable, we can incorporate it // into the expression by making all instruction operands be inputs. @@ -170,7 +179,8 @@ // operands need to be phi translated, and if so, reconstruct it. if (CastInst *Cast = dyn_cast(Inst)) { - Value *PHIIn = translateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT); + Value *PHIIn = + translateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT, Cond, CondVal); if (!PHIIn) return nullptr; if (PHIIn == Cast->getOperand(0)) return Cast; @@ -199,7 +209,7 @@ SmallVector GEPOps; bool AnyChanged = false; for (Value *Op : GEP->operands()) { - Value *GEPOp = translateSubExpr(Op, CurBB, PredBB, DT); + Value *GEPOp = translateSubExpr(Op, CurBB, PredBB, DT, Cond, CondVal); if (!GEPOp) return nullptr; AnyChanged |= GEPOp != Op; @@ -243,7 +253,8 @@ bool isNSW = cast(Inst)->hasNoSignedWrap(); bool isNUW = cast(Inst)->hasNoUnsignedWrap(); - Value *LHS = translateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT); + Value *LHS = + translateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT, Cond, CondVal); if (!LHS) return nullptr; // If the PHI translated LHS is an add of a constant, fold the immediates. @@ -313,6 +324,20 @@ return Addr; } +/// translateValue - PHI translate the current address from \p CurBB to \p +/// PredBB, and if the resulted address depends on select instructions with \p +/// Cond predicate, translate both cases of this selects. +SelectAddr::SelectAddrs PHITransAddr::translateValue(BasicBlock *CurBB, + BasicBlock *PredBB, + const DominatorTree *DT, + Value *Cond) { + Value *TrueAddr = + PHITransAddr(*this).translateSubExpr(Addr, CurBB, PredBB, DT, Cond, true); + Value *FalseAddr = PHITransAddr(*this).translateSubExpr(Addr, CurBB, PredBB, + DT, Cond, false); + return {TrueAddr, FalseAddr}; +} + /// PHITranslateWithInsertion - PHI translate this value into the specified /// predecessor block, inserting a computation of the value if it is /// unavailable. diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1134,7 +1134,7 @@ std::optional GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, - Value *Address) { + const SelectAddr &Addr) { assert(Load->isUnordered() && "rules below are incorrect for ordered access"); assert(DepInfo.isLocal() && "expected a local dependence"); @@ -1142,6 +1142,7 @@ const DataLayout &DL = Load->getModule()->getDataLayout(); if (DepInfo.isClobber()) { + Value *Address = Addr.getAddr(); // If the dependence is to a store that writes to a superset of the bits // read by the load, we can extract the bits we need for the load from the // stored value. @@ -1250,16 +1251,20 @@ // between load values. There must be no instructions between the found // loads and DepInst that may clobber the loads. if (auto *Sel = dyn_cast(DepInst)) { - assert(Sel->getType() == Load->getPointerOperandType()); + auto [TrueAddr, FalseAddr] = Addr.getSelectAddrs(); + assert(TrueAddr && TrueAddr->getType() == Load->getPointerOperandType() && + "Invalid address of true side of select dependency"); + assert(FalseAddr && FalseAddr->getType() == Load->getPointerOperandType() && + "Invalid address of false side of select dependency"); auto Loc = MemoryLocation::get(Load); Value *V1 = - findDominatingValue(Loc.getWithNewPtr(Sel->getTrueValue()), - Load->getType(), DepInst, getAliasAnalysis()); + findDominatingValue(Loc.getWithNewPtr(TrueAddr), Load->getType(), + DepInst, getAliasAnalysis()); if (!V1) return std::nullopt; Value *V2 = - findDominatingValue(Loc.getWithNewPtr(Sel->getFalseValue()), - Load->getType(), DepInst, getAliasAnalysis()); + findDominatingValue(Loc.getWithNewPtr(FalseAddr), Load->getType(), + DepInst, getAliasAnalysis()); if (!V2) return std::nullopt; return AvailableValue::getSelect(Sel, V1, V2); diff --git a/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll b/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll --- a/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll @@ -835,24 +835,31 @@ ; CHECK-LABEL: @test_phi_select_index_non_local( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I:%.*]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[ENTRY_IF_END_CRIT_EDGE:%.*]] +; CHECK: entry.if.end_crit_edge: +; CHECK-NEXT: [[IDXPROM5_PHI_TRANS_INSERT:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAYIDX6_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM5_PHI_TRANS_INSERT]] +; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX6_PHI_TRANS_INSERT]], align 4 +; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: land.lhs.true: ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[ADD]] to i64 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM1]] ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[CMP3]], i32 [[TMP1]], i32 [[TMP0]] ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 [[ADD]], i32 [[I]] +; CHECK-NEXT: [[DOTPRE1:%.*]] = sext i32 [[SPEC_SELECT]] to i64 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[I_ADDR_0:%.*]] = phi i32 [ [[I]], [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[LAND_LHS_TRUE]] ] -; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[I_ADDR_0]] to i64 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[IDXPROM5_PRE_PHI:%.*]] = phi i64 [ [[IDXPROM5_PHI_TRANS_INSERT]], [[ENTRY_IF_END_CRIT_EDGE]] ], [ [[DOTPRE1]], [[LAND_LHS_TRUE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[DOTPRE]], [[ENTRY_IF_END_CRIT_EDGE]] ], [ [[TMP2]], [[LAND_LHS_TRUE]] ] +; CHECK-NEXT: [[I_ADDR_0:%.*]] = phi i32 [ [[I]], [[ENTRY_IF_END_CRIT_EDGE]] ], [ [[SPEC_SELECT]], [[LAND_LHS_TRUE]] ] +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM5_PRE_PHI]] +; CHECK-NEXT: ret i32 [[TMP3]] ; entry: %cmp = icmp slt i32 %i, %N @@ -884,17 +891,19 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 1 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: +; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[A:%.*]], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[TMP2:%.*]], [[FOR_BODY]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[RES]] to i64 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM1]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP2]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[TMP0]] ; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[IDX]], i32 [[RES]] ; CHECK-NEXT: [[IDX_NEXT]] = add nsw i32 [[IDX]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IDX_NEXT]], [[N]]