diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -79,6 +79,10 @@ /// the calls are the same. Def, + /// This marker indicates that the query has dependency from select + /// instruction. + Select, + /// This marker indicates that the query has no known dependency in the /// specified block. /// @@ -106,6 +110,7 @@ DepType, PointerSumTypeMember<Invalid, Instruction *>, PointerSumTypeMember<Clobber, Instruction *>, PointerSumTypeMember<Def, Instruction *>, + PointerSumTypeMember<Select, Instruction *>, PointerSumTypeMember<Other, PointerEmbeddedInt<OtherType, 3>>>; ValueTy Value; @@ -124,6 +129,9 @@ assert(Inst && "Clobber requires inst"); return MemDepResult(ValueTy::create<Clobber>(Inst)); } + static MemDepResult getSelect(Instruction *Inst) { + return MemDepResult(ValueTy::create<Select>(Inst)); + } static MemDepResult getNonLocal() { return MemDepResult(ValueTy::create<Other>(NonLocal)); } @@ -142,6 +150,13 @@ /// definition dependency. bool isDef() const { return Value.is<Def>(); } + /// Tests if this MemDepResult represents a query that is an instruction + /// select dependency. + bool isSelect() const { return Value.is<Select>(); } + + /// Tests if this MemDepResult represents a local query (Clobber/Def/Select). + bool isLocal() const { return isClobber() || isDef() || isSelect(); } + /// Tests if this MemDepResult represents a query that is transparent to the /// start of the block, but where a non-local hasn't been done. bool isNonLocal() const { @@ -170,6 +185,8 @@ return Value.cast<Clobber>(); case Def: return Value.cast<Def>(); + case Select: + return Value.cast<Select>(); case Other: return nullptr; } diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -592,6 +592,11 @@ return MemDepResult::getDef(Inst); } + // If we found a select instruction for MemLoc pointer, return it as + // Select dependency. + if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst) + return MemDepResult::getSelect(Inst); + if (isInvariantLoad) continue; @@ -962,7 +967,7 @@ // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it // to Cache! - if (!Dep.isDef() && !Dep.isClobber()) + if (!Dep.isLocal()) return Dep; // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1120,31 +1120,26 @@ /// 1. The pointer select (\p Address) must be defined in \p DepBB. /// 2. Both value operands of the pointer select must be loaded in the same /// basic block, before the pointer select. -/// 3. There must be no instructions between the found loads and \p End that may +/// 3. There must be no instructions between the found loads and \p Sel that may /// clobber the loads. static std::optional<AvailableValue> -tryToConvertLoadOfPtrSelect(BasicBlock *DepBB, BasicBlock::iterator End, - Value *Address, Type *LoadTy, DominatorTree &DT, +tryToConvertLoadOfPtrSelect(SelectInst *Sel, Type *LoadTy, DominatorTree &DT, AAResults *AA) { - - auto *Sel = dyn_cast_or_null<SelectInst>(Address); - if (!Sel || DepBB != Sel->getParent()) - return std::nullopt; - LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), LoadTy, Sel, DT); LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), LoadTy, Sel, DT); if (!L1 || !L2) return std::nullopt; // Ensure there are no accesses that may modify the locations referenced by - // either L1 or L2 between L1, L2 and the specified End iterator. + // either L1 or L2 between L1, L2 and the specified Sel instruction. Instruction *EarlierLoad = L1->comesBefore(L2) ? L1 : L2; MemoryLocation L1Loc = MemoryLocation::get(L1); MemoryLocation L2Loc = MemoryLocation::get(L2); - if (any_of(make_range(EarlierLoad->getIterator(), End), [&](Instruction &I) { - return isModSet(AA->getModRefInfo(&I, L1Loc)) || - isModSet(AA->getModRefInfo(&I, L2Loc)); - })) + if (any_of(make_range(EarlierLoad->getIterator(), Sel->getIterator()), + [&](Instruction &I) { + return isModSet(AA->getModRefInfo(&I, L1Loc)) || + isModSet(AA->getModRefInfo(&I, L2Loc)); + })) return std::nullopt; return AvailableValue::getSelect(Sel); @@ -1153,20 +1148,21 @@ std::optional<AvailableValue> GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, Value *Address) { - if (!DepInfo.isDef() && !DepInfo.isClobber()) { - assert(isa<SelectInst>(Address)); - return tryToConvertLoadOfPtrSelect(Load->getParent(), Load->getIterator(), - Address, Load->getType(), - getDominatorTree(), getAliasAnalysis()); - } - - assert((DepInfo.isDef() || DepInfo.isClobber()) && - "expected a local dependence"); assert(Load->isUnordered() && "rules below are incorrect for ordered access"); - - const DataLayout &DL = Load->getModule()->getDataLayout(); + assert(DepInfo.isLocal() && "expected a local dependence"); Instruction *DepInst = DepInfo.getInst(); + if (DepInfo.isSelect()) { + // Check if load with Addr dependent from select can be converted to select + // between load values. There must be no instructions between the found + // loads and DepInst that may clobber the loads. + assert(isa<SelectInst>(DepInst) && "Dependency must be select instruction"); + auto *Sel = cast<SelectInst>(DepInst); + return tryToConvertLoadOfPtrSelect(Sel, Load->getType(), getDominatorTree(), + getAliasAnalysis()); + } + + const DataLayout &DL = Load->getModule()->getDataLayout(); if (DepInfo.isClobber()) { // If the dependence is to a store that writes to a superset of the bits // read by the load, we can extract the bits we need for the load from the @@ -1298,24 +1294,15 @@ continue; } - // The address being loaded in this non-local block may not be the same as - // the pointer operand of the load if PHI translation occurs. Make sure - // to consider the right address. - Value *Address = Dep.getAddress(); - - if (!DepInfo.isDef() && !DepInfo.isClobber()) { - if (auto R = tryToConvertLoadOfPtrSelect( - DepBB, DepBB->end(), Address, Load->getType(), getDominatorTree(), - getAliasAnalysis())) { - ValuesPerBlock.push_back( - AvailableValueInBlock::get(DepBB, std::move(*R))); - continue; - } + if (!DepInfo.isLocal()) { UnavailableBlocks.push_back(DepBB); continue; } - if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Address)) { + // The address being loaded in this non-local block may not be the same as + // the pointer operand of the load if PHI translation occurs. Make sure + // to consider the right address. + if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Dep.getAddress())) { // subtlety: because we know this was a non-local dependency, we know // it's safe to materialize anywhere between the instruction within // DepInfo and the end of it's block. @@ -2043,9 +2030,8 @@ if (Dep.isNonLocal()) return processNonLocalLoad(L); - Value *Address = L->getPointerOperand(); // Only handle the local case below - if (!Dep.isDef() && !Dep.isClobber() && !isa<SelectInst>(Address)) { + if (!Dep.isLocal()) { // This might be a NonFuncLocal or an Unknown LLVM_DEBUG( // fast print dep, using operator<< on instruction is too slow. @@ -2054,7 +2040,7 @@ return false; } - auto AV = AnalyzeLoadAvailability(L, Dep, Address); + auto AV = AnalyzeLoadAvailability(L, Dep, L->getPointerOperand()); if (!AV) return false; diff --git a/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll b/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll --- a/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll @@ -236,13 +236,15 @@ ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[A:%.*]], align 4 ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]] ; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], ptr [[A]], ptr [[B]] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: else: +; CHECK-NEXT: [[RES_2_PRE:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: +; CHECK-NEXT: [[RES_2:%.*]] = phi i32 [ [[SELECT]], [[THEN]] ], [ [[RES_2_PRE]], [[ELSE]] ] ; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[MIN_SELECT]], [[THEN]] ], [ [[A]], [[ELSE]] ] -; CHECK-NEXT: [[RES_2:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: ret i32 [[RES_2]] ; entry: