diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -77,6 +77,11 @@ /// calls or memory use intrinsics with identical callees and no /// intervening clobbers. No validation is done that the operands to /// the calls are the same. + /// 4. For loads and stores, this could be a select instruction that + /// defines pointer to this memory location. In this case, users can + /// find non-clobbered Defs for both select values that are reaching + // the desired memory location (there is still a guarantee that there + // are no clobbers between analyzed memory location and select). Def, /// This marker indicates that the query has no known dependency in the @@ -142,6 +147,9 @@ /// definition dependency. bool isDef() const { return Value.is(); } + /// Tests if this MemDepResult represents a valid local query (Clobber/Def). + bool isLocal() const { return isClobber() || isDef(); } + /// Tests if this MemDepResult represents a query that is transparent to the /// start of the block, but where a non-local hasn't been done. bool isNonLocal() const { diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -592,6 +592,11 @@ return MemDepResult::getDef(Inst); } + // If we found a select instruction for MemLoc pointer, return it as Def + // dependency. + if (isa(Inst) && MemLoc.Ptr == Inst) + return MemDepResult::getDef(Inst); + if (isInvariantLoad) continue; @@ -962,7 +967,7 @@ // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it // to Cache! - if (!Dep.isDef() && !Dep.isClobber()) + if (!Dep.isLocal()) return Dep; // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1120,31 +1120,26 @@ /// 1. The pointer select (\p Address) must be defined in \p DepBB. /// 2. Both value operands of the pointer select must be loaded in the same /// basic block, before the pointer select. -/// 3. There must be no instructions between the found loads and \p End that may +/// 3. There must be no instructions between the found loads and \p Sel that may /// clobber the loads. static std::optional -tryToConvertLoadOfPtrSelect(BasicBlock *DepBB, BasicBlock::iterator End, - Value *Address, Type *LoadTy, DominatorTree &DT, +tryToConvertLoadOfPtrSelect(SelectInst *Sel, Type *LoadTy, DominatorTree &DT, AAResults *AA) { - - auto *Sel = dyn_cast_or_null(Address); - if (!Sel || DepBB != Sel->getParent()) - return std::nullopt; - LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), LoadTy, Sel, DT); LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), LoadTy, Sel, DT); if (!L1 || !L2) return std::nullopt; // Ensure there are no accesses that may modify the locations referenced by - // either L1 or L2 between L1, L2 and the specified End iterator. + // either L1 or L2 between L1, L2 and the specified Sel instruction. Instruction *EarlierLoad = L1->comesBefore(L2) ? L1 : L2; MemoryLocation L1Loc = MemoryLocation::get(L1); MemoryLocation L2Loc = MemoryLocation::get(L2); - if (any_of(make_range(EarlierLoad->getIterator(), End), [&](Instruction &I) { - return isModSet(AA->getModRefInfo(&I, L1Loc)) || - isModSet(AA->getModRefInfo(&I, L2Loc)); - })) + if (any_of(make_range(EarlierLoad->getIterator(), Sel->getIterator()), + [&](Instruction &I) { + return isModSet(AA->getModRefInfo(&I, L1Loc)) || + isModSet(AA->getModRefInfo(&I, L2Loc)); + })) return std::nullopt; return AvailableValue::getSelect(Sel); @@ -1153,20 +1148,12 @@ std::optional GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, Value *Address) { - if (!DepInfo.isDef() && !DepInfo.isClobber()) { - assert(isa(Address)); - return tryToConvertLoadOfPtrSelect(Load->getParent(), Load->getIterator(), - Address, Load->getType(), - getDominatorTree(), getAliasAnalysis()); - } - - assert((DepInfo.isDef() || DepInfo.isClobber()) && - "expected a local dependence"); assert(Load->isUnordered() && "rules below are incorrect for ordered access"); - - const DataLayout &DL = Load->getModule()->getDataLayout(); + assert(DepInfo.isLocal() && "expected a local dependence"); Instruction *DepInst = DepInfo.getInst(); + + const DataLayout &DL = Load->getModule()->getDataLayout(); if (DepInfo.isClobber()) { // If the dependence is to a store that writes to a superset of the bits // read by the load, we can extract the bits we need for the load from the @@ -1272,6 +1259,13 @@ return AvailableValue::getLoad(LD); } + // Check if load with Addr dependent from select can be converted to select + // between load values. There must be no instructions between the found + // loads and DepInst that may clobber the loads. + if (auto *Sel = dyn_cast(DepInst)) + return tryToConvertLoadOfPtrSelect(Sel, Load->getType(), getDominatorTree(), + getAliasAnalysis()); + // Unknown def - must be conservative LLVM_DEBUG( // fast print dep, using operator<< on instruction is too slow. @@ -1298,24 +1292,15 @@ continue; } - // The address being loaded in this non-local block may not be the same as - // the pointer operand of the load if PHI translation occurs. Make sure - // to consider the right address. - Value *Address = Dep.getAddress(); - - if (!DepInfo.isDef() && !DepInfo.isClobber()) { - if (auto R = tryToConvertLoadOfPtrSelect( - DepBB, DepBB->end(), Address, Load->getType(), getDominatorTree(), - getAliasAnalysis())) { - ValuesPerBlock.push_back( - AvailableValueInBlock::get(DepBB, std::move(*R))); - continue; - } + if (!DepInfo.isLocal()) { UnavailableBlocks.push_back(DepBB); continue; } - if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Address)) { + // The address being loaded in this non-local block may not be the same as + // the pointer operand of the load if PHI translation occurs. Make sure + // to consider the right address. + if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Dep.getAddress())) { // subtlety: because we know this was a non-local dependency, we know // it's safe to materialize anywhere between the instruction within // DepInfo and the end of it's block. @@ -2043,9 +2028,8 @@ if (Dep.isNonLocal()) return processNonLocalLoad(L); - Value *Address = L->getPointerOperand(); // Only handle the local case below - if (!Dep.isDef() && !Dep.isClobber() && !isa(Address)) { + if (!Dep.isLocal()) { // This might be a NonFuncLocal or an Unknown LLVM_DEBUG( // fast print dep, using operator<< on instruction is too slow. @@ -2054,7 +2038,7 @@ return false; } - auto AV = AnalyzeLoadAvailability(L, Dep, Address); + auto AV = AnalyzeLoadAvailability(L, Dep, L->getPointerOperand()); if (!AV) return false; diff --git a/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll b/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll --- a/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll @@ -236,13 +236,15 @@ ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[A:%.*]], align 4 ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]] +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]] ; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], ptr [[A]], ptr [[B]] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: else: +; CHECK-NEXT: [[RES_2_PRE:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: +; CHECK-NEXT: [[RES_2:%.*]] = phi i32 [ [[TMP0]], [[THEN]] ], [ [[RES_2_PRE]], [[ELSE]] ] ; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[MIN_SELECT]], [[THEN]] ], [ [[A]], [[ELSE]] ] -; CHECK-NEXT: [[RES_2:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: ret i32 [[RES_2]] ; entry: diff --git a/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll --- a/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll @@ -311,13 +311,13 @@ ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[PTR_IV]], align 4 ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[MIN_PTR]], align 4 ; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]] +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]] ; CHECK-NEXT: [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], ptr [[PTR_IV]], ptr [[MIN_PTR]] ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END:%.*]] ; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[MIN_SELECT]], align 4 -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: br label %loop @@ -350,13 +350,13 @@ ; CHECK-NEXT: call void @may_throw() ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[MIN_PTR]], align 4 ; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]] +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]] ; CHECK-NEXT: [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], ptr [[PTR_IV]], ptr [[MIN_PTR]] ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END:%.*]] ; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[MIN_SELECT]], align 4 -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: br label %loop @@ -514,13 +514,13 @@ ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[PTR_IV]], align 4 ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[MIN_PTR]], align 4 ; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]] +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]] ; CHECK-NEXT: [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], ptr [[PTR_IV]], ptr [[MIN_PTR]] ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END:%.*]] ; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[MIN_SELECT]], align 4 -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %start.ptr = getelementptr inbounds i32, ptr %ptr, i64 1