Index: llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -162,7 +162,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -355,7 +354,6 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.setPreservesCFG(); @@ -374,14 +372,23 @@ class SeparateConstOffsetFromGEP { public: SeparateConstOffsetFromGEP( - DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, - TargetLibraryInfo *TLI, + DominatorTree *DT, LoopInfo *LI, TargetLibraryInfo *TLI, function_ref GetTTI, bool LowerGEP) - : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {} + : DT(DT), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {} bool run(Function &F); private: + /// Track the operands of an add or sub. + using ExprKey = std::pair; + + /// Create a pair for use as a map key for a commutable operation. + static ExprKey createNormalizedCommutablePair(Value *A, Value *B) { + if (A < B) + return {A, B}; + return {B, A}; + } + /// Tries to split the given GEP into a variadic base and a constant offset, /// and returns true if the splitting succeeds. bool splitGEP(GetElementPtrInst *GEP); @@ -446,8 +453,8 @@ /// Find the closest dominator of that is equivalent to . Instruction *findClosestMatchingDominator( - const SCEV *Key, Instruction *Dominatee, - DenseMap> &DominatingExprs); + ExprKey Key, Instruction *Dominatee, + DenseMap> &DominatingExprs); /// Verify F is free of dead code. void verifyNoDeadCode(Function &F); @@ -463,7 +470,6 @@ const DataLayout *DL = nullptr; DominatorTree *DT = nullptr; - ScalarEvolution *SE; LoopInfo *LI; TargetLibraryInfo *TLI; // Retrieved lazily since not always used. @@ -473,8 +479,8 @@ /// multiple GEPs with a single index. bool LowerGEP; - DenseMap> DominatingAdds; - DenseMap> DominatingSubs; + DenseMap> DominatingAdds; + DenseMap> DominatingSubs; }; } // end anonymous namespace @@ -1158,13 +1164,12 @@ if (skipFunction(F)) return false; auto *DT = &getAnalysis().getDomTree(); - auto *SE = &getAnalysis().getSE(); auto *LI = &getAnalysis().getLoopInfo(); auto *TLI = &getAnalysis().getTLI(F); auto GetTTI = [this](Function &F) -> TargetTransformInfo & { return this->getAnalysis().getTTI(F); }; - SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP); + SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP); return Impl.run(F); } @@ -1194,8 +1199,8 @@ } Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator( - const SCEV *Key, Instruction *Dominatee, - DenseMap> &DominatingExprs) { + ExprKey Key, Instruction *Dominatee, + DenseMap> &DominatingExprs) { auto Pos = DominatingExprs.find(Key); if (Pos == DominatingExprs.end()) return nullptr; @@ -1215,7 +1220,7 @@ } bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) { - if (!SE->isSCEVable(I->getType())) + if (!I->getType()->isIntOrIntVectorTy()) return false; // Dom: LHS+RHS @@ -1225,8 +1230,7 @@ Value *LHS = nullptr, *RHS = nullptr; if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) { if (LHS->getType() == RHS->getType()) { - const SCEV *Key = - SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); + ExprKey Key = createNormalizedCommutablePair(LHS, RHS); if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) { Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I); NewSExt->takeName(I); @@ -1237,9 +1241,8 @@ } } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) { if (LHS->getType() == RHS->getType()) { - const SCEV *Key = SE->getAddExpr( - SE->getUnknown(LHS), SE->getNegativeSCEV(SE->getUnknown(RHS))); - if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingSubs)) { + if (auto *Dom = + findClosestMatchingDominator({LHS, RHS}, I, DominatingSubs)) { Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I); NewSExt->takeName(I); I->replaceAllUsesWith(NewSExt); @@ -1252,16 +1255,12 @@ // Add I to DominatingExprs if it's an add/sub that can't sign overflow. if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) { if (programUndefinedIfPoison(I)) { - const SCEV *Key = - SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); + ExprKey Key = createNormalizedCommutablePair(LHS, RHS); DominatingAdds[Key].push_back(I); } } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) { - if (programUndefinedIfPoison(I)) { - const SCEV *Key = SE->getAddExpr( - SE->getUnknown(LHS), SE->getNegativeSCEV(SE->getUnknown(RHS))); - DominatingSubs[Key].push_back(I); - } + if (programUndefinedIfPoison(I)) + DominatingSubs[{LHS, RHS}].push_back(I); } return false; } @@ -1394,13 +1393,12 @@ PreservedAnalyses SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) { auto *DT = &AM.getResult(F); - auto *SE = &AM.getResult(F); auto *LI = &AM.getResult(F); auto *TLI = &AM.getResult(F); auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & { return AM.getResult(F); }; - SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP); + SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP); if (!Impl.run(F)) return PreservedAnalyses::all(); PreservedAnalyses PA; Index: llvm/test/CodeGen/AMDGPU/llc-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -468,7 +468,6 @@ ; GCN-O1-OPTS-NEXT: Dominator Tree Construction ; GCN-O1-OPTS-NEXT: SROA ; GCN-O1-OPTS-NEXT: Natural Loop Information -; GCN-O1-OPTS-NEXT: Scalar Evolution Analysis ; GCN-O1-OPTS-NEXT: Split GEPs to a variadic base and a constant offset for better CSE ; GCN-O1-OPTS-NEXT: Scalar Evolution Analysis ; GCN-O1-OPTS-NEXT: Straight line strength reduction @@ -767,7 +766,6 @@ ; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: SROA ; GCN-O2-NEXT: Natural Loop Information -; GCN-O2-NEXT: Scalar Evolution Analysis ; GCN-O2-NEXT: Split GEPs to a variadic base and a constant offset for better CSE ; GCN-O2-NEXT: Scalar Evolution Analysis ; GCN-O2-NEXT: Straight line strength reduction @@ -1076,7 +1074,6 @@ ; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: SROA ; GCN-O3-NEXT: Natural Loop Information -; GCN-O3-NEXT: Scalar Evolution Analysis ; GCN-O3-NEXT: Split GEPs to a variadic base and a constant offset for better CSE ; GCN-O3-NEXT: Scalar Evolution Analysis ; GCN-O3-NEXT: Straight line strength reduction Index: llvm/test/CodeGen/PowerPC/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -27,7 +27,6 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Split GEPs to a variadic base and a constant offset for better CSE ; CHECK-NEXT: Early CSE ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) Index: llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reunite-exts.ll =================================================================== --- llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reunite-exts.ll +++ llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reunite-exts.ll @@ -270,9 +270,7 @@ ; CHECK-SAME: (<2 x i32> [[ARG0:%.*]], <2 x i32> [[ARG1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD_NSW:%.*]] = add nsw <2 x i32> [[ARG0]], [[ARG1]] -; CHECK-NEXT: [[ARG0_SEXT:%.*]] = sext <2 x i32> [[ARG0]] to <2 x i64> -; CHECK-NEXT: [[ARG1_SEXT:%.*]] = sext <2 x i32> [[ARG1]] to <2 x i64> -; CHECK-NEXT: [[ADD_SEXT:%.*]] = add <2 x i64> [[ARG0_SEXT]], [[ARG1_SEXT]] +; CHECK-NEXT: [[ADD_SEXT:%.*]] = sext <2 x i32> [[ADD_NSW]] to <2 x i64> ; CHECK-NEXT: call void @use.v2i32(<2 x i32> [[ADD_NSW]]) ; CHECK-NEXT: ret <2 x i64> [[ADD_SEXT]] ; @@ -291,9 +289,7 @@ ; CHECK-SAME: (<2 x i32> [[ARG0:%.*]], <2 x i32> [[ARG1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB_NSW:%.*]] = sub nsw <2 x i32> [[ARG0]], [[ARG1]] -; CHECK-NEXT: [[ARG0_SEXT:%.*]] = sext <2 x i32> [[ARG0]] to <2 x i64> -; CHECK-NEXT: [[ARG1_SEXT:%.*]] = sext <2 x i32> [[ARG1]] to <2 x i64> -; CHECK-NEXT: [[SUB_SEXT:%.*]] = sub <2 x i64> [[ARG0_SEXT]], [[ARG1_SEXT]] +; CHECK-NEXT: [[SUB_SEXT:%.*]] = sext <2 x i32> [[SUB_NSW]] to <2 x i64> ; CHECK-NEXT: call void @use.v2i32(<2 x i32> [[SUB_NSW]]) ; CHECK-NEXT: ret <2 x i64> [[SUB_SEXT]] ; @@ -313,9 +309,7 @@ ; CHECK-SAME: (<2 x i32> [[ARG0:%.*]], <2 x i32> [[ARG1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD_NSW:%.*]] = add nsw <2 x i32> [[ARG0]], [[ARG1]] -; CHECK-NEXT: [[ARG0_SEXT:%.*]] = sext <2 x i32> [[ARG0]] to <2 x i64> -; CHECK-NEXT: [[ARG1_SEXT:%.*]] = sext <2 x i32> [[ARG1]] to <2 x i64> -; CHECK-NEXT: [[ADD_SEXT:%.*]] = add <2 x i64> [[ARG1_SEXT]], [[ARG0_SEXT]] +; CHECK-NEXT: [[ADD_SEXT:%.*]] = sext <2 x i32> [[ADD_NSW]] to <2 x i64> ; CHECK-NEXT: call void @use.v2i32(<2 x i32> [[ADD_NSW]]) ; CHECK-NEXT: ret <2 x i64> [[ADD_SEXT]] ;