diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -431,8 +431,10 @@ bool reuniteExts(Instruction *I); /// Find the closest dominator of that is equivalent to . - Instruction *findClosestMatchingDominator(const SCEV *Key, - Instruction *Dominatee); + Instruction *findClosestMatchingDominator( + const SCEV *Key, Instruction *Dominatee, + DenseMap> &DominatingExprs); + /// Verify F is free of dead code. void verifyNoDeadCode(Function &F); @@ -456,7 +458,8 @@ /// multiple GEPs with a single index. bool LowerGEP; - DenseMap> DominatingExprs; + DenseMap> DominatingAdds; + DenseMap> DominatingSubs; }; } // end anonymous namespace @@ -1141,7 +1144,8 @@ } Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator( - const SCEV *Key, Instruction *Dominatee) { + const SCEV *Key, Instruction *Dominatee, + DenseMap> &DominatingExprs) { auto Pos = DominatingExprs.find(Key); if (Pos == DominatingExprs.end()) return nullptr; @@ -1169,12 +1173,23 @@ // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom). // TODO: handle zext Value *LHS = nullptr, *RHS = nullptr; - if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) || - match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) { + if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) { if (LHS->getType() == RHS->getType()) { const SCEV *Key = SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); - if (auto *Dom = findClosestMatchingDominator(Key, I)) { + if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) { + Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I); + NewSExt->takeName(I); + I->replaceAllUsesWith(NewSExt); + RecursivelyDeleteTriviallyDeadInstructions(I); + return true; + } + } + } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) { + if (LHS->getType() == RHS->getType()) { + const SCEV *Key = + SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); + if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingSubs)) { Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I); NewSExt->takeName(I); I->replaceAllUsesWith(NewSExt); @@ -1185,12 +1200,17 @@ } // Add I to DominatingExprs if it's an add/sub that can't sign overflow. - if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) || - match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) { + if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) { + if (programUndefinedIfFullPoison(I)) { + const SCEV *Key = + SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); + DominatingAdds[Key].push_back(I); + } + } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) { if (programUndefinedIfFullPoison(I)) { const SCEV *Key = SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); - DominatingExprs[Key].push_back(I); + DominatingSubs[Key].push_back(I); } } return false; @@ -1198,7 +1218,8 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) { bool Changed = false; - DominatingExprs.clear(); + DominatingAdds.clear(); + DominatingSubs.clear(); for (const auto Node : depth_first(DT)) { BasicBlock *BB = Node->getBlock(); for (auto I = BB->begin(); I != BB->end(); ) { diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/test-add-sub-separation.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/test-add-sub-separation.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/test-add-sub-separation.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -separate-const-offset-from-gep < %s | FileCheck %s + +define void @matchingExtensions(i32* %ap, i32* %bp, i64* %result) { +; CHECK-LABEL: @matchingExtensions( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[AP:%.*]] +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[BP:%.*]] +; CHECK-NEXT: [[EB:%.*]] = sext i32 [[B]] to i64 +; CHECK-NEXT: [[SUBAB:%.*]] = sub nsw i32 [[A]], [[B]] +; CHECK-NEXT: [[EA:%.*]] = sext i32 [[A]] to i64 +; CHECK-NEXT: [[ADDEAEB:%.*]] = add nsw i64 [[EA]], [[EB]] +; CHECK-NEXT: [[EXTSUB:%.*]] = sext i32 [[SUBAB]] to i64 +; CHECK-NEXT: [[IDX:%.*]] = getelementptr i32, i32* [[AP]], i64 [[EXTSUB]] +; CHECK-NEXT: store i64 [[ADDEAEB]], i64* [[RESULT:%.*]] +; CHECK-NEXT: store i32 [[SUBAB]], i32* [[IDX]] +; CHECK-NEXT: ret void +; +entry: + %a = load i32, i32* %ap + %b = load i32, i32* %bp + %eb = sext i32 %b to i64 + %subab = sub nsw i32 %a, %b + %ea = sext i32 %a to i64 + %addeaeb = add nsw i64 %ea, %eb + %extsub = sext i32 %subab to i64 + %idx = getelementptr i32, i32* %ap, i64 %extsub + store i64 %addeaeb, i64* %result + store i32 %subab, i32* %idx + ret void +}