Index: lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
===================================================================
--- lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -430,8 +430,10 @@
   bool reuniteExts(Instruction *I);
 
   /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
-  Instruction *findClosestMatchingDominator(const SCEV *Key,
-                                            Instruction *Dominatee);
+  Instruction *findClosestMatchingDominator(const SCEV *Key, 
+        Instruction *Dominatee,
+        DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs);
+
   /// Verify F is free of dead code.
   void verifyNoDeadCode(Function &F);
 
@@ -455,7 +457,8 @@
   /// multiple GEPs with a single index.
   bool LowerGEP;
 
-  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingExprs;
+  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingAdds;
+  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingSubs;
 };
 
 } // end anonymous namespace
@@ -1140,7 +1143,8 @@
 }
 
 Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
-    const SCEV *Key, Instruction *Dominatee) {
+    const SCEV *Key, Instruction *Dominatee, 
+    DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs) {
   auto Pos = DominatingExprs.find(Key);
   if (Pos == DominatingExprs.end())
     return nullptr;
@@ -1168,12 +1172,11 @@
   // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
   // TODO: handle zext
   Value *LHS = nullptr, *RHS = nullptr;
-  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
-      match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
+  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
     if (LHS->getType() == RHS->getType()) {
       const SCEV *Key =
           SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
-      if (auto *Dom = findClosestMatchingDominator(Key, I)) {
+      if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
         Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
         NewSExt->takeName(I);
         I->replaceAllUsesWith(NewSExt);
@@ -1181,23 +1184,41 @@
         return true;
       }
     }
+  } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
+    if (LHS->getType() == RHS->getType()) {
+      const SCEV *Key =
+          SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+      if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingSubs)) {
+        Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
+        NewSExt->takeName(I);
+        I->replaceAllUsesWith(NewSExt);
+        RecursivelyDeleteTriviallyDeadInstructions(I);
+        return true;
+      }
+    }
   }
 
   // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
-  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
-      match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
+  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
     if (programUndefinedIfFullPoison(I)) {
       const SCEV *Key =
           SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
-      DominatingExprs[Key].push_back(I);
+      DominatingAdds[Key].push_back(I);
     }
+  } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
+    if (programUndefinedIfFullPoison(I)) {
+      const SCEV *Key =
+          SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+      DominatingSubs[Key].push_back(I);
+    }
   }
   return false;
 }
 
 bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
   bool Changed = false;
-  DominatingExprs.clear();
+  DominatingAdds.clear();
+  DominatingSubs.clear();
   for (const auto Node : depth_first(DT)) {
     BasicBlock *BB = Node->getBlock();
     for (auto I = BB->begin(); I != BB->end(); ) {
Index: test/Transforms/SeparateConstOffsetFromGEP/PowerPC/test-nomatch-add-and-sub.ll
===================================================================
--- test/Transforms/SeparateConstOffsetFromGEP/PowerPC/test-nomatch-add-and-sub.ll
+++ test/Transforms/SeparateConstOffsetFromGEP/PowerPC/test-nomatch-add-and-sub.ll
@@ -0,0 +1,181 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=powerpc64le-gnu-linux < %s | FileCheck %s
+
+; Though the error in this function only involves a handful of instructions,
+; this bug requires the ~75 other instructions to actually occur due to other
+; llc analysis passes and the general intermittentness of the issue. This test
+; case is considerably reduced from the originally generated code.
+
+; Function Attrs: noinline nounwind
+define void @tri_solve_reduced(i32 signext %size,
+; CHECK-LABEL: tri_solve_reduced:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi 9, 3, -2
+; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 2, -32(1) # 8-byte Folded Spill
+; CHECK-NEXT:    li 8, 0
+; CHECK-NEXT:    srawi 10, 9, 31
+; CHECK-NEXT:    and 10, 10, 9
+; CHECK-NEXT:    subf 9, 10, 9
+; CHECK-NEXT:    addi 10, 3, -1
+; CHECK-NEXT:    clrldi 9, 9, 32
+; CHECK-NEXT:    extsw 11, 10
+; CHECK-NEXT:    addi 12, 9, 1
+; CHECK-NEXT:    sldi 9, 7, 4
+; CHECK-NEXT:    mtctr 12
+; CHECK-NEXT:    b .LBB0_2
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_1: # %loopend
+;
+; CHECK:         addi 8, 8, 1
+; CHECK-NEXT:    stfdx 0, 0, 12
+; CHECK-NEXT:    bdz .LBB0_8
+; CHECK-NEXT:  .LBB0_2: # %outer
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB0_7 Depth 2
+; CHECK-NEXT:    mr 0, 10
+; CHECK-NEXT:    subf 10, 7, 10
+; CHECK-NEXT:    mr 30, 11
+; CHECK-NEXT:    slwi 12, 10, 1
+; CHECK-NEXT:    cmpw 30, 3
+; CHECK-NEXT:    extsw 11, 12
+; CHECK-NEXT:    mr 12, 6
+; CHECK-NEXT:    sldi 11, 11, 3
+; CHECK-NEXT:    lfdux 0, 12, 11
+; CHECK-NEXT:    addi 11, 30, -1
+; CHECK-NEXT:    bge 0, .LBB0_1
+; CHECK-NEXT:  # %bb.3: # %outercheck
+;  
+; CHECK:         andi. 29, 8, 1
+; CHECK-NEXT:    extsw 0, 0
+; CHECK-NEXT:    bc 12, 1, .LBB0_5
+; CHECK-NEXT:  # %bb.4: # %innerhead
+;
+; CHECK:         mullw 29, 11, 5
+; CHECK-NEXT:    add 0, 0, 7
+; CHECK-NEXT:    add 29, 30, 29
+; CHECK-NEXT:    addi 30, 30, 1
+; CHECK-NEXT:    slwi 29, 29, 1
+; CHECK-NEXT:    extsw 29, 29
+; CHECK-NEXT:    sldi 29, 29, 3
+; CHECK-NEXT:    lfdx 1, 4, 29
+; CHECK-NEXT:    xssubdp 0, 0, 1
+; CHECK-NEXT:  .LBB0_5: # %innercheck
+;
+; CHECK:         cmplwi 8, 0
+; CHECK-NEXT:    beq 0, .LBB0_1
+; CHECK-NEXT:  # %bb.6: # %innerbody.preheader
+;
+; CHECK:         sldi 0, 0, 4
+; CHECK-NEXT:    subf 2, 30, 3
+; CHECK-NEXT:    add 0, 4, 0
+; CHECK-NEXT:    .p2align 5
+; CHECK-NEXT:  .LBB0_7: # %innerbody
+; CHECK-NEXT:    # Parent Loop BB0_2 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    lfdx 1, 0, 0
+; CHECK-NEXT:    addi 2, 2, -2
+; CHECK-NEXT:    add 0, 0, 9
+; CHECK-NEXT:    cmplwi 2, 0
+; CHECK-NEXT:    xssubdp 0, 1, 0
+; CHECK-NEXT:    bne 0, .LBB0_7
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_8: # %fin
+; CHECK-NEXT:    ld 2, -32(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
+; CHECK-NEXT:    blr
+; CHECK     :    cmplwi 8, 0
+              double* nocapture readonly %matrix, i32 signext %lda,
+                  double* nocapture %vector, i32 signext %stridelen) {
+entry:
+  %initdec = add i32 %size, -1
+  %bigdec = sext i32 %initdec to i64
+  ; Extended operand to matchingadd
+  %bigincX = sext i32 %stridelen to i64
+  br label %outer
+
+outer:                                   ; preds = %entry, %loopend
+  %revi = phi i32 [ 0, %entry ], [ %revinc, %loopend ]
+  %bigi = phi i64 [ %bigdec, %entry ], [ %endeci, %loopend ]
+  ; Short operand to matchingsub
+  %indexolotl = phi i32 [ %initdec, %entry ], [ %matchingsub, %loopend ]
+  %colindex = phi i32 [ %initdec, %entry ], [ %nexti, %loopend ]
+
+  ; This subtraction matches matchingadd during the Separate Const Offset from
+  ; GEP optimization pass sext-merging phase, which is not a valid transform
+  ; because adds and subtracts are not the same type of operation.
+
+  %matchingsub = sub nsw i32 %indexolotl, %stridelen
+  %nexti = add nsw i32 %colindex, -1
+  %smallXoffset = shl nsw i32 %matchingsub, 1
+  %bigXoffset = sext i32 %smallXoffset to i64
+  %arrayidx = getelementptr inbounds double, double* %vector, i64 %bigXoffset
+  %numvar4 = load double, double* %arrayidx, align 8
+  %checkout = icmp slt i32 %colindex, %size
+  br i1 %checkout, label %outercheck, label %loopend
+
+outercheck:                                 ; preds = %outer
+  ; Extended operand to matchingadd
+  %longxtolotl = sext i32 %indexolotl to i64                                    ; Inst of interest
+  %rowindex = mul nsw i32 %nexti, %lda
+  %qcheck = and i32 %revi, 1
+  %co2 = icmp eq i32 %qcheck, 0
+  br i1 %co2, label %innerhead, label %innercheck
+
+innerhead:                                  ; preds = %outercheck
+  %findex = add nsw i32 %colindex, %rowindex
+  %doubledex = shl nsw i32 %findex, 1
+  %bigindex = sext i32 %doubledex to i64
+  %finptr = getelementptr inbounds double, double* %matrix, i64 %bigindex
+  %loadedA = load double, double* %finptr, align 8
+  %headreal = fsub double %numvar4, %loadedA
+
+  ; The match to matchingsub- At compile time on powerpc with -O2, this changes
+  ; from an add instruction into sext i32 %matchingsub to i64 because the
+  ; extended operands to this instruction match the un-extended operands to
+  ; the earlier matchingsub.
+
+  %matchingadd = add nsw i64 %longxtolotl, %bigincX                     ; Inst of interest
+  %biginext = add nsw i64 %bigi, 1       ; Possible:  j++
+  %incol = add nsw i32 %colindex, 1
+  br label %innercheck
+
+innercheck:                         ; preds = %innerhead, %outercheck
+  %innerreal = phi double [ %headreal, %innerhead ], [ undef, %outercheck ]
+  %incxticheck = phi i64 [ %matchingadd, %innerhead ], [ %longxtolotl, %outercheck ]
+  %icinext = phi i64 [ %biginext, %innerhead ], [ %bigi, %outercheck ]
+  %ichkcol = phi i32 [ %incol, %innerhead ], [ %colindex, %outercheck ]
+  %icreal = phi double [ %headreal, %innerhead ], [ %numvar4, %outercheck ]
+  %icrevicheck = icmp eq i32 %revi, 0
+  br i1 %icrevicheck, label %loopend, label %innerbody
+
+innerbody:                                       ; preds = %innercheck, %innerbody
+  %innerloopxtolotl = phi i64 [ %bodyinctolotl, %innerbody ], [ %incxticheck, %innercheck ]
+  %bodyi = phi i64 [ %bodyinci, %innerbody ], [ %icinext, %innercheck ]
+  %bodyj = phi i32 [ %dinextj, %innerbody ], [ %ichkcol, %innercheck ]
+  %bodyreal = phi double [ %newreal, %innerbody ], [ %icreal, %innercheck ]
+  %nextj = add nsw i32 %bodyj, 1
+  %doublotl = shl nsw i64 %innerloopxtolotl, 1
+  %bodyptr = getelementptr inbounds double, double* %matrix, i64 %doublotl
+  %bodyAload = load double, double* %bodyptr, align 8
+  %newreal = fsub double %bodyAload, %bodyreal
+  %bodyinctolotl = add i64 %innerloopxtolotl, %bigincX
+  %bodyinci = add nsw i64 %bodyi, 2
+  %dinextj = add nsw i32 %bodyj, 2
+  %smallinci = trunc i64 %bodyinci to i32
+  %bodycmp = icmp eq i32 %smallinci, %size
+  br i1 %bodycmp, label %loopend, label %innerbody
+
+loopend:                                          ; preds = %innercheck, %innerbody, %outer
+  %endreal = phi double [ %numvar4, %outer ], [ %innerreal, %innercheck ], [ %newreal, %innerbody ]
+  store double %endreal, double* %arrayidx, align 8
+  %endcmp = icmp sgt i32 %nexti, 0
+  %endeci = add nsw i64 %bigi, -1
+  %revinc = add i32 %revi, 1
+  br i1 %endcmp, label %outer, label %fin
+
+fin:
+  ret void
+}