Index: lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- lib/Transforms/InstCombine/InstructionCombining.cpp
+++ lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -75,6 +75,10 @@
 STATISTIC(NumFactor   , "Number of factorizations");
 STATISTIC(NumReassoc  , "Number of reassociations");
 
+static cl::opt<bool>
+    AggressiveGepMerging("aggr-gep-merging", cl::init(false), cl::Hidden,
+                         cl::desc("Enable GEP merging for most cases."));
+
 Value *InstCombiner::EmitGEPOffset(User *GEP) {
   return llvm::EmitGEPOffset(Builder, DL, GEP);
 }
@@ -1432,13 +1436,22 @@
     if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
       return nullptr;
 
-    // Note that if our source is a gep chain itself then we wait for that
-    // chain to be resolved before we perform this transformation.  This
-    // avoids us creating a TON of code in some cases.
-    if (GEPOperator *SrcGEP =
-          dyn_cast<GEPOperator>(Src->getOperand(0)))
-      if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
-        return nullptr;   // Wait until our source is folded to completion.
+    if (AggressiveGepMerging) {
+      // Note that if our source is a gep chain itself then we wait for that
+      // chain to be resolved before we perform this transformation.  This
+      // avoids us creating a TON of code in some cases.
+      if (GEPOperator *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
+        if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
+          return nullptr; // Wait until our source is folded to completion.
+    } else {
+      // Don't combine the malformed cycle gep instructions like the following:
+      // %gep2 = getelementptr i8, i8* %gep, i32 1
+      // %gep = getelementptr i8, i8* %gep2, i32 1
+      GetElementPtrInst *SrcGEP =
+          dyn_cast<GetElementPtrInst>(Src->getOperand(0));
+      if (SrcGEP == &GEP)
+        return nullptr;
+    }
 
     SmallVector<Value*, 8> Indices;
 
@@ -1467,10 +1480,16 @@
         // normalized.
         if (SO1->getType() != GO1->getType())
           return nullptr;
-        // Only do the combine when GO1 and SO1 are both constants. Only in
-        // this case, we are sure the cost after the merge is never more than
-        // that before the merge.
-        if (!isa<Constant>(GO1) || !isa<Constant>(SO1))
+        // If !AggressiveGepMerging, do gep(gep(...)) combine only when
+        // 1. GO1 and SO1 are both constants or
+        // 2. Src has only one use plus that Src and GEP are in the same BB.
+        // In thses cases we are sure the cost of the combined result will be
+        // equal or less than before.
+        if (!AggressiveGepMerging &&
+            (!isa<Constant>(GO1) || !isa<Constant>(SO1)) &&
+            (!Src->hasOneUse() ||
+             (isa<Instruction>(Src) &&
+              cast<GetElementPtrInst>(Src)->getParent() != GEP.getParent())))
           return nullptr;
         Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
       }
Index: test/Transforms/InstCombine/gep-merge1.ll
===================================================================
--- test/Transforms/InstCombine/gep-merge1.ll
+++ test/Transforms/InstCombine/gep-merge1.ll
@@ -0,0 +1,76 @@
+; PR23580
+; RUN: opt < %s -O2 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.anon = type { [0 x %class.C] }
+%class.C = type { i8 }
+%struct.B = type { i16 }
+%class.G = type <{ %struct.F, [2 x i32], i8, [7 x i8] }>
+%struct.F = type { i8, i8, i8, i16, i32* }
+
+@a = global i32 0, align 4
+@d = internal global %struct.anon zeroinitializer, align 1
+
+declare %struct.B* @_ZN1C5m_fn1Ev(%class.C*)
+
+; Check geps inside for.body are merged so loop vectorizer can recognize loads
+; inside for.body to be inter-iterations consecutive, and generate %wide.loads.
+;
+; CHECK-LABEL: @fn2(
+; CHECK: %wide.load{{[0-9]*}} =
+; CHECK: %wide.load{{[0-9]*}} =
+; CHECK: %wide.load{{[0-9]*}} =
+
+define void @fn2(%class.G* nocapture readonly %this, i1 zeroext %arg) align 2 {
+entry:
+  br label %for.cond
+
+for.cond:
+  %tmp1 = load i32, i32* @a, align 4
+  %idxprom = sext i32 %tmp1 to i64
+  %arrayidx3 = getelementptr inbounds [0 x %class.C], [0 x %class.C]* getelementptr inbounds (%struct.anon, %struct.anon* @d, i32 0, i32 0), i32 0, i64 %idxprom
+  %call = call %struct.B* @_ZN1C5m_fn1Ev(%class.C* %arrayidx3)
+  %tmp4 = load i32, i32* @a, align 4
+  %idx.ext = sext i32 %tmp4 to i64
+  %add.ptr = getelementptr inbounds %struct.B, %struct.B* %call, i64 %idx.ext
+  %tobool = trunc i32 %tmp4 to i1
+  br i1 %tobool, label %for.cond13, label %if.else30
+
+for.cond13:                                       ; preds = %for.body, %if.then
+  %k.0 = phi i32 [ 1, %for.cond ], [ %add, %for.body ]
+  %cmp14 = icmp slt i32 %k.0, %tmp4
+  br i1 %cmp14, label %for.body, label %if.else30, !llvm.loop !0
+
+for.body:                                         ; preds = %for.cond13
+  %idxprom15 = sext i32 %k.0 to i64
+  %arrayidx16 = getelementptr inbounds %struct.B, %struct.B* %add.ptr, i64 %idxprom15
+  %ival = getelementptr inbounds %struct.B, %struct.B* %arrayidx16, i32 0, i32 0
+  %tmp9 = load i16, i16* %ival, align 2
+  %conv17 = sext i16 %tmp9 to i32
+  %add = add nsw i32 %k.0, 1
+  %idxprom18 = sext i32 %add to i64
+  %arrayidx19 = getelementptr inbounds %struct.B, %struct.B* %add.ptr, i64 %idxprom18
+  %ival20 = getelementptr inbounds %struct.B, %struct.B* %arrayidx19, i32 0, i32 0
+  %tmp10 = load i16, i16* %ival20, align 2
+  %conv21 = sext i16 %tmp10 to i32
+  %add22 = add nsw i32 %conv17, %conv21
+  %mul = mul nsw i32 %tmp1, %add22
+  %add23 = add nsw i32 %tmp4, %mul
+  %shr = ashr i32 %add23, %tmp4
+  %arrayidx25 = getelementptr inbounds %struct.B, %struct.B* %call, i64 %idxprom15
+  %ival26 = getelementptr inbounds %struct.B, %struct.B* %arrayidx25, i32 0, i32 0
+  %tmp11 = load i16, i16* %ival26, align 2
+  %conv27 = sext i16 %tmp11 to i32
+  %sub = sub nsw i32 %conv27, %shr
+  %conv28 = trunc i32 %sub to i16
+  store i16 %conv28, i16* %ival26, align 2
+  br label %for.cond13
+
+if.else30:
+  br label %for.cond
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.width", i32 4}
Index: test/Transforms/InstCombine/gep-merge2.ll
===================================================================
--- test/Transforms/InstCombine/gep-merge2.ll
+++ test/Transforms/InstCombine/gep-merge2.ll
@@ -0,0 +1,48 @@
+; This test makes sure that gep(gep ...) merge doesn't come into effect.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Make sure there are no geps being merged.
+; CHECK-LABEL: @fn3(
+; CHECK: getelementptr
+; CHECK: getelementptr
+; CHECK: getelementptr
+
+@_ZN2cv1aE = global i8* zeroinitializer, align 8
+declare i32 @fn1() #2
+declare i32 @fn2() #2
+
+; Function Attrs: uwtable
+define linkonce_odr i32 @fn3() {
+entry:
+  %call = call i32 @fn1()
+  %call1 = call i32 @fn2()
+  %0 = load i8*, i8** @_ZN2cv1aE, align 8
+  %idx.ext2 = sext i32 %call1 to i64
+  %add.ptr3 = getelementptr inbounds i8, i8* %0, i64 %idx.ext2
+  br label %for.cond5
+
+for.cond5:
+  %total1 = phi i32 [ 0, %entry ], [ %total2, %for.body7 ]
+  %x.1 = phi i32 [ 0, %entry ], [ %inc, %for.body7 ]
+  %cmp6 = icmp slt i32 %x.1, %call
+  br i1 %cmp6, label %for.body7, label %for.cond34
+
+for.body7:                                        ; preds = %for.cond5
+  %mul = mul nsw i32 %x.1, 2
+  %idxprom = sext i32 %mul to i64
+  %arrayidx = getelementptr inbounds i8, i8* %add.ptr3, i64 %idxprom
+  %1 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %1 to i32
+  %sub = sub nsw i32 %mul, 1
+  %idxprom10 = sext i32 %sub to i64
+  %arrayidx11 = getelementptr inbounds i8, i8* %add.ptr3, i64 %idxprom10
+  %2 = load i8, i8* %arrayidx11, align 1
+  %conv2 = zext i8 %2 to i32
+  %add1 = add nsw i32 %conv, %conv2
+  %total2 = add nsw i32 %total1, %add1
+  %inc = add nsw i32 %x.1, 1
+  br label %for.cond5
+
+for.cond34:
+  ret i32 %total1
+}
Index: test/Transforms/InstCombine/getelementptr.ll
===================================================================
--- test/Transforms/InstCombine/getelementptr.ll
+++ test/Transforms/InstCombine/getelementptr.ll
@@ -104,8 +104,8 @@
         %B = getelementptr i32, i32* %A, i64 %D
         ret i32* %B
 ; CHECK-LABEL: @test7(
-; CHECK: %A = getelementptr i32, i32* %I, i64 %C
-; CHECK: %B = getelementptr i32, i32* %A, i64 %D
+; CHECK: %A.sum = add i64 %C, %D
+; CHECK: getelementptr i32, i32* %I, i64 %A.sum
 }
 
 define i8* @test8([10 x i32]* %X) {