diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -754,6 +754,13 @@
   ///
   /// If the multiplication is known not to overflow then NoSignedWrap is set.
   Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
+
+  /// Simplify GEP chain if some of the GEP instructions have constant indices.
+  ///
+  /// For example:
+  /// GEP i8 (GEP (GEP i64 X, C1), I1), C2 = GEP i8 (GEP X, I1) C3 where
+  /// C3 = C1 * 8 + C2 for constants C1 and C2.
+  Value *SimplifyGEPChain(GetElementPtrInst &GEP);
 };
 
 class Negator final {
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2251,6 +2251,231 @@
   return nullptr;
 }
 
+// Clone a GEP instruction with a new pointer operand with updated index, given
+// a constant value to be added or appended to the last index of the original
+// GEP.
+static Value *CreateInBoundsGEPUpdateIndex(InstCombiner::BuilderTy &Builder,
+                                           GetElementPtrInst *GEP,
+                                           Value *NewPointer, APInt Diff,
+                                           bool AppendIndex) {
+  SmallVector<Value *, 4> NewIndices(GEP->indices());
+  if (AppendIndex) {
+    NewIndices.push_back(ConstantInt::get(
+        Type::getIntNTy(Builder.getContext(), Diff.getBitWidth()), Diff));
+  } else {
+    APInt Idx =
+        dyn_cast<ConstantInt>((GEP->indices().end() - 1)->get())->getValue();
+    // Make sure NewIdx is big engouh to hold the sum of Idx and Diff.
+    unsigned BitWidth = std::max(Idx.getBitWidth(), Diff.getBitWidth());
+    Idx = Idx.sextOrSelf(BitWidth);
+    Diff = Diff.sextOrSelf(BitWidth);
+    bool Overflow = false;
+    APInt Res = Idx.sadd_ov(Diff, Overflow);
+    if (Overflow) {
+      BitWidth++;
+      Idx = Idx.sext(BitWidth);
+      Diff = Diff.sext(BitWidth);
+      Res = Idx + Diff;
+    }
+    Constant *NewIdx =
+        ConstantInt::get(Type::getIntNTy(Builder.getContext(), BitWidth), Res);
+    NewIndices.back() = NewIdx;
+  }
+  return Builder.CreateInBoundsGEP(GEP->getSourceElementType(), NewPointer,
+                                   NewIndices);
+}
+
+// Recursively rewrite the GEP chain with the replaced inner GEP, and return
+// the new value replacing outer GEP.
+static Value *RewriteGEPChain(InstCombiner::BuilderTy &Builder, Value *V,
+                              Value *Match, Value *NewV) {
+  if (V == Match)
+    return NewV;
+
+  if (BitCastInst *BC = dyn_cast<BitCastInst>(V)) {
+    return Builder.CreateBitCast(
+        RewriteGEPChain(Builder, BC->getOperand(0), Match, NewV),
+        BC->getDestTy());
+  } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+    return Builder.CreateInBoundsGEP(
+        GEP->getSourceElementType(),
+        RewriteGEPChain(Builder, GEP->getPointerOperand(), Match, NewV),
+        SmallVector<Value *, 4>(GEP->indices()));
+  } else {
+    llvm_unreachable("Unsupported type");
+  }
+}
+
+/// If the outer (most dominated) and the inner (dominates most) GEP
+/// instructions in a chain of GEP (and bitcast) have constant indices, and
+/// intermediate values have no other uses, and their indices are not dominated
+/// by inner GEP, these two instructions can be merged with recalculated
+/// constant indices if the constant offset of one GEP is divisible by the
+/// scalar element size of the other.
+Value *InstCombinerImpl::SimplifyGEPChain(GetElementPtrInst &OuterGEP) {
+  auto IsGEPNonZeroConstantIndex = [&](GetElementPtrInst *GEP) {
+    // Check each index is a constant int, also check non all indices are zero,
+    // because that is same as a bitcast with no cost, we should not modify it
+    // to introduce pointer arithmetic.
+    bool NonZero = false;
+    for (Value *Idx : GEP->indices()) {
+      ConstantInt *CI;
+      if (!match(Idx, m_ConstantInt(CI)))
+        return false;
+      if (!CI->isZero())
+        NonZero = true;
+    }
+
+    // Check if result has a fixed size type.
+    return NonZero && !isa<ScalableVectorType>(GEP->getResultElementType()) &&
+        DL.getTypeAllocSize(GEP->getResultElementType()) != 0;
+  };
+
+  // Check if the second to the last GEP index points to an array, so that we
+  // can perform pointer arithmetic by changing its last index.
+  auto IsPointerToArray = [](GetElementPtrInst *GEP) {
+    // If a GEP has only 1 index, it behaves like pointing to an array.
+    if (GEP->getNumIndices() == 1)
+      return true;
+    return isa<ArrayType>(GetElementPtrInst::getIndexedType(
+        GEP->getSourceElementType(),
+        SmallVector<Value *, 4>(llvm::drop_end(GEP->indices()))));
+  };
+
+  // If OuterGEP does not have constant indices, just bail out.
+  if (!OuterGEP.isInBounds() || OuterGEP.getNumIndices() == 0 ||
+      !IsGEPNonZeroConstantIndex(&OuterGEP))
+    return nullptr;
+
+  Value *V = OuterGEP.getOperand(0);
+  Value *NewV = nullptr;
+  bool AppendIndex = false;
+  APInt Diff;
+
+  unsigned Depth = 0;
+  while (Depth++ < MaxAnalysisRecursionDepth) {
+    // Except for the inner GEP, all intermediate instructions should
+    // have exactly one use, so that rewriting does not increase instruction
+    // count. This also implies intermediate GEP's indices are not dependent on
+    // any other intermediate GEP, otherwise the transfromation becomes invalid.
+    bool hasOneUse = V->hasOneUse();
+
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(V)) {
+      // Bitcast doesn't change the value of an address, keep searching into it.
+      if (!hasOneUse)
+        return nullptr;
+      V = BC->getOperand(0);
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+      // Can't guarantee pointer arithmetic is valid in these cases.
+      if (!GEP->isInBounds() ||
+          OuterGEP.getAddressSpace() != GEP->getAddressSpace())
+        return nullptr;
+
+      const size_t BitWidth = DL.getIndexSizeInBits(OuterGEP.getAddressSpace());
+      APInt OuterOffset(BitWidth, 0, true);
+      APInt InnerOffset(BitWidth, 0, true);
+
+      // Found inner GEP candidate with constant indices.
+      if (GEP->getNumIndices() > 0 && IsGEPNonZeroConstantIndex(GEP) &&
+          OuterGEP.accumulateConstantOffset(DL, OuterOffset) &&
+          GEP->accumulateConstantOffset(DL, InnerOffset)) {
+
+        Builder.SetInsertPoint(&OuterGEP);
+
+        // If two offsets cancel each other, we can eliminate both GEP
+        // instructions.
+        if (InnerOffset + OuterOffset == 0) {
+          NewV = GEP->getPointerOperand();
+          if (GEP->getPointerOperandType() != GEP->getType())
+            NewV = Builder.CreateBitCast(NewV, GEP->getType());
+          break;
+        }
+
+        APInt InnerSize(BitWidth,
+                        DL.getTypeAllocSize(GEP->getResultElementType()));
+
+        // If outer GEP offset is divisible by inner GEP element size, and the
+        // last index of inner GEP is not a struct index, merge it to inner
+        // GEP by adding the quotient to its last index.
+        if (OuterOffset.srem(InnerSize) == 0 && IsPointerToArray(GEP)) {
+          NewV = CreateInBoundsGEPUpdateIndex(
+              Builder, GEP, GEP->getPointerOperand(),
+              OuterOffset.sdiv(InnerSize), false);
+          break;
+        }
+
+        // If inner GEP's result points to an array, and outer GEP offset is
+        // divisible by the array's element type size, merge it to inner GEP
+        // by appending an index. Note that while we can index through every
+        // nested array or struct in attempt to find a size by which the offset
+        // is divisible, this will be a costly process, so we only search down
+        // one level.
+        if (GEP->getResultElementType()->isArrayTy()) {
+          InnerSize = DL.getTypeAllocSize(
+              GEP->getResultElementType()->getArrayElementType());
+          if (OuterOffset.srem(InnerSize) == 0) {
+            NewV = CreateInBoundsGEPUpdateIndex(
+                Builder, GEP, GEP->getPointerOperand(),
+                OuterOffset.sdiv(InnerSize), true);
+            break;
+          }
+        }
+
+        // Check the other way around, if inner GEP offset is divisible by outer
+        // GEP element size. If so eliminate inner GEP and let outer GEP merge
+        // it.
+        APInt OuterSize(BitWidth,
+                        DL.getTypeAllocSize(OuterGEP.getResultElementType()));
+        if (InnerOffset.srem(OuterSize) == 0 && IsPointerToArray(&OuterGEP)) {
+          Diff = InnerOffset.sdiv(OuterSize);
+          NewV = GEP->getPointerOperand();
+          if (GEP->getPointerOperandType() != GEP->getType())
+            NewV = Builder.CreateBitCast(NewV, GEP->getType());
+          break;
+        }
+
+        // Similar case if outer GEP's result points to an array.
+        if (OuterGEP.getResultElementType()->isArrayTy()) {
+          OuterSize = DL.getTypeAllocSize(
+              OuterGEP.getResultElementType()->getArrayElementType());
+          if (InnerOffset.srem(OuterSize) == 0) {
+            AppendIndex = true;
+            Diff = InnerOffset.sdiv(OuterSize);
+            NewV = GEP->getPointerOperand();
+            if (GEP->getPointerOperandType() != GEP->getType())
+              NewV = Builder.CreateBitCast(NewV, GEP->getType());
+            break;
+          }
+        }
+
+        // Otherwise this GEP is not mergeable, treat it as a regular GEP.
+      }
+
+      // Regular GEP, keep searching into it.
+      if (!hasOneUse)
+        return nullptr;
+      V = GEP->getPointerOperand();
+    } else {
+      return nullptr;
+    }
+  }
+
+  if (NewV) {
+    NewV = RewriteGEPChain(Builder, OuterGEP.getPointerOperand(), V, NewV);
+
+    // Handle merge inner GEP to outer.
+    if (Diff != 0)
+      return CreateInBoundsGEPUpdateIndex(Builder, &OuterGEP, NewV, Diff,
+                                          AppendIndex);
+    // Handle outer GEP being eliminated (merged to inner, or cancelled out).
+    if (OuterGEP.getType() != NewV->getType())
+      return Builder.CreateBitCast(NewV, OuterGEP.getType());
+    return NewV;
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   Value *PtrOp = GEP.getOperand(0);
   SmallVector<Value *, 8> Indices(GEP.indices());
@@ -2693,6 +2918,9 @@
     }
   }
 
+  if (Value *V = SimplifyGEPChain(GEP))
+    return replaceInstUsesWith(GEP, V);
+
   if (Instruction *R = foldSelectGEP(GEP, Builder))
     return R;
 
diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
--- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
@@ -11,10 +11,9 @@
 ; result = (p + 3) + a
 define i32* @basic(i32* %p, i64 %a) {
 ; CHECK-LABEL: @basic(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 3
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds i32, i32* %p, i64 1
   %2 = getelementptr inbounds i32, i32* %1, i64 %a
@@ -25,9 +24,8 @@
 ; result = (struct.C*) p + 3
 define %struct.C* @offsetDivisible(i64* %p) {
 ; CHECK-LABEL: @offsetDivisible(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 3
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 3
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i64, i64* %p, i64 3
   %2 = getelementptr inbounds %struct.C, %struct.C* %1, i64 1
@@ -37,10 +35,9 @@
 ; result = (i8*) ((i16*) p + 7) + a
 define ptr @opaque(ptr %p, i64 %a) {
 ; CHECK-LABEL: @opaque(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 -1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 7
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds i16, ptr %p, i64 -1
   %2 = getelementptr inbounds i8, ptr %1, i64 %a
@@ -51,10 +48,9 @@
 ; result = (i32*) (p - 9) + a
 define i32* @bitcast(i8* %p, i64 %a) {
 ; CHECK-LABEL: @bitcast(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 -1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 -9
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -2
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds i8, i8* %p, i64 -1
   %2 = bitcast i8* %1 to i32*
@@ -67,9 +63,8 @@
 ; result = (i8*) p + 10
 define i8* @bitcastReverse(i64* %p, i64 %a) {
 ; CHECK-LABEL: @bitcastReverse(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i64, i64* %p, i64 1
   %2 = bitcast i64* %1 to i8*
@@ -80,13 +75,11 @@
 ; result = (i16*) ((i64*) ((i8*) p + a) + (a * b)) + 29
 define i16* @nested(i32* %p, i64 %a, i64 %b) {
 ; CHECK-LABEL: @nested(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 3
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i64 7
-; CHECK-NEXT:    ret ptr [[TMP6]]
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[A]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 29
+; CHECK-NEXT:    ret ptr [[TMP4]]
 ;
   %1 = getelementptr inbounds i32, i32* %p, i64 3
   %2 = getelementptr inbounds i8, i8* %1, i64 %a
@@ -103,10 +96,8 @@
 ; result = (i8*) p + a
 define i8* @zeroSum(i32 *%p, i64 %a) {
 ; CHECK-LABEL: @zeroSum(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -4
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i32, i32* %p, i64 1
   %2 = bitcast i32* %1 to i8*
@@ -119,10 +110,9 @@
 ; result = (p + 254) + a
 define i32* @indexOverflow(i32* %p, i64 %a) {
 ; CHECK-LABEL: @indexOverflow(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 127
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 254
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 127
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds i32, i32* %p, i8 127
   %2 = getelementptr inbounds i32, i32* %1, i64 %a
@@ -138,8 +128,8 @@
 ; CHECK-LABEL: @constIndexGEPMultipleUses(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 3
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP6]]
@@ -157,10 +147,9 @@
 ; result = (i32*) ((p[1] + 17) + a)
 define i32* @array1([20 x i8]* %p, i64 %a) {
 ; CHECK-LABEL: @array1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [20 x i8], ptr [[P:%.*]], i64 1, i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [20 x i8], ptr [[P:%.*]], i64 1, i64 17
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds [20 x i8], [20 x i8]* %p, i64 1, i64 1
   %2 = getelementptr inbounds i8, i8* %1, i64 %a
@@ -171,10 +160,9 @@
 ; result = (i8*) ((i32*) p + a) + 14
 define i8* @array2([7 x i32]* %p, i64 %a) {
 ; CHECK-LABEL: @array2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [7 x i32], ptr [[P:%.*]], i64 0, i64 3
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 2
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 14
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds [7 x i32], [7 x i32]* %p, i64 0, i64 3
   %2 = getelementptr inbounds i32, i32* %1, i64 %a
@@ -185,10 +173,9 @@
 ; result = (([3 x i8]*) (p + a))[1] + 17
 define i8* @array3(i64* %p, i64 %a) {
 ; CHECK-LABEL: @array3(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [3 x i8], ptr [[TMP2]], i64 1, i64 1
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [3 x i8], ptr [[TMP1]], i64 1, i64 17
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds i64, i64* %p, i64 2
   %2 = getelementptr inbounds i64, i64* %1, i64 %a
@@ -199,10 +186,9 @@
 ; result = ((i8*) p + a) - 4
 define i8* @struct(%struct.A* %p, i64 %a) {
 ; CHECK-LABEL: @struct(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[P:%.*]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -128
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -4
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds %struct.A, %struct.A* %p, i64 0, i32 1
   %2 = getelementptr inbounds i8, i8* %1, i64 %a
@@ -213,10 +199,9 @@
 ; result = (i32*) ((p - 4) + a)
 define i32* @structReverse(i8* %p, i64 %a) {
 ; CHECK-LABEL: @structReverse(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 -128
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 -4
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[TMP2]], i64 0, i32 1
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds i8, i8* %p, i64 -128
   %2 = getelementptr inbounds i8, i8* %1, i64 %a
@@ -227,10 +212,9 @@
 ; result = ((i8*) &p[0].member2.member0 + 7) + a
 define i8* @structStruct(%struct.B* %p, i64 %a) {
 ; CHECK-LABEL: @structStruct(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 7
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[TMP2]], i64 0, i32 0, i64 4
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 2, i32 0, i64 3
   %2 = getelementptr inbounds i8, i8* %1, i64 %a
@@ -244,10 +228,9 @@
 ; result = (i8*) ((i16*) &p[0].member1 + 4) + a
 define i64* @appendIndex(%struct.B* %p, i64 %a) {
 ; CHECK-LABEL: @appendIndex(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 1, i64 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 1
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 1
   %2 = getelementptr inbounds i8, i8* %1, i64 %a
@@ -258,10 +241,9 @@
 ; result = (i16*) &((struct.B*) (p + a))[0].member1 + 4
 define ptr @appendIndexReverse(i64* %p, i64 %a) {
 ; CHECK-LABEL: @appendIndexReverse(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP2]], i64 0, i32 1
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP1]], i64 0, i32 1, i64 4
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds i64, i64* %p, i64 1
   %2 = getelementptr inbounds i64, i64* %1, i64 %a
@@ -274,12 +256,11 @@
 ; result = (struct.C*) ((<5 x i32>*) ((p + 4) + a) + 1) + b
 define ptr @skipMiddleGEP(%struct.C* %p, i64 %a, i64 %b) {
 ; CHECK-LABEL: @skipMiddleGEP(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C]], ptr [[TMP1]], i64 [[A:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <5 x i32>, ptr [[TMP2]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_C]], ptr [[TMP3]], i64 [[B:%.*]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_C]], ptr [[TMP4]], i64 3
-; CHECK-NEXT:    ret ptr [[TMP5]]
+; CHECK-NEXT:    ret ptr [[TMP4]]
 ;
   %1 = getelementptr inbounds %struct.C, %struct.C* %p, i64 1
   %2 = getelementptr inbounds %struct.C, %struct.C* %1, i64 %a
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
@@ -44,9 +44,8 @@
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>*
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8, !alias.scope !0
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>*
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -7
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[TMP5]] to <4 x double>*
 ; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8, !alias.scope !0
 ; CHECK-NEXT:    [[REVERSE7:%.*]] = shufflevector <4 x double> [[WIDE_LOAD6]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP8:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -762,11 +762,9 @@
 ; CHECK-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 ; CHECK-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC5]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -2
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>*
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>*
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>