Index: lib/Transforms/IPO/MergeFunctions.cpp
===================================================================
--- lib/Transforms/IPO/MergeFunctions.cpp
+++ lib/Transforms/IPO/MergeFunctions.cpp
@@ -617,8 +617,8 @@
 /// See method declaration comments for more details.
 int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
 
-  PointerType *PTyL = dyn_cast<PointerType>(TyL);
-  PointerType *PTyR = dyn_cast<PointerType>(TyR);
+  PointerType *PTyL = dyn_cast<PointerType>(TyL->getScalarType());
+  PointerType *PTyR = dyn_cast<PointerType>(TyR->getScalarType());
 
   if (DL) {
     if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL);
@@ -845,7 +845,7 @@
 // Determine whether two GEP operations perform the same underlying arithmetic.
 // Read method declaration comments for more details.
 int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
-                               const GEPOperator *GEPR) {
+                                const GEPOperator *GEPR) {
 
   unsigned int ASL = GEPL->getPointerAddressSpace();
   unsigned int ASR = GEPR->getPointerAddressSpace();
@@ -863,16 +863,133 @@
       return cmpAPInts(OffsetL, OffsetR);
   }
 
-  if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
-                           (uint64_t)GEPR->getPointerOperand()->getType()))
-    return Res;
-
   if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
     return Res;
 
+  if (GEPL->getPointerOperand()->getType() ==
+      GEPR->getPointerOperand()->getType()) {
+    // If the types are identical, and all the operands are identical, then we
+    // don't need to look at offsets, but can just return that these are equal.
+    for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i)
+      if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
+        return Res;
+    return 0;
+  }
+
+  if (!DL) {
+    // If the GEP types aren't identical, and we lack DataLayout, then we
+    // can't work out the sizes of GEP elements.
+    return cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
+                      (uint64_t)GEPR->getPointerOperand()->getType());
+  }
+
+  // We have DataLayout available, and can look at the actual offsets which
+  // would be generated for dynamic indices.  For example,
+  // float a[]; int b[]; a[i] and b[i] will both generate the same offsets from
+  // their bases.
+
+  // Note that the pointer operand may be a vector of pointers. Take the
+  // scalar element which holds a pointer.
+  Type *EltTyL = GEPL->getPointerOperandType()->getScalarType();
+  Type *EltTyR = GEPR->getPointerOperandType()->getScalarType();
   for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
-    if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
+    Value *OpL = GEPL->getOperand(i);
+    Value *OpR = GEPR->getOperand(i);
+
+    // The bases only need to be identical operands, we don't care about
+    // their offsets.
+    if (!i) {
+      if (int Res = cmpValues(OpL, OpR))
+        return Res;
+      continue;
+    }
+
+    StructType *StTyL = dyn_cast<StructType>(EltTyL);
+    StructType *StTyR = dyn_cast<StructType>(EltTyR);
+    // TODO: Handle case where only one side is a struct, so long as both
+    // sides have a constant at this index, and the field offsets are a match.
+    if (int Res = cmpNumbers(!!StTyL, !!StTyR))
+      return Res;
+
+    if (StTyL) {
+      unsigned IntL = cast<Constant>(OpL)->getUniqueInteger().getZExtValue();
+      unsigned IntR = cast<Constant>(OpR)->getUniqueInteger().getZExtValue();
+      if (int Res = cmpNumbers(IntL, IntR))
+        return Res;
+
+      // Make sure that both left and right get the same offset here.
+      // We don't actually care what fields we skip over, so long as we get
+      // the same offset.
+      uint64_t OffsetL = DL->getStructLayout(StTyL)->getElementOffset(IntL);
+      uint64_t OffsetR = DL->getStructLayout(StTyR)->getElementOffset(IntR);
+      if (int Res = cmpNumbers(OffsetL, OffsetR))
+        return Res;
+      EltTyL = StTyL->getElementType(IntL);
+      EltTyR = StTyR->getElementType(IntR);
+      continue;
+    }
+
+    EltTyL = cast<SequentialType>(EltTyL)->getElementType();
+    EltTyR = cast<SequentialType>(EltTyR)->getElementType();
+
+    // If this is a constant subscript, handle it quickly.
+    const Constant *ConstL = dyn_cast<Constant>(OpL);
+    const Constant *ConstR = dyn_cast<Constant>(OpR);
+
+    if (int Res = cmpNumbers(!!ConstL, !!ConstR))
       return Res;
+
+    if (ConstL) {
+      bool ZeroL = ConstL->isNullValue();
+      bool ZeroR = ConstR->isNullValue();
+      if (int Res = cmpNumbers(ZeroL, ZeroR))
+        return Res;
+      if (ZeroL) continue;
+
+      // Constant non-zero indices must be the same offset.  For example,
+      // char c[4]; short s[4];
+      // c[2] and s[1] have the same offset.
+
+      // If we have scalars, we can just get the integer index, otherwise
+      // for a vector we look for a splatted value.
+      const Constant *ConstIntL = nullptr;
+      const Constant *ConstIntR = nullptr;
+      if (ConstL->getType()->isIntegerTy()) {
+        ConstIntL = dyn_cast<ConstantInt>(ConstL);
+        ConstIntR = dyn_cast<ConstantInt>(ConstR);
+      } else {
+        assert(ConstL->getType()->isVectorTy() && "Expected vector type");
+        ConstIntL = dyn_cast_or_null<ConstantInt>(ConstL->getSplatValue());
+        ConstIntR = dyn_cast_or_null<ConstantInt>(ConstR->getSplatValue());
+
+        // If either of the constants aren't splats, then we need to give up.
+        if (int Res = cmpNumbers(!!ConstIntL, !!ConstIntR))
+          return Res;
+        if (!ConstIntL || !ConstIntR) {
+          // Its possible for both values to not be splats, in which case we
+          // need a reliable (ordered) way to compare them.
+          // We know at this point that the GEPs aren't equal types so can
+          // compare those.
+          return cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
+                            (uint64_t)GEPR->getPointerOperand()->getType());
+        }
+      }
+
+      unsigned IntL = cast<ConstantInt>(ConstIntL)->getZExtValue();
+      unsigned IntR = cast<ConstantInt>(ConstIntR)->getZExtValue();
+      if (int Res = cmpNumbers(DL->getTypeAllocSize(EltTyL) * IntL,
+                               DL->getTypeAllocSize(EltTyR) * IntR))
+        return Res;
+    } else {
+      // Dynamic indices.  These must be the same index value and the same
+      // underlying size.
+      if (int Res = cmpValues(OpL, OpR))
+        return Res;
+
+      if (int Res = cmpNumbers(DL->getTypeAllocSize(EltTyL),
+                               DL->getTypeAllocSize(EltTyR)))
+        return Res;
+    }
   }
 
   return 0;
Index: test/Transforms/MergeFunc/gep.ll
===================================================================
--- /dev/null
+++ test/Transforms/MergeFunc/gep.ll
@@ -0,0 +1,79 @@
+; RUN: opt -S -mergefunc -o - %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+%vector_float = type { i64, [8 x float] }
+%vector_int = type { { i32, i32 }, [8 x i32] }
+%vector_char4 = type { { i32, i32 }, [8 x <4 x i8>] }
+%vector_short2 = type { { i32, i32 }, [8 x <2 x i16>] }
+
+declare void @keepalive(i8*)
+
+; The i'th element of the float and int vectors should be at the same offset.
+
+; CHECK-DAG: call void @{{vector_float|vector_int}}
+
+define linkonce_odr void @vector_float(%vector_float* %this, i64 %i) unnamed_addr {
+entry:
+  %offset_i = getelementptr %vector_float* %this, i64 0, i32 1, i64 %i
+  %bc_offset_i = bitcast float* %offset_i to i8*
+  call void @keepalive(i8* %bc_offset_i)
+  ret void
+}
+
+define linkonce_odr void @vector_int(%vector_int* %this, i64 %i) unnamed_addr {
+entry:
+  %offset_i = getelementptr %vector_int* %this, i64 0, i32 1, i64 %i
+  %bc_offset_i = bitcast i32* %offset_i to i8*
+  call void @keepalive(i8* %bc_offset_i)
+  ret void
+}
+
+; The character at index 2 should be at the same position as the short at index 1.
+
+; CHECK-DAG: call void @{{vector_char4_index2|vector_short2_index1}}
+
+define linkonce_odr void @vector_char4_index2(%vector_char4* %this, i64 %i) unnamed_addr {
+entry:
+  %offset_i = getelementptr %vector_char4* %this, i64 0, i32 1, i64 %i, i64 2
+  %bc_offset_i = bitcast i8* %offset_i to i8*
+  call void @keepalive(i8* %bc_offset_i)
+  ret void
+}
+
+define linkonce_odr void @vector_short2_index1(%vector_short2* %this, i64 %i) unnamed_addr {
+entry:
+  %offset_i = getelementptr %vector_short2* %this, i64 0, i32 1, i64 %i, i64 1
+  %bc_offset_i = bitcast i16* %offset_i to i8*
+  call void @keepalive(i8* %bc_offset_i)
+  ret void
+}
+
+; Vector constant indices vs scalar indices shouldn't change whether we can merge.
+
+; CHECK-DAG: call void @{{vector_char4_vec|vector_short2_vec}}
+
+%vector_char4_ptr = type <4 x %vector_char4*>
+%vector_short2_ptr = type <4 x %vector_short2*>
+
+define linkonce_odr void @vector_char4_vec(%vector_char4_ptr %this, <4 x i64> %i) unnamed_addr {
+entry:
+  %bc_ptr = bitcast %vector_char4_ptr %this to %vector_char4_ptr
+  %offset_i = getelementptr %vector_char4_ptr %bc_ptr, <4 x i32><i32 0, i32 0, i32 0, i32 0>, <4 x i32><i32 1, i32 1, i32 1, i32 1>, <4 x i64> %i, <4 x i32><i32 2, i32 2, i32 2, i32 2>
+  %ext_i = extractelement <4 x i8*> %offset_i, i32 0
+  %bc_offset_i = bitcast i8* %ext_i to i8*
+  call void @keepalive(i8* %bc_offset_i)
+  ret void
+}
+
+define linkonce_odr void @vector_short2_vec(%vector_char4_ptr %this, <4 x i64> %i) unnamed_addr {
+entry:
+  %bc_ptr = bitcast %vector_char4_ptr %this to %vector_short2_ptr
+  %offset_i = getelementptr %vector_short2_ptr %bc_ptr, <4 x i32><i32 0, i32 0, i32 0, i32 0>, <4 x i32><i32 1, i32 1, i32 1, i32 1>, <4 x i64> %i, <4 x i32><i32 1, i32 1, i32 1, i32 1>
+  %ext_i = extractelement <4 x i16*> %offset_i, i32 0
+  %bc_offset_i = bitcast i16* %ext_i to i8*
+  call void @keepalive(i8* %bc_offset_i)
+  ret void
+}
+
Index: test/Transforms/MergeFunc/gep2.ll
===================================================================
--- /dev/null
+++ test/Transforms/MergeFunc/gep2.ll
@@ -0,0 +1,95 @@
+; RUN: opt -S -mergefunc -o - %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+%vector_char4 = type { { i32, i32 }, [8 x <4 x i8>] }
+%vector_short2 = type { { i32, i32 }, [8 x <2 x i16>] }
+
+declare void @keepalive(i8*)
+
+; The character at index 1 should not at the same position as the short at index 1.
+
+define linkonce_odr void @vector_char4_index1(%vector_char4* %this, i64 %i) unnamed_addr {
+entry:
+; CHECK-LABEL: @vector_char4_index1
+; CHECK: getelementptr
+; CHECK: bitcast
+; CHECK: call void @keepalive
+; CHECK: ret void
+  %offset_i = getelementptr %vector_char4* %this, i64 0, i32 1, i64 %i, i64 1
+  %bc_offset_i = bitcast i8* %offset_i to i8*
+  call void @keepalive(i8* %bc_offset_i)
+  ret void
+}
+
+define linkonce_odr void @vector_short2_index1(%vector_short2* %this, i64 %i) unnamed_addr {
+entry:
+; CHECK-LABEL: @vector_short2_index1
+; CHECK: getelementptr
+; CHECK: bitcast
+; CHECK: call void @keepalive
+; CHECK: ret void
+  %offset_i = getelementptr %vector_short2* %this, i64 0, i32 1, i64 %i, i64 1
+  %bc_offset_i = bitcast i16* %offset_i to i8*
+  call void @keepalive(i8* %bc_offset_i)
+  ret void
+}
+
+; Vector constant indices vs scalar indices only support splatted indices for now.
+
+%vector_char4_ptr = type <4 x %vector_char4*>
+%vector_short2_ptr = type <4 x %vector_short2*>
+
+define linkonce_odr void @vector_char4_vec(%vector_char4_ptr %this, <4 x i64> %i) unnamed_addr {
+entry:
+; CHECK-LABEL: @vector_char4_vec
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: extractelement
+; CHECK: bitcast
+; CHECK: call void @keepalive
+; CHECK: ret void
+  %bc_ptr = bitcast %vector_char4_ptr %this to %vector_char4_ptr
+  %offset_i = getelementptr %vector_char4_ptr %bc_ptr, <4 x i32><i32 0, i32 0, i32 0, i32 0>, <4 x i32><i32 1, i32 1, i32 1, i32 1>, <4 x i64> %i, <4 x i32><i32 0, i32 2, i32 2, i32 2>
+  %ext_i = extractelement <4 x i8*> %offset_i, i32 0
+  %bc_offset_i = bitcast i8* %ext_i to i8*
+  call void @keepalive(i8* %bc_offset_i)
+  ret void
+}
+
+; This is the same as the above test, but with a change in the order of the splat constant fields.  It ensures we still don't merge here.
+
+define linkonce_odr void @vector_char4_vec2(%vector_char4_ptr %this, <4 x i64> %i) unnamed_addr {
+entry:
+; CHECK-LABEL: @vector_char4_vec2
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: extractelement
+; CHECK: bitcast
+; CHECK: call void @keepalive
+; CHECK: ret void
+  %bc_ptr = bitcast %vector_char4_ptr %this to %vector_char4_ptr
+  %offset_i = getelementptr %vector_char4_ptr %bc_ptr, <4 x i32><i32 0, i32 0, i32 0, i32 0>, <4 x i32><i32 1, i32 1, i32 1, i32 1>, <4 x i64> %i, <4 x i32><i32 2, i32 0, i32 2, i32 2>
+  %ext_i = extractelement <4 x i8*> %offset_i, i32 0
+  %bc_offset_i = bitcast i8* %ext_i to i8*
+  call void @keepalive(i8* %bc_offset_i)
+  ret void
+}
+
+define linkonce_odr void @vector_short2_vec(%vector_char4_ptr %this, <4 x i64> %i) unnamed_addr {
+entry:
+; CHECK-LABEL: @vector_short2_vec
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: extractelement
+; CHECK: bitcast
+; CHECK: call void @keepalive
+; CHECK: ret void
+  %bc_ptr = bitcast %vector_char4_ptr %this to %vector_short2_ptr
+  %offset_i = getelementptr %vector_short2_ptr %bc_ptr, <4 x i32><i32 0, i32 0, i32 0, i32 0>, <4 x i32><i32 1, i32 1, i32 1, i32 1>, <4 x i64> %i, <4 x i32><i32 1, i32 1, i32 1, i32 1>
+  %ext_i = extractelement <4 x i16*> %offset_i, i32 0
+  %bc_offset_i = bitcast i16* %ext_i to i8*
+  call void @keepalive(i8* %bc_offset_i)
+  ret void
+}
\ No newline at end of file