Index: lib/Transforms/IPO/ConstantMerge.cpp
===================================================================
--- lib/Transforms/IPO/ConstantMerge.cpp
+++ lib/Transforms/IPO/ConstantMerge.cpp
@@ -19,6 +19,7 @@
 
 #include "llvm/Transforms/IPO/ConstantMerge.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -41,6 +42,8 @@
 #define DEBUG_TYPE "constmerge"
 
 STATISTIC(NumIdenticalMerged, "Number of identical global constants merged");
+STATISTIC(NumCommonInitialSequenceMerged,
+          "Number of global constants with common initial sequence merged");
 
 /// Find values that are marked as llvm.used.
 static void FindUsedValues(GlobalVariable *LLVMUsed,
@@ -103,11 +106,31 @@
   return CanMerge::Yes;
 }
 
-static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) {
-  Constant *NewConstant = New;
+enum class CommonSequence { Same, Initial };
+const char *toString(CommonSequence S) {
+  switch (S) {
+  case CommonSequence::Same:
+    return "same";
+  case CommonSequence::Initial:
+    return "common initial sequence";
+  }
+}
+
+static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New,
+                    CommonSequence Common) {
+  Constant *NewConstant;
+  switch (Common) {
+  case CommonSequence::Same:
+    NewConstant = New;
+    break;
+  case CommonSequence::Initial:
+    NewConstant =
+        ConstantExpr::getCast(Instruction::BitCast, New, Old->getType());
+    break;
+  }
 
-  LLVM_DEBUG(dbgs() << "Replacing global: @" << Old->getName() << " -> @"
-                    << New->getName() << "\n");
+  LLVM_DEBUG(dbgs() << "Replacing " << toString(Common) << " global: @"
+                    << Old->getName() << " -> @" << New->getName() << "\n");
 
   // Bump the alignment if necessary.
   if (Old->getAlignment() || New->getAlignment())
@@ -122,6 +145,60 @@
   Old->eraseFromParent();
 }
 
+template <typename Container>
+static void findCommonInitialSequence(
+    MapVector<Constant *, Constant *> &CommonInitialSequenceRewrite,
+    Container &Initializers) {
+  using Contained = typename Container::value_type;
+  // We only deduplicate array-like / sequenced types with the same element
+  // type, and we try to deduplicate to the larger of all potential prefix
+  // matches. Sorting by size means we can look at the larger ones first.
+  std::stable_sort(
+      Initializers.begin(), Initializers.end(), [](Contained L, Contained R) {
+        return L->getType()->getNumElements() > R->getType()->getNumElements();
+      });
+
+  auto different = [](Contained L, Contained R, unsigned Size) {
+    for (unsigned Idx = 0; Idx != Size; ++Idx)
+      if (L->getAggregateElement(Idx) != R->getAggregateElement(Idx))
+        return true;
+    return false;
+  };
+
+  auto registerBiggerMatchFor = [&](Contained Smaller,
+                                    unsigned SmallerNumElements) {
+    for (Contained *Bigger = Initializers.begin();; ++Bigger) {
+      auto *BiggerTy = (*Bigger)->getType();
+      unsigned BiggerNumElements = BiggerTy->getNumElements();
+      if (SmallerNumElements >= BiggerNumElements)
+        return;
+      if (different(Smaller, *Bigger, SmallerNumElements))
+        continue;
+      LLVM_DEBUG(dbgs() << "Common initial sequence: " << *Smaller << " -> "
+                        << **Bigger << "\n");
+      CommonInitialSequenceRewrite[Smaller] = *Bigger;
+      return;
+    }
+    return;
+  };
+
+  for (auto Smaller = Initializers.rbegin(), LastSmaller = Initializers.rend();
+       Smaller != LastSmaller; ++Smaller) {
+    if (CommonInitialSequenceRewrite.find(*Smaller) !=
+        CommonInitialSequenceRewrite.end())
+      continue;
+    registerBiggerMatchFor(*Smaller, (*Smaller)->getType()->getNumElements());
+  }
+}
+
+template <typename Container>
+static void findCommonInitialSequenceForEach(
+    MapVector<Constant *, Constant *> &CommonInitialSequenceRewrite,
+    Container &Con) {
+  for (auto &Pair : Con)
+    findCommonInitialSequence(CommonInitialSequenceRewrite, Pair.second);
+}
+
 static bool mergeConstants(Module &M) {
   // Find all the globals that are marked "used".  These cannot be merged.
   SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -134,6 +211,59 @@
   SmallVector<std::pair<GlobalVariable *, GlobalVariable *>, 32>
       SameContentReplacements;
 
+  // Keep track of constant array-like / sequenced initializers because they're
+  // candidates for common sequence deduplication. We restrict ourselves in the
+  // following way:
+  //
+  //   - Only try to deduplicate the ones with the same element types instead
+  //     of guessing at byte-level layout. This reduced how many things we
+  //     compare.
+  //   - Don't look for constant scalars which are duplicated in non-scalars.
+  //   - Don't try to find tail sequences which match (this would benefit null
+  //     terminated strings).
+  //   - Don't try to find internal sequences which match.
+  //   - Greedily match shorter sequences with the longest one we can find,
+  //     even though it might be better to match with a shorter one.
+  //   - Don't treat undef as a wildcard which can match any value.
+  //
+  // Lifting these restrictions might be profitable.
+  DenseMap<Type *, SmallVector<ConstantDataArray *, 4>> CDAs;
+  DenseMap<Type *, SmallVector<ConstantDataVector *, 4>> CDVs;
+  DenseMap<Type *, SmallVector<ConstantArray *, 4>> CAs;
+  DenseMap<Type *, SmallVector<ConstantVector *, 4>> CVs;
+  SmallVector<ConstantStruct *, 4> CSs;
+  MapVector<Constant *, Constant *> CommonInitialSequenceRewrite;
+
+  auto watchConstant = [&](Constant *WatchMe) {
+    if (auto *I = dyn_cast<ConstantDataArray>(WatchMe))
+      CDAs[I->getType()->getArrayElementType()].push_back(I);
+    else if (auto *I = dyn_cast<ConstantDataVector>(WatchMe))
+      CDVs[I->getType()->getElementType()].push_back(I);
+    else if (auto *I = dyn_cast<ConstantArray>(WatchMe))
+      CAs[I->getType()->getArrayElementType()].push_back(I);
+    else if (auto *I = dyn_cast<ConstantVector>(WatchMe))
+      CVs[I->getType()->getElementType()].push_back(I);
+    else if (auto *I = dyn_cast<ConstantStruct>(WatchMe))
+      CSs.push_back(I);
+  };
+
+  auto findAllCommonInitialSequences = [&]() {
+    findCommonInitialSequenceForEach(CommonInitialSequenceRewrite, CDAs);
+    findCommonInitialSequenceForEach(CommonInitialSequenceRewrite, CDVs);
+    findCommonInitialSequenceForEach(CommonInitialSequenceRewrite, CAs);
+    findCommonInitialSequenceForEach(CommonInitialSequenceRewrite, CVs);
+    findCommonInitialSequence(CommonInitialSequenceRewrite, CSs);
+  };
+
+  auto clearAllCommonInitialSequences = [&]() {
+    CDAs.clear();
+    CDVs.clear();
+    CAs.clear();
+    CVs.clear();
+    CSs.clear();
+    CommonInitialSequenceRewrite.clear();
+  };
+
   size_t ChangesMade = 0;
   size_t OldChangesMade = 0;
 
@@ -186,9 +316,14 @@
         Slot = GV;
         LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV->getName()
                           << (FirstConstantFound ? "\n" : " (updated)\n"));
+        if (FirstConstantFound)
+          watchConstant(Init);
       }
     }
 
+    // Find initializers which have a common initial sequence.
+    findAllCommonInitialSequences();
+
     // Identify all globals that can be merged together, filling in the
     // SameContentReplacements vector. We cannot do the replacement in this pass
     // because doing so may cause initializers of other globals to be rewritten,
@@ -228,23 +363,54 @@
       SameContentReplacements.push_back(std::make_pair(GV, Slot));
     }
 
-    // Now that we have figured out which replacements must be made, do them all
-    // now.  This avoid invalidating the pointers in CMap, which are unneeded
-    // now.
-    for (unsigned i = 0, e = SameContentReplacements.size(); i != e; ++i) {
-      GlobalVariable *Old = SameContentReplacements[i].first;
-      GlobalVariable *New = SameContentReplacements[i].second;
-      replace(M, Old, New);
+    // Now that we have figured out which replacements must be made, do them
+    // all. Do the replacements where initializers are the exactly same,
+    // updating the pointers in CMap for common sequence replacement.
+    for (auto &&R : SameContentReplacements) {
+      GlobalVariable *Old = R.first;
+      GlobalVariable *New = R.second;
+      Constant *OldInitializer = Old->getInitializer();
+      replace(M, Old, New, CommonSequence::Same);
+      LLVM_DEBUG(dbgs() << "Cmap[" << *OldInitializer
+                        << "] = " << New->getName() << " (same updated)\n");
+      CMap[OldInitializer] = New;
       ++ChangesMade;
       ++NumIdenticalMerged;
     }
 
+    // We might have dropped some constant in a previous iteration or through
+    // an exact match. No need to consider them again.
+    CommonInitialSequenceRewrite.remove_if(
+        [&](std::pair<Constant *, Constant *> &P) {
+          return CMap.find(P.first) == CMap.end() ||
+                 CMap.find(P.second) == CMap.end();
+        });
+
+    // Replacements where initializers have a common initial sequence.
+    for (auto &&I : CommonInitialSequenceRewrite) {
+      GlobalVariable *Old = CMap.find(I.first)->second;
+      GlobalVariable *New = CMap.find(I.second)->second;
+
+      if (makeMergeable(Old, New) == CanMerge::No)
+        continue;
+
+      Constant *OldInitializer = Old->getInitializer();
+      replace(M, Old, New, CommonSequence::Initial);
+      LLVM_DEBUG(dbgs() << "Cmap[" << *OldInitializer
+                        << "] = " << New->getName()
+                        << " (common initial sequence updated)\n");
+      CMap[OldInitializer] = New;
+      ++NumCommonInitialSequenceMerged;
+      ++ChangesMade;
+    }
+
     if (ChangesMade == OldChangesMade)
       break;
     OldChangesMade = ChangesMade;
 
     SameContentReplacements.clear();
     CMap.clear();
+    clearAllCommonInitialSequences();
   }
 
   return ChangesMade;
Index: test/Transforms/ConstantMerge/initial-match.ll
===================================================================
--- /dev/null
+++ test/Transforms/ConstantMerge/initial-match.ll
@@ -0,0 +1,150 @@
+; RUN: opt -constmerge -S < %s | FileCheck %s
+
+; Test that constants with similar initial sequence of bytes are merged.
+
+%foo = type { i16, i8 }
+%baz = type { i16, i8, i8* }
+%bat = type { i16, i8, i8*, i16 }
+
+; Make sure these are entirely gone:
+
+; CHECK-NOT: bar_1
+; CHECK-NOT: bar_2
+; CHECK-NOT: bar_3
+; CHECK-NOT: bar_4
+; CHECK-NOT: bar_5
+; CHECK-NOT: bar_6
+; CHECK-NOT: bar_7
+; CHECK-NOT: bar_8_drop
+; CHECK-NOT: hello_drop
+; CHECK-NOT: hello_drop_me_too
+; CHECK-NOT: arr_2
+; CHECK-NOT: struct_1
+; CHECK-NOT: struct_2
+; CHECK-NOT: vec_2
+; CHECK-NOT: vec_ptr_1
+
+; Make sure only these are present:
+
+; CHECK:      @unused = constant
+; CHECK-NEXT: @different = private unnamed_addr constant
+; CHECK-NEXT: @bar_8_keep = private unnamed_addr constant
+; CHECK-NEXT: @helloer = private unnamed_addr constant
+; CHECK-NEXT: @helloworld = private unnamed_addr constant
+
+; This one is tricky because it contains references to other global constants which should get replaced.
+; CHECK-NEXT: @arr_3 = private unnamed_addr constant [3 x i8*] [{{.*}} @helloworld, {{.*}} @helloer, {{.*}} @helloworld, {{.*}}], align 32
+
+; CHECK-NEXT: @struct_3 = private unnamed_addr constant %bat { i16 42, i8 69, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @helloworld, i64 0, i64 0), i16 1337 }
+; CHECK-NEXT: @vec_3 = private unnamed_addr constant <3 x i32> <i32 69, i32 1337, i32 42>
+; CHECK-NEXT: @vec_ptr_2 = private unnamed_addr constant <2 x <2 x i32>*> <<2 x i32>* bitcast (<3 x i32>* @vec_3 to <2 x i32>*), <2 x i32>* bitcast (<3 x i32>* @vec_3 to <2 x i32>*)>
+
+; CHECK-NEXT: @last = constant i32 1337
+
+@unused = constant i32 42
+
+@bar_1 = private unnamed_addr constant [1 x %foo] [%foo { i16 257, i8 1 }], align 2
+@bar_2 = private unnamed_addr constant [2 x %foo] [%foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }], align 2
+@different = private unnamed_addr constant [2 x %foo] [%foo { i16 257, i8 1 }, %foo { i16 257, i8 42 }], align 2
+@bar_2_electric_boogaloo = private unnamed_addr constant [2 x %foo] [%foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }], align 2
+@bar_3 = private unnamed_addr constant [3 x %foo] [%foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }], align 2
+@bar_4 = private unnamed_addr constant [4 x %foo] [%foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }], align 16
+@bar_5 = private unnamed_addr constant [5 x %foo] [%foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }], align 16
+@bar_6 = private unnamed_addr constant [6 x %foo] [%foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }], align 16
+@bar_7 = private unnamed_addr constant [7 x %foo] [%foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }], align 16
+@bar_8_drop = private unnamed_addr constant [8 x %foo] [%foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }], align 16
+@bar_8_keep = private unnamed_addr constant [8 x %foo] [%foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }, %foo { i16 257, i8 1 }], align 16
+
+@hello_drop = private unnamed_addr constant [5 x i8] c"hello", align 1
+@hello_drop_me_too = private unnamed_addr constant [5 x i8] c"hello", align 1
+@helloer = private unnamed_addr constant [7 x i8] c"helloer", align 1
+@helloworld = private unnamed_addr constant [12 x i8] c"hello world!", align 1
+
+@arr_2 = private unnamed_addr constant [2 x i8*] [i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hello_drop, i64 0, i64 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @helloer, i64 0, i64 0)], align 32
+@arr_3 = private unnamed_addr constant [3 x i8*] [i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hello_drop, i64 0, i64 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @helloer, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hello_drop_me_too, i64 0, i64 0)], align 16
+
+@struct_1 = private unnamed_addr constant %foo { i16 42, i8 69 }
+@struct_2 = private unnamed_addr constant %baz { i16 42, i8 69, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hello_drop_me_too, i64 0, i64 0) }
+@struct_3 = private unnamed_addr constant %bat { i16 42, i8 69, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hello_drop, i64 0, i64 0), i16 1337 }
+
+@vec_2 = private unnamed_addr constant <2 x i32> <i32 69, i32 1337>
+@vec_3 = private unnamed_addr constant <3 x i32> <i32 69, i32 1337, i32 42>
+
+@vec_ptr_1 = private unnamed_addr constant <1 x <2 x i32>*> <<2 x i32>* @vec_2>
+@vec_ptr_2 = private unnamed_addr constant <2 x <2 x i32>*> <<2 x i32>* @vec_2, <2 x i32>* @vec_2>
+
+; FIXME: test that two zeroinitializer things get common initial sequence deduplication. Ditto undef.
+
+@last = constant i32 1337
+
+declare void @usefoo(%foo*)
+declare void @usebaz(%baz*)
+declare void @usebat(%bat*)
+declare void @usefoo1([1 x %foo]*)
+declare void @usefoo2([2 x %foo]*)
+declare void @usefoo3([3 x %foo]*)
+declare void @usefoo4([4 x %foo]*)
+declare void @usefoo5([5 x %foo]*)
+declare void @usefoo6([6 x %foo]*)
+declare void @usefoo7([7 x %foo]*)
+declare void @usefoo8([8 x %foo]*)
+declare void @usei8(i8*)
+declare void @usei8_5([5 x i8]*)
+declare void @usei8_7([7 x i8]*)
+declare void @usei8_12([12 x i8]*)
+declare void @usearr2([2 x i8*]*)
+declare void @usearr3([3 x i8*]*)
+declare void @usevec2(<2 x i32>*)
+declare void @usevec3(<3 x i32>*)
+declare void @usevecptr1(<1 x <2 x i32>*>*)
+declare void @usevecptr2(<2 x <2 x i32>*>*)
+
+define i32 @main() { ; CHECK-LABEL: @main()
+entry:               ; CHECK-LABEL: entry
+  tail call void @usefoo(%foo* getelementptr inbounds ([1 x %foo], [1 x %foo]* @bar_1, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usefoo({{.*}}@bar_8_keep, i64 0, i64 0))
+  tail call void @usefoo(%foo* getelementptr inbounds ([2 x %foo], [2 x %foo]* @bar_2, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usefoo({{.*}}@bar_8_keep, i64 0, i64 0))
+  tail call void @usefoo(%foo* getelementptr inbounds ([2 x %foo], [2 x %foo]* @different, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usefoo({{.*}}@different, i64 0, i64 0))
+  tail call void @usefoo(%foo* getelementptr inbounds ([2 x %foo], [2 x %foo]* @bar_2_electric_boogaloo, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usefoo({{.*}}@bar_8_keep, i64 0, i64 0))
+  tail call void @usefoo(%foo* getelementptr inbounds ([3 x %foo], [3 x %foo]* @bar_3, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usefoo({{.*}}@bar_8_keep, i64 0, i64 0))
+  tail call void @usefoo(%foo* getelementptr inbounds ([4 x %foo], [4 x %foo]* @bar_4, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usefoo({{.*}}@bar_8_keep, i64 0, i64 0))
+  tail call void @usefoo(%foo* getelementptr inbounds ([5 x %foo], [5 x %foo]* @bar_5, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usefoo({{.*}}@bar_8_keep, i64 0, i64 0))
+  tail call void @usefoo(%foo* getelementptr inbounds ([6 x %foo], [6 x %foo]* @bar_6, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usefoo({{.*}}@bar_8_keep, i64 0, i64 0))
+  tail call void @usefoo(%foo* getelementptr inbounds ([7 x %foo], [7 x %foo]* @bar_7, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usefoo({{.*}}@bar_8_keep, i64 0, i64 0))
+  tail call void @usefoo(%foo* getelementptr inbounds ([8 x %foo], [8 x %foo]* @bar_8_drop, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usefoo({{.*}}@bar_8_keep, i64 0, i64 0))
+  tail call void @usefoo(%foo* getelementptr inbounds ([8 x %foo], [8 x %foo]* @bar_8_keep, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usefoo({{.*}}@bar_8_keep, i64 0, i64 0))
+
+  tail call void @usefoo1([1 x %foo]* @bar_1) ; CHECK-NEXT: tail call void @usefoo1({{.*}} bitcast {{.*}}@bar_8_keep
+  tail call void @usefoo2([2 x %foo]* @bar_2) ; CHECK-NEXT: tail call void @usefoo2({{.*}} bitcast {{.*}}@bar_8_keep
+  tail call void @usefoo2([2 x %foo]* @different) ; CHECK-NEXT: tail call void @usefoo2([2 x %foo]* @different)
+  tail call void @usefoo2([2 x %foo]* @bar_2_electric_boogaloo) ; CHECK-NEXT: tail call void @usefoo2({{.*}} bitcast {{.*}}@bar_8_keep
+  tail call void @usefoo3([3 x %foo]* @bar_3) ; CHECK-NEXT: tail call void @usefoo3({{.*}} bitcast {{.*}}@bar_8_keep
+  tail call void @usefoo4([4 x %foo]* @bar_4) ; CHECK-NEXT: tail call void @usefoo4({{.*}} bitcast {{.*}}@bar_8_keep
+  tail call void @usefoo5([5 x %foo]* @bar_5) ; CHECK-NEXT: tail call void @usefoo5({{.*}} bitcast {{.*}}@bar_8_keep
+  tail call void @usefoo6([6 x %foo]* @bar_6) ; CHECK-NEXT: tail call void @usefoo6({{.*}} bitcast {{.*}}@bar_8_keep
+  tail call void @usefoo7([7 x %foo]* @bar_7) ; CHECK-NEXT: tail call void @usefoo7({{.*}} bitcast {{.*}}@bar_8_keep
+  tail call void @usefoo8([8 x %foo]* @bar_8_drop) ; CHECK-NEXT: tail call void @usefoo8([8 x %foo]* @bar_8_keep)
+  tail call void @usefoo8([8 x %foo]* @bar_8_keep) ; CHECK-NEXT: tail call void @usefoo8([8 x %foo]* @bar_8_keep)
+
+  tail call void @usei8(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hello_drop, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usei8({{.*}}@helloworld, i64 0, i64 0))
+  tail call void @usei8(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @helloer, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usei8({{.*}}@helloer, i64 0, i64 0))
+  tail call void @usei8(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @helloworld, i64 0, i64 0)) ; CHECK-NEXT: tail call void @usei8({{.*}}@helloworld, i64 0, i64 0))
+
+  tail call void @usei8_5([5 x i8]* @hello_drop) ; CHECK-NEXT: tail call void @usei8_5({{.*}} bitcast {{.*}}@helloworld
+  tail call void @usei8_7([7 x i8]* @helloer) ; CHECK-NEXT: tail call void @usei8_7([7 x i8]* @helloer)
+  tail call void @usei8_12([12 x i8]* @helloworld) ; CHECK-NEXT: tail call void @usei8_12([12 x i8]* @helloworld)
+
+  tail call void @usearr2([2 x i8*]* @arr_2) ; CHECK-NEXT: tail call void @usearr2([2 x i8*]* bitcast ([3 x i8*]* @arr_3 to [2 x i8*]*))
+  tail call void @usearr3([3 x i8*]* @arr_3) ; CHECK-NEXT: tail call void @usearr3([3 x i8*]* @arr_3)
+
+  tail call void @usefoo(%foo* @struct_1) ; CHECK-NEXT: tail call void @usefoo(%foo* bitcast (%bat* @struct_3 to %foo*))
+  tail call void @usebaz(%baz* @struct_2) ; CHECK-NEXT: tail call void @usebaz(%baz* bitcast (%bat* @struct_3 to %baz*))
+  tail call void @usebat(%bat* @struct_3) ; CHECK-NEXT: tail call void @usebat(%bat* @struct_3)
+
+  tail call void @usevec2(<2 x i32>* @vec_2) ; CHECK-NEXT: tail call void @usevec2(<2 x i32>* bitcast (<3 x i32>* @vec_3 to <2 x i32>*))
+  tail call void @usevec3(<3 x i32>* @vec_3) ; CHECK-NEXT: tail call void @usevec3(<3 x i32>* @vec_3)
+
+  tail call void @usevecptr1(<1 x <2 x i32>*>* @vec_ptr_1) ; CHECK-NEXT: tail call void @usevecptr1(<1 x <2 x i32>*>* bitcast (<2 x <2 x i32>*>* @vec_ptr_2 to <1 x <2 x i32>*>*))
+  tail call void @usevecptr2(<2 x <2 x i32>*>* @vec_ptr_2) ; CHECK-NEXT: tail call void @usevecptr2(<2 x <2 x i32>*>* @vec_ptr_2)
+
+  ret i32 0
+}