Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -942,6 +942,18 @@
       return nullptr;
     }
 
+    ScheduleData *getScheduleData(Value *V, Value *Key) {
+      if (V == Key)
+        return getScheduleData(V);
+      auto I = ExtraScheduleDataMap.find(V);
+      if (I != ExtraScheduleDataMap.end()) {
+        ScheduleData *SD = I->second[Key];
+        if (SD && SD->SchedulingRegionID == SchedulingRegionID)
+          return SD;
+      }
+      return nullptr;
+    }
+
     bool isInSchedulingRegion(ScheduleData *SD) {
       return SD->SchedulingRegionID == SchedulingRegionID;
     }
@@ -955,19 +967,29 @@
 
       ScheduleData *BundleMember = SD;
       while (BundleMember) {
+        if (BundleMember->Inst != BundleMember->OpValue) {
+          BundleMember = BundleMember->NextInBundle;
+          continue;
+        }
         // Handle the def-use chain dependencies.
         for (Use &U : BundleMember->Inst->operands()) {
-          ScheduleData *OpDef = getScheduleData(U.get());
-          if (OpDef && OpDef->hasValidDependencies() &&
-              OpDef->incrementUnscheduledDeps(-1) == 0) {
-            // There are no more unscheduled dependencies after decrementing,
-            // so we can put the dependent instruction into the ready list.
-            ScheduleData *DepBundle = OpDef->FirstInBundle;
-            assert(!DepBundle->IsScheduled &&
-                   "already scheduled bundle gets ready");
-            ReadyList.insert(DepBundle);
-            DEBUG(dbgs() << "SLP:    gets ready (def): " << *DepBundle << "\n");
-          }
+          auto *I = dyn_cast<Instruction>(U.get());
+          if (!I)
+            continue;
+          doForAllOpcodes(I, [&ReadyList](ScheduleData *OpDef) {
+            if (OpDef && OpDef->hasValidDependencies() &&
+                OpDef->incrementUnscheduledDeps(-1) == 0) {
+              // There are no more unscheduled dependencies after
+              // decrementing, so we can put the dependent instruction
+              // into the ready list.
+              ScheduleData *DepBundle = OpDef->FirstInBundle;
+              assert(!DepBundle->IsScheduled &&
+                     "already scheduled bundle gets ready");
+              ReadyList.insert(DepBundle);
+              DEBUG(dbgs()
+                    << "SLP:    gets ready (def): " << *DepBundle << "\n");
+            }
+          });
         }
         // Handle the memory dependencies.
         for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
@@ -978,22 +1000,35 @@
             assert(!DepBundle->IsScheduled &&
                    "already scheduled bundle gets ready");
             ReadyList.insert(DepBundle);
-            DEBUG(dbgs() << "SLP:    gets ready (mem): " << *DepBundle << "\n");
+            DEBUG(dbgs() << "SLP:    gets ready (mem): " << *DepBundle
+                         << "\n");
           }
         }
         BundleMember = BundleMember->NextInBundle;
       }
     }
 
+    void doForAllOpcodes(Value *V,
+                         function_ref<void(ScheduleData *SD)> Action) {
+      if (ScheduleData *SD = getScheduleData(V))
+        Action(SD);
+      auto I = ExtraScheduleDataMap.find(V);
+      if (I != ExtraScheduleDataMap.end())
+        for (auto &P : I->second)
+          if (P.second->SchedulingRegionID == SchedulingRegionID)
+            Action(P.second);
+    }
+
     /// Put all instructions into the ReadyList which are ready for scheduling.
     template <typename ReadyListType>
     void initialFillReadyList(ReadyListType &ReadyList) {
       for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
-        ScheduleData *SD = getScheduleData(I);
-        if (SD->isSchedulingEntity() && SD->isReady()) {
-          ReadyList.insert(SD);
-          DEBUG(dbgs() << "SLP:    initially in ready list: " << *I << "\n");
-        }
+        doForAllOpcodes(I, [&ReadyList, I](ScheduleData *SD) {
+          if (SD->isSchedulingEntity() && SD->isReady()) {
+            ReadyList.insert(SD);
+            DEBUG(dbgs() << "SLP:    initially in ready list: " << *I << "\n");
+          }
+        });
       }
     }
 
@@ -1005,9 +1040,12 @@
     /// Un-bundles a group of instructions.
     void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
 
+    /// Allocates schedule data chunk.
+    ScheduleData *allocateScheduleDataChunks();
+
     /// Extends the scheduling region so that V is inside the region.
     /// \returns true if the region size is within the limit.
-    bool extendSchedulingRegion(Value *V);
+    bool extendSchedulingRegion(Value *V, Value *OpValue);
 
     /// Initialize the ScheduleData structures for new instructions in the
     /// scheduling region.
@@ -1040,6 +1078,10 @@
     /// ScheduleData structures are recycled.
     DenseMap<Value *, ScheduleData *> ScheduleDataMap;
 
+    /// Attaches ScheduleData to Instruction with the leading key.
+    DenseMap<Value *, SmallDenseMap<Value *, ScheduleData *>>
+        ExtraScheduleDataMap;
+
     struct ReadyList : SmallVector<ScheduleData *, 8> {
       void insert(ScheduleData *SD) { push_back(SD); }
     };
@@ -3279,7 +3321,7 @@
   // Make sure that the scheduling region contains all
   // instructions of the bundle.
   for (Value *V : VL) {
-    if (!extendSchedulingRegion(V))
+    if (!extendSchedulingRegion(V, OpValue))
       return false;
   }
 
@@ -3316,8 +3358,9 @@
     // It is seldom that this needs to be done a second time after adding the
     // initial bundle to the region.
     for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
-      ScheduleData *SD = getScheduleData(I);
-      SD->clearDependencies();
+      doForAllOpcodes(I, [](ScheduleData *SD) {
+        SD->clearDependencies();
+      });
     }
     ReSchedule = true;
   }
@@ -3378,17 +3421,43 @@
   }
 }
 
-bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
-  if (getScheduleData(V))
+BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::allocateScheduleDataChunks() {
+  // Allocate a new ScheduleData for the instruction.
+  if (ChunkPos >= ChunkSize) {
+    ScheduleDataChunks.push_back(llvm::make_unique<ScheduleData[]>(ChunkSize));
+    ChunkPos = 0;
+  }
+  return &(ScheduleDataChunks.back()[ChunkPos++]);
+}
+
+bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
+                                                      Value *OpValue) {
+  if (getScheduleData(V, isOneOf(OpValue, V)))
     return true;
   Instruction *I = dyn_cast<Instruction>(V);
   assert(I && "bundle member must be an instruction");
   assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
+  auto &&CheckSheduleForI = [this, OpValue](Instruction *I) -> bool {
+    ScheduleData *ISD = getScheduleData(I);
+    if (!ISD)
+      return false;
+    assert(isInSchedulingRegion(ISD) &&
+           "new ScheduleData already in scheduling region");
+    ScheduleData *SD = allocateScheduleDataChunks();
+    SD->Inst = I;
+    SD->init(SchedulingRegionID, OpValue);
+    ExtraScheduleDataMap[I][OpValue] = SD;
+    return true;
+  };
+  if (CheckSheduleForI(I))
+    return true;
   if (!ScheduleStart) {
     // It's the first instruction in the new region.
     initScheduleData(I, I->getNextNode(), nullptr, nullptr);
     ScheduleStart = I;
     ScheduleEnd = I->getNextNode();
+    if (isOneOf(OpValue, I) != I)
+      CheckSheduleForI(I);
     assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
     DEBUG(dbgs() << "SLP:  initialize schedule region to " << *I << "\n");
     return true;
@@ -3410,6 +3479,8 @@
       if (&*UpIter == I) {
         initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
         ScheduleStart = I;
+        if (isOneOf(OpValue, I) != I)
+          CheckSheduleForI(I);
         DEBUG(dbgs() << "SLP:  extend schedule region start to " << *I << "\n");
         return true;
       }
@@ -3420,6 +3491,8 @@
         initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion,
                          nullptr);
         ScheduleEnd = I->getNextNode();
+        if (isOneOf(OpValue, I) != I)
+          CheckSheduleForI(I);
         assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
         DEBUG(dbgs() << "SLP:  extend schedule region end to " << *I << "\n");
         return true;
@@ -3446,7 +3519,7 @@
             llvm::make_unique<ScheduleData[]>(ChunkSize));
         ChunkPos = 0;
       }
-      SD = &(ScheduleDataChunks.back()[ChunkPos++]);
+      SD = allocateScheduleDataChunks();
       ScheduleDataMap[I] = SD;
       SD->Inst = I;
     }
@@ -3494,23 +3567,35 @@
         BundleMember->resetUnscheduledDeps();
 
         // Handle def-use chain dependencies.
-        for (User *U : BundleMember->Inst->users()) {
-          if (isa<Instruction>(U)) {
-            ScheduleData *UseSD = getScheduleData(U);
-            if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
+        if (BundleMember->OpValue != BundleMember->Inst) {
+          ScheduleData *UseSD = getScheduleData(BundleMember->Inst);
+          if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
+            BundleMember->Dependencies++;
+            ScheduleData *DestBundle = UseSD->FirstInBundle;
+            if (!DestBundle->IsScheduled)
+              BundleMember->incrementUnscheduledDeps(1);
+            if (!DestBundle->hasValidDependencies())
+              WorkList.push_back(DestBundle);
+          }
+        } else {
+          for (User *U : BundleMember->Inst->users()) {
+            if (isa<Instruction>(U)) {
+              ScheduleData *UseSD = getScheduleData(U);
+              if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
+                BundleMember->Dependencies++;
+                ScheduleData *DestBundle = UseSD->FirstInBundle;
+                if (!DestBundle->IsScheduled)
+                  BundleMember->incrementUnscheduledDeps(1);
+                if (!DestBundle->hasValidDependencies())
+                  WorkList.push_back(DestBundle);
+              }
+            } else {
+              // I'm not sure if this can ever happen. But we need to be safe.
+              // This lets the instruction/bundle never be scheduled and
+              // eventually disable vectorization.
               BundleMember->Dependencies++;
-              ScheduleData *DestBundle = UseSD->FirstInBundle;
-              if (!DestBundle->IsScheduled)
-                BundleMember->incrementUnscheduledDeps(1);
-              if (!DestBundle->hasValidDependencies())
-                WorkList.push_back(DestBundle);
+              BundleMember->incrementUnscheduledDeps(1);
             }
-          } else {
-            // I'm not sure if this can ever happen. But we need to be safe.
-            // This lets the instruction/bundle never be scheduled and
-            // eventually disable vectorization.
-            BundleMember->Dependencies++;
-            BundleMember->incrementUnscheduledDeps(1);
           }
         }
 
@@ -3587,10 +3672,11 @@
   assert(ScheduleStart &&
          "tried to reset schedule on block which has not been scheduled");
   for (Instruction *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
-    ScheduleData *SD = getScheduleData(I);
-    assert(isInSchedulingRegion(SD));
-    SD->IsScheduled = false;
-    SD->resetUnscheduledDeps();
+    doForAllOpcodes(I, [this](ScheduleData *SD) {
+      assert(isInSchedulingRegion(SD));
+      SD->IsScheduled = false;
+      SD->resetUnscheduledDeps();
+    });
   }
   ReadyInsts.clear();
 }
@@ -3620,15 +3706,16 @@
   int NumToSchedule = 0;
   for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
        I = I->getNextNode()) {
-    ScheduleData *SD = BS->getScheduleData(I);
-    assert(
-        SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr) &&
-        "scheduler and vectorizer have different opinion on what is a bundle");
-    SD->FirstInBundle->SchedulingPriority = Idx++;
-    if (SD->isSchedulingEntity()) {
-      BS->calculateDependencies(SD, false, this);
-      NumToSchedule++;
-    }
+    BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) {
+      assert(SD->isPartOfBundle() ==
+                 (getTreeEntry(SD->Inst) != nullptr) &&
+             "scheduler and vectorizer bundle mismatch");
+      SD->FirstInBundle->SchedulingPriority = Idx++;
+      if (SD->isSchedulingEntity()) {
+        BS->calculateDependencies(SD, false, this);
+        NumToSchedule++;
+      }
+    });
   }
   BS->initialFillReadyList(ReadyInsts);
 
Index: test/Transforms/SLPVectorizer/X86/scheduling1.ll
===================================================================
--- /dev/null
+++ test/Transforms/SLPVectorizer/X86/scheduling1.ll
@@ -0,0 +1,1159 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -slp-vectorizer -slp-vectorizer -mcpu=bdver1 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i32 %n) local_unnamed_addr #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARR_F1:%.*]] = alloca [17 x float], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [17 x float]* [[ARR_F1]] to i8*
+; CHECK-NEXT:    [[ARR_F2:%.*]] = alloca [16 x float], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [16 x float]* [[ARR_F2]] to i8*
+; CHECK-NEXT:    [[ARR_F3:%.*]] = alloca [16 x float], align 16
+; CHECK-NEXT:    [[ARR_F4:%.*]] = alloca [16 x float], align 16
+; CHECK-NEXT:    [[ARR_I1:%.*]] = alloca [16 x i32], align 16
+; CHECK-NEXT:    [[ARR_I2:%.*]] = alloca [16 x i32], align 16
+; CHECK-NEXT:    [[ARR_I3:%.*]] = alloca [16 x i32], align 16
+; CHECK-NEXT:    [[VALUE1:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[ARRARR_F33:%.*]] = alloca [16 x [16 x i8]], align 16
+; CHECK-NEXT:    [[ARRARR_F34:%.*]] = alloca [16 x [16 x i8]], align 16
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 68, i8* nonnull [[TMP0]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [16 x float]* [[ARR_F3]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [16 x float]* [[ARR_F4]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull [[TMP3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast [16 x i32]* [[ARR_I1]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull [[TMP4]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast [16 x i32]* [[ARR_I2]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull [[TMP5]])
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast [16 x i32]* [[ARR_I3]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull [[TMP6]])
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[VALUE1]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP7]])
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 0, i64 0
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 256, i8* nonnull [[TMP8]])
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 0, i64 0
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 256, i8* nonnull [[TMP9]])
+; CHECK-NEXT:    store i32 0, i32* [[VALUE1]], align 4, !tbaa !0
+; CHECK-NEXT:    call void @foo_i(i32* nonnull [[VALUE1]])
+; CHECK-NEXT:    store i32 0, i32* [[VALUE1]], align 4, !tbaa !0
+; CHECK-NEXT:    br label [[FOR_BODY7:%.*]]
+; CHECK:       for.body7:
+; CHECK-NEXT:    [[TMP10:%.*]] = phi i8 [ undef, [[ENTRY:%.*]] ], [ [[DOTPRE227:%.*]], [[FOR_BODY7_FOR_BODY7_CRIT_EDGE:%.*]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i8 [ undef, [[ENTRY]] ], [ [[DOTPRE:%.*]], [[FOR_BODY7_FOR_BODY7_CRIT_EDGE]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY7_FOR_BODY7_CRIT_EDGE]] ]
+; CHECK-NEXT:    [[ADD23_LCSSA219:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[BIN_EXTRA:%.*]], [[FOR_BODY7_FOR_BODY7_CRIT_EDGE]] ]
+; CHECK-NEXT:    [[ARRAYIDX15_1:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX15_1]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_1:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 1
+; CHECK-NEXT:    [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX20_1]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_2:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 2
+; CHECK-NEXT:    [[TMP14:%.*]] = load i8, i8* [[ARRAYIDX15_2]], align 2, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_2:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 2
+; CHECK-NEXT:    [[TMP15:%.*]] = load i8, i8* [[ARRAYIDX20_2]], align 2, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_3:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 3
+; CHECK-NEXT:    [[TMP16:%.*]] = load i8, i8* [[ARRAYIDX15_3]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_3:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 3
+; CHECK-NEXT:    [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX20_3]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_4:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 4
+; CHECK-NEXT:    [[TMP18:%.*]] = load i8, i8* [[ARRAYIDX15_4]], align 4, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_4:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 4
+; CHECK-NEXT:    [[TMP19:%.*]] = load i8, i8* [[ARRAYIDX20_4]], align 4, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_5:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 5
+; CHECK-NEXT:    [[TMP20:%.*]] = load i8, i8* [[ARRAYIDX15_5]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_5:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 5
+; CHECK-NEXT:    [[TMP21:%.*]] = load i8, i8* [[ARRAYIDX20_5]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_6:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 6
+; CHECK-NEXT:    [[TMP22:%.*]] = load i8, i8* [[ARRAYIDX15_6]], align 2, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_6:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 6
+; CHECK-NEXT:    [[TMP23:%.*]] = load i8, i8* [[ARRAYIDX20_6]], align 2, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_7:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 7
+; CHECK-NEXT:    [[TMP24:%.*]] = load i8, i8* [[ARRAYIDX15_7]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_7:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 7
+; CHECK-NEXT:    [[TMP25:%.*]] = load i8, i8* [[ARRAYIDX20_7]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_8:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 8
+; CHECK-NEXT:    [[TMP26:%.*]] = load i8, i8* [[ARRAYIDX15_8]], align 8, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_8:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 8
+; CHECK-NEXT:    [[TMP27:%.*]] = load i8, i8* [[ARRAYIDX20_8]], align 8, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_9:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 9
+; CHECK-NEXT:    [[TMP28:%.*]] = load i8, i8* [[ARRAYIDX15_9]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_9:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 9
+; CHECK-NEXT:    [[TMP29:%.*]] = load i8, i8* [[ARRAYIDX20_9]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_10:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 10
+; CHECK-NEXT:    [[TMP30:%.*]] = load i8, i8* [[ARRAYIDX15_10]], align 2, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_10:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 10
+; CHECK-NEXT:    [[TMP31:%.*]] = load i8, i8* [[ARRAYIDX20_10]], align 2, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_11:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 11
+; CHECK-NEXT:    [[TMP32:%.*]] = load i8, i8* [[ARRAYIDX15_11]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_11:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 11
+; CHECK-NEXT:    [[TMP33:%.*]] = load i8, i8* [[ARRAYIDX20_11]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_12:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 12
+; CHECK-NEXT:    [[TMP34:%.*]] = load i8, i8* [[ARRAYIDX15_12]], align 4, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_12:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 12
+; CHECK-NEXT:    [[TMP35:%.*]] = load i8, i8* [[ARRAYIDX20_12]], align 4, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_13:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 13
+; CHECK-NEXT:    [[TMP36:%.*]] = load i8, i8* [[ARRAYIDX15_13]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_13:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 13
+; CHECK-NEXT:    [[TMP37:%.*]] = load i8, i8* [[ARRAYIDX20_13]], align 1, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_14:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 14
+; CHECK-NEXT:    [[TMP38:%.*]] = load i8, i8* [[ARRAYIDX15_14]], align 2, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_14:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 14
+; CHECK-NEXT:    [[TMP39:%.*]] = load i8, i8* [[ARRAYIDX20_14]], align 2, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX15_15:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV]], i64 15
+; CHECK-NEXT:    [[TMP40:%.*]] = load i8, i8* [[ARRAYIDX15_15]], align 1, !tbaa !4
+; CHECK-NEXT:    [[TMP41:%.*]] = insertelement <16 x i8> undef, i8 [[TMP40]], i32 0
+; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <16 x i8> [[TMP41]], i8 [[TMP38]], i32 1
+; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <16 x i8> [[TMP42]], i8 [[TMP36]], i32 2
+; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <16 x i8> [[TMP43]], i8 [[TMP34]], i32 3
+; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <16 x i8> [[TMP44]], i8 [[TMP32]], i32 4
+; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <16 x i8> [[TMP45]], i8 [[TMP30]], i32 5
+; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <16 x i8> [[TMP46]], i8 [[TMP28]], i32 6
+; CHECK-NEXT:    [[TMP48:%.*]] = insertelement <16 x i8> [[TMP47]], i8 [[TMP26]], i32 7
+; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <16 x i8> [[TMP48]], i8 [[TMP24]], i32 8
+; CHECK-NEXT:    [[TMP50:%.*]] = insertelement <16 x i8> [[TMP49]], i8 [[TMP22]], i32 9
+; CHECK-NEXT:    [[TMP51:%.*]] = insertelement <16 x i8> [[TMP50]], i8 [[TMP20]], i32 10
+; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP18]], i32 11
+; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <16 x i8> [[TMP52]], i8 [[TMP16]], i32 12
+; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <16 x i8> [[TMP53]], i8 [[TMP14]], i32 13
+; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <16 x i8> [[TMP54]], i8 [[TMP12]], i32 14
+; CHECK-NEXT:    [[TMP56:%.*]] = insertelement <16 x i8> [[TMP55]], i8 [[TMP11]], i32 15
+; CHECK-NEXT:    [[TMP57:%.*]] = sext <16 x i8> [[TMP56]] to <16 x i32>
+; CHECK-NEXT:    [[ARRAYIDX20_15:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV]], i64 15
+; CHECK-NEXT:    [[TMP58:%.*]] = load i8, i8* [[ARRAYIDX20_15]], align 1, !tbaa !4
+; CHECK-NEXT:    [[TMP59:%.*]] = insertelement <16 x i8> undef, i8 [[TMP58]], i32 0
+; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <16 x i8> [[TMP59]], i8 [[TMP39]], i32 1
+; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <16 x i8> [[TMP60]], i8 [[TMP37]], i32 2
+; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP35]], i32 3
+; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <16 x i8> [[TMP62]], i8 [[TMP33]], i32 4
+; CHECK-NEXT:    [[TMP64:%.*]] = insertelement <16 x i8> [[TMP63]], i8 [[TMP31]], i32 5
+; CHECK-NEXT:    [[TMP65:%.*]] = insertelement <16 x i8> [[TMP64]], i8 [[TMP29]], i32 6
+; CHECK-NEXT:    [[TMP66:%.*]] = insertelement <16 x i8> [[TMP65]], i8 [[TMP27]], i32 7
+; CHECK-NEXT:    [[TMP67:%.*]] = insertelement <16 x i8> [[TMP66]], i8 [[TMP25]], i32 8
+; CHECK-NEXT:    [[TMP68:%.*]] = insertelement <16 x i8> [[TMP67]], i8 [[TMP23]], i32 9
+; CHECK-NEXT:    [[TMP69:%.*]] = insertelement <16 x i8> [[TMP68]], i8 [[TMP21]], i32 10
+; CHECK-NEXT:    [[TMP70:%.*]] = insertelement <16 x i8> [[TMP69]], i8 [[TMP19]], i32 11
+; CHECK-NEXT:    [[TMP71:%.*]] = insertelement <16 x i8> [[TMP70]], i8 [[TMP17]], i32 12
+; CHECK-NEXT:    [[TMP72:%.*]] = insertelement <16 x i8> [[TMP71]], i8 [[TMP15]], i32 13
+; CHECK-NEXT:    [[TMP73:%.*]] = insertelement <16 x i8> [[TMP72]], i8 [[TMP13]], i32 14
+; CHECK-NEXT:    [[TMP74:%.*]] = insertelement <16 x i8> [[TMP73]], i8 [[TMP10]], i32 15
+; CHECK-NEXT:    [[TMP75:%.*]] = sext <16 x i8> [[TMP74]] to <16 x i32>
+; CHECK-NEXT:    [[TMP76:%.*]] = sub nsw <16 x i32> [[TMP57]], [[TMP75]]
+; CHECK-NEXT:    [[ADD23:%.*]] = add nsw i32 undef, [[ADD23_LCSSA219]]
+; CHECK-NEXT:    [[ADD23_1:%.*]] = add nsw i32 undef, [[ADD23]]
+; CHECK-NEXT:    [[ADD23_2:%.*]] = add nsw i32 undef, [[ADD23_1]]
+; CHECK-NEXT:    [[ADD23_3:%.*]] = add nsw i32 undef, [[ADD23_2]]
+; CHECK-NEXT:    [[ADD23_4:%.*]] = add nsw i32 undef, [[ADD23_3]]
+; CHECK-NEXT:    [[ADD23_5:%.*]] = add nsw i32 undef, [[ADD23_4]]
+; CHECK-NEXT:    [[ADD23_6:%.*]] = add nsw i32 undef, [[ADD23_5]]
+; CHECK-NEXT:    [[ADD23_7:%.*]] = add nsw i32 undef, [[ADD23_6]]
+; CHECK-NEXT:    [[ADD23_8:%.*]] = add nsw i32 undef, [[ADD23_7]]
+; CHECK-NEXT:    [[ADD23_9:%.*]] = add nsw i32 undef, [[ADD23_8]]
+; CHECK-NEXT:    [[ADD23_10:%.*]] = add nsw i32 undef, [[ADD23_9]]
+; CHECK-NEXT:    [[ADD23_11:%.*]] = add nsw i32 undef, [[ADD23_10]]
+; CHECK-NEXT:    [[ADD23_12:%.*]] = add nsw i32 undef, [[ADD23_11]]
+; CHECK-NEXT:    [[ADD23_13:%.*]] = add nsw i32 undef, [[ADD23_12]]
+; CHECK-NEXT:    [[ADD23_14:%.*]] = add nsw i32 undef, [[ADD23_13]]
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP76]], <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add nsw <16 x i32> [[TMP76]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add nsw <16 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <16 x i32> [[BIN_RDX2]], <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add nsw <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
+; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX6:%.*]] = add nsw <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
+; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
+; CHECK-NEXT:    [[BIN_EXTRA]] = add nsw i32 [[TMP77]], [[ADD23_LCSSA219]]
+; CHECK-NEXT:    [[ADD23_15:%.*]] = add nsw i32 undef, [[ADD23_14]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 16
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END29:%.*]], label [[FOR_BODY7_FOR_BODY7_CRIT_EDGE]]
+; CHECK:       for.body7.for.body7_crit_edge:
+; CHECK-NEXT:    [[ARRAYIDX15_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F33]], i64 0, i64 [[INDVARS_IV_NEXT]], i64 0
+; CHECK-NEXT:    [[DOTPRE]] = load i8, i8* [[ARRAYIDX15_PHI_TRANS_INSERT]], align 16, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX20_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* [[ARRARR_F34]], i64 0, i64 [[INDVARS_IV_NEXT]], i64 0
+; CHECK-NEXT:    [[DOTPRE227]] = load i8, i8* [[ARRAYIDX20_PHI_TRANS_INSERT]], align 16, !tbaa !4
+; CHECK-NEXT:    br label [[FOR_BODY7]]
+; CHECK:       for.end29:
+; CHECK-NEXT:    store i32 [[BIN_EXTRA]], i32* [[VALUE1]], align 4, !tbaa !0
+; CHECK-NEXT:    call void @foo_i(i32* nonnull [[VALUE1]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull [[TMP0]], i8* nonnull [[TMP1]], i64 64, i32 16, i1 false)
+; CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 0
+; CHECK-NEXT:    call void @foo_f(float* nonnull [[ARRAYDECAY]])
+; CHECK-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 0
+; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 0
+; CHECK-NEXT:    [[ARRAYIDX51:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 0
+; CHECK-NEXT:    [[ARRAYIDX46_1:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 1
+; CHECK-NEXT:    [[ARRAYIDX48_1:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 1
+; CHECK-NEXT:    [[ARRAYIDX51_1:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 1
+; CHECK-NEXT:    [[ARRAYIDX54_1:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 1
+; CHECK-NEXT:    [[ARRAYIDX46_2:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 2
+; CHECK-NEXT:    [[ARRAYIDX48_2:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 2
+; CHECK-NEXT:    [[ARRAYIDX51_2:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 2
+; CHECK-NEXT:    [[ARRAYIDX54_2:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 2
+; CHECK-NEXT:    [[ARRAYIDX46_3:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 3
+; CHECK-NEXT:    [[ARRAYIDX48_3:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 3
+; CHECK-NEXT:    [[ARRAYIDX51_3:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 3
+; CHECK-NEXT:    [[ARRAYIDX54_3:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 3
+; CHECK-NEXT:    [[ARRAYIDX46_4:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 4
+; CHECK-NEXT:    [[ARRAYIDX48_4:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 4
+; CHECK-NEXT:    [[ARRAYIDX51_4:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 4
+; CHECK-NEXT:    [[ARRAYIDX54_4:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 4
+; CHECK-NEXT:    [[ARRAYIDX46_5:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 5
+; CHECK-NEXT:    [[ARRAYIDX48_5:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 5
+; CHECK-NEXT:    [[ARRAYIDX51_5:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 5
+; CHECK-NEXT:    [[ARRAYIDX54_5:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 5
+; CHECK-NEXT:    [[ARRAYIDX46_6:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 6
+; CHECK-NEXT:    [[ARRAYIDX48_6:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 6
+; CHECK-NEXT:    [[ARRAYIDX51_6:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 6
+; CHECK-NEXT:    [[ARRAYIDX54_6:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 6
+; CHECK-NEXT:    [[ARRAYIDX46_7:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 7
+; CHECK-NEXT:    [[TMP78:%.*]] = bitcast float* [[ARRAYIDX46]] to <8 x float>*
+; CHECK-NEXT:    [[TMP79:%.*]] = load <8 x float>, <8 x float>* [[TMP78]], align 16, !tbaa !5
+; CHECK-NEXT:    [[ARRAYIDX48_7:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 7
+; CHECK-NEXT:    [[TMP80:%.*]] = bitcast float* [[ARRAYIDX48]] to <8 x float>*
+; CHECK-NEXT:    [[TMP81:%.*]] = load <8 x float>, <8 x float>* [[TMP80]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP82:%.*]] = fadd <8 x float> [[TMP79]], [[TMP81]]
+; CHECK-NEXT:    [[ARRAYIDX51_7:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 7
+; CHECK-NEXT:    [[TMP83:%.*]] = bitcast float* [[ARRAYIDX51]] to <8 x float>*
+; CHECK-NEXT:    [[TMP84:%.*]] = load <8 x float>, <8 x float>* [[TMP83]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP85:%.*]] = fadd <8 x float> [[TMP82]], [[TMP84]]
+; CHECK-NEXT:    [[ARRAYIDX54_7:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 7
+; CHECK-NEXT:    [[TMP86:%.*]] = bitcast float* [[ARRAYDECAY]] to <8 x float>*
+; CHECK-NEXT:    store <8 x float> [[TMP85]], <8 x float>* [[TMP86]], align 16, !tbaa !5
+; CHECK-NEXT:    [[ARRAYIDX46_8:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 8
+; CHECK-NEXT:    [[ARRAYIDX48_8:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 8
+; CHECK-NEXT:    [[ARRAYIDX51_8:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 8
+; CHECK-NEXT:    [[ARRAYIDX54_8:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 8
+; CHECK-NEXT:    [[ARRAYIDX46_9:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 9
+; CHECK-NEXT:    [[ARRAYIDX48_9:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 9
+; CHECK-NEXT:    [[ARRAYIDX51_9:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 9
+; CHECK-NEXT:    [[ARRAYIDX54_9:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 9
+; CHECK-NEXT:    [[ARRAYIDX46_10:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 10
+; CHECK-NEXT:    [[ARRAYIDX48_10:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 10
+; CHECK-NEXT:    [[ARRAYIDX51_10:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 10
+; CHECK-NEXT:    [[ARRAYIDX54_10:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 10
+; CHECK-NEXT:    [[ARRAYIDX46_11:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 11
+; CHECK-NEXT:    [[ARRAYIDX48_11:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 11
+; CHECK-NEXT:    [[ARRAYIDX51_11:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 11
+; CHECK-NEXT:    [[ARRAYIDX54_11:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 11
+; CHECK-NEXT:    [[ARRAYIDX46_12:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 12
+; CHECK-NEXT:    [[ARRAYIDX48_12:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 12
+; CHECK-NEXT:    [[ARRAYIDX51_12:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 12
+; CHECK-NEXT:    [[ARRAYIDX54_12:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 12
+; CHECK-NEXT:    [[ARRAYIDX46_13:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 13
+; CHECK-NEXT:    [[ARRAYIDX48_13:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 13
+; CHECK-NEXT:    [[ARRAYIDX51_13:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 13
+; CHECK-NEXT:    [[ARRAYIDX54_13:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 13
+; CHECK-NEXT:    [[ARRAYIDX46_14:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 14
+; CHECK-NEXT:    [[ARRAYIDX48_14:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 14
+; CHECK-NEXT:    [[ARRAYIDX51_14:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 14
+; CHECK-NEXT:    [[ARRAYIDX54_14:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 14
+; CHECK-NEXT:    [[ARRAYIDX46_15:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F2]], i64 0, i64 15
+; CHECK-NEXT:    [[TMP87:%.*]] = bitcast float* [[ARRAYIDX46_8]] to <8 x float>*
+; CHECK-NEXT:    [[TMP88:%.*]] = load <8 x float>, <8 x float>* [[TMP87]], align 16, !tbaa !5
+; CHECK-NEXT:    [[ARRAYIDX48_15:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F3]], i64 0, i64 15
+; CHECK-NEXT:    [[TMP89:%.*]] = bitcast float* [[ARRAYIDX48_8]] to <8 x float>*
+; CHECK-NEXT:    [[TMP90:%.*]] = load <8 x float>, <8 x float>* [[TMP89]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP91:%.*]] = fadd <8 x float> [[TMP88]], [[TMP90]]
+; CHECK-NEXT:    [[ARRAYIDX51_15:%.*]] = getelementptr inbounds [16 x float], [16 x float]* [[ARR_F4]], i64 0, i64 15
+; CHECK-NEXT:    [[TMP92:%.*]] = bitcast float* [[ARRAYIDX51_8]] to <8 x float>*
+; CHECK-NEXT:    [[TMP93:%.*]] = load <8 x float>, <8 x float>* [[TMP92]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP94:%.*]] = fadd <8 x float> [[TMP91]], [[TMP93]]
+; CHECK-NEXT:    [[ARRAYIDX54_15:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 15
+; CHECK-NEXT:    [[TMP95:%.*]] = bitcast float* [[ARRAYIDX54_8]] to <8 x float>*
+; CHECK-NEXT:    store <8 x float> [[TMP94]], <8 x float>* [[TMP95]], align 16, !tbaa !5
+; CHECK-NEXT:    call void @foo_f(float* nonnull [[ARRAYDECAY]])
+; CHECK-NEXT:    [[TMP96:%.*]] = load float, float* [[ARRAYIDX46_1]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP97:%.*]] = load float, float* [[ARRAYIDX48_1]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP98:%.*]] = load float, float* [[ARRAYIDX46]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP99:%.*]] = load float, float* [[ARRAYIDX48]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP100:%.*]] = load float, float* [[ARRAYIDX46_3]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP101:%.*]] = load float, float* [[ARRAYIDX48_3]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP102:%.*]] = load float, float* [[ARRAYIDX46_2]], align 8, !tbaa !5
+; CHECK-NEXT:    [[TMP103:%.*]] = load float, float* [[ARRAYIDX48_2]], align 8, !tbaa !5
+; CHECK-NEXT:    [[TMP104:%.*]] = load float, float* [[ARRAYIDX46_5]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP105:%.*]] = load float, float* [[ARRAYIDX48_5]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP106:%.*]] = load float, float* [[ARRAYIDX46_4]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP107:%.*]] = load float, float* [[ARRAYIDX48_4]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP108:%.*]] = load float, float* [[ARRAYIDX46_7]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP109:%.*]] = load float, float* [[ARRAYIDX48_7]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP110:%.*]] = load float, float* [[ARRAYIDX46_6]], align 8, !tbaa !5
+; CHECK-NEXT:    [[TMP111:%.*]] = load float, float* [[ARRAYIDX48_6]], align 8, !tbaa !5
+; CHECK-NEXT:    [[TMP112:%.*]] = load float, float* [[ARRAYIDX46_9]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP113:%.*]] = load float, float* [[ARRAYIDX48_9]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP114:%.*]] = load float, float* [[ARRAYIDX46_8]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP115:%.*]] = load float, float* [[ARRAYIDX48_8]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP116:%.*]] = load float, float* [[ARRAYIDX46_11]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP117:%.*]] = load float, float* [[ARRAYIDX48_11]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP118:%.*]] = load float, float* [[ARRAYIDX46_10]], align 8, !tbaa !5
+; CHECK-NEXT:    [[TMP119:%.*]] = load float, float* [[ARRAYIDX48_10]], align 8, !tbaa !5
+; CHECK-NEXT:    [[TMP120:%.*]] = load float, float* [[ARRAYIDX46_13]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP121:%.*]] = load float, float* [[ARRAYIDX48_13]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP122:%.*]] = load float, float* [[ARRAYIDX46_12]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP123:%.*]] = load float, float* [[ARRAYIDX48_12]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP124:%.*]] = load float, float* [[ARRAYIDX46_15]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP125:%.*]] = load float, float* [[ARRAYIDX48_15]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP126:%.*]] = insertelement <8 x float> undef, float [[TMP96]], i32 0
+; CHECK-NEXT:    [[TMP127:%.*]] = insertelement <8 x float> [[TMP126]], float [[TMP100]], i32 1
+; CHECK-NEXT:    [[TMP128:%.*]] = insertelement <8 x float> [[TMP127]], float [[TMP104]], i32 2
+; CHECK-NEXT:    [[TMP129:%.*]] = insertelement <8 x float> [[TMP128]], float [[TMP108]], i32 3
+; CHECK-NEXT:    [[TMP130:%.*]] = insertelement <8 x float> [[TMP129]], float [[TMP112]], i32 4
+; CHECK-NEXT:    [[TMP131:%.*]] = insertelement <8 x float> [[TMP130]], float [[TMP116]], i32 5
+; CHECK-NEXT:    [[TMP132:%.*]] = insertelement <8 x float> [[TMP131]], float [[TMP120]], i32 6
+; CHECK-NEXT:    [[TMP133:%.*]] = insertelement <8 x float> [[TMP132]], float [[TMP124]], i32 7
+; CHECK-NEXT:    [[TMP134:%.*]] = insertelement <8 x float> undef, float [[TMP97]], i32 0
+; CHECK-NEXT:    [[TMP135:%.*]] = insertelement <8 x float> [[TMP134]], float [[TMP101]], i32 1
+; CHECK-NEXT:    [[TMP136:%.*]] = insertelement <8 x float> [[TMP135]], float [[TMP105]], i32 2
+; CHECK-NEXT:    [[TMP137:%.*]] = insertelement <8 x float> [[TMP136]], float [[TMP109]], i32 3
+; CHECK-NEXT:    [[TMP138:%.*]] = insertelement <8 x float> [[TMP137]], float [[TMP113]], i32 4
+; CHECK-NEXT:    [[TMP139:%.*]] = insertelement <8 x float> [[TMP138]], float [[TMP117]], i32 5
+; CHECK-NEXT:    [[TMP140:%.*]] = insertelement <8 x float> [[TMP139]], float [[TMP121]], i32 6
+; CHECK-NEXT:    [[TMP141:%.*]] = insertelement <8 x float> [[TMP140]], float [[TMP125]], i32 7
+; CHECK-NEXT:    [[TMP142:%.*]] = fmul <8 x float> [[TMP133]], [[TMP141]]
+; CHECK-NEXT:    [[TMP143:%.*]] = load float, float* [[ARRAYIDX46_14]], align 8, !tbaa !5
+; CHECK-NEXT:    [[TMP144:%.*]] = load float, float* [[ARRAYIDX48_14]], align 8, !tbaa !5
+; CHECK-NEXT:    [[TMP145:%.*]] = insertelement <8 x float> undef, float [[TMP98]], i32 0
+; CHECK-NEXT:    [[TMP146:%.*]] = insertelement <8 x float> [[TMP145]], float [[TMP102]], i32 1
+; CHECK-NEXT:    [[TMP147:%.*]] = insertelement <8 x float> [[TMP146]], float [[TMP106]], i32 2
+; CHECK-NEXT:    [[TMP148:%.*]] = insertelement <8 x float> [[TMP147]], float [[TMP110]], i32 3
+; CHECK-NEXT:    [[TMP149:%.*]] = insertelement <8 x float> [[TMP148]], float [[TMP114]], i32 4
+; CHECK-NEXT:    [[TMP150:%.*]] = insertelement <8 x float> [[TMP149]], float [[TMP118]], i32 5
+; CHECK-NEXT:    [[TMP151:%.*]] = insertelement <8 x float> [[TMP150]], float [[TMP122]], i32 6
+; CHECK-NEXT:    [[TMP152:%.*]] = insertelement <8 x float> [[TMP151]], float [[TMP143]], i32 7
+; CHECK-NEXT:    [[TMP153:%.*]] = insertelement <8 x float> undef, float [[TMP99]], i32 0
+; CHECK-NEXT:    [[TMP154:%.*]] = insertelement <8 x float> [[TMP153]], float [[TMP103]], i32 1
+; CHECK-NEXT:    [[TMP155:%.*]] = insertelement <8 x float> [[TMP154]], float [[TMP107]], i32 2
+; CHECK-NEXT:    [[TMP156:%.*]] = insertelement <8 x float> [[TMP155]], float [[TMP111]], i32 3
+; CHECK-NEXT:    [[TMP157:%.*]] = insertelement <8 x float> [[TMP156]], float [[TMP115]], i32 4
+; CHECK-NEXT:    [[TMP158:%.*]] = insertelement <8 x float> [[TMP157]], float [[TMP119]], i32 5
+; CHECK-NEXT:    [[TMP159:%.*]] = insertelement <8 x float> [[TMP158]], float [[TMP123]], i32 6
+; CHECK-NEXT:    [[TMP160:%.*]] = insertelement <8 x float> [[TMP159]], float [[TMP144]], i32 7
+; CHECK-NEXT:    [[TMP161:%.*]] = fmul <8 x float> [[TMP152]], [[TMP160]]
+; CHECK-NEXT:    [[TMP162:%.*]] = fsub <8 x float> [[TMP142]], [[TMP161]]
+; CHECK-NEXT:    [[TMP163:%.*]] = bitcast float* [[ARRAYDECAY]] to <8 x float>*
+; CHECK-NEXT:    store <8 x float> [[TMP162]], <8 x float>* [[TMP163]], align 16, !tbaa !5
+; CHECK-NEXT:    call void @foo_f(float* nonnull [[ARRAYDECAY]])
+; CHECK-NEXT:    [[TMP164:%.*]] = bitcast float* [[ARRAYIDX46]] to <8 x float>*
+; CHECK-NEXT:    [[TMP165:%.*]] = load <8 x float>, <8 x float>* [[TMP164]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP166:%.*]] = bitcast float* [[ARRAYIDX48]] to <8 x float>*
+; CHECK-NEXT:    [[TMP167:%.*]] = load <8 x float>, <8 x float>* [[TMP166]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP168:%.*]] = fadd <8 x float> [[TMP165]], [[TMP167]]
+; CHECK-NEXT:    [[ARRAYIDX123:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 0
+; CHECK-NEXT:    [[ARRAYIDX125:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 0
+; CHECK-NEXT:    [[ARRAYIDX128:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 0
+; CHECK-NEXT:    [[ARRAYIDX123_1:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 1
+; CHECK-NEXT:    [[ARRAYIDX125_1:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 1
+; CHECK-NEXT:    [[ARRAYIDX128_1:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 1
+; CHECK-NEXT:    [[ARRAYIDX123_2:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 2
+; CHECK-NEXT:    [[ARRAYIDX125_2:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 2
+; CHECK-NEXT:    [[ARRAYIDX128_2:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 2
+; CHECK-NEXT:    [[ARRAYIDX123_3:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 3
+; CHECK-NEXT:    [[ARRAYIDX125_3:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 3
+; CHECK-NEXT:    [[ARRAYIDX128_3:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 3
+; CHECK-NEXT:    [[ARRAYIDX123_4:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 4
+; CHECK-NEXT:    [[ARRAYIDX125_4:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 4
+; CHECK-NEXT:    [[ARRAYIDX128_4:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 4
+; CHECK-NEXT:    [[ARRAYIDX123_5:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 5
+; CHECK-NEXT:    [[ARRAYIDX125_5:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 5
+; CHECK-NEXT:    [[ARRAYIDX128_5:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 5
+; CHECK-NEXT:    [[ARRAYIDX123_6:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 6
+; CHECK-NEXT:    [[ARRAYIDX125_6:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 6
+; CHECK-NEXT:    [[ARRAYIDX128_6:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 6
+; CHECK-NEXT:    [[TMP169:%.*]] = bitcast float* [[ARRAYDECAY]] to <8 x float>*
+; CHECK-NEXT:    store <8 x float> [[TMP168]], <8 x float>* [[TMP169]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP170:%.*]] = bitcast float* [[ARRAYIDX51]] to <8 x float>*
+; CHECK-NEXT:    store <8 x float> [[TMP168]], <8 x float>* [[TMP170]], align 16, !tbaa !5
+; CHECK-NEXT:    [[ARRAYIDX123_7:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 7
+; CHECK-NEXT:    [[TMP171:%.*]] = bitcast i32* [[ARRAYIDX123]] to <8 x i32>*
+; CHECK-NEXT:    [[TMP172:%.*]] = load <8 x i32>, <8 x i32>* [[TMP171]], align 16, !tbaa !0
+; CHECK-NEXT:    [[ARRAYIDX125_7:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 7
+; CHECK-NEXT:    [[TMP173:%.*]] = bitcast i32* [[ARRAYIDX125]] to <8 x i32>*
+; CHECK-NEXT:    [[TMP174:%.*]] = load <8 x i32>, <8 x i32>* [[TMP173]], align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP175:%.*]] = add nsw <8 x i32> [[TMP174]], [[TMP172]]
+; CHECK-NEXT:    [[ARRAYIDX128_7:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 7
+; CHECK-NEXT:    [[TMP176:%.*]] = bitcast i32* [[ARRAYIDX128]] to <8 x i32>*
+; CHECK-NEXT:    store <8 x i32> [[TMP175]], <8 x i32>* [[TMP176]], align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP177:%.*]] = bitcast float* [[ARRAYIDX46_8]] to <8 x float>*
+; CHECK-NEXT:    [[TMP178:%.*]] = load <8 x float>, <8 x float>* [[TMP177]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP179:%.*]] = bitcast float* [[ARRAYIDX48_8]] to <8 x float>*
+; CHECK-NEXT:    [[TMP180:%.*]] = load <8 x float>, <8 x float>* [[TMP179]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP181:%.*]] = fadd <8 x float> [[TMP178]], [[TMP180]]
+; CHECK-NEXT:    [[ARRAYIDX123_8:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 8
+; CHECK-NEXT:    [[ARRAYIDX125_8:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 8
+; CHECK-NEXT:    [[ARRAYIDX128_8:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 8
+; CHECK-NEXT:    [[ARRAYIDX123_9:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 9
+; CHECK-NEXT:    [[ARRAYIDX125_9:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 9
+; CHECK-NEXT:    [[ARRAYIDX128_9:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 9
+; CHECK-NEXT:    [[ARRAYIDX123_10:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 10
+; CHECK-NEXT:    [[ARRAYIDX125_10:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 10
+; CHECK-NEXT:    [[ARRAYIDX128_10:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 10
+; CHECK-NEXT:    [[ARRAYIDX123_11:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 11
+; CHECK-NEXT:    [[ARRAYIDX125_11:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 11
+; CHECK-NEXT:    [[ARRAYIDX128_11:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 11
+; CHECK-NEXT:    [[ARRAYIDX123_12:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 12
+; CHECK-NEXT:    [[ARRAYIDX125_12:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 12
+; CHECK-NEXT:    [[ARRAYIDX128_12:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 12
+; CHECK-NEXT:    [[ARRAYIDX123_13:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 13
+; CHECK-NEXT:    [[ARRAYIDX125_13:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 13
+; CHECK-NEXT:    [[ARRAYIDX128_13:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 13
+; CHECK-NEXT:    [[ARRAYIDX123_14:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 14
+; CHECK-NEXT:    [[ARRAYIDX125_14:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 14
+; CHECK-NEXT:    [[ARRAYIDX128_14:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 14
+; CHECK-NEXT:    [[TMP182:%.*]] = bitcast float* [[ARRAYIDX54_8]] to <8 x float>*
+; CHECK-NEXT:    store <8 x float> [[TMP181]], <8 x float>* [[TMP182]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP183:%.*]] = bitcast float* [[ARRAYIDX51_8]] to <8 x float>*
+; CHECK-NEXT:    store <8 x float> [[TMP181]], <8 x float>* [[TMP183]], align 16, !tbaa !5
+; CHECK-NEXT:    [[ARRAYIDX123_15:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I2]], i64 0, i64 15
+; CHECK-NEXT:    [[TMP184:%.*]] = bitcast i32* [[ARRAYIDX123_8]] to <8 x i32>*
+; CHECK-NEXT:    [[TMP185:%.*]] = load <8 x i32>, <8 x i32>* [[TMP184]], align 16, !tbaa !0
+; CHECK-NEXT:    [[ARRAYIDX125_15:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I3]], i64 0, i64 15
+; CHECK-NEXT:    [[TMP186:%.*]] = bitcast i32* [[ARRAYIDX125_8]] to <8 x i32>*
+; CHECK-NEXT:    [[TMP187:%.*]] = load <8 x i32>, <8 x i32>* [[TMP186]], align 16, !tbaa !0
+; CHECK-NEXT:    [[TMP188:%.*]] = add nsw <8 x i32> [[TMP187]], [[TMP185]]
+; CHECK-NEXT:    [[ARRAYIDX128_15:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ARR_I1]], i64 0, i64 15
+; CHECK-NEXT:    [[TMP189:%.*]] = bitcast i32* [[ARRAYIDX128_8]] to <8 x i32>*
+; CHECK-NEXT:    store <8 x i32> [[TMP188]], <8 x i32>* [[TMP189]], align 16, !tbaa !0
+; CHECK-NEXT:    call void @foo_i(i32* nonnull [[ARRAYIDX128]])
+; CHECK-NEXT:    call void @foo_f(float* nonnull [[ARRAYDECAY]])
+; CHECK-NEXT:    call void @foo_f(float* nonnull [[ARRAYIDX51]])
+; CHECK-NEXT:    [[TMP190:%.*]] = bitcast float* [[ARRAYIDX46]] to <8 x float>*
+; CHECK-NEXT:    [[TMP191:%.*]] = load <8 x float>, <8 x float>* [[TMP190]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP192:%.*]] = bitcast float* [[ARRAYIDX48]] to <8 x float>*
+; CHECK-NEXT:    [[TMP193:%.*]] = load <8 x float>, <8 x float>* [[TMP192]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP194:%.*]] = fadd <8 x float> [[TMP191]], [[TMP193]]
+; CHECK-NEXT:    [[TMP195:%.*]] = bitcast float* [[ARRAYDECAY]] to <8 x float>*
+; CHECK-NEXT:    store <8 x float> [[TMP194]], <8 x float>* [[TMP195]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP196:%.*]] = bitcast float* [[ARRAYIDX46_8]] to <4 x float>*
+; CHECK-NEXT:    [[TMP197:%.*]] = load <4 x float>, <4 x float>* [[TMP196]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP198:%.*]] = bitcast float* [[ARRAYIDX48_8]] to <4 x float>*
+; CHECK-NEXT:    [[TMP199:%.*]] = load <4 x float>, <4 x float>* [[TMP198]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP200:%.*]] = fadd <4 x float> [[TMP197]], [[TMP199]]
+; CHECK-NEXT:    [[TMP201:%.*]] = bitcast float* [[ARRAYIDX54_8]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP200]], <4 x float>* [[TMP201]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP202:%.*]] = load float, float* [[ARRAYIDX46_12]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP203:%.*]] = load float, float* [[ARRAYIDX48_12]], align 16, !tbaa !5
+; CHECK-NEXT:    [[ADD143_12:%.*]] = fadd float [[TMP202]], [[TMP203]]
+; CHECK-NEXT:    store float [[ADD143_12]], float* [[ARRAYIDX54_12]], align 16, !tbaa !5
+; CHECK-NEXT:    [[TMP204:%.*]] = load float, float* [[ARRAYIDX46_13]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP205:%.*]] = load float, float* [[ARRAYIDX48_13]], align 4, !tbaa !5
+; CHECK-NEXT:    [[ADD143_13:%.*]] = fadd float [[TMP204]], [[TMP205]]
+; CHECK-NEXT:    store float [[ADD143_13]], float* [[ARRAYIDX54_13]], align 4, !tbaa !5
+; CHECK-NEXT:    [[TMP206:%.*]] = extractelement <8 x float> [[TMP181]], i32 6
+; CHECK-NEXT:    store float [[TMP206]], float* [[ARRAYIDX54_14]], align 8, !tbaa !5
+; CHECK-NEXT:    [[TMP207:%.*]] = extractelement <8 x float> [[TMP181]], i32 7
+; CHECK-NEXT:    store float [[TMP207]], float* [[ARRAYIDX54_15]], align 4, !tbaa !5
+; CHECK-NEXT:    [[ARRAYIDX153_15:%.*]] = getelementptr inbounds [17 x float], [17 x float]* [[ARR_F1]], i64 0, i64 16
+; CHECK-NEXT:    store float [[TMP207]], float* [[ARRAYIDX153_15]], align 16, !tbaa !5
+; CHECK-NEXT:    call void @foo_f(float* nonnull [[ARRAYDECAY]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 256, i8* nonnull [[TMP9]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 256, i8* nonnull [[TMP8]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP7]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull [[TMP6]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull [[TMP5]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull [[TMP4]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull [[TMP3]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull [[TMP2]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull [[TMP1]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 68, i8* nonnull [[TMP0]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arr_f1 = alloca [17 x float], align 16
+  %0 = bitcast [17 x float]* %arr_f1 to i8*
+  %arr_f2 = alloca [16 x float], align 16
+  %1 = bitcast [16 x float]* %arr_f2 to i8*
+  %arr_f3 = alloca [16 x float], align 16
+  %arr_f4 = alloca [16 x float], align 16
+  %arr_i1 = alloca [16 x i32], align 16
+  %arr_i2 = alloca [16 x i32], align 16
+  %arr_i3 = alloca [16 x i32], align 16
+  %value1 = alloca i32, align 4
+  %arrarr_f33 = alloca [16 x [16 x i8]], align 16
+  %arrarr_f34 = alloca [16 x [16 x i8]], align 16
+  call void @llvm.lifetime.start.p0i8(i64 68, i8* nonnull %0) #3
+  call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %1) #3
+  %2 = bitcast [16 x float]* %arr_f3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %2) #3
+  %3 = bitcast [16 x float]* %arr_f4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %3) #3
+  %4 = bitcast [16 x i32]* %arr_i1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %4) #3
+  %5 = bitcast [16 x i32]* %arr_i2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %5) #3
+  %6 = bitcast [16 x i32]* %arr_i3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %6) #3
+  %7 = bitcast i32* %value1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %7) #3
+  %8 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 256, i8* nonnull %8) #3
+  %9 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 256, i8* nonnull %9) #3
+  store i32 0, i32* %value1, align 4, !tbaa !2
+  call void @foo_i(i32* nonnull %value1) #3
+  store i32 0, i32* %value1, align 4, !tbaa !2
+  br label %for.body7
+
+for.body7:                                        ; preds = %for.body7.for.body7_crit_edge, %entry
+  %10 = phi i8 [ undef, %entry ], [ %.pre227, %for.body7.for.body7_crit_edge ]
+  %11 = phi i8 [ undef, %entry ], [ %.pre, %for.body7.for.body7_crit_edge ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body7.for.body7_crit_edge ]
+  %add23.lcssa219 = phi i32 [ 0, %entry ], [ %add23.15, %for.body7.for.body7_crit_edge ]
+  %conv16 = sext i8 %11 to i32
+  %conv21 = sext i8 %10 to i32
+  %sub22 = sub nsw i32 %conv16, %conv21
+  %add23 = add nsw i32 %sub22, %add23.lcssa219
+  %arrayidx15.1 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 1
+  %12 = load i8, i8* %arrayidx15.1, align 1, !tbaa !6
+  %conv16.1 = sext i8 %12 to i32
+  %arrayidx20.1 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 1
+  %13 = load i8, i8* %arrayidx20.1, align 1, !tbaa !6
+  %conv21.1 = sext i8 %13 to i32
+  %sub22.1 = sub nsw i32 %conv16.1, %conv21.1
+  %add23.1 = add nsw i32 %sub22.1, %add23
+  %arrayidx15.2 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 2
+  %14 = load i8, i8* %arrayidx15.2, align 2, !tbaa !6
+  %conv16.2 = sext i8 %14 to i32
+  %arrayidx20.2 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 2
+  %15 = load i8, i8* %arrayidx20.2, align 2, !tbaa !6
+  %conv21.2 = sext i8 %15 to i32
+  %sub22.2 = sub nsw i32 %conv16.2, %conv21.2
+  %add23.2 = add nsw i32 %sub22.2, %add23.1
+  %arrayidx15.3 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 3
+  %16 = load i8, i8* %arrayidx15.3, align 1, !tbaa !6
+  %conv16.3 = sext i8 %16 to i32
+  %arrayidx20.3 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 3
+  %17 = load i8, i8* %arrayidx20.3, align 1, !tbaa !6
+  %conv21.3 = sext i8 %17 to i32
+  %sub22.3 = sub nsw i32 %conv16.3, %conv21.3
+  %add23.3 = add nsw i32 %sub22.3, %add23.2
+  %arrayidx15.4 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 4
+  %18 = load i8, i8* %arrayidx15.4, align 4, !tbaa !6
+  %conv16.4 = sext i8 %18 to i32
+  %arrayidx20.4 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 4
+  %19 = load i8, i8* %arrayidx20.4, align 4, !tbaa !6
+  %conv21.4 = sext i8 %19 to i32
+  %sub22.4 = sub nsw i32 %conv16.4, %conv21.4
+  %add23.4 = add nsw i32 %sub22.4, %add23.3
+  %arrayidx15.5 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 5
+  %20 = load i8, i8* %arrayidx15.5, align 1, !tbaa !6
+  %conv16.5 = sext i8 %20 to i32
+  %arrayidx20.5 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 5
+  %21 = load i8, i8* %arrayidx20.5, align 1, !tbaa !6
+  %conv21.5 = sext i8 %21 to i32
+  %sub22.5 = sub nsw i32 %conv16.5, %conv21.5
+  %add23.5 = add nsw i32 %sub22.5, %add23.4
+  %arrayidx15.6 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 6
+  %22 = load i8, i8* %arrayidx15.6, align 2, !tbaa !6
+  %conv16.6 = sext i8 %22 to i32
+  %arrayidx20.6 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 6
+  %23 = load i8, i8* %arrayidx20.6, align 2, !tbaa !6
+  %conv21.6 = sext i8 %23 to i32
+  %sub22.6 = sub nsw i32 %conv16.6, %conv21.6
+  %add23.6 = add nsw i32 %sub22.6, %add23.5
+  %arrayidx15.7 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 7
+  %24 = load i8, i8* %arrayidx15.7, align 1, !tbaa !6
+  %conv16.7 = sext i8 %24 to i32
+  %arrayidx20.7 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 7
+  %25 = load i8, i8* %arrayidx20.7, align 1, !tbaa !6
+  %conv21.7 = sext i8 %25 to i32
+  %sub22.7 = sub nsw i32 %conv16.7, %conv21.7
+  %add23.7 = add nsw i32 %sub22.7, %add23.6
+  %arrayidx15.8 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 8
+  %26 = load i8, i8* %arrayidx15.8, align 8, !tbaa !6
+  %conv16.8 = sext i8 %26 to i32
+  %arrayidx20.8 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 8
+  %27 = load i8, i8* %arrayidx20.8, align 8, !tbaa !6
+  %conv21.8 = sext i8 %27 to i32
+  %sub22.8 = sub nsw i32 %conv16.8, %conv21.8
+  %add23.8 = add nsw i32 %sub22.8, %add23.7
+  %arrayidx15.9 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 9
+  %28 = load i8, i8* %arrayidx15.9, align 1, !tbaa !6
+  %conv16.9 = sext i8 %28 to i32
+  %arrayidx20.9 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 9
+  %29 = load i8, i8* %arrayidx20.9, align 1, !tbaa !6
+  %conv21.9 = sext i8 %29 to i32
+  %sub22.9 = sub nsw i32 %conv16.9, %conv21.9
+  %add23.9 = add nsw i32 %sub22.9, %add23.8
+  %arrayidx15.10 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 10
+  %30 = load i8, i8* %arrayidx15.10, align 2, !tbaa !6
+  %conv16.10 = sext i8 %30 to i32
+  %arrayidx20.10 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 10
+  %31 = load i8, i8* %arrayidx20.10, align 2, !tbaa !6
+  %conv21.10 = sext i8 %31 to i32
+  %sub22.10 = sub nsw i32 %conv16.10, %conv21.10
+  %add23.10 = add nsw i32 %sub22.10, %add23.9
+  %arrayidx15.11 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 11
+  %32 = load i8, i8* %arrayidx15.11, align 1, !tbaa !6
+  %conv16.11 = sext i8 %32 to i32
+  %arrayidx20.11 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 11
+  %33 = load i8, i8* %arrayidx20.11, align 1, !tbaa !6
+  %conv21.11 = sext i8 %33 to i32
+  %sub22.11 = sub nsw i32 %conv16.11, %conv21.11
+  %add23.11 = add nsw i32 %sub22.11, %add23.10
+  %arrayidx15.12 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 12
+  %34 = load i8, i8* %arrayidx15.12, align 4, !tbaa !6
+  %conv16.12 = sext i8 %34 to i32
+  %arrayidx20.12 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 12
+  %35 = load i8, i8* %arrayidx20.12, align 4, !tbaa !6
+  %conv21.12 = sext i8 %35 to i32
+  %sub22.12 = sub nsw i32 %conv16.12, %conv21.12
+  %add23.12 = add nsw i32 %sub22.12, %add23.11
+  %arrayidx15.13 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 13
+  %36 = load i8, i8* %arrayidx15.13, align 1, !tbaa !6
+  %conv16.13 = sext i8 %36 to i32
+  %arrayidx20.13 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 13
+  %37 = load i8, i8* %arrayidx20.13, align 1, !tbaa !6
+  %conv21.13 = sext i8 %37 to i32
+  %sub22.13 = sub nsw i32 %conv16.13, %conv21.13
+  %add23.13 = add nsw i32 %sub22.13, %add23.12
+  %arrayidx15.14 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 14
+  %38 = load i8, i8* %arrayidx15.14, align 2, !tbaa !6
+  %conv16.14 = sext i8 %38 to i32
+  %arrayidx20.14 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 14
+  %39 = load i8, i8* %arrayidx20.14, align 2, !tbaa !6
+  %conv21.14 = sext i8 %39 to i32
+  %sub22.14 = sub nsw i32 %conv16.14, %conv21.14
+  %add23.14 = add nsw i32 %sub22.14, %add23.13
+  %arrayidx15.15 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv, i64 15
+  %40 = load i8, i8* %arrayidx15.15, align 1, !tbaa !6
+  %conv16.15 = sext i8 %40 to i32
+  %arrayidx20.15 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv, i64 15
+  %41 = load i8, i8* %arrayidx20.15, align 1, !tbaa !6
+  %conv21.15 = sext i8 %41 to i32
+  %sub22.15 = sub nsw i32 %conv16.15, %conv21.15
+  %add23.15 = add nsw i32 %sub22.15, %add23.14
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 16
+  br i1 %exitcond, label %for.end29, label %for.body7.for.body7_crit_edge
+
+for.body7.for.body7_crit_edge:                    ; preds = %for.body7
+  %arrayidx15.phi.trans.insert = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f33, i64 0, i64 %indvars.iv.next, i64 0
+  %.pre = load i8, i8* %arrayidx15.phi.trans.insert, align 16, !tbaa !6
+  %arrayidx20.phi.trans.insert = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %arrarr_f34, i64 0, i64 %indvars.iv.next, i64 0
+  %.pre227 = load i8, i8* %arrayidx20.phi.trans.insert, align 16, !tbaa !6
+  br label %for.body7
+
+for.end29:                                        ; preds = %for.body7
+  store i32 %add23.15, i32* %value1, align 4, !tbaa !2
+  call void @foo_i(i32* nonnull %value1) #3
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %0, i8* nonnull %1, i64 64, i32 16, i1 false)
+  %arraydecay = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 0
+  call void @foo_f(float* nonnull %arraydecay) #3
+  %arrayidx46 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 0
+  %42 = load float, float* %arrayidx46, align 16, !tbaa !7
+  %arrayidx48 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 0
+  %43 = load float, float* %arrayidx48, align 16, !tbaa !7
+  %add49 = fadd float %42, %43
+  %arrayidx51 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 0
+  %44 = load float, float* %arrayidx51, align 16, !tbaa !7
+  %add52 = fadd float %add49, %44
+  store float %add52, float* %arraydecay, align 16, !tbaa !7
+  %arrayidx46.1 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 1
+  %45 = load float, float* %arrayidx46.1, align 4, !tbaa !7
+  %arrayidx48.1 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 1
+  %46 = load float, float* %arrayidx48.1, align 4, !tbaa !7
+  %add49.1 = fadd float %45, %46
+  %arrayidx51.1 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 1
+  %47 = load float, float* %arrayidx51.1, align 4, !tbaa !7
+  %add52.1 = fadd float %add49.1, %47
+  %arrayidx54.1 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 1
+  store float %add52.1, float* %arrayidx54.1, align 4, !tbaa !7
+  %arrayidx46.2 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 2
+  %48 = load float, float* %arrayidx46.2, align 8, !tbaa !7
+  %arrayidx48.2 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 2
+  %49 = load float, float* %arrayidx48.2, align 8, !tbaa !7
+  %add49.2 = fadd float %48, %49
+  %arrayidx51.2 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 2
+  %50 = load float, float* %arrayidx51.2, align 8, !tbaa !7
+  %add52.2 = fadd float %add49.2, %50
+  %arrayidx54.2 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 2
+  store float %add52.2, float* %arrayidx54.2, align 8, !tbaa !7
+  %arrayidx46.3 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 3
+  %51 = load float, float* %arrayidx46.3, align 4, !tbaa !7
+  %arrayidx48.3 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 3
+  %52 = load float, float* %arrayidx48.3, align 4, !tbaa !7
+  %add49.3 = fadd float %51, %52
+  %arrayidx51.3 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 3
+  %53 = load float, float* %arrayidx51.3, align 4, !tbaa !7
+  %add52.3 = fadd float %add49.3, %53
+  %arrayidx54.3 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 3
+  store float %add52.3, float* %arrayidx54.3, align 4, !tbaa !7
+  %arrayidx46.4 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 4
+  %54 = load float, float* %arrayidx46.4, align 16, !tbaa !7
+  %arrayidx48.4 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 4
+  %55 = load float, float* %arrayidx48.4, align 16, !tbaa !7
+  %add49.4 = fadd float %54, %55
+  %arrayidx51.4 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 4
+  %56 = load float, float* %arrayidx51.4, align 16, !tbaa !7
+  %add52.4 = fadd float %add49.4, %56
+  %arrayidx54.4 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 4
+  store float %add52.4, float* %arrayidx54.4, align 16, !tbaa !7
+  %arrayidx46.5 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 5
+  %57 = load float, float* %arrayidx46.5, align 4, !tbaa !7
+  %arrayidx48.5 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 5
+  %58 = load float, float* %arrayidx48.5, align 4, !tbaa !7
+  %add49.5 = fadd float %57, %58
+  %arrayidx51.5 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 5
+  %59 = load float, float* %arrayidx51.5, align 4, !tbaa !7
+  %add52.5 = fadd float %add49.5, %59
+  %arrayidx54.5 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 5
+  store float %add52.5, float* %arrayidx54.5, align 4, !tbaa !7
+  %arrayidx46.6 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 6
+  %60 = load float, float* %arrayidx46.6, align 8, !tbaa !7
+  %arrayidx48.6 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 6
+  %61 = load float, float* %arrayidx48.6, align 8, !tbaa !7
+  %add49.6 = fadd float %60, %61
+  %arrayidx51.6 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 6
+  %62 = load float, float* %arrayidx51.6, align 8, !tbaa !7
+  %add52.6 = fadd float %add49.6, %62
+  %arrayidx54.6 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 6
+  store float %add52.6, float* %arrayidx54.6, align 8, !tbaa !7
+  %arrayidx46.7 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 7
+  %63 = load float, float* %arrayidx46.7, align 4, !tbaa !7
+  %arrayidx48.7 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 7
+  %64 = load float, float* %arrayidx48.7, align 4, !tbaa !7
+  %add49.7 = fadd float %63, %64
+  %arrayidx51.7 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 7
+  %65 = load float, float* %arrayidx51.7, align 4, !tbaa !7
+  %add52.7 = fadd float %add49.7, %65
+  %arrayidx54.7 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 7
+  store float %add52.7, float* %arrayidx54.7, align 4, !tbaa !7
+  %arrayidx46.8 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 8
+  %66 = load float, float* %arrayidx46.8, align 16, !tbaa !7
+  %arrayidx48.8 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 8
+  %67 = load float, float* %arrayidx48.8, align 16, !tbaa !7
+  %add49.8 = fadd float %66, %67
+  %arrayidx51.8 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 8
+  %68 = load float, float* %arrayidx51.8, align 16, !tbaa !7
+  %add52.8 = fadd float %add49.8, %68
+  %arrayidx54.8 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 8
+  store float %add52.8, float* %arrayidx54.8, align 16, !tbaa !7
+  %arrayidx46.9 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 9
+  %69 = load float, float* %arrayidx46.9, align 4, !tbaa !7
+  %arrayidx48.9 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 9
+  %70 = load float, float* %arrayidx48.9, align 4, !tbaa !7
+  %add49.9 = fadd float %69, %70
+  %arrayidx51.9 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 9
+  %71 = load float, float* %arrayidx51.9, align 4, !tbaa !7
+  %add52.9 = fadd float %add49.9, %71
+  %arrayidx54.9 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 9
+  store float %add52.9, float* %arrayidx54.9, align 4, !tbaa !7
+  %arrayidx46.10 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 10
+  %72 = load float, float* %arrayidx46.10, align 8, !tbaa !7
+  %arrayidx48.10 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 10
+  %73 = load float, float* %arrayidx48.10, align 8, !tbaa !7
+  %add49.10 = fadd float %72, %73
+  %arrayidx51.10 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 10
+  %74 = load float, float* %arrayidx51.10, align 8, !tbaa !7
+  %add52.10 = fadd float %add49.10, %74
+  %arrayidx54.10 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 10
+  store float %add52.10, float* %arrayidx54.10, align 8, !tbaa !7
+  %arrayidx46.11 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 11
+  %75 = load float, float* %arrayidx46.11, align 4, !tbaa !7
+  %arrayidx48.11 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 11
+  %76 = load float, float* %arrayidx48.11, align 4, !tbaa !7
+  %add49.11 = fadd float %75, %76
+  %arrayidx51.11 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 11
+  %77 = load float, float* %arrayidx51.11, align 4, !tbaa !7
+  %add52.11 = fadd float %add49.11, %77
+  %arrayidx54.11 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 11
+  store float %add52.11, float* %arrayidx54.11, align 4, !tbaa !7
+  %arrayidx46.12 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 12
+  %78 = load float, float* %arrayidx46.12, align 16, !tbaa !7
+  %arrayidx48.12 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 12
+  %79 = load float, float* %arrayidx48.12, align 16, !tbaa !7
+  %add49.12 = fadd float %78, %79
+  %arrayidx51.12 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 12
+  %80 = load float, float* %arrayidx51.12, align 16, !tbaa !7
+  %add52.12 = fadd float %add49.12, %80
+  %arrayidx54.12 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 12
+  store float %add52.12, float* %arrayidx54.12, align 16, !tbaa !7
+  %arrayidx46.13 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 13
+  %81 = load float, float* %arrayidx46.13, align 4, !tbaa !7
+  %arrayidx48.13 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 13
+  %82 = load float, float* %arrayidx48.13, align 4, !tbaa !7
+  %add49.13 = fadd float %81, %82
+  %arrayidx51.13 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 13
+  %83 = load float, float* %arrayidx51.13, align 4, !tbaa !7
+  %add52.13 = fadd float %add49.13, %83
+  %arrayidx54.13 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 13
+  store float %add52.13, float* %arrayidx54.13, align 4, !tbaa !7
+  %arrayidx46.14 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 14
+  %84 = load float, float* %arrayidx46.14, align 8, !tbaa !7
+  %arrayidx48.14 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 14
+  %85 = load float, float* %arrayidx48.14, align 8, !tbaa !7
+  %add49.14 = fadd float %84, %85
+  %arrayidx51.14 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 14
+  %86 = load float, float* %arrayidx51.14, align 8, !tbaa !7
+  %add52.14 = fadd float %add49.14, %86
+  %arrayidx54.14 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 14
+  store float %add52.14, float* %arrayidx54.14, align 8, !tbaa !7
+  %arrayidx46.15 = getelementptr inbounds [16 x float], [16 x float]* %arr_f2, i64 0, i64 15
+  %87 = load float, float* %arrayidx46.15, align 4, !tbaa !7
+  %arrayidx48.15 = getelementptr inbounds [16 x float], [16 x float]* %arr_f3, i64 0, i64 15
+  %88 = load float, float* %arrayidx48.15, align 4, !tbaa !7
+  %add49.15 = fadd float %87, %88
+  %arrayidx51.15 = getelementptr inbounds [16 x float], [16 x float]* %arr_f4, i64 0, i64 15
+  %89 = load float, float* %arrayidx51.15, align 4, !tbaa !7
+  %add52.15 = fadd float %add49.15, %89
+  %arrayidx54.15 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 15
+  store float %add52.15, float* %arrayidx54.15, align 4, !tbaa !7
+  call void @foo_f(float* nonnull %arraydecay) #3
+  %90 = load float, float* %arrayidx46.1, align 4, !tbaa !7
+  %91 = load float, float* %arrayidx48.1, align 4, !tbaa !7
+  %mul70 = fmul float %90, %91
+  %92 = load float, float* %arrayidx46, align 16, !tbaa !7
+  %93 = load float, float* %arrayidx48, align 16, !tbaa !7
+  %mul77 = fmul float %92, %93
+  %sub78 = fsub float %mul70, %mul77
+  store float %sub78, float* %arraydecay, align 16, !tbaa !7
+  %94 = load float, float* %arrayidx46.3, align 4, !tbaa !7
+  %95 = load float, float* %arrayidx48.3, align 4, !tbaa !7
+  %mul70.1 = fmul float %94, %95
+  %96 = load float, float* %arrayidx46.2, align 8, !tbaa !7
+  %97 = load float, float* %arrayidx48.2, align 8, !tbaa !7
+  %mul77.1 = fmul float %96, %97
+  %sub78.1 = fsub float %mul70.1, %mul77.1
+  store float %sub78.1, float* %arrayidx54.1, align 4, !tbaa !7
+  %98 = load float, float* %arrayidx46.5, align 4, !tbaa !7
+  %99 = load float, float* %arrayidx48.5, align 4, !tbaa !7
+  %mul70.2 = fmul float %98, %99
+  %100 = load float, float* %arrayidx46.4, align 16, !tbaa !7
+  %101 = load float, float* %arrayidx48.4, align 16, !tbaa !7
+  %mul77.2 = fmul float %100, %101
+  %sub78.2 = fsub float %mul70.2, %mul77.2
+  store float %sub78.2, float* %arrayidx54.2, align 8, !tbaa !7
+  %102 = load float, float* %arrayidx46.7, align 4, !tbaa !7
+  %103 = load float, float* %arrayidx48.7, align 4, !tbaa !7
+  %mul70.3 = fmul float %102, %103
+  %104 = load float, float* %arrayidx46.6, align 8, !tbaa !7
+  %105 = load float, float* %arrayidx48.6, align 8, !tbaa !7
+  %mul77.3 = fmul float %104, %105
+  %sub78.3 = fsub float %mul70.3, %mul77.3
+  store float %sub78.3, float* %arrayidx54.3, align 4, !tbaa !7
+  %106 = load float, float* %arrayidx46.9, align 4, !tbaa !7
+  %107 = load float, float* %arrayidx48.9, align 4, !tbaa !7
+  %mul70.4 = fmul float %106, %107
+  %108 = load float, float* %arrayidx46.8, align 16, !tbaa !7
+  %109 = load float, float* %arrayidx48.8, align 16, !tbaa !7
+  %mul77.4 = fmul float %108, %109
+  %sub78.4 = fsub float %mul70.4, %mul77.4
+  store float %sub78.4, float* %arrayidx54.4, align 16, !tbaa !7
+  %110 = load float, float* %arrayidx46.11, align 4, !tbaa !7
+  %111 = load float, float* %arrayidx48.11, align 4, !tbaa !7
+  %mul70.5 = fmul float %110, %111
+  %112 = load float, float* %arrayidx46.10, align 8, !tbaa !7
+  %113 = load float, float* %arrayidx48.10, align 8, !tbaa !7
+  %mul77.5 = fmul float %112, %113
+  %sub78.5 = fsub float %mul70.5, %mul77.5
+  store float %sub78.5, float* %arrayidx54.5, align 4, !tbaa !7
+  %114 = load float, float* %arrayidx46.13, align 4, !tbaa !7
+  %115 = load float, float* %arrayidx48.13, align 4, !tbaa !7
+  %mul70.6 = fmul float %114, %115
+  %116 = load float, float* %arrayidx46.12, align 16, !tbaa !7
+  %117 = load float, float* %arrayidx48.12, align 16, !tbaa !7
+  %mul77.6 = fmul float %116, %117
+  %sub78.6 = fsub float %mul70.6, %mul77.6
+  store float %sub78.6, float* %arrayidx54.6, align 8, !tbaa !7
+  %118 = load float, float* %arrayidx46.15, align 4, !tbaa !7
+  %119 = load float, float* %arrayidx48.15, align 4, !tbaa !7
+  %mul70.7 = fmul float %118, %119
+  %120 = load float, float* %arrayidx46.14, align 8, !tbaa !7
+  %121 = load float, float* %arrayidx48.14, align 8, !tbaa !7
+  %mul77.7 = fmul float %120, %121
+  %sub78.7 = fsub float %mul70.7, %mul77.7
+  store float %sub78.7, float* %arrayidx54.7, align 4, !tbaa !7
+  call void @foo_f(float* nonnull %arraydecay) #3
+  %122 = load float, float* %arrayidx46, align 16, !tbaa !7
+  %123 = load float, float* %arrayidx48, align 16, !tbaa !7
+  %add112 = fadd float %122, %123
+  store float %add112, float* %arraydecay, align 16, !tbaa !7
+  store float %add112, float* %arrayidx51, align 16, !tbaa !7
+  %arrayidx123 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 0
+  %124 = load i32, i32* %arrayidx123, align 16, !tbaa !2
+  %arrayidx125 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 0
+  %125 = load i32, i32* %arrayidx125, align 16, !tbaa !2
+  %add126 = add nsw i32 %125, %124
+  %arrayidx128 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 0
+  store i32 %add126, i32* %arrayidx128, align 16, !tbaa !2
+  %126 = load float, float* %arrayidx46.1, align 4, !tbaa !7
+  %127 = load float, float* %arrayidx48.1, align 4, !tbaa !7
+  %add112.1 = fadd float %126, %127
+  store float %add112.1, float* %arrayidx54.1, align 4, !tbaa !7
+  store float %add112.1, float* %arrayidx51.1, align 4, !tbaa !7
+  %arrayidx123.1 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 1
+  %128 = load i32, i32* %arrayidx123.1, align 4, !tbaa !2
+  %arrayidx125.1 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 1
+  %129 = load i32, i32* %arrayidx125.1, align 4, !tbaa !2
+  %add126.1 = add nsw i32 %129, %128
+  %arrayidx128.1 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 1
+  store i32 %add126.1, i32* %arrayidx128.1, align 4, !tbaa !2
+  %130 = load float, float* %arrayidx46.2, align 8, !tbaa !7
+  %131 = load float, float* %arrayidx48.2, align 8, !tbaa !7
+  %add112.2 = fadd float %130, %131
+  store float %add112.2, float* %arrayidx54.2, align 8, !tbaa !7
+  store float %add112.2, float* %arrayidx51.2, align 8, !tbaa !7
+  %arrayidx123.2 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 2
+  %132 = load i32, i32* %arrayidx123.2, align 8, !tbaa !2
+  %arrayidx125.2 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 2
+  %133 = load i32, i32* %arrayidx125.2, align 8, !tbaa !2
+  %add126.2 = add nsw i32 %133, %132
+  %arrayidx128.2 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 2
+  store i32 %add126.2, i32* %arrayidx128.2, align 8, !tbaa !2
+  %134 = load float, float* %arrayidx46.3, align 4, !tbaa !7
+  %135 = load float, float* %arrayidx48.3, align 4, !tbaa !7
+  %add112.3 = fadd float %134, %135
+  store float %add112.3, float* %arrayidx54.3, align 4, !tbaa !7
+  store float %add112.3, float* %arrayidx51.3, align 4, !tbaa !7
+  %arrayidx123.3 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 3
+  %136 = load i32, i32* %arrayidx123.3, align 4, !tbaa !2
+  %arrayidx125.3 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 3
+  %137 = load i32, i32* %arrayidx125.3, align 4, !tbaa !2
+  %add126.3 = add nsw i32 %137, %136
+  %arrayidx128.3 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 3
+  store i32 %add126.3, i32* %arrayidx128.3, align 4, !tbaa !2
+  %138 = load float, float* %arrayidx46.4, align 16, !tbaa !7
+  %139 = load float, float* %arrayidx48.4, align 16, !tbaa !7
+  %add112.4 = fadd float %138, %139
+  store float %add112.4, float* %arrayidx54.4, align 16, !tbaa !7
+  store float %add112.4, float* %arrayidx51.4, align 16, !tbaa !7
+  %arrayidx123.4 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 4
+  %140 = load i32, i32* %arrayidx123.4, align 16, !tbaa !2
+  %arrayidx125.4 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 4
+  %141 = load i32, i32* %arrayidx125.4, align 16, !tbaa !2
+  %add126.4 = add nsw i32 %141, %140
+  %arrayidx128.4 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 4
+  store i32 %add126.4, i32* %arrayidx128.4, align 16, !tbaa !2
+  %142 = load float, float* %arrayidx46.5, align 4, !tbaa !7
+  %143 = load float, float* %arrayidx48.5, align 4, !tbaa !7
+  %add112.5 = fadd float %142, %143
+  store float %add112.5, float* %arrayidx54.5, align 4, !tbaa !7
+  store float %add112.5, float* %arrayidx51.5, align 4, !tbaa !7
+  %arrayidx123.5 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 5
+  %144 = load i32, i32* %arrayidx123.5, align 4, !tbaa !2
+  %arrayidx125.5 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 5
+  %145 = load i32, i32* %arrayidx125.5, align 4, !tbaa !2
+  %add126.5 = add nsw i32 %145, %144
+  %arrayidx128.5 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 5
+  store i32 %add126.5, i32* %arrayidx128.5, align 4, !tbaa !2
+  %146 = load float, float* %arrayidx46.6, align 8, !tbaa !7
+  %147 = load float, float* %arrayidx48.6, align 8, !tbaa !7
+  %add112.6 = fadd float %146, %147
+  store float %add112.6, float* %arrayidx54.6, align 8, !tbaa !7
+  store float %add112.6, float* %arrayidx51.6, align 8, !tbaa !7
+  %arrayidx123.6 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 6
+  %148 = load i32, i32* %arrayidx123.6, align 8, !tbaa !2
+  %arrayidx125.6 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 6
+  %149 = load i32, i32* %arrayidx125.6, align 8, !tbaa !2
+  %add126.6 = add nsw i32 %149, %148
+  %arrayidx128.6 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 6
+  store i32 %add126.6, i32* %arrayidx128.6, align 8, !tbaa !2
+  %150 = load float, float* %arrayidx46.7, align 4, !tbaa !7
+  %151 = load float, float* %arrayidx48.7, align 4, !tbaa !7
+  %add112.7 = fadd float %150, %151
+  store float %add112.7, float* %arrayidx54.7, align 4, !tbaa !7
+  store float %add112.7, float* %arrayidx51.7, align 4, !tbaa !7
+  %arrayidx123.7 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 7
+  %152 = load i32, i32* %arrayidx123.7, align 4, !tbaa !2
+  %arrayidx125.7 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 7
+  %153 = load i32, i32* %arrayidx125.7, align 4, !tbaa !2
+  %add126.7 = add nsw i32 %153, %152
+  %arrayidx128.7 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 7
+  store i32 %add126.7, i32* %arrayidx128.7, align 4, !tbaa !2
+  %154 = load float, float* %arrayidx46.8, align 16, !tbaa !7
+  %155 = load float, float* %arrayidx48.8, align 16, !tbaa !7
+  %add112.8 = fadd float %154, %155
+  store float %add112.8, float* %arrayidx54.8, align 16, !tbaa !7
+  store float %add112.8, float* %arrayidx51.8, align 16, !tbaa !7
+  %arrayidx123.8 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 8
+  %156 = load i32, i32* %arrayidx123.8, align 16, !tbaa !2
+  %arrayidx125.8 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 8
+  %157 = load i32, i32* %arrayidx125.8, align 16, !tbaa !2
+  %add126.8 = add nsw i32 %157, %156
+  %arrayidx128.8 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 8
+  store i32 %add126.8, i32* %arrayidx128.8, align 16, !tbaa !2
+  %158 = load float, float* %arrayidx46.9, align 4, !tbaa !7
+  %159 = load float, float* %arrayidx48.9, align 4, !tbaa !7
+  %add112.9 = fadd float %158, %159
+  store float %add112.9, float* %arrayidx54.9, align 4, !tbaa !7
+  store float %add112.9, float* %arrayidx51.9, align 4, !tbaa !7
+  %arrayidx123.9 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 9
+  %160 = load i32, i32* %arrayidx123.9, align 4, !tbaa !2
+  %arrayidx125.9 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 9
+  %161 = load i32, i32* %arrayidx125.9, align 4, !tbaa !2
+  %add126.9 = add nsw i32 %161, %160
+  %arrayidx128.9 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 9
+  store i32 %add126.9, i32* %arrayidx128.9, align 4, !tbaa !2
+  %162 = load float, float* %arrayidx46.10, align 8, !tbaa !7
+  %163 = load float, float* %arrayidx48.10, align 8, !tbaa !7
+  %add112.10 = fadd float %162, %163
+  store float %add112.10, float* %arrayidx54.10, align 8, !tbaa !7
+  store float %add112.10, float* %arrayidx51.10, align 8, !tbaa !7
+  %arrayidx123.10 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 10
+  %164 = load i32, i32* %arrayidx123.10, align 8, !tbaa !2
+  %arrayidx125.10 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 10
+  %165 = load i32, i32* %arrayidx125.10, align 8, !tbaa !2
+  %add126.10 = add nsw i32 %165, %164
+  %arrayidx128.10 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 10
+  store i32 %add126.10, i32* %arrayidx128.10, align 8, !tbaa !2
+  %166 = load float, float* %arrayidx46.11, align 4, !tbaa !7
+  %167 = load float, float* %arrayidx48.11, align 4, !tbaa !7
+  %add112.11 = fadd float %166, %167
+  store float %add112.11, float* %arrayidx54.11, align 4, !tbaa !7
+  store float %add112.11, float* %arrayidx51.11, align 4, !tbaa !7
+  %arrayidx123.11 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 11
+  %168 = load i32, i32* %arrayidx123.11, align 4, !tbaa !2
+  %arrayidx125.11 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 11
+  %169 = load i32, i32* %arrayidx125.11, align 4, !tbaa !2
+  %add126.11 = add nsw i32 %169, %168
+  %arrayidx128.11 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 11
+  store i32 %add126.11, i32* %arrayidx128.11, align 4, !tbaa !2
+  %170 = load float, float* %arrayidx46.12, align 16, !tbaa !7
+  %171 = load float, float* %arrayidx48.12, align 16, !tbaa !7
+  %add112.12 = fadd float %170, %171
+  store float %add112.12, float* %arrayidx54.12, align 16, !tbaa !7
+  store float %add112.12, float* %arrayidx51.12, align 16, !tbaa !7
+  %arrayidx123.12 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 12
+  %172 = load i32, i32* %arrayidx123.12, align 16, !tbaa !2
+  %arrayidx125.12 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 12
+  %173 = load i32, i32* %arrayidx125.12, align 16, !tbaa !2
+  %add126.12 = add nsw i32 %173, %172
+  %arrayidx128.12 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 12
+  store i32 %add126.12, i32* %arrayidx128.12, align 16, !tbaa !2
+  %174 = load float, float* %arrayidx46.13, align 4, !tbaa !7
+  %175 = load float, float* %arrayidx48.13, align 4, !tbaa !7
+  %add112.13 = fadd float %174, %175
+  store float %add112.13, float* %arrayidx54.13, align 4, !tbaa !7
+  store float %add112.13, float* %arrayidx51.13, align 4, !tbaa !7
+  %arrayidx123.13 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 13
+  %176 = load i32, i32* %arrayidx123.13, align 4, !tbaa !2
+  %arrayidx125.13 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 13
+  %177 = load i32, i32* %arrayidx125.13, align 4, !tbaa !2
+  %add126.13 = add nsw i32 %177, %176
+  %arrayidx128.13 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 13
+  store i32 %add126.13, i32* %arrayidx128.13, align 4, !tbaa !2
+  %178 = load float, float* %arrayidx46.14, align 8, !tbaa !7
+  %179 = load float, float* %arrayidx48.14, align 8, !tbaa !7
+  %add112.14 = fadd float %178, %179
+  store float %add112.14, float* %arrayidx54.14, align 8, !tbaa !7
+  store float %add112.14, float* %arrayidx51.14, align 8, !tbaa !7
+  %arrayidx123.14 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 14
+  %180 = load i32, i32* %arrayidx123.14, align 8, !tbaa !2
+  %arrayidx125.14 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 14
+  %181 = load i32, i32* %arrayidx125.14, align 8, !tbaa !2
+  %add126.14 = add nsw i32 %181, %180
+  %arrayidx128.14 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 14
+  store i32 %add126.14, i32* %arrayidx128.14, align 8, !tbaa !2
+  %182 = load float, float* %arrayidx46.15, align 4, !tbaa !7
+  %183 = load float, float* %arrayidx48.15, align 4, !tbaa !7
+  %add112.15 = fadd float %182, %183
+  store float %add112.15, float* %arrayidx54.15, align 4, !tbaa !7
+  store float %add112.15, float* %arrayidx51.15, align 4, !tbaa !7
+  %arrayidx123.15 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i2, i64 0, i64 15
+  %184 = load i32, i32* %arrayidx123.15, align 4, !tbaa !2
+  %arrayidx125.15 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i3, i64 0, i64 15
+  %185 = load i32, i32* %arrayidx125.15, align 4, !tbaa !2
+  %add126.15 = add nsw i32 %185, %184
+  %arrayidx128.15 = getelementptr inbounds [16 x i32], [16 x i32]* %arr_i1, i64 0, i64 15
+  store i32 %add126.15, i32* %arrayidx128.15, align 4, !tbaa !2
+call void @foo_i(i32* nonnull %arrayidx128) #3
+  call void @foo_f(float* nonnull %arraydecay) #3
+  call void @foo_f(float* nonnull %arrayidx51) #3
+  %186 = load float, float* %arrayidx46, align 16, !tbaa !7
+  %187 = load float, float* %arrayidx48, align 16, !tbaa !7
+  %add143 = fadd float %186, %187
+  store float %add143, float* %arraydecay, align 16, !tbaa !7
+  %188 = load float, float* %arrayidx46.1, align 4, !tbaa !7
+  %189 = load float, float* %arrayidx48.1, align 4, !tbaa !7
+  %add143.1 = fadd float %188, %189
+  store float %add143.1, float* %arrayidx54.1, align 4, !tbaa !7
+  %190 = load float, float* %arrayidx46.2, align 8, !tbaa !7
+  %191 = load float, float* %arrayidx48.2, align 8, !tbaa !7
+  %add143.2 = fadd float %190, %191
+  store float %add143.2, float* %arrayidx54.2, align 8, !tbaa !7
+  %192 = load float, float* %arrayidx46.3, align 4, !tbaa !7
+  %193 = load float, float* %arrayidx48.3, align 4, !tbaa !7
+  %add143.3 = fadd float %192, %193
+  store float %add143.3, float* %arrayidx54.3, align 4, !tbaa !7
+  %194 = load float, float* %arrayidx46.4, align 16, !tbaa !7
+  %195 = load float, float* %arrayidx48.4, align 16, !tbaa !7
+  %add143.4 = fadd float %194, %195
+  store float %add143.4, float* %arrayidx54.4, align 16, !tbaa !7
+  %196 = load float, float* %arrayidx46.5, align 4, !tbaa !7
+  %197 = load float, float* %arrayidx48.5, align 4, !tbaa !7
+  %add143.5 = fadd float %196, %197
+  store float %add143.5, float* %arrayidx54.5, align 4, !tbaa !7
+  %198 = load float, float* %arrayidx46.6, align 8, !tbaa !7
+  %199 = load float, float* %arrayidx48.6, align 8, !tbaa !7
+  %add143.6 = fadd float %198, %199
+  store float %add143.6, float* %arrayidx54.6, align 8, !tbaa !7
+  %200 = load float, float* %arrayidx46.7, align 4, !tbaa !7
+  %201 = load float, float* %arrayidx48.7, align 4, !tbaa !7
+  %add143.7 = fadd float %200, %201
+  store float %add143.7, float* %arrayidx54.7, align 4, !tbaa !7
+  %202 = load float, float* %arrayidx46.8, align 16, !tbaa !7
+  %203 = load float, float* %arrayidx48.8, align 16, !tbaa !7
+  %add143.8 = fadd float %202, %203
+  store float %add143.8, float* %arrayidx54.8, align 16, !tbaa !7
+  %204 = load float, float* %arrayidx46.9, align 4, !tbaa !7
+  %205 = load float, float* %arrayidx48.9, align 4, !tbaa !7
+  %add143.9 = fadd float %204, %205
+  store float %add143.9, float* %arrayidx54.9, align 4, !tbaa !7
+  %206 = load float, float* %arrayidx46.10, align 8, !tbaa !7
+  %207 = load float, float* %arrayidx48.10, align 8, !tbaa !7
+  %add143.10 = fadd float %206, %207
+  store float %add143.10, float* %arrayidx54.10, align 8, !tbaa !7
+  %208 = load float, float* %arrayidx46.11, align 4, !tbaa !7
+  %209 = load float, float* %arrayidx48.11, align 4, !tbaa !7
+  %add143.11 = fadd float %208, %209
+  store float %add143.11, float* %arrayidx54.11, align 4, !tbaa !7
+  %210 = load float, float* %arrayidx46.12, align 16, !tbaa !7
+  %211 = load float, float* %arrayidx48.12, align 16, !tbaa !7
+  %add143.12 = fadd float %210, %211
+  store float %add143.12, float* %arrayidx54.12, align 16, !tbaa !7
+  %212 = load float, float* %arrayidx46.13, align 4, !tbaa !7
+  %213 = load float, float* %arrayidx48.13, align 4, !tbaa !7
+  %add143.13 = fadd float %212, %213
+  store float %add143.13, float* %arrayidx54.13, align 4, !tbaa !7
+  store float %add112.14, float* %arrayidx54.14, align 8, !tbaa !7
+  store float %add112.15, float* %arrayidx54.15, align 4, !tbaa !7
+  %arrayidx153.15 = getelementptr inbounds [17 x float], [17 x float]* %arr_f1, i64 0, i64 16
+  store float %add112.15, float* %arrayidx153.15, align 16, !tbaa !7
+  call void @foo_f(float* nonnull %arraydecay) #3
+  call void @llvm.lifetime.end.p0i8(i64 256, i8* nonnull %9) #3
+  call void @llvm.lifetime.end.p0i8(i64 256, i8* nonnull %8) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %7) #3
+  call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %6) #3
+  call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %5) #3
+  call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %4) #3
+  call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 68, i8* nonnull %0) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @foo_i(i32*) local_unnamed_addr #2
+
+declare void @foo_f(float*) local_unnamed_addr #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!4, !4, i64 0}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"float", !4, i64 0}