Index: include/llvm/InitializePasses.h
===================================================================
--- include/llvm/InitializePasses.h
+++ include/llvm/InitializePasses.h
@@ -291,6 +291,7 @@
 void initializeWinEHPreparePass(PassRegistry&);
 void initializePlaceBackedgeSafepointsImplPass(PassRegistry&);
 void initializePlaceSafepointsPass(PassRegistry&);
+void initializeAggregateMemAccessRemovalPass(PassRegistry&);
 }
 
 #endif
Index: include/llvm/Transforms/Scalar.h
===================================================================
--- include/llvm/Transforms/Scalar.h
+++ include/llvm/Transforms/Scalar.h
@@ -426,6 +426,12 @@
 //
 ModulePass *createPlaceSafepointsPass();
 
+//===----------------------------------------------------------------------===//
+//
+// AggregateMemAccessRemoval - Convert aggregate load/store into
+// scalar load/store
+BasicBlockPass *createAggregateMemAccessRemovalPass();
+
 } // End llvm namespace
 
 #endif
Index: lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- lib/Transforms/IPO/PassManagerBuilder.cpp
+++ lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -184,6 +184,9 @@
   // Add LibraryInfo if we have some.
   if (LibraryInfo)
     MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
+  
+  // Remove aggregate load/store
+  MPM.add(createAggregateMemAccessRemovalPass());
 
   addInitialAliasAnalysisPasses(MPM);
 
Index: lib/Transforms/Scalar/AggregateMemAccessRemoval.cpp
===================================================================
--- /dev/null
+++ lib/Transforms/Scalar/AggregateMemAccessRemoval.cpp
@@ -0,0 +1,364 @@
+//===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This transformation remove load and stores of aggregate type into
+/// scalar loads and stores. This will allow subsequent passes to
+/// optimize them properly.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aggregate-removal"
+
+namespace {
+
+class AggregateMemAccessRemoval : public BasicBlockPass {
+  LLVMContext *C;
+  const DataLayout *DL;
+
+public:
+  AggregateMemAccessRemoval()
+      : BasicBlockPass(ID),
+        C(nullptr), DL(nullptr) { }
+
+  using llvm::Pass::doInitialization;
+  bool doInitialization(Function &) override;
+  bool runOnBasicBlock(BasicBlock &BB) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  const char *getPassName() const override { return "AggregateMemAccessRemoval"; }
+  static char ID;
+private:
+  SmallVector<Instruction*, 8> InstrsToErase;
+
+  void runOnLoad(LoadInst* LI);
+  void runOnStore(StoreInst* SI);
+
+  Value* addToStore(IRBuilder<> &Builder, Value* V, unsigned Size,
+                    Value* E, unsigned ElementOffset, unsigned ElementSize);
+  Value* getFromLoad(IRBuilder<> &Builder, Value* L,
+                     unsigned Size, Type* T, unsigned ElementOffset);
+};
+}
+
+bool AggregateMemAccessRemoval::doInitialization(Function &F) {
+  DEBUG(dbgs() << "AggregateMemAccessRemoval function: " << F.getName() << "\n");
+  C = &F.getContext();
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  if (!DLP) {
+    DEBUG(dbgs() << "\tSkipping AggregateMemAccessRemoval -- no target data!\n");
+    return false;
+  }
+  DL = &DLP->getDataLayout();
+  return true;
+}
+
+bool AggregateMemAccessRemoval::runOnBasicBlock(BasicBlock &BB) {
+  if (skipOptnoneFunction(BB) || !DL)
+    return false;
+
+  for (auto &I : BB) {
+    LoadInst* LI = dyn_cast<LoadInst>(&I);
+    if (LI && LI->isUnordered()) {
+      runOnLoad(LI);
+      continue;
+    }
+    
+    StoreInst* SI = dyn_cast<StoreInst>(&I);
+    if (SI && SI->isUnordered()) {
+      runOnStore(SI);
+      continue;
+    }
+  }
+  
+  if (InstrsToErase.size() == 0)
+    return false;
+  
+  for (auto &I : InstrsToErase) {
+    I->eraseFromParent();
+  }
+  
+  InstrsToErase.clear();
+  return true;
+}
+
+void AggregateMemAccessRemoval::runOnLoad(LoadInst* LI) {
+  Type* T = LI->getType();
+  if (!T->isAggregateType())
+    return;
+
+  DEBUG(dbgs() << "\tload : " << *LI << "\n");
+
+  IRBuilder<> Builder(LI->getParent(), LI);
+  if (StructType *ST = dyn_cast<StructType>(T)) {
+    if (ST->isOpaque())
+      return;
+
+    InstrsToErase.push_back(LI);
+
+    // If the struct only have one element, we unpack.
+    if (ST->getNumElements() == 1) {
+      LoadInst* NewLI = Builder.CreateLoad(
+        Builder.CreateStructGEP(LI->getPointerOperand(), 0)
+      );
+      
+      Value* V = UndefValue::get(T);
+      V = Builder.CreateInsertValue(V, NewLI, 0);
+      LI->replaceAllUsesWith(V);
+      
+      runOnLoad(NewLI);
+      return;
+    }
+    
+    const StructLayout* SL = DL->getStructLayout(ST);
+    uint64_t Size = DL->getTypeStoreSize(ST);
+    unsigned Align = LI->getAlignment();
+    
+    Value* Addr = LI->getPointerOperand();
+    unsigned AddressSpace = LI->getPointerAddressSpace();
+    IntegerType* PT = Type::getIntNTy(*C, Size * 8);
+    Addr = Builder.CreatePointerCast(Addr, PointerType::get(PT, AddressSpace));
+
+    LoadInst* ELI = Builder.CreateLoad(Addr);
+    ELI->setAlignment(Align);
+
+    Value* V = UndefValue::get(T);
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+      Value* E = getFromLoad(Builder, ELI, Size, ST->getElementType(i), SL->getElementOffset(i));
+      V = Builder.CreateInsertValue(V, E, i);
+    }
+    
+    LI->replaceAllUsesWith(V);
+  } else if (ArrayType* AT = dyn_cast<ArrayType>(T)) {
+    InstrsToErase.push_back(LI);
+    
+    Value* V = UndefValue::get(T);
+    Value* Addr = LI->getPointerOperand();
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      LoadInst* NewLI = Builder.CreateLoad(Builder.CreateStructGEP(Addr, i));
+      V = Builder.CreateInsertValue(V, NewLI, i);
+      runOnLoad(NewLI);
+    }
+    
+    LI->replaceAllUsesWith(V);
+  }
+}
+
+Value* AggregateMemAccessRemoval::getFromLoad(IRBuilder<> &Builder, Value* L,
+                                                unsigned Size, Type* T,
+                                                unsigned ElementOffset) {
+  // If the type is an aggregate, forward.
+  if (T->isAggregateType()) {
+    if (StructType* ST = dyn_cast<StructType>(T)) {
+      assert(!ST->isOpaque() && "Can't load opaque struct");
+
+      Value* E = UndefValue::get(T);
+      const StructLayout* SL = DL->getStructLayout(ST);
+      for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+        unsigned SEOffset = ElementOffset + SL->getElementOffset(i);
+        Value* SE = getFromLoad(Builder, L, Size, ST->getElementType(i), SEOffset);
+        E = Builder.CreateInsertValue(E, SE, i);
+      }
+      
+      return E;
+    } else if (ArrayType* AT = dyn_cast<ArrayType>(T)) {
+      Value* E = UndefValue::get(T);
+      Type* ET = AT->getElementType();
+      unsigned SESize = DL->getTypeAllocSize(ET);
+      unsigned SEOffset = ElementOffset;
+      for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i, SEOffset += SESize) {
+        Value* SE = getFromLoad(Builder, L, Size, ET, SEOffset);
+        E = Builder.CreateInsertValue(E, SE, i);
+      }
+      
+      return E;
+    }
+    
+    llvm_unreachable("Invalid Aggregate Type");
+  }
+
+  unsigned ElementSize = DL->getTypeStoreSize(T);
+  
+  assert(ElementSize <= Size && "Element do not fit into value.");
+  if (ElementOffset > 0) {
+    unsigned ShiftAmt;
+    if (DL->isLittleEndian())
+      ShiftAmt = ElementOffset * 8;
+    else
+      ShiftAmt = (Size - ElementSize - ElementOffset) * 8;
+    if (ShiftAmt)
+      L = Builder.CreateLShr(L, ShiftAmt);
+  }
+  
+  if (ElementSize < Size) {
+    L = Builder.CreateTrunc(L, Type::getIntNTy(*C, ElementSize * 8));
+  }
+  
+  // Retrieve element type
+  if (T->getScalarType()->isPointerTy())
+    L = Builder.CreateIntToPtr(L, T);
+  if (!T->isIntegerTy())
+    L = Builder.CreateBitCast(L, T);
+  
+  return L;
+}
+
+void AggregateMemAccessRemoval::runOnStore(StoreInst* SI) {
+  Value* V = SI->getValueOperand();
+  Type* T = V->getType();
+  if (!T->isAggregateType())
+    return;
+
+  DEBUG(dbgs() << "\tstore : " << *SI << "\n");
+
+  IRBuilder<> Builder(SI->getParent(), SI);
+  if (StructType* ST = dyn_cast<StructType>(T)) {
+    if (ST->isOpaque())
+      return;
+
+    InstrsToErase.push_back(SI);
+
+    // If the struct only have one element, we unpack.
+    if (ST->getNumElements() == 1) {
+      runOnStore(Builder.CreateStore(
+        Builder.CreateExtractValue(V, 0),
+        Builder.CreateStructGEP(SI->getPointerOperand(), 0)
+      ));
+      return;
+    }
+    
+    const StructLayout* SL = DL->getStructLayout(ST);
+    uint64_t Size = DL->getTypeStoreSize(ST);
+    unsigned Align = SI->getAlignment();
+    
+    IntegerType* PT = Type::getIntNTy(*C, Size * 8);
+    ConstantInt* Zero = ConstantInt::get(PT, 0);
+    Value* NV = Zero;
+    
+    Value* Addr = SI->getPointerOperand();
+    unsigned AddressSpace = SI->getPointerAddressSpace();
+    Addr = Builder.CreatePointerCast(Addr, PointerType::get(PT, AddressSpace));
+    
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+      unsigned ElementOffset = SL->getElementOffset(i);
+      Value* E = Builder.CreateExtractValue(V, i);
+      unsigned ElementSize = DL->getTypeStoreSize(E->getType());
+      NV = addToStore(Builder, NV, Size, E, ElementOffset, ElementSize);
+    }
+    
+    Builder.CreateStore(NV, Addr)->setAlignment(Align);
+  } else if (ArrayType* AT = dyn_cast<ArrayType>(T)) {
+    InstrsToErase.push_back(SI);
+
+    Value* Addr = SI->getPointerOperand();
+    unsigned Align = SI->getAlignment();
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      StoreInst* ESI = Builder.CreateStore(
+        Builder.CreateExtractValue(V, i),
+        Builder.CreateStructGEP(Addr, i)
+      );
+      ESI->setAlignment(Align);
+      runOnStore(ESI);
+    }
+  }
+}
+
+Value* AggregateMemAccessRemoval::addToStore(IRBuilder<> &Builder, Value* V, unsigned Size,
+                                             Value* E, unsigned ElementOffset, unsigned ElementSize) {
+  DEBUG(dbgs() << "\t\tinserting : " << *E << " at offset " << ElementOffset << " into " << *V << "\n");
+
+  Type* ET = E->getType();
+
+  // If the type is an aggregate, forward.
+  if (ET->isAggregateType()) {
+    if (StructType* ST = dyn_cast<StructType>(ET)) {
+      assert(!ST->isOpaque() && "Can't store opaque struct");
+      
+      const StructLayout* SL = DL->getStructLayout(ST);
+      for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+        unsigned SEOffset = ElementOffset + SL->getElementOffset(i);
+        
+        assert(SEOffset < Size && "Element is past the end of value.");
+        
+        Value* SE = Builder.CreateExtractValue(E, i);
+        unsigned SESize = DL->getTypeStoreSize(SE->getType());
+        
+        V = addToStore(Builder, V, Size, SE, SEOffset, SESize);
+      }
+      
+      return V;
+    } else if (ArrayType* AT = dyn_cast<ArrayType>(ET)) {
+      unsigned SESize = DL->getTypeAllocSize(AT->getElementType());
+      unsigned SEOffset = ElementOffset;
+      unsigned i = 0;
+      
+      for (unsigned e = AT->getNumElements(); i != e; ++i, SEOffset += SESize) {
+        assert(SEOffset < Size && "Element is past the end of value.");
+        
+        Value* SE = Builder.CreateExtractValue(E, i);
+        V = addToStore(Builder, V, Size, SE, SEOffset, SESize);
+      }
+      
+      return V;
+    }
+    
+    llvm_unreachable("Invalid Aggregate Type");
+  }
+  
+  // Transform the element into an integer.
+  if (E->getType()->getScalarType()->isPointerTy())
+    E = Builder.CreatePtrToInt(E, DL->getIntPtrType(ET));
+  if (!E->getType()->isIntegerTy())
+    E = Builder.CreateBitCast(E, IntegerType::get(*C, ElementSize));
+  
+  assert(ElementSize <= Size && "Element do not fit into value.");
+  if (ElementSize < Size)
+    E = Builder.CreateZExt(E, Type::getIntNTy(*C, Size * 8));
+  
+  unsigned Offset = ElementOffset;
+  unsigned ShiftAmt;
+  if (DL->isLittleEndian())
+    ShiftAmt = Offset * 8;
+  else
+    ShiftAmt = (Size - ElementSize - Offset) * 8;
+  if (ShiftAmt)
+    E = Builder.CreateShl(E, ShiftAmt);
+
+  return Builder.CreateOr(V, E);
+}
+
+void AggregateMemAccessRemoval::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+}
+
+char AggregateMemAccessRemoval::ID = 0;
+
+BasicBlockPass *llvm::createAggregateMemAccessRemovalPass() {
+  return new AggregateMemAccessRemoval();
+}
+
+INITIALIZE_PASS(AggregateMemAccessRemoval, "aggregate-removal",
+                "Transform aggregate store/load into integrals.",
+                false, false)
+
Index: lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- lib/Transforms/Scalar/CMakeLists.txt
+++ lib/Transforms/Scalar/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_llvm_library(LLVMScalarOpts
   ADCE.cpp
+  AggregateMemAccessRemoval.cpp
   AlignmentFromAssumptions.cpp
   ConstantHoisting.cpp
   ConstantProp.cpp
Index: lib/Transforms/Scalar/Scalar.cpp
===================================================================
--- lib/Transforms/Scalar/Scalar.cpp
+++ lib/Transforms/Scalar/Scalar.cpp
@@ -73,6 +73,7 @@
   initializeLoadCombinePass(Registry);
   initializePlaceBackedgeSafepointsImplPass(Registry);
   initializePlaceSafepointsPass(Registry);
+  initializeAggregateMemAccessRemovalPass(Registry);
 }
 
 void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
Index: test/Transforms/AggregateMemAccessRemoval/load.ll
===================================================================
--- /dev/null
+++ test/Transforms/AggregateMemAccessRemoval/load.ll
@@ -0,0 +1,175 @@
+; RUN: opt -aggregate-removal -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%A__vtbl = type { i8*, i32 (%A*)* }
+%B__vtbl = type { i8*, i32 (%B*)* }
+%C__vtbl = type { i8*, i32 (%C*)* }
+%D__vtbl = type { i8*, i32 (%D*)* }
+%E__vtbl = type { i8*, i32 (%E*)* }
+%A = type { %A__vtbl* }
+%B = type { %B__vtbl*, i32 }
+%C = type { %C__vtbl*, i32, { i8 }, { i8 } }
+%D = type { %D__vtbl*, i32, { i8 }, { i8 }, { i8, i8, i8 } }
+%E = type { %E__vtbl*, i32, { i8 }, { i8 }, { i8, i8, i8 }, [2 x i32] }
+
+declare i8* @allocmemory(i64)
+
+define %A @structA() {
+body:
+  %0 = tail call i8* @allocmemory(i64 32)
+  %1 = bitcast i8* %0 to %A*
+; CHECK: load %A__vtbl**
+; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
+  %2 = load %A* %1, align 8
+  ret %A %2
+}
+
+define %B @structB() {
+body:
+  %0 = tail call i8* @allocmemory(i64 32)
+  %1 = bitcast i8* %0 to %B*
+; CHECK: load i128* {{.*}}, align 8
+; CHECK: trunc i128 {{.*}} to i64
+; CHECK: inttoptr i64 {{.*}} to %B__vtbl*
+; CHECK: insertvalue %B undef, %B__vtbl* {{.*}}, 0
+; CHECK: lshr i128 {{.*}}, 64
+; CHECK: trunc i128 {{.*}} to i32
+; CHECK: insertvalue %B {{.*}}, i32 {{.*}}, 1
+  %2 = load %B* %1, align 8
+  ret %B %2
+}
+
+define %C @structC() {
+body:
+  %0 = tail call i8* @allocmemory(i64 32)
+  %1 = bitcast i8* %0 to %C*
+; CHECK: load i128* {{.*}}, align 8
+; CHECK: trunc i128 {{.*}} to i64
+; CHECK: inttoptr i64 {{.*}} to %C__vtbl*
+; CHECK: insertvalue %C undef, %C__vtbl* {{.*}}, 0
+; CHECK: lshr i128 {{.*}}, 64
+; CHECK: trunc i128 {{.*}} to i32
+; CHECK: insertvalue %C {{.*}}, i32 {{.*}}, 1
+; CHECK: lshr i128 {{.*}}, 96
+; CHECK: trunc i128 {{.*}} to i8
+; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0
+; CHECK: insertvalue %C {{.*}}, { i8 } {{.*}}, 2
+; CHECK: lshr i128 {{.*}}, 104
+; CHECK: trunc i128 {{.*}} to i8
+; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0
+; CHECK: insertvalue %C {{.*}}, { i8 } {{.*}}, 3
+  %2 = load %C* %1, align 8
+  ret %C %2
+}
+
+define %D @structD() {
+body:
+  %0 = tail call i8* @allocmemory(i64 32)
+  %1 = bitcast i8* %0 to %D*
+; CHECK: load i192* {{.*}}, align 8
+; CHECK: trunc i192 {{.*}} to i64
+; CHECK: inttoptr i64 {{.*}} to %D__vtbl*
+; CHECK: insertvalue %D undef, %D__vtbl* {{.*}}, 0
+; CHECK: lshr i192 {{.*}}, 64
+; CHECK: trunc i192 {{.*}} to i32
+; CHECK: insertvalue %D {{.*}}, i32 {{.*}}, 1
+; CHECK: lshr i192 {{.*}}, 96
+; CHECK: trunc i192 {{.*}} to i8
+; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0
+; CHECK: insertvalue %D {{.*}}, { i8 } {{.*}}, 2
+; CHECK: lshr i192 {{.*}}, 104
+; CHECK: trunc i192 {{.*}} to i8
+; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0
+; CHECK: insertvalue %D {{.*}}, { i8 } {{.*}}, 3
+; CHECK: lshr i192 {{.*}}, 112
+; CHECK: trunc i192 {{.*}} to i8
+; CHECK: insertvalue { i8, i8, i8 } undef, i8 {{.*}}, 0
+; CHECK: lshr i192 {{.*}}, 120
+; CHECK: trunc i192 {{.*}} to i8
+; CHECK: insertvalue { i8, i8, i8 } {{.*}}, i8 {{.*}}, 1
+; CHECK: lshr i192 {{.*}}, 128
+; CHECK: trunc i192 {{.*}} to i8
+; CHECK: insertvalue { i8, i8, i8 } {{.*}}, i8 {{.*}}, 2
+; CHECK: insertvalue %D {{.*}}, { i8, i8, i8 } {{.*}}, 4
+  %2 = load %D* %1, align 8
+  ret %D %2
+}
+
+define %E @structE() {
+body:
+  %0 = tail call i8* @allocmemory(i64 32)
+  %1 = bitcast i8* %0 to %E*
+; CHECK: load i256* {{.*}}, align 8
+; CHECK: trunc i256 {{.*}} to i64
+; CHECK: inttoptr i64 {{.*}} to %E__vtbl*
+; CHECK: insertvalue %E undef, %E__vtbl* {{.*}}, 0
+; CHECK: lshr i256 {{.*}}, 64
+; CHECK: trunc i256 {{.*}} to i32
+; CHECK: insertvalue %E {{.*}}, i32 {{.*}}, 1
+; CHECK: lshr i256 {{.*}}, 96
+; CHECK: trunc i256 {{.*}} to i8
+; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0
+; CHECK: insertvalue %E {{.*}}, { i8 } {{.*}}, 2
+; CHECK: lshr i256 {{.*}}, 104
+; CHECK: trunc i256 {{.*}} to i8
+; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0
+; CHECK: insertvalue %E {{.*}}, { i8 } {{.*}}, 3
+; CHECK: lshr i256 {{.*}}, 112
+; CHECK: trunc i256 {{.*}} to i8
+; CHECK: insertvalue { i8, i8, i8 } undef, i8 {{.*}}, 0
+; CHECK: lshr i256 {{.*}}, 120
+; CHECK: trunc i256 {{.*}} to i8
+; CHECK: insertvalue { i8, i8, i8 } {{.*}}, i8 {{.*}}, 1
+; CHECK: lshr i256 {{.*}}, 128
+; CHECK: trunc i256 {{.*}} to i8
+; CHECK: insertvalue { i8, i8, i8 } {{.*}}, i8 {{.*}}, 2
+; CHECK: insertvalue %E {{.*}}, { i8, i8, i8 } {{.*}}, 4
+; CHECK: lshr i256 {{.*}}, 160
+; CHECK: trunc i256 {{.*}} to i32
+; CHECK: insertvalue [2 x i32] undef, i32 {{.*}}, 0
+; CHECK: lshr i256 {{.*}}, 192
+; CHECK: trunc i256 {{.*}} to i32
+; CHECK: insertvalue [2 x i32] {{.*}}, i32 {{.*}}, 1
+; CHECK: insertvalue %E {{.*}}, [2 x i32] {{.*}}, 5
+  %2 = load %E* %1, align 8
+  ret %E %2
+}
+
+define void @arrays() {
+body:
+  %0 = tail call i8* @allocmemory(i64 32)
+  %1 = bitcast i8* %0 to [1 x i64]*
+; CHECK: load i64*
+; CHECK: insertvalue [1 x i64] undef, i64 {{.*}}, 0
+  %2 = load [1 x i64]* %1, align 8
+  %3 = bitcast i8* %0 to [2 x i64]*
+; CHECK: load i64*
+; CHECK: insertvalue [2 x i64] undef, i64 {{.*}}, 0
+; CHECK: load i64*
+; CHECK: insertvalue [2 x i64] {{.*}}, i64 {{.*}}, 1
+  %4 = load [2 x i64]* %3, align 8
+  %5 = bitcast i8* %0 to [2 x %A]*
+; CHECK: load %A__vtbl**
+; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
+; CHECK: insertvalue [2 x %A] undef, %A {{.*}}, 0
+; CHECK: load %A__vtbl**
+; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
+; CHECK: insertvalue [2 x %A] {{.*}}, %A {{.*}}, 1
+  %6 = load [2 x %A]* %5, align 8
+  %7 = bitcast i8* %0 to [2 x %B]*
+; CHECK: inttoptr i64 {{.*}} to %B__vtbl*
+; CHECK: insertvalue %B undef, %B__vtbl* {{.*}}, 0
+; CHECK: trunc i128 {{.*}} to i32
+; CHECK: insertvalue %B {{.*}}, i32 {{.*}}, 1
+; CHECK: insertvalue [2 x %B] undef, %B {{.*}}, 0
+; CHECK: inttoptr i64 {{.*}} to %B__vtbl*
+; CHECK: insertvalue %B undef, %B__vtbl* {{.*}}, 0
+; CHECK: trunc i128 {{.*}} to i32
+; CHECK: insertvalue %B {{.*}}, i32 {{.*}}, 1
+; CHECK: insertvalue [2 x %B] {{.*}}, %B {{.*}}, 1
+  %8 = load [2 x %B]* %7, align 8
+  ret void
+}
+
Index: test/Transforms/AggregateMemAccessRemoval/store.ll
===================================================================
--- /dev/null
+++ test/Transforms/AggregateMemAccessRemoval/store.ll
@@ -0,0 +1,72 @@
+; RUN: opt -aggregate-removal -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%A__vtbl = type { i8*, i32 (%A*)* }
+%B__vtbl = type { i8*, i32 (%B*)* }
+%C__vtbl = type { i8*, i32 (%C*)* }
+%D__vtbl = type { i8*, i32 (%D*)* }
+%E__vtbl = type { i8*, i32 (%E*)* }
+%A = type { %A__vtbl* }
+%B = type { %B__vtbl*, i32 }
+%C = type { %C__vtbl*, i32, { i8 }, { i8 } }
+%D = type { %D__vtbl*, i32, { i8 }, { i8 }, { i8, i8, i8 } }
+%E = type { %E__vtbl*, i32, { i8 }, { i8 }, { i8, i8, i8 }, [2 x i32] }
+
+@A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo }
+@B__vtblZ = constant %B__vtbl { i8* null, i32 (%B*)* @B.foo }
+@C__vtblZ = constant %C__vtbl { i8* null, i32 (%C*)* @C.foo }
+@D__vtblZ = constant %D__vtbl { i8* null, i32 (%D*)* @D.foo }
+@E__vtblZ = constant %E__vtbl { i8* null, i32 (%E*)* @E.foo }
+
+declare i32 @A.foo(%A* nocapture %this)
+declare i32 @B.foo(%B* nocapture %this)
+declare i32 @C.foo(%C* nocapture %this)
+declare i32 @D.foo(%D* nocapture %this)
+declare i32 @E.foo(%E* nocapture %this)
+
+declare i8* @allocmemory(i64)
+
+define void @structs() {
+body:
+  %0 = tail call i8* @allocmemory(i64 32)
+  %1 = bitcast i8* %0 to %A*
+; CHECK: store %A__vtbl* @A__vtblZ
+  store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8
+  %2 = bitcast i8* %0 to %B*
+; CHECK: store i128 or (i128 zext (i64 ptrtoint (%B__vtbl* @B__vtblZ to i64) to i128), i128 774763251095801167872), i128* {{.*}}, align 8
+  store %B { %B__vtbl* @B__vtblZ, i32 42 }, %B* %2, align 8
+  %3 = bitcast i8* %0 to %C*
+; CHECK: store i128 or (i128 zext (i64 ptrtoint (%C__vtbl* @C__vtblZ to i64) to i128), i128 466891561697334504689850300497920), i128* {{.*}}, align 8
+  store %C { %C__vtbl* @C__vtblZ, i32 42, { i8 } { i8 5 }, { i8 } { i8 23 } }, %C* %3, align 8
+  %4 = bitcast i8* %0 to %D*
+; CHECK: store i192 or (i192 zext (i64 ptrtoint (%D__vtbl* @D__vtblZ to i64) to i192), i192 1023511215942805454298064656762495041536), i192* {{.*}}, align 8
+  store %D { %D__vtbl* @D__vtblZ, i32 42, { i8 } { i8 5 }, { i8 } { i8 23 }, { i8, i8, i8 } { i8 1, i8 2, i8 3 } }, %D* %4, align 8
+  %5 = bitcast i8* %0 to %E*
+; CHECK: store i256 or (i256 zext (i64 ptrtoint (%E__vtbl* @E__vtblZ to i64) to i256), i256 31385508682779410369526070004795876865674973957706397777920), i256* {{.*}}, align 8
+  store %E { %E__vtbl* @E__vtblZ, i32 42, { i8 } { i8 5 }, { i8 } { i8 23 }, { i8, i8, i8 } { i8 1, i8 2, i8 3 }, [2 x i32] [i32 4, i32 5] }, %E* %5, align 8
+  ret void
+}
+
+define void @arrays() {
+body:
+  %0 = tail call i8* @allocmemory(i64 32)
+  %1 = bitcast i8* %0 to [1 x i64]*
+; CHECK: store i64 42
+  store [1 x i64] [ i64 42 ], [1 x i64]* %1, align 8
+  %2 = bitcast i8* %0 to [2 x i64]*
+; CHECK: store i64 666
+; CHECK: store i64 42
+  store [2 x i64] [ i64 666, i64 42 ], [2 x i64]* %2, align 8
+  %3 = bitcast i8* %0 to [2 x %A]*
+; CHECK: store %A__vtbl* @A__vtblZ
+; CHECK: store %A__vtbl* @A__vtblZ
+  store [2 x %A] [%A { %A__vtbl* @A__vtblZ }, %A { %A__vtbl* @A__vtblZ }], [2 x %A]* %3, align 8
+  %4 = bitcast i8* %0 to [2 x %B]*
+; CHECK: store i128 or (i128 zext (i64 ptrtoint (%B__vtbl* @B__vtblZ to i64) to i128), i128 1826227663297245609984), i128* {{.*}}, align 8
+; CHECK: store i128 or (i128 zext (i64 ptrtoint (%B__vtbl* @B__vtblZ to i64) to i128), i128 461168601842738790400), i128* {{.*}}, align 8
+  store [2 x %B] [%B { %B__vtbl* @B__vtblZ, i32 99 }, %B { %B__vtbl* @B__vtblZ, i32 25 }], [2 x %B]* %4, align 8
+  ret void
+}
+