Index: lib/CodeGen/AtomicExpandPass.cpp
===================================================================
--- lib/CodeGen/AtomicExpandPass.cpp
+++ lib/CodeGen/AtomicExpandPass.cpp
@@ -268,8 +268,7 @@
   IRBuilder<> Builder(LI);
   AtomicOrdering Order = LI->getOrdering();
   Value *Addr = LI->getPointerOperand();
-  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
-  Constant *DummyVal = Constant::getNullValue(Ty);
+  Constant *DummyVal = Constant::getNullValue(LI->getType());
 
   Value *Pair = Builder.CreateAtomicCmpXchg(
       Addr, DummyVal, DummyVal, Order,
Index: lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- lib/CodeGen/CodeGenPrepare.cpp
+++ lib/CodeGen/CodeGenPrepare.cpp
@@ -3432,7 +3432,7 @@
 /// Add the ultimately found memory instructions to MemoryUses.
 static bool FindAllMemoryUses(
     Instruction *I,
-    SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
+    SmallVectorImpl<Instruction *> &MemoryUses,
     SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
   // If we already considered this instruction, we're done.
   if (!ConsideredInsts.insert(I).second)
@@ -3447,14 +3447,14 @@
     Instruction *UserI = cast<Instruction>(U.getUser());
 
     if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
-      MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
+      MemoryUses.push_back(LI);
       continue;
     }
 
     if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
       unsigned opNo = U.getOperandNo();
       if (opNo == 0) return true; // Storing addr, not into addr.
-      MemoryUses.push_back(std::make_pair(SI, opNo));
+      MemoryUses.push_back(SI);
       continue;
     }
 
@@ -3554,7 +3554,7 @@
   // check to see if their addressing modes will include this instruction.  If
   // so, we can fold it into all uses, so it doesn't matter if it has multiple
   // uses.
-  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+  SmallVector<Instruction*, 16> MemoryUses;
   SmallPtrSet<Instruction*, 16> ConsideredInsts;
   if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
     return false;  // Has a non-memory, non-foldable use!
@@ -3565,17 +3565,21 @@
   // *actually* fold the instruction.
   SmallVector<Instruction*, 32> MatchedAddrModeInsts;
   for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
-    Instruction *User = MemoryUses[i].first;
-    unsigned OpNo = MemoryUses[i].second;
-
-    // Get the access type of this use.  If the use isn't a pointer, we don't
-    // know what it accesses.
-    Value *Address = User->getOperand(OpNo);
-    PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
-    if (!AddrTy)
-      return false;
-    Type *AddressAccessTy = AddrTy->getElementType();
-    unsigned AS = AddrTy->getAddressSpace();
+    Value *Address;
+    Type *AddressAccessTy;
+    unsigned AS;
+
+    Instruction *User = MemoryUses[i];
+    if (auto *LI = dyn_cast<LoadInst>(User)) {
+      Address = LI->getPointerOperand();
+      AddressAccessTy = LI->getType();
+      AS = LI->getPointerAddressSpace();
+    } else {
+      auto *SI = cast<StoreInst>(User);
+      Address = SI->getPointerOperand();
+      AddressAccessTy = SI->getValueOperand()->getType();
+      AS = SI->getPointerAddressSpace();
+    }
 
     // Do a match against the root of this address, ignoring profitability. This
     // will tell us if the addressing mode for the memory operation will
Index: lib/Target/AMDGPU/SITypeRewriter.cpp
===================================================================
--- lib/Target/AMDGPU/SITypeRewriter.cpp
+++ lib/Target/AMDGPU/SITypeRewriter.cpp
@@ -74,9 +74,8 @@
 void SITypeRewriter::visitLoadInst(LoadInst &I) {
   Value *Ptr = I.getPointerOperand();
   Type *PtrTy = Ptr->getType();
-  Type *ElemTy = PtrTy->getPointerElementType();
   IRBuilder<> Builder(&I);
-  if (ElemTy == v16i8)  {
+  if (I.getType() == v16i8)  {
     Value *BitCast = Builder.CreateBitCast(Ptr,
         PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
     LoadInst *Load = Builder.CreateLoad(BitCast);
Index: lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
===================================================================
--- lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -183,14 +183,17 @@
     for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
         J != JE; ++J) {
       Value *PtrValue;
+      Type *ValTy = nullptr;
       Instruction *MemI;
 
       if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
         MemI = LMemI;
         PtrValue = LMemI->getPointerOperand();
+        ValTy = LMemI->getType();
       } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
         MemI = SMemI;
         PtrValue = SMemI->getPointerOperand();
+        ValTy = SMemI->getValueOperand()->getType();
       } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) {
         if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
           MemI = IMemI;
@@ -203,8 +206,7 @@
         continue;
 
       // There are no update forms for Altivec vector load/stores.
-      if (ST && ST->hasAltivec() &&
-          PtrValue->getType()->getPointerElementType()->isVectorTy())
+      if (ST && ST->hasAltivec() && ValTy && ValTy->isVectorTy())
         continue;
 
       if (L->isLoopInvariant(PtrValue))
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -19543,9 +19543,9 @@
 // FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
 TargetLowering::AtomicExpansionKind
 X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
-  auto PTy = cast<PointerType>(LI->getPointerOperand()->getType());
-  return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg
-                                               : AtomicExpansionKind::None;
+  return needsCmpXchgNb(LI->getType())
+      ? AtomicExpansionKind::CmpXChg
+      : AtomicExpansionKind::None;
 }
 
 TargetLowering::AtomicExpansionKind
Index: lib/Transforms/IPO/GlobalOpt.cpp
===================================================================
--- lib/Transforms/IPO/GlobalOpt.cpp
+++ lib/Transforms/IPO/GlobalOpt.cpp
@@ -2188,10 +2188,10 @@
 /// another pointer type, we punt.  We basically just support direct accesses to
 /// globals and GEP's of globals.  This should be kept up to date with
 /// CommitValueTo.
-static bool isSimpleEnoughPointerToCommit(Constant *C) {
+static bool isSimpleEnoughPointerToCommit(Constant *C, Type *ValTy) {
   // Conservatively, avoid aggregate types. This is because we don't
   // want to worry about them partially overlapping other stores.
-  if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+  if (!ValTy->isSingleValueType())
     return false;
 
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
@@ -2428,19 +2428,20 @@
         DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
         return false;  // no volatile/atomic accesses.
       }
-      Constant *Ptr = getVal(SI->getOperand(1));
+      Constant *Ptr = getVal(SI->getPointerOperand());
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
         DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
         Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
         DEBUG(dbgs() << "; To: " << *Ptr << "\n");
       }
-      if (!isSimpleEnoughPointerToCommit(Ptr)) {
+      Type *ValTy = SI->getValueOperand()->getType();
+      if (!isSimpleEnoughPointerToCommit(Ptr, ValTy)) {
         // If this is too complex for us to commit, reject it.
         DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
         return false;
       }
 
-      Constant *Val = getVal(SI->getOperand(0));
+      Constant *Val = getVal(SI->getValueOperand());
 
       // If this might be too difficult for the backend to handle (e.g. the addr
       // of one global variable divided by another) then we can't commit it.
Index: lib/Transforms/Instrumentation/ThreadSanitizer.cpp
===================================================================
--- lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -94,7 +94,7 @@
                                       SmallVectorImpl<Instruction *> &All,
                                       const DataLayout &DL);
   bool addrPointsToConstantData(Value *Addr);
-  int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
+  int getMemoryAccessFuncIndex(Type *OrigTy, const DataLayout &DL);
 
   Type *IntptrTy;
   IntegerType *OrdTy;
@@ -410,7 +410,10 @@
   Value *Addr = IsWrite
       ? cast<StoreInst>(I)->getPointerOperand()
       : cast<LoadInst>(I)->getPointerOperand();
-  int Idx = getMemoryAccessFuncIndex(Addr, DL);
+  Type *OrigTy = IsWrite
+      ? cast<StoreInst>(I)->getValueOperand()->getType()
+      : cast<LoadInst>(I)->getType();
+  int Idx = getMemoryAccessFuncIndex(OrigTy, DL);
   if (Idx < 0)
     return false;
   if (IsWrite && isVtableAccess(I)) {
@@ -440,7 +443,6 @@
   const unsigned Alignment = IsWrite
       ? cast<StoreInst>(I)->getAlignment()
       : cast<LoadInst>(I)->getAlignment();
-  Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
   const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
   Value *OnAccessFunc = nullptr;
   if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0)
@@ -508,7 +510,7 @@
   IRBuilder<> IRB(I);
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     Value *Addr = LI->getPointerOperand();
-    int Idx = getMemoryAccessFuncIndex(Addr, DL);
+    int Idx = getMemoryAccessFuncIndex(LI->getType(), DL);
     if (Idx < 0)
       return false;
     const unsigned ByteSize = 1U << Idx;
@@ -522,7 +524,8 @@
 
   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
     Value *Addr = SI->getPointerOperand();
-    int Idx = getMemoryAccessFuncIndex(Addr, DL);
+    Value *Val = SI->getValueOperand();
+    int Idx = getMemoryAccessFuncIndex(Val->getType(), DL);
     if (Idx < 0)
       return false;
     const unsigned ByteSize = 1U << Idx;
@@ -530,13 +533,14 @@
     Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
-                     IRB.CreateIntCast(SI->getValueOperand(), Ty, false),
+                     IRB.CreateIntCast(Val, Ty, false),
                      createOrdering(&IRB, SI->getOrdering())};
     CallInst *C = CallInst::Create(TsanAtomicStore[Idx], Args);
     ReplaceInstWithInst(I, C);
   } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) {
     Value *Addr = RMWI->getPointerOperand();
-    int Idx = getMemoryAccessFuncIndex(Addr, DL);
+    Value *Val = RMWI->getValOperand();
+    int Idx = getMemoryAccessFuncIndex(Val->getType(), DL);
     if (Idx < 0)
       return false;
     Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
@@ -547,13 +551,14 @@
     Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
-                     IRB.CreateIntCast(RMWI->getValOperand(), Ty, false),
+                     IRB.CreateIntCast(Val, Ty, false),
                      createOrdering(&IRB, RMWI->getOrdering())};
     CallInst *C = CallInst::Create(F, Args);
     ReplaceInstWithInst(I, C);
   } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
     Value *Addr = CASI->getPointerOperand();
-    int Idx = getMemoryAccessFuncIndex(Addr, DL);
+    Value *CmpVal = CASI->getCompareOperand();
+    int Idx = getMemoryAccessFuncIndex(CmpVal->getType(), DL);
     if (Idx < 0)
       return false;
     const unsigned ByteSize = 1U << Idx;
@@ -561,12 +566,12 @@
     Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
-                     IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false),
+                     IRB.CreateIntCast(CmpVal, Ty, false),
                      IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false),
                      createOrdering(&IRB, CASI->getSuccessOrdering()),
                      createOrdering(&IRB, CASI->getFailureOrdering())};
     CallInst *C = IRB.CreateCall(TsanAtomicCAS[Idx], Args);
-    Value *Success = IRB.CreateICmpEQ(C, CASI->getCompareOperand());
+    Value *Success = IRB.CreateICmpEQ(C, CmpVal);
 
     Value *Res = IRB.CreateInsertValue(UndefValue::get(CASI->getType()), C, 0);
     Res = IRB.CreateInsertValue(Res, Success, 1);
@@ -583,10 +588,8 @@
   return true;
 }
 
-int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr,
+int ThreadSanitizer::getMemoryAccessFuncIndex(Type *OrigTy,
                                               const DataLayout &DL) {
-  Type *OrigPtrTy = Addr->getType();
-  Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
   assert(OrigTy->isSized());
   uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
   if (TypeSize != 8  && TypeSize != 16 &&
Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp
===================================================================
--- lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -146,6 +146,9 @@
   /// range.
   Value *StartPtr;
 
+  /// DestTy - The type used for writing to the start of the range.
+  Type *DestTy;
+
   /// Alignment - The known alignment of the first store.
   unsigned Alignment;
 
@@ -221,18 +224,24 @@
   }
 
   void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
-    int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
+    Type *DestTy = SI->getValueOperand()->getType();
+    int64_t StoreSize = DL.getTypeStoreSize(DestTy);
 
     addRange(OffsetFromFirst, StoreSize,
-             SI->getPointerOperand(), SI->getAlignment(), SI);
+             SI->getPointerOperand(), DestTy,
+             SI->getAlignment(), SI);
   }
 
   void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
+    Type *DestTy = Type::getInt8Ty(MSI->getContext());
     int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
-    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+
+    addRange(OffsetFromFirst, Size, MSI->getDest(),
+             DestTy, MSI->getAlignment(), MSI);
   }
 
-  void addRange(int64_t Start, int64_t Size, Value *Ptr,
+  void addRange(int64_t Start, int64_t Size,
+                Value *Ptr, Type *DestTy,
                 unsigned Alignment, Instruction *Inst);
 
 };
@@ -243,7 +252,8 @@
 /// Add a new store to the MemsetRanges data structure.  This adds a
 /// new range for the specified store at the specified offset, merging into
 /// existing ranges as appropriate.
-void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
+void MemsetRanges::addRange(int64_t Start, int64_t Size,
+                            Value *Ptr, Type *DestTy,
                             unsigned Alignment, Instruction *Inst) {
   int64_t End = Start+Size;
 
@@ -258,6 +268,7 @@
     R.Start        = Start;
     R.End          = End;
     R.StartPtr     = Ptr;
+    R.DestTy       = DestTy;
     R.Alignment    = Alignment;
     R.TheStores.push_back(Inst);
     return;
@@ -280,6 +291,7 @@
   if (Start < I->Start) {
     I->Start = Start;
     I->StartPtr = Ptr;
+    I->DestTy = DestTy;
     I->Alignment = Alignment;
   }
 
@@ -453,11 +465,8 @@
 
     // Determine alignment
     unsigned Alignment = Range.Alignment;
-    if (Alignment == 0) {
-      Type *EltType =
-        cast<PointerType>(StartPtr->getType())->getElementType();
-      Alignment = DL.getABITypeAlignment(EltType);
-    }
+    if (Alignment == 0)
+      Alignment = DL.getABITypeAlignment(Range.DestTy);
 
     AMemSet =
       Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
Index: lib/Transforms/Scalar/SROA.cpp
===================================================================
--- lib/Transforms/Scalar/SROA.cpp
+++ lib/Transforms/Scalar/SROA.cpp
@@ -1750,13 +1750,13 @@
     if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
         II->getIntrinsicID() != Intrinsic::lifetime_end)
       return false;
-  } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
-    // Disable vector promotion when there are loads or stores of an FCA.
-    return false;
   } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
     if (LI->isVolatile())
       return false;
     Type *LTy = LI->getType();
+    // Disable vector promotion when there are loads or stores of an FCA.
+    if (LTy->isStructTy())
+      return false;
     if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
       assert(LTy->isIntegerTy());
       LTy = SplitIntTy;
@@ -1767,6 +1767,9 @@
     if (SI->isVolatile())
       return false;
     Type *STy = SI->getValueOperand()->getType();
+    // Disable vector promotion when there are loads or stores of an FCA.
+    if (STy->isStructTy())
+      return false;
     if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
       assert(STy->isIntegerTy());
       STy = SplitIntTy;
Index: lib/Transforms/Scalar/Scalarizer.cpp
===================================================================
--- lib/Transforms/Scalar/Scalarizer.cpp
+++ lib/Transforms/Scalar/Scalarizer.cpp
@@ -49,7 +49,7 @@
   // insert them before BBI in BB.  If Cache is nonnull, use it to cache
   // the results.
   Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
-            ValueVector *cachePtr = nullptr);
+            Type *VecTy, ValueVector *cachePtr = nullptr);
 
   // Return component I, creating a new Value for it if necessary.
   Value *operator[](unsigned I);
@@ -161,7 +161,7 @@
   }
 
 private:
-  Scatterer scatter(Instruction *, Value *);
+  Scatterer scatter(Instruction *, Value *, Type *VecTy = nullptr);
   void gather(Instruction *, const ValueVector &);
   bool canTransferMetadata(unsigned Kind);
   void transferMetadata(Instruction *, const ValueVector &);
@@ -183,13 +183,10 @@
                              "Scalarize vector operations", false, false)
 
 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
-                     ValueVector *cachePtr)
+                     Type *VecTy, ValueVector *cachePtr)
   : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
-  Type *Ty = V->getType();
-  PtrTy = dyn_cast<PointerType>(Ty);
-  if (PtrTy)
-    Ty = PtrTy->getElementType();
-  Size = Ty->getVectorNumElements();
+  PtrTy = dyn_cast<PointerType>(V->getType());
+  Size = VecTy->getVectorNumElements();
   if (!CachePtr)
     Tmp.resize(Size, nullptr);
   else if (CachePtr->empty())
@@ -268,24 +265,26 @@
 
 // Return a scattered form of V that can be accessed by Point.  V must be a
 // vector or a pointer to a vector.
-Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
+Scatterer Scalarizer::scatter(Instruction *Point, Value *V, Type *VecTy) {
+  if (!VecTy)
+    VecTy = V->getType();
   if (Argument *VArg = dyn_cast<Argument>(V)) {
     // Put the scattered form of arguments in the entry block,
     // so that it can be used everywhere.
     Function *F = VArg->getParent();
     BasicBlock *BB = &F->getEntryBlock();
-    return Scatterer(BB, BB->begin(), V, &Scattered[V]);
+    return Scatterer(BB, BB->begin(), V, VecTy, &Scattered[V]);
   }
   if (Instruction *VOp = dyn_cast<Instruction>(V)) {
     // Put the scattered form of an instruction directly after the
     // instruction.
     BasicBlock *BB = VOp->getParent();
     return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
-                     V, &Scattered[V]);
+                     V, VecTy, &Scattered[V]);
   }
   // In the fallback case, just put the scattered before Point and
   // keep the result local to Point.
-  return Scatterer(Point->getParent(), Point->getIterator(), V);
+  return Scatterer(Point->getParent(), Point->getIterator(), V, VecTy);
 }
 
 // Replace Op with the gathered form of the components in CV.  Defer the
@@ -601,7 +600,7 @@
 
   unsigned NumElems = Layout.VecTy->getNumElements();
   IRBuilder<> Builder(&LI);
-  Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
+  Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), LI.getType());
   ValueVector Res;
   Res.resize(NumElems);
 
@@ -620,13 +619,14 @@
 
   VectorLayout Layout;
   Value *FullValue = SI.getValueOperand();
-  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
+  Type *ValTy = FullValue->getType();
+  if (!getVectorLayout(ValTy, SI.getAlignment(), Layout,
                        SI.getModule()->getDataLayout()))
     return false;
 
   unsigned NumElems = Layout.VecTy->getNumElements();
   IRBuilder<> Builder(&SI);
-  Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
+  Scatterer Ptr = scatter(&SI, SI.getPointerOperand(), ValTy);
   Scatterer Val = scatter(&SI, FullValue);
 
   ValueVector Stores;
Index: lib/Transforms/Vectorize/BBVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/BBVectorize.cpp
+++ lib/Transforms/Vectorize/BBVectorize.cpp
@@ -610,7 +610,9 @@
     // after I; if OffsetInElmts == -1 then I accesses the memory
     // directly after J.
     bool getPairPtrInfo(Instruction *I, Instruction *J,
-        Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
+        Value *&IPtr, Value *&JPtr,
+        Type *&ITy, Type *&JTy,
+        unsigned &IAlignment, unsigned &JAlignment,
         unsigned &IAddressSpace, unsigned &JAddressSpace,
         int64_t &OffsetInElmts, bool ComputeOffset = true) {
       OffsetInElmts = 0;
@@ -618,6 +620,8 @@
         LoadInst *LJ = cast<LoadInst>(J);
         IPtr = LI->getPointerOperand();
         JPtr = LJ->getPointerOperand();
+        ITy = LI->getType();
+        JTy = LJ->getType();
         IAlignment = LI->getAlignment();
         JAlignment = LJ->getAlignment();
         IAddressSpace = LI->getPointerAddressSpace();
@@ -626,6 +630,8 @@
         StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J);
         IPtr = SI->getPointerOperand();
         JPtr = SJ->getPointerOperand();
+        ITy = SI->getValueOperand()->getType();
+        JTy = SJ->getValueOperand()->getType();
         IAlignment = SI->getAlignment();
         JAlignment = SJ->getAlignment();
         IAddressSpace = SI->getPointerAddressSpace();
@@ -647,12 +653,10 @@
         ConstantInt *IntOff = ConstOffSCEV->getValue();
         int64_t Offset = IntOff->getSExtValue();
         const DataLayout &DL = I->getModule()->getDataLayout();
-        Type *VTy = IPtr->getType()->getPointerElementType();
-        int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(VTy);
+        int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(ITy);
 
-        Type *VTy2 = JPtr->getType()->getPointerElementType();
-        if (VTy != VTy2 && Offset < 0) {
-          int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(VTy2);
+        if (ITy != JTy && Offset < 0) {
+          int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(JTy);
           OffsetInElmts = Offset/VTy2TSS;
           return (std::abs(Offset) % VTy2TSS) == 0;
         }
@@ -981,19 +985,16 @@
 
     if (IsSimpleLoadStore) {
       Value *IPtr, *JPtr;
+      Type *aTypeI, *aTypeJ;
       unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
       int64_t OffsetInElmts = 0;
-      if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+      if (getPairPtrInfo(I, J, IPtr, JPtr, aTypeI, aTypeJ, IAlignment, JAlignment,
                          IAddressSpace, JAddressSpace, OffsetInElmts) &&
           std::abs(OffsetInElmts) == 1) {
         FixedOrder = (int) OffsetInElmts;
         unsigned BottomAlignment = IAlignment;
         if (OffsetInElmts < 0) BottomAlignment = JAlignment;
 
-        Type *aTypeI = isa<StoreInst>(I) ?
-          cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
-        Type *aTypeJ = isa<StoreInst>(J) ?
-          cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
         Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
 
         if (Config.AlignedOnly) {
@@ -2302,20 +2303,19 @@
   Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
                      Instruction *I, Instruction *J, unsigned o) {
     Value *IPtr, *JPtr;
+    Type *ArgTypeI, *ArgTypeJ;
     unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
     int64_t OffsetInElmts;
 
     // Note: the analysis might fail here, that is why the pair order has
     // been precomputed (OffsetInElmts must be unused here).
-    (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+    (void) getPairPtrInfo(I, J, IPtr, JPtr, ArgTypeI, ArgTypeJ,
+                          IAlignment, JAlignment,
                           IAddressSpace, JAddressSpace,
                           OffsetInElmts, false);
 
     // The pointer value is taken to be the one with the lowest offset.
     Value *VPtr = IPtr;
-
-    Type *ArgTypeI = IPtr->getType()->getPointerElementType();
-    Type *ArgTypeJ = JPtr->getType()->getPointerElementType();
     Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
     Type *VArgPtrType
       = PointerType::get(VArgType,
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1236,7 +1236,7 @@
   /// 0 - Stride is unknown or non-consecutive.
   /// 1 - Address is consecutive.
   /// -1 - Address is consecutive, and decreasing.
-  int isConsecutivePtr(Value *Ptr);
+  int isConsecutivePtr(Value *Ptr, Type *DataType = nullptr);
 
   /// Returns true if the value V is uniform within the loop.
   bool isUniform(Value *V);
@@ -1275,12 +1275,12 @@
   /// Returns true if the target machine supports masked store operation
   /// for the given \p DataType and kind of access to \p Ptr.
   bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
-    return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType);
+    return isConsecutivePtr(Ptr, DataType) && TTI->isLegalMaskedStore(DataType);
   }
   /// Returns true if the target machine supports masked load operation
   /// for the given \p DataType and kind of access to \p Ptr.
   bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
-    return isConsecutivePtr(Ptr) && TTI->isLegalMaskedLoad(DataType);
+    return isConsecutivePtr(Ptr, DataType) && TTI->isLegalMaskedLoad(DataType);
   }
   /// Returns true if vector representation of the instruction \p I
   /// requires mask.
@@ -1984,11 +1984,11 @@
   return Builder.CreateAdd(Val, Step, "induction");
 }
 
-int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
+int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr, Type *DataType) {
   assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
   auto *SE = PSE.getSE();
   // Make sure that the pointer does not point to structs.
-  if (Ptr->getType()->getPointerElementType()->isAggregateType())
+  if (DataType && DataType->isAggregateType())
     return 0;
 
   // If this value is a pointer induction variable we know it is consecutive.
@@ -2010,8 +2010,7 @@
   if (Phi && Inductions.count(Phi)) {
 
     // Make sure that the pointer does not point to structs.
-    PointerType *GepPtrType = cast<PointerType>(GpPtr->getType());
-    if (GepPtrType->getElementType()->isAggregateType())
+    if (Gep->getSourceElementType()->isAggregateType())
       return 0;
 
     // Make sure that all of the index operands are loop invariant.
@@ -2381,7 +2380,7 @@
 
   // If the pointer is loop invariant or if it is non-consecutive,
   // scalarize the load.
-  int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+  int ConsecutiveStride = Legal->isConsecutivePtr(Ptr, ScalarDataTy);
   bool Reverse = ConsecutiveStride < 0;
   bool UniformLoad = LI && Legal->isUniform(Ptr);
   if (!ConsecutiveStride || UniformLoad)
@@ -4582,6 +4581,7 @@
 
     Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
     int Stride = isStridedPtr(PSE, Ptr, TheLoop, Strides);
+    Type *AccessTy = LI ? LI->getType() : SI->getValueOperand()->getType();
 
     // The factor of the corresponding interleave group.
     unsigned Factor = std::abs(Stride);
@@ -4591,13 +4591,12 @@
       continue;
 
     const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
-    PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
-    unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType());
+    unsigned Size = DL.getTypeAllocSize(AccessTy);
 
     // An alignment of 0 means target ABI alignment.
     unsigned Align = LI ? LI->getAlignment() : SI->getAlignment();
     if (!Align)
-      Align = DL.getABITypeAlignment(PtrTy->getElementType());
+      Align = DL.getABITypeAlignment(AccessTy);
 
     StrideAccesses[I] = StrideDescriptor(Stride, Scev, Size, Align);
   }
@@ -5480,7 +5479,7 @@
     }
 
     // Scalarized loads/stores.
-    int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+    int ConsecutiveStride = Legal->isConsecutivePtr(Ptr, ValTy);
     bool Reverse = ConsecutiveStride < 0;
     const DataLayout &DL = I->getModule()->getDataLayout();
     unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ValTy);
@@ -5623,12 +5622,14 @@
 
 bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
   // Check for a store.
-  if (StoreInst *ST = dyn_cast<StoreInst>(Inst))
-    return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0;
+  if (StoreInst *ST = dyn_cast<StoreInst>(Inst)) {
+    Type *Ty = ST->getValueOperand()->getType();
+    return Legal->isConsecutivePtr(ST->getPointerOperand(), Ty) != 0;
+  }
 
   // Check for a load.
   if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
-    return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0;
+    return Legal->isConsecutivePtr(LI->getPointerOperand(), LI->getType()) != 0;
 
   return false;
 }
Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -442,6 +442,10 @@
   /// \returns NULL if this is not a valid Load/Store instruction.
   static Value *getPointerOperand(Value *I);
 
+  /// \brief Take the acccessed type from the Load/Store instruction.
+  /// \returns NULL if this is not a valid Load/Store instruction.
+  static Type *getAccessType(Value *I);
+
   /// \brief Take the address space operand from the Load/Store instruction.
   /// \returns -1 if this is not a valid Load/Store instruction.
   static unsigned getAddressSpaceOperand(Value *I);
@@ -1845,6 +1849,14 @@
   return nullptr;
 }
 
+Type *BoUpSLP::getAccessType(Value *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->getType();
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->getValueOperand()->getType();
+  return nullptr;
+}
+
 unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
   if (LoadInst *L = dyn_cast<LoadInst>(I))
     return L->getPointerAddressSpace();
@@ -1864,11 +1876,13 @@
     return false;
 
   // Make sure that A and B are different pointers of the same type.
-  if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
+  Type *TyA = getAccessType(A);
+  Type *TyB = getAccessType(B);
+  if (PtrA == PtrB || TyA != TyB)
     return false;
 
   unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
-  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+  Type *Ty = TyA;
   APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
 
   APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);