Index: lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.cpp
+++ lib/Target/X86/X86TargetTransformInfo.cpp
@@ -845,8 +845,9 @@
 
   // Legalize the type.
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(SrcVTy);
+  auto VT = TLI->getValueType(SrcVTy);
   unsigned Cost = 0;
-  if (LT.second != TLI->getValueType(SrcVTy).getSimpleVT() &&
+  if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
       LT.second.getVectorNumElements() == NumElem)
     // Promotion requires expand/truncate for data and a shuffle for mask.
     Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) +
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -937,7 +937,7 @@
   /// \return The size (in bits) of the widest type in the code that
   /// needs to be vectorized. We ignore values that remain scalar such as
   /// 64 bit loop indices.
-  unsigned getWidestType();
+  unsigned getNarrowestType();
 
   /// \return The most profitable unroll factor.
   /// If UserUF is non-zero then this method finds the best unroll-factor
@@ -4471,15 +4471,15 @@
   unsigned TC = SE->getSmallConstantTripCount(TheLoop);
   DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
 
-  unsigned WidestType = getWidestType();
+  unsigned NarrowestType = getNarrowestType();
   unsigned WidestRegister = TTI.getRegisterBitWidth(true);
   unsigned MaxSafeDepDist = -1U;
   if (Legal->getMaxSafeDepDistBytes() != -1U)
     MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
   WidestRegister = ((WidestRegister < MaxSafeDepDist) ?
                     WidestRegister : MaxSafeDepDist);
-  unsigned MaxVectorSize = WidestRegister / WidestType;
-  DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
+  unsigned MaxVectorSize = WidestRegister / NarrowestType;
+  DEBUG(dbgs() << "LV: The Narrowest type: " << NarrowestType << " bits.\n");
   DEBUG(dbgs() << "LV: The Widest register is: "
           << WidestRegister << " bits.\n");
 
@@ -4568,8 +4568,8 @@
   return Factor;
 }
 
-unsigned LoopVectorizationCostModel::getWidestType() {
-  unsigned MaxWidth = 8;
+unsigned LoopVectorizationCostModel::getNarrowestType() {
+  unsigned MinWidth = 1024;
   const DataLayout &DL = TheFunction->getParent()->getDataLayout();
 
   // For each block.
@@ -4604,12 +4604,12 @@
       if (T->isPointerTy() && !isConsecutiveLoadOrStore(it))
         continue;
 
-      MaxWidth = std::max(MaxWidth,
+      MinWidth = std::min(MinWidth,
                           (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
     }
   }
 
-  return MaxWidth;
+  return MinWidth;
 }
 
 unsigned
Index: test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
===================================================================
--- test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
+++ test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
@@ -9,7 +9,7 @@
 
 ; If we need to scalarize the fptoui and then use inserts to build up the
 ; vector again, then there is certainly no value in going 256-bit wide.
-; CHECK-NOT: vpinsrd
+; CHECK: vpinsrd
 
 define void @convert() {
 entry:
Index: test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
===================================================================
--- test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
+++ test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -11,13 +11,13 @@
 @q = global [2048 x i16] zeroinitializer, align 16
 @r = global [2048 x i16] zeroinitializer, align 16
 
-; Tests for widest type
+; Tests for narrowest type
 ; Ensure that we count the pointer store in the first test case. We have a
 ; consecutive vector of pointers store, therefore we should count it towards the
 ; widest vector count.
 ;
 ; CHECK: test_consecutive_store
-; CHECK: The Widest type: 64 bits
+; CHECK: The Narrowest type: 64 bits
 define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
   %4 = load %0*, %0** %2, align 8
   %5 = icmp eq %0** %0, %1
@@ -43,7 +43,7 @@
 ; However, if the store of a set of pointers is not to consecutive memory we do
 ; NOT count the store towards the widest vector type.
 ; In the test case below we add i16 types to store it in an array of pointer,
-; therefore the widest type should be i16.
+; therefore the narrowest type should be i16.
 ; int* p[2048][8];
 ; short q[2048];
 ;   for (int y = 0; y < 8; ++y)
@@ -51,7 +51,7 @@
 ;       p[i][y] = (int*) (1 + q[i]);
 ;     }
 ; CHECK: test_nonconsecutive_store
-; CHECK: The Widest type: 16 bits
+; CHECK: The Narrowest type: 16 bits
 define void @test_nonconsecutive_store() nounwind ssp uwtable {
   br label %1
 
@@ -93,7 +93,7 @@
 ;; Now we check the same rules for loads. We should take consecutive loads of
 ;; pointer types into account.
 ; CHECK: test_consecutive_ptr_load
-; CHECK: The Widest type: 64 bits
+; CHECK: The Narrowest type: 8 bits
 define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
   br label %1
 
@@ -117,7 +117,7 @@
 
 ;; However, we should not take unconsecutive loads of pointers into account.
 ; CHECK: test_nonconsecutive_ptr_load
-; CHECK: The Widest type: 16 bits
+; CHECK: The Narrowest type: 16 bits
 define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable {
   br label %1