Index: include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfo.h
+++ include/llvm/Analysis/TargetTransformInfo.h
@@ -575,9 +575,11 @@
   /// Phi, Ret, Br.
   int getCFInstrCost(unsigned Opcode) const;
 
-  /// \returns The expected cost of compare and select instructions.
+  /// \returns The expected cost of compare and select instructions. If there
+  /// is an existing instruction that holds Opcode, it may be passed in the
+  /// 'I' parameter.
   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                         Type *CondTy = nullptr) const;
+                 Type *CondTy = nullptr, const Instruction *I = nullptr) const;
 
   /// \return The expected cost of vector Insert and Extract.
   /// Use -1 to indicate that there is no information on the index value.
@@ -809,7 +811,7 @@
                                        VectorType *VecTy, unsigned Index) = 0;
   virtual int getCFInstrCost(unsigned Opcode) = 0;
   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                 Type *CondTy) = 0;
+                                Type *CondTy, const Instruction *I) = 0;
   virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
                                  unsigned Index) = 0;
   virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
@@ -1055,8 +1057,9 @@
   int getCFInstrCost(unsigned Opcode) override {
     return Impl.getCFInstrCost(Opcode);
   }
-  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override {
-    return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                         const Instruction *I) override {
+    return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
   }
   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
     return Impl.getVectorInstrCost(Opcode, Val, Index);
Index: include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfoImpl.h
+++ include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -336,7 +336,8 @@
 
   unsigned getCFInstrCost(unsigned Opcode) { return 1; }
 
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                              const Instruction *I) {
     return 1;
   }
 
Index: include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- include/llvm/CodeGen/BasicTTIImpl.h
+++ include/llvm/CodeGen/BasicTTIImpl.h
@@ -319,6 +319,23 @@
     return Cost;
   }
 
+  unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
+    assert (VecTy->isVectorTy());
+    
+    unsigned Cost = 0;
+
+    Cost += getScalarizationOverhead(VecTy, true, false);
+    if (!Args.empty())
+      Cost += getOperandsScalarizationOverhead(Args,
+                                               VecTy->getVectorNumElements());
+    else
+      // When no information on arguments is provided, we add the cost
+      // associated with one argument as a heuristic.
+      Cost += getScalarizationOverhead(VecTy, false, true);
+
+    return Cost;
+  }
+
   unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
 
   unsigned getArithmeticInstrCost(
@@ -361,15 +378,7 @@
                           ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
       // Return the cost of multiple scalar invocation plus the cost of
       // inserting and extracting the values.
-      unsigned TotCost = getScalarizationOverhead(Ty, true, false) + Num * Cost;
-      if (!Args.empty())
-        TotCost += getOperandsScalarizationOverhead(Args, Num);
-      else
-        // When no information on arguments is provided, we add the cost
-        // associated with one argument as a heuristic.
-        TotCost += getScalarizationOverhead(Ty, false, true);
-
-      return TotCost;
+      return getScalarizationOverhead(Ty, Args) + Num * Cost;
     }
 
     // We don't know anything about this scalar instruction.
@@ -512,7 +521,8 @@
     return 0;
   }
 
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                              const Instruction *I) {
     const TargetLoweringBase *TLI = getTLI();
     int ISD = TLI->InstructionOpcodeToISD(Opcode);
     assert(ISD && "Invalid opcode");
@@ -540,7 +550,7 @@
       if (CondTy)
         CondTy = CondTy->getScalarType();
       unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
-          Opcode, ValTy->getScalarType(), CondTy);
+          Opcode, ValTy->getScalarType(), CondTy, I);
 
       // Return the cost of multiple scalar invocation plus the cost of
       // inserting and extracting the values.
Index: lib/Analysis/CostModel.cpp
===================================================================
--- lib/Analysis/CostModel.cpp
+++ lib/Analysis/CostModel.cpp
@@ -447,12 +447,12 @@
   case Instruction::Select: {
     const SelectInst *SI = cast<SelectInst>(I);
     Type *CondTy = SI->getCondition()->getType();
-    return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
+    return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
   }
   case Instruction::ICmp:
   case Instruction::FCmp: {
     Type *ValTy = I->getOperand(0)->getType();
-    return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
+    return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
   }
   case Instruction::Store: {
     const StoreInst *SI = cast<StoreInst>(I);
Index: lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- lib/Analysis/TargetTransformInfo.cpp
+++ lib/Analysis/TargetTransformInfo.cpp
@@ -329,8 +329,8 @@
 }
 
 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                            Type *CondTy) const {
-  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+                                 Type *CondTy, const Instruction *I) const {
+  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
Index: lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -103,7 +103,8 @@
 
   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
 
-  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                         const Instruction *I = nullptr);
 
   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                       unsigned AddressSpace);
Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -436,7 +436,7 @@
 }
 
 int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                       Type *CondTy) {
+                                       Type *CondTy, const Instruction *I) {
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   // We don't lower some vector selects well that are wider than the register
@@ -463,7 +463,7 @@
         return Entry->Cost;
     }
   }
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
 }
 
 int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
Index: lib/Target/ARM/ARMTargetTransformInfo.h
===================================================================
--- lib/Target/ARM/ARMTargetTransformInfo.h
+++ lib/Target/ARM/ARMTargetTransformInfo.h
@@ -96,7 +96,8 @@
 
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
 
-  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                         const Instruction *I = nullptr);
 
   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
 
Index: lib/Target/ARM/ARMTargetTransformInfo.cpp
===================================================================
--- lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -310,7 +310,8 @@
   return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
 }
 
-int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                                   const Instruction *I) {
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   // On NEON a a vector select gets lowered to vbsl.
@@ -335,7 +336,7 @@
     return LT.first;
   }
 
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
 }
 
 int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
Index: lib/Target/PowerPC/PPCTargetTransformInfo.h
===================================================================
--- lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -75,7 +75,8 @@
       ArrayRef<const Value *> Args = ArrayRef<const Value *>());
   int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
-  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                         const Instruction *I = nullptr);
   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                       unsigned AddressSpace);
Index: lib/Target/PowerPC/PPCTargetTransformInfo.cpp
===================================================================
--- lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -308,8 +308,9 @@
   return BaseT::getCastInstrCost(Opcode, Dst, Src);
 }
 
-int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                                   const Instruction *I) {
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
 }
 
 int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
Index: lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- lib/Target/SystemZ/SystemZISelLowering.cpp
+++ lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -347,9 +347,13 @@
     // There should be no need to check for float types other than v2f64
     // since <2 x f32> isn't a legal type.
     setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
     setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
     setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
     setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
   }
 
   // Handle floating-point types.
Index: lib/Target/SystemZ/SystemZTargetTransformInfo.h
===================================================================
--- lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -27,6 +27,8 @@
   const SystemZSubtarget *getST() const { return ST; }
   const SystemZTargetLowering *getTLI() const { return TLI; }
 
+  unsigned const LIBCALL_COST = 30;
+
 public:
   explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
@@ -53,6 +55,22 @@
   unsigned getNumberOfRegisters(bool Vector);
   unsigned getRegisterBitWidth(bool Vector);
 
+  bool isFPVectorizationPotentiallyUnsafe() { return false; }
+
+  int getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty,
+      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+  unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
+  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                         const Instruction *I = nullptr);
+  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                      unsigned AddressSpace);
   /// @}
 };
 
Index: lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
===================================================================
--- lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -259,11 +259,8 @@
         }
       }
       if (isa<StoreInst>(&I)) {
-        NumStores++;
         Type *MemAccessTy = I.getOperand(0)->getType();
-        if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) &&
-           (getDataLayout().getTypeSizeInBits(MemAccessTy) == 128))
-          NumStores++;  // 128 bit fp/int stores get split.
+        NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0);
       }
     }
 
@@ -313,3 +310,464 @@
   return 0;
 }
 
+int SystemZTTIImpl::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty,  
+    TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
+    TTI::OperandValueProperties Opd1PropInfo,
+    TTI::OperandValueProperties Opd2PropInfo,
+    ArrayRef<const Value *> Args) {
+
+  // TODO: return a good value for BB-VECTORIZER that includes the
+  // immediate loads, which we do not want to count for the loop
+  // vectorizer, since they are hopefully hoisted out of the loop. This
+  // would require a new parameter 'InLoop', but not sure if constant
+  // args are common enough to motivate this.
+
+  unsigned ScalarBits = Ty->getScalarSizeInBits();
+
+  if (Ty->isVectorTy()) {
+    assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
+    unsigned VF = Ty->getVectorNumElements();
+    unsigned NumVectors = getNumberOfParts(Ty);
+
+    // These vector operations are custom handled, but are still supported
+    // with one instruction per vector, regardless of element size.
+    if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
+        Opcode == Instruction::AShr) {
+      return NumVectors;
+    }
+
+    // These FP operations are supported with a single vector instruction for
+    // double (base implementation assumes float generally costs 2). For
+    // FP128, the scalar cost is 1, and there is no overhead since the values
+    // are already in scalar registers.
+    if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+        Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
+      switch (ScalarBits) {
+      case 32: {
+        // Return the cost of multiple scalar invocation plus the cost of
+        // inserting and extracting the values.
+        unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+        unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
+        // FIXME: VF 2 for these FP operations are currently just as
+        // expensive as for VF 4.
+        if (VF == 2)
+          Cost *= 2;
+        return Cost;
+      }
+      case 64:
+      case 128:
+        return NumVectors;
+      default:
+        break;
+      }
+    }
+
+    // There is no native support for FRem.
+    if (Opcode == Instruction::FRem) {
+      unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args);
+      // FIXME: VF 2 for float is currently just as expensive as for VF 4.
+      if (VF == 2 && ScalarBits == 32)
+        Cost *= 2;
+      return Cost;
+    }
+  }
+  else {  // Scalar:
+    // These FP operations are supported with a dedicated instruction for
+    // float, double and fp128 (base implementation assumes float generally
+    // costs 2).
+    if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+        Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
+      return 1;
+
+    // There is no native support for FRem.
+    if (Opcode == Instruction::FRem)
+      return LIBCALL_COST;
+
+    if (Opcode == Instruction::LShr || Opcode == Instruction::AShr)
+      return (ScalarBits >= 32 ? 1 : 2 /*ext*/);
+
+    // Or requires one instruction, although it has custom handling for i64.
+    if (Opcode == Instruction::Or)
+      return 1;
+
+    // An extra extension for narrow types is needed.
+    if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
+      return (ScalarBits < 32 ? 4 /*sext of ops*/ : 2);
+
+    if (Opcode == Instruction::UDiv || Opcode == Instruction::URem)
+      return (ScalarBits < 32 ? 4 /*zext of both ops*/ : 3);
+  }
+
+  // Fallback to the default implementation.
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+                                       Opd1PropInfo, Opd2PropInfo, Args);
+}
+
+
+int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                                   Type *SubTp) {
+  assert (Tp->isVectorTy());
+  assert (ST->hasVector() && "getShuffleCost() called.");
+  unsigned NumVectors = getNumberOfParts(Tp);
+  
+  // FP128 values are always in scalar registers, so there is no work
+  // involved with a shuffle, except for broadcast. In that case a register
+  // moves are done with a single instruction per element.
+  if (Tp->getScalarType()->isFP128Ty())
+    return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0);
+
+  switch (Kind) {
+  case  TargetTransformInfo::SK_ExtractSubvector:
+    // ExtractSubvector Index indicates start offset.
+
+    // Extracting a subvector from first index is a noop.
+    return (Index == 0 ? 0 : NumVectors);
+
+  case TargetTransformInfo::SK_Broadcast:
+    // Loop vectorizer calls here to figure out the extra cost of
+    // broadcasting a loaded value to all elements of a vector. Since vlrep
+    // loads and replicates with a single instruction, adjust the returned
+    // value.
+    return NumVectors - 1;
+
+  default:
+
+    // SystemZ supports single instruction permutation / replication.
+    return NumVectors;
+  }
+
+  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+// Return the log2 difference of the element sizes of the two vector types.
+static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) {
+  unsigned Bits0 = Ty0->getScalarSizeInBits();
+  unsigned Bits1 = Ty1->getScalarSizeInBits();
+
+  if (Bits1 >  Bits0)
+    return (Log2_32(Bits1) - Log2_32(Bits0));
+
+  return (Log2_32(Bits0) - Log2_32(Bits1));
+}
+
+// Return the number of instructions needed to truncate SrcTy to DstTy.
+unsigned SystemZTTIImpl::
+getVectorTruncCost(Type *SrcTy, Type *DstTy) {
+  assert (SrcTy->isVectorTy() && DstTy->isVectorTy());
+  assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() &&
+          "Packing must reduce size of vector type.");
+  assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() &&
+          "Packing should not change number of elements.");
+
+  unsigned NumParts = getNumberOfParts(SrcTy);
+  if (NumParts <= 2)
+    // Up to 2 vector registers can be truncated efficiently with pack or
+    // permute. The latter requires an immediate mask to be loaded, which
+    // typically gets hoisted out of a loop.  TODO: return a good value for
+    // BB-VECTORIZER that includes the immediate loads, which we do not want
+    // to count for the loop vectorizer.
+    return 1;
+
+  unsigned Cost = 0;
+  unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
+  unsigned VF = SrcTy->getVectorNumElements();
+  for (unsigned P = 0; P < Log2Diff; ++P) {
+    if (NumParts > 1)
+      NumParts /= 2;
+    Cost += NumParts;
+  }
+
+  // Currently, a general mix of permutes and pack instructions is output by
+  // isel, which follow the cost computation above except for this case which
+  // is one instruction less:
+  if (VF == 8 && SrcTy->getScalarSizeInBits() == 64 &&
+      DstTy->getScalarSizeInBits() == 8)
+    Cost--;
+
+  return Cost;
+}
+
+int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+
+  unsigned DstScalarBits = Dst->getScalarSizeInBits();
+  unsigned SrcScalarBits = Src->getScalarSizeInBits();
+
+  if (Src->isVectorTy()) {
+    assert (ST->hasVector() && "getCastInstrCost() called with vector type.");
+    assert (Dst->isVectorTy());
+    unsigned VF = Src->getVectorNumElements();
+    unsigned NumDstVectors = getNumberOfParts(Dst);
+    unsigned NumSrcVectors = getNumberOfParts(Src);
+
+    if (Opcode == Instruction::Trunc)
+      return getVectorTruncCost(Src, Dst);
+
+    if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
+      if (SrcScalarBits >= 8) {
+        // ZExt/SExt will be handled with one unpack per doubling of width.
+        unsigned NumUnpacks = getElSizeLog2Diff(Src, Dst);
+
+        // For types that spans multiple vector registers, some additional
+        // instructions are used to setup the unpacking.
+        unsigned NumSrcVectorOps =
+          (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors)
+                          : (NumDstVectors / 2));
+
+        return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
+      }
+      else if (SrcScalarBits == 1) {
+        // FIXME: i1 isn't optimally treated.
+        // These values reflect the current handling of i1 for sext/zext.
+        if (Opcode == Instruction::SExt) {
+          static const CostTblEntry SextCostTable[] = {
+            { ISD::SIGN_EXTEND, MVT::v2i8, 3},
+            { ISD::SIGN_EXTEND, MVT::v2i16, 3},
+            { ISD::SIGN_EXTEND, MVT::v2i32, 3},
+            { ISD::SIGN_EXTEND, MVT::v2i64, 2},
+            { ISD::SIGN_EXTEND, MVT::v4i8, 3},
+            { ISD::SIGN_EXTEND, MVT::v4i16, 3},
+            { ISD::SIGN_EXTEND, MVT::v4i32, 2},
+            { ISD::SIGN_EXTEND, MVT::v4i64, 6},
+            { ISD::SIGN_EXTEND, MVT::v8i8, 3},
+            { ISD::SIGN_EXTEND, MVT::v8i16, 2},
+            { ISD::SIGN_EXTEND, MVT::v8i32, 6},
+            { ISD::SIGN_EXTEND, MVT::v8i64, 13},
+            { ISD::SIGN_EXTEND, MVT::v16i8, 2},
+            { ISD::SIGN_EXTEND, MVT::v16i16, 6},
+            { ISD::SIGN_EXTEND, MVT::v16i32, 12},
+            { ISD::SIGN_EXTEND, MVT::v16i64, 23},
+          };
+          MVT MTy = TLI->getValueType(DL, Dst).getSimpleVT();
+          if (const auto *Entry =
+              CostTableLookup(SextCostTable, ISD::SIGN_EXTEND, MTy))
+            return Entry->Cost;
+        }
+        else { // ZExt
+          static const CostTblEntry ZextCostTable[] = {
+            { ISD::ZERO_EXTEND, MVT::v2i8, 2},
+            { ISD::ZERO_EXTEND, MVT::v2i16, 2},
+            { ISD::ZERO_EXTEND, MVT::v2i32, 2},
+            { ISD::ZERO_EXTEND, MVT::v2i64, 1},
+            { ISD::ZERO_EXTEND, MVT::v4i8, 2},
+            { ISD::ZERO_EXTEND, MVT::v4i16, 2},
+            { ISD::ZERO_EXTEND, MVT::v4i32, 1},
+            { ISD::ZERO_EXTEND, MVT::v4i64, 4},
+            { ISD::ZERO_EXTEND, MVT::v8i8, 2},
+            { ISD::ZERO_EXTEND, MVT::v8i16, 1},
+            { ISD::ZERO_EXTEND, MVT::v8i32, 4},
+            { ISD::ZERO_EXTEND, MVT::v8i64, 12},
+            { ISD::ZERO_EXTEND, MVT::v16i8, 1},
+            { ISD::ZERO_EXTEND, MVT::v16i16, 4},
+            { ISD::ZERO_EXTEND, MVT::v16i32, 12},
+            { ISD::ZERO_EXTEND, MVT::v16i64, 32},
+          };
+          MVT MTy = TLI->getValueType(DL, Dst).getSimpleVT();
+          if (const auto *Entry =
+              CostTableLookup(ZextCostTable, ISD::ZERO_EXTEND, MTy))
+            return Entry->Cost;
+        }
+      }
+    }
+  
+    if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
+        Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
+      // TODO: Fix base implementation which could simplify things a bit here
+      // (seems to miss on differentiating on scalar/vector types).
+
+      // Only 64 bit vector conversions are natively supported.
+      if (SrcScalarBits == 64 && DstScalarBits == 64)
+        return NumDstVectors;
+
+      // Return the cost of multiple scalar invocation plus the cost of
+      // inserting and extracting the values. Base implementation does not
+      // realize float->int gets scalarized.
+      unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(),
+                                             Src->getScalarType());
+      unsigned TotCost = VF * ScalarCost;
+      bool NeedsInserts = true, NeedsExtracts = true;
+      // FP128 registers do not get inserted or extracted.
+      if (DstScalarBits == 128 &&
+          (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP))
+        NeedsInserts = false;
+      if (SrcScalarBits == 128 &&
+          (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
+        NeedsExtracts = false;
+
+      TotCost += getScalarizationOverhead(Dst, NeedsInserts, NeedsExtracts);
+
+      // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
+      if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
+        TotCost *= 2;
+
+      return TotCost;
+    }
+
+    if (Opcode == Instruction::FPTrunc) {
+      if (SrcScalarBits == 128)  // fp128 -> double/float + inserts of elements.
+        return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false);
+      else // double -> float
+        return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/);
+    }
+
+    if (Opcode == Instruction::FPExt) {
+      if (SrcScalarBits == 32 && DstScalarBits == 64) {
+        // float -> double is very rare and currently unoptimized. Instead of
+        // using vldeb, which can do two at a time, all conversions are
+        // scalarized.
+        return VF * 2;
+      }
+      // -> fp128.  VF * lxdb/lxeb + extraction of elements.
+      return VF + getScalarizationOverhead(Src, false, true);
+    }
+  }
+  else { // Scalar
+    assert (!Dst->isVectorTy());
+
+    if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)
+      return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/);
+    
+    if (Opcode == Instruction::SExt && Src->isIntegerTy(1))
+      // nilf/risbgn + lcr/lcgr
+      return 2;
+  }
+
+  return BaseT::getCastInstrCost(Opcode, Dst, Src);
+}
+
+static Type *ToVectorTy(Type *T, unsigned VF) {
+  if (!T->isVectorTy() && VF > 1)
+    return VectorType::get(T, VF);
+  return T;
+}
+
+int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                                       const Instruction *I) {
+  
+  // Hand over to common code if it's a compare for branch.
+  if (I != nullptr && I->hasOneUse() &&
+      isa<BranchInst>(I->use_begin()->getUser()))
+    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr);
+
+  if (ValTy->isVectorTy()) {
+    assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
+    unsigned VF = ValTy->getVectorNumElements();
+
+    // Called with a compare instruction.
+    if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
+      Type *SelectedTy = nullptr;
+      unsigned PredicateExtraCost = 0;
+      if (I != nullptr) {
+        assert (isa<CmpInst>(I));
+        if (I->hasOneUse()) {  // FIXME: Need to handle several users?
+          if (SelectInst *SI = dyn_cast<SelectInst>(I->use_begin()->getUser()))
+            SelectedTy = ToVectorTy(SI->getType(), VF);
+        }
+
+        // Some predicates cost one or two extra instructions.
+        switch (dyn_cast<CmpInst>(I)->getPredicate()) {
+        case CmpInst::Predicate::ICMP_NE:
+        case CmpInst::Predicate::ICMP_UGE:
+        case CmpInst::Predicate::ICMP_ULE:
+        case CmpInst::Predicate::ICMP_SGE:
+        case CmpInst::Predicate::ICMP_SLE:
+          PredicateExtraCost = 1;
+          break;
+        case CmpInst::Predicate::FCMP_ONE:
+        case CmpInst::Predicate::FCMP_ORD:
+        case CmpInst::Predicate::FCMP_UEQ:
+        case CmpInst::Predicate::FCMP_UNO:
+          PredicateExtraCost = 2;
+          break;
+        default:
+          break;
+        }
+      }
+
+      // Float is handled with 2*vmr[lh]f + 2*vldeb + vfchdb for each pair of
+      // floats.  FIXME: <2 x float> generates same code as <4 x float>.
+      unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1);
+      unsigned NumVecs_cmp = getNumberOfParts(ValTy);
+      unsigned NumVecs_sel = (SelectedTy != nullptr ?
+                              getNumberOfParts(SelectedTy) : 1);
+
+      // If the vector select is split, one compare will be done for each part.
+      unsigned Cost = (std::max(NumVecs_cmp, NumVecs_sel) *
+                       (CmpCostPerVector + PredicateExtraCost));
+
+      // In case the select gets split, and the compared element type is
+      // smaller than the selected one, extra instructions are needed to move
+      // the values into the operands for the compares.
+      if (SelectedTy != nullptr && NumVecs_sel > 1 && NumVecs_cmp < NumVecs_sel) {
+        Cost += NumVecs_sel;
+        unsigned Log2Diff = getElSizeLog2Diff(ValTy, SelectedTy);
+        if (NumVecs_sel >= 4 && Log2Diff > 1)
+          Cost += NumVecs_sel / 2;
+        if (NumVecs_sel >= 8 && Log2Diff > 2)
+          Cost += NumVecs_sel / 4;
+      }
+
+      return Cost;
+    }
+    else { // Called with a select instruction.
+      assert (Opcode == Instruction::Select);
+
+      unsigned NumVecs_sel = getNumberOfParts(ValTy);
+
+      // We can figure out the extra cost of packing / unpacking if the
+      // instruction was passed and the compare instruction is found.
+      unsigned PackCost = 0;
+      if (I != nullptr) {
+        assert (isa<SelectInst>(I));
+
+        Type *ComparedTy = nullptr;
+        if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0)))
+          ComparedTy = ToVectorTy(CI->getOperand(0)->getType(), VF);
+
+        if (ComparedTy != nullptr) {
+          unsigned SelScalarBits = ValTy->getScalarSizeInBits();
+          unsigned CmpScalarBits = ComparedTy->getScalarSizeInBits();
+          unsigned Log2Diff = getElSizeLog2Diff(ValTy, ComparedTy);
+          if (CmpScalarBits > SelScalarBits)
+            // The bitmask will be truncated.
+            PackCost = getVectorTruncCost(ComparedTy, ValTy);
+          else if (SelScalarBits > CmpScalarBits)
+            // Each vector select needs its part of the bitmask unpacked.
+            PackCost = Log2Diff * NumVecs_sel;
+        }
+      }
+
+      return NumVecs_sel /*vsel*/ + PackCost;
+    }
+  }
+  else { // Scalar
+    switch (Opcode) {
+    case Instruction::ICmp: {
+      unsigned Cost = 1;
+      if (ValTy->getScalarSizeInBits() <= 16)
+        Cost += 2; // extend both operands
+      return Cost;
+    }
+    case Instruction::Select:
+      if (ValTy->isFloatingPointTy())
+        return 4; // No load on condition for FP, so this costs a conditional jump.
+      return 1; // Load On Condition.
+    }
+  }
+
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr);
+}
+
+int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                    unsigned Alignment, unsigned AddressSpace) {
+  assert(!Src->isVoidTy() && "Invalid type");
+
+  unsigned NumOps = getNumberOfParts(Src);
+
+  if (Src->getScalarSizeInBits() == 128)
+    // 128 bit scalars are held in a pair of two 64 bit registers.
+    NumOps *= 2;
+
+  return  NumOps;
+}
+
Index: lib/Target/X86/X86TargetTransformInfo.h
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.h
+++ lib/Target/X86/X86TargetTransformInfo.h
@@ -62,7 +62,8 @@
       ArrayRef<const Value *> Args = ArrayRef<const Value *>());
   int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
-  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                         const Instruction *I = nullptr);
   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                       unsigned AddressSpace);
Index: lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.cpp
+++ lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1300,7 +1300,8 @@
   return BaseT::getCastInstrCost(Opcode, Dst, Src);
 }
 
-int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                                   const Instruction *I) {
   // Legalize the type.
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
 
@@ -1366,7 +1367,7 @@
     if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
       return LT.first * Entry->Cost;
 
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
 }
 
 int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7229,16 +7229,27 @@
     if (!ScalarCond)
       CondTy = VectorType::get(CondTy, VF);
 
-    return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
+    return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, I);
   }
   case Instruction::ICmp:
   case Instruction::FCmp: {
+    // If this is the loop-latch compare for the back branch, just add the
+    // scalar value. Should this check be done in caller instead?
+    bool LikelyVectorized = true;
+    if (I->hasOneUse()) {
+      if (BranchInst *BI = dyn_cast<BranchInst>(I->use_begin()->getUser())) {
+        if (BI->getParent() == TheLoop->getLoopLatch())
+          LikelyVectorized = false;
+      }
+    }
     Type *ValTy = I->getOperand(0)->getType();
     Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
     if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
       ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
-    VectorTy = ToVectorTy(ValTy, VF);
-    return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
+
+    if (LikelyVectorized)
+      VectorTy = ToVectorTy(ValTy, VF);
+    return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, I);
   }
   case Instruction::Store:
   case Instruction::Load: {
Index: test/Analysis/CostModel/SystemZ/cmpsel.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/SystemZ/cmpsel.ll
@@ -0,0 +1,1806 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+;
+; Note: Cost estimates of select of a fp-type is somewhat arbitrary, since it
+; involves a conditional jump.
+; Note: Vector fp32 is not directly supported, and not quite exact in
+; estimates (but it is big absolute values).
+
+define i8 @fun0(i8 %val1, i8 %val2,
+                i8 %val3, i8 %val4) {
+  %cmp = icmp eq i8 %val1, %val2
+  %sel = select i1 %cmp, i8 %val3, i8 %val4
+  ret i8 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i8 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i8 %val3, i8 %val4
+}
+
+define i16 @fun1(i8 %val1, i8 %val2,
+                i16 %val3, i16 %val4) {
+  %cmp = icmp eq i8 %val1, %val2
+  %sel = select i1 %cmp, i16 %val3, i16 %val4
+  ret i16 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i8 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i16 %val3, i16 %val4
+}
+
+define i32 @fun2(i8 %val1, i8 %val2,
+                i32 %val3, i32 %val4) {
+  %cmp = icmp eq i8 %val1, %val2
+  %sel = select i1 %cmp, i32 %val3, i32 %val4
+  ret i32 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i8 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i32 %val3, i32 %val4
+}
+
+define i64 @fun3(i8 %val1, i8 %val2,
+                i64 %val3, i64 %val4) {
+  %cmp = icmp eq i8 %val1, %val2
+  %sel = select i1 %cmp, i64 %val3, i64 %val4
+  ret i64 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i8 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i64 %val3, i64 %val4
+}
+
+define float @fun4(i8 %val1, i8 %val2,
+                float %val3, float %val4) {
+  %cmp = icmp eq i8 %val1, %val2
+  %sel = select i1 %cmp, float %val3, float %val4
+  ret float %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i8 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, float %val3, float %val4
+}
+
+define double @fun5(i8 %val1, i8 %val2,
+                double %val3, double %val4) {
+  %cmp = icmp eq i8 %val1, %val2
+  %sel = select i1 %cmp, double %val3, double %val4
+  ret double %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i8 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, double %val3, double %val4
+}
+
+define i8 @fun6(i16 %val1, i16 %val2,
+                i8 %val3, i8 %val4) {
+  %cmp = icmp eq i16 %val1, %val2
+  %sel = select i1 %cmp, i8 %val3, i8 %val4
+  ret i8 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i16 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i8 %val3, i8 %val4
+}
+
+define i16 @fun7(i16 %val1, i16 %val2,
+                i16 %val3, i16 %val4) {
+  %cmp = icmp eq i16 %val1, %val2
+  %sel = select i1 %cmp, i16 %val3, i16 %val4
+  ret i16 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i16 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i16 %val3, i16 %val4
+}
+
+define i32 @fun8(i16 %val1, i16 %val2,
+                i32 %val3, i32 %val4) {
+  %cmp = icmp eq i16 %val1, %val2
+  %sel = select i1 %cmp, i32 %val3, i32 %val4
+  ret i32 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i16 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i32 %val3, i32 %val4
+}
+
+define i64 @fun9(i16 %val1, i16 %val2,
+                i64 %val3, i64 %val4) {
+  %cmp = icmp eq i16 %val1, %val2
+  %sel = select i1 %cmp, i64 %val3, i64 %val4
+  ret i64 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i16 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i64 %val3, i64 %val4
+}
+
+define float @fun10(i16 %val1, i16 %val2,
+                float %val3, float %val4) {
+  %cmp = icmp eq i16 %val1, %val2
+  %sel = select i1 %cmp, float %val3, float %val4
+  ret float %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i16 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, float %val3, float %val4
+}
+
+define double @fun11(i16 %val1, i16 %val2,
+                double %val3, double %val4) {
+  %cmp = icmp eq i16 %val1, %val2
+  %sel = select i1 %cmp, double %val3, double %val4
+  ret double %sel
+
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %cmp = icmp eq i16 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, double %val3, double %val4
+}
+
+define i8 @fun12(i32 %val1, i32 %val2,
+                i8 %val3, i8 %val4) {
+  %cmp = icmp eq i32 %val1, %val2
+  %sel = select i1 %cmp, i8 %val3, i8 %val4
+  ret i8 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i32 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i8 %val3, i8 %val4
+}
+
+define i16 @fun13(i32 %val1, i32 %val2,
+                i16 %val3, i16 %val4) {
+  %cmp = icmp eq i32 %val1, %val2
+  %sel = select i1 %cmp, i16 %val3, i16 %val4
+  ret i16 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i32 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i16 %val3, i16 %val4
+}
+
+define i32 @fun14(i32 %val1, i32 %val2,
+                i32 %val3, i32 %val4) {
+  %cmp = icmp eq i32 %val1, %val2
+  %sel = select i1 %cmp, i32 %val3, i32 %val4
+  ret i32 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i32 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i32 %val3, i32 %val4
+}
+
+define i64 @fun15(i32 %val1, i32 %val2,
+                i64 %val3, i64 %val4) {
+  %cmp = icmp eq i32 %val1, %val2
+  %sel = select i1 %cmp, i64 %val3, i64 %val4
+  ret i64 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i32 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i64 %val3, i64 %val4
+}
+
+define float @fun16(i32 %val1, i32 %val2,
+                float %val3, float %val4) {
+  %cmp = icmp eq i32 %val1, %val2
+  %sel = select i1 %cmp, float %val3, float %val4
+  ret float %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i32 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, float %val3, float %val4
+}
+
+define double @fun17(i32 %val1, i32 %val2,
+                double %val3, double %val4) {
+  %cmp = icmp eq i32 %val1, %val2
+  %sel = select i1 %cmp, double %val3, double %val4
+  ret double %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i32 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, double %val3, double %val4
+}
+
+define i8 @fun18(i64 %val1, i64 %val2,
+                i8 %val3, i8 %val4) {
+  %cmp = icmp eq i64 %val1, %val2
+  %sel = select i1 %cmp, i8 %val3, i8 %val4
+  ret i8 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i64 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i8 %val3, i8 %val4
+}
+
+define i16 @fun19(i64 %val1, i64 %val2,
+                i16 %val3, i16 %val4) {
+  %cmp = icmp eq i64 %val1, %val2
+  %sel = select i1 %cmp, i16 %val3, i16 %val4
+  ret i16 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i64 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i16 %val3, i16 %val4
+}
+
+define i32 @fun20(i64 %val1, i64 %val2,
+                i32 %val3, i32 %val4) {
+  %cmp = icmp eq i64 %val1, %val2
+  %sel = select i1 %cmp, i32 %val3, i32 %val4
+  ret i32 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i64 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i32 %val3, i32 %val4
+}
+
+define i64 @fun21(i64 %val1, i64 %val2,
+                i64 %val3, i64 %val4) {
+  %cmp = icmp eq i64 %val1, %val2
+  %sel = select i1 %cmp, i64 %val3, i64 %val4
+  ret i64 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i64 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i64 %val3, i64 %val4
+}
+
+define float @fun22(i64 %val1, i64 %val2,
+                float %val3, float %val4) {
+  %cmp = icmp eq i64 %val1, %val2
+  %sel = select i1 %cmp, float %val3, float %val4
+  ret float %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i64 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, float %val3, float %val4
+}
+
+define double @fun23(i64 %val1, i64 %val2,
+                double %val3, double %val4) {
+  %cmp = icmp eq i64 %val1, %val2
+  %sel = select i1 %cmp, double %val3, double %val4
+  ret double %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i64 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, double %val3, double %val4
+}
+
+define <2 x i8> @fun24(<2 x i8> %val1, <2 x i8> %val2,
+                <2 x i8> %val3, <2 x i8> %val4) {
+  %cmp = icmp eq <2 x i8> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+  ret <2 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+}
+
+define <2 x i16> @fun25(<2 x i8> %val1, <2 x i8> %val2,
+                <2 x i16> %val3, <2 x i16> %val4) {
+  %cmp = icmp eq <2 x i8> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+  ret <2 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+}
+
+define <2 x i32> @fun26(<2 x i8> %val1, <2 x i8> %val2,
+                <2 x i32> %val3, <2 x i32> %val4) {
+  %cmp = icmp eq <2 x i8> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+  ret <2 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+}
+
+define <2 x i64> @fun27(<2 x i8> %val1, <2 x i8> %val2,
+                <2 x i64> %val3, <2 x i64> %val4) {
+  %cmp = icmp eq <2 x i8> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+  ret <2 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+}
+
+define <2 x float> @fun28(<2 x i8> %val1, <2 x i8> %val2,
+                <2 x float> %val3, <2 x float> %val4) {
+  %cmp = icmp eq <2 x i8> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+  ret <2 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+}
+
+define <2 x double> @fun29(<2 x i8> %val1, <2 x i8> %val2,
+                <2 x double> %val3, <2 x double> %val4) {
+  %cmp = icmp eq <2 x i8> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+  ret <2 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+}
+
+define <2 x i8> @fun30(<2 x i16> %val1, <2 x i16> %val2,
+                <2 x i8> %val3, <2 x i8> %val4) {
+  %cmp = icmp eq <2 x i16> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+  ret <2 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+}
+
+define <2 x i16> @fun31(<2 x i16> %val1, <2 x i16> %val2,
+                <2 x i16> %val3, <2 x i16> %val4) {
+  %cmp = icmp eq <2 x i16> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+  ret <2 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+}
+
+define <2 x i32> @fun32(<2 x i16> %val1, <2 x i16> %val2,
+                <2 x i32> %val3, <2 x i32> %val4) {
+  %cmp = icmp eq <2 x i16> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+  ret <2 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+}
+
+define <2 x i64> @fun33(<2 x i16> %val1, <2 x i16> %val2,
+                <2 x i64> %val3, <2 x i64> %val4) {
+  %cmp = icmp eq <2 x i16> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+  ret <2 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+}
+
+define <2 x float> @fun34(<2 x i16> %val1, <2 x i16> %val2,
+                <2 x float> %val3, <2 x float> %val4) {
+  %cmp = icmp eq <2 x i16> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+  ret <2 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+}
+
+define <2 x double> @fun35(<2 x i16> %val1, <2 x i16> %val2,
+                <2 x double> %val3, <2 x double> %val4) {
+  %cmp = icmp eq <2 x i16> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+  ret <2 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+}
+
+define <2 x i8> @fun36(<2 x i32> %val1, <2 x i32> %val2,
+                <2 x i8> %val3, <2 x i8> %val4) {
+  %cmp = icmp eq <2 x i32> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+  ret <2 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+}
+
+define <2 x i16> @fun37(<2 x i32> %val1, <2 x i32> %val2,
+                <2 x i16> %val3, <2 x i16> %val4) {
+  %cmp = icmp eq <2 x i32> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+  ret <2 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+}
+
+define <2 x i32> @fun38(<2 x i32> %val1, <2 x i32> %val2,
+                <2 x i32> %val3, <2 x i32> %val4) {
+  %cmp = icmp eq <2 x i32> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+  ret <2 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+}
+
+define <2 x i64> @fun39(<2 x i32> %val1, <2 x i32> %val2,
+                <2 x i64> %val3, <2 x i64> %val4) {
+  %cmp = icmp eq <2 x i32> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+  ret <2 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+}
+
+define <2 x float> @fun40(<2 x i32> %val1, <2 x i32> %val2,
+                <2 x float> %val3, <2 x float> %val4) {
+  %cmp = icmp eq <2 x i32> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+  ret <2 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+}
+
+define <2 x double> @fun41(<2 x i32> %val1, <2 x i32> %val2,
+                <2 x double> %val3, <2 x double> %val4) {
+  %cmp = icmp eq <2 x i32> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+  ret <2 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+}
+
+define <2 x i8> @fun42(<2 x i64> %val1, <2 x i64> %val2,
+                <2 x i8> %val3, <2 x i8> %val4) {
+  %cmp = icmp eq <2 x i64> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+  ret <2 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+}
+
+define <2 x i16> @fun43(<2 x i64> %val1, <2 x i64> %val2,
+                <2 x i16> %val3, <2 x i16> %val4) {
+  %cmp = icmp eq <2 x i64> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+  ret <2 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+}
+
+define <2 x i32> @fun44(<2 x i64> %val1, <2 x i64> %val2,
+                <2 x i32> %val3, <2 x i32> %val4) {
+  %cmp = icmp eq <2 x i64> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+  ret <2 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+}
+
+define <2 x i64> @fun45(<2 x i64> %val1, <2 x i64> %val2,
+                <2 x i64> %val3, <2 x i64> %val4) {
+  %cmp = icmp eq <2 x i64> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+  ret <2 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+}
+
+define <2 x float> @fun46(<2 x i64> %val1, <2 x i64> %val2,
+                <2 x float> %val3, <2 x float> %val4) {
+  %cmp = icmp eq <2 x i64> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+  ret <2 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+}
+
+define <2 x double> @fun47(<2 x i64> %val1, <2 x i64> %val2,
+                <2 x double> %val3, <2 x double> %val4) {
+  %cmp = icmp eq <2 x i64> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+  ret <2 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <2 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+}
+
+define <4 x i8> @fun48(<4 x i8> %val1, <4 x i8> %val2,
+                <4 x i8> %val3, <4 x i8> %val4) {
+  %cmp = icmp eq <4 x i8> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+  ret <4 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+}
+
+define <4 x i16> @fun49(<4 x i8> %val1, <4 x i8> %val2,
+                <4 x i16> %val3, <4 x i16> %val4) {
+  %cmp = icmp eq <4 x i8> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+  ret <4 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+}
+
+define <4 x i32> @fun50(<4 x i8> %val1, <4 x i8> %val2,
+                <4 x i32> %val3, <4 x i32> %val4) {
+  %cmp = icmp eq <4 x i8> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+  ret <4 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+}
+
+define <4 x i64> @fun51(<4 x i8> %val1, <4 x i8> %val2,
+                <4 x i64> %val3, <4 x i64> %val4) {
+  %cmp = icmp eq <4 x i8> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+  ret <4 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <4 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+}
+
+define <4 x float> @fun52(<4 x i8> %val1, <4 x i8> %val2,
+                <4 x float> %val3, <4 x float> %val4) {
+  %cmp = icmp eq <4 x i8> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+  ret <4 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+}
+
+define <4 x double> @fun53(<4 x i8> %val1, <4 x i8> %val2,
+                <4 x double> %val3, <4 x double> %val4) {
+  %cmp = icmp eq <4 x i8> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+  ret <4 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <4 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+}
+
+define <4 x i8> @fun54(<4 x i16> %val1, <4 x i16> %val2,
+                <4 x i8> %val3, <4 x i8> %val4) {
+  %cmp = icmp eq <4 x i16> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+  ret <4 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+}
+
+define <4 x i16> @fun55(<4 x i16> %val1, <4 x i16> %val2,
+                <4 x i16> %val3, <4 x i16> %val4) {
+  %cmp = icmp eq <4 x i16> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+  ret <4 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+}
+
+define <4 x i32> @fun56(<4 x i16> %val1, <4 x i16> %val2,
+                <4 x i32> %val3, <4 x i32> %val4) {
+  %cmp = icmp eq <4 x i16> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+  ret <4 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+}
+
+define <4 x i64> @fun57(<4 x i16> %val1, <4 x i16> %val2,
+                <4 x i64> %val3, <4 x i64> %val4) {
+  %cmp = icmp eq <4 x i16> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+  ret <4 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <4 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+}
+
+define <4 x float> @fun58(<4 x i16> %val1, <4 x i16> %val2,
+                <4 x float> %val3, <4 x float> %val4) {
+  %cmp = icmp eq <4 x i16> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+  ret <4 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+}
+
+define <4 x double> @fun59(<4 x i16> %val1, <4 x i16> %val2,
+                <4 x double> %val3, <4 x double> %val4) {
+  %cmp = icmp eq <4 x i16> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+  ret <4 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <4 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+}
+
+define <4 x i8> @fun60(<4 x i32> %val1, <4 x i32> %val2,
+                <4 x i8> %val3, <4 x i8> %val4) {
+  %cmp = icmp eq <4 x i32> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+  ret <4 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+}
+
+define <4 x i16> @fun61(<4 x i32> %val1, <4 x i32> %val2,
+                <4 x i16> %val3, <4 x i16> %val4) {
+  %cmp = icmp eq <4 x i32> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+  ret <4 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+}
+
+define <4 x i32> @fun62(<4 x i32> %val1, <4 x i32> %val2,
+                <4 x i32> %val3, <4 x i32> %val4) {
+  %cmp = icmp eq <4 x i32> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+  ret <4 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+}
+
+define <4 x i64> @fun63(<4 x i32> %val1, <4 x i32> %val2,
+                <4 x i64> %val3, <4 x i64> %val4) {
+  %cmp = icmp eq <4 x i32> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+  ret <4 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <4 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+}
+
+define <4 x float> @fun64(<4 x i32> %val1, <4 x i32> %val2,
+                <4 x float> %val3, <4 x float> %val4) {
+  %cmp = icmp eq <4 x i32> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+  ret <4 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <4 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+}
+
+define <4 x double> @fun65(<4 x i32> %val1, <4 x i32> %val2,
+                <4 x double> %val3, <4 x double> %val4) {
+  %cmp = icmp eq <4 x i32> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+  ret <4 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <4 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+}
+
+define <4 x i8> @fun66(<4 x i64> %val1, <4 x i64> %val2,
+                <4 x i8> %val3, <4 x i8> %val4) {
+  %cmp = icmp eq <4 x i64> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+  ret <4 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <4 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+}
+
+define <4 x i16> @fun67(<4 x i64> %val1, <4 x i64> %val2,
+                <4 x i16> %val3, <4 x i16> %val4) {
+  %cmp = icmp eq <4 x i64> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+  ret <4 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <4 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+}
+
+define <4 x i32> @fun68(<4 x i64> %val1, <4 x i64> %val2,
+                <4 x i32> %val3, <4 x i32> %val4) {
+  %cmp = icmp eq <4 x i64> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+  ret <4 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <4 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+}
+
+define <4 x i64> @fun69(<4 x i64> %val1, <4 x i64> %val2,
+                <4 x i64> %val3, <4 x i64> %val4) {
+  %cmp = icmp eq <4 x i64> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+  ret <4 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <4 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+}
+
+define <4 x float> @fun70(<4 x i64> %val1, <4 x i64> %val2,
+                <4 x float> %val3, <4 x float> %val4) {
+  %cmp = icmp eq <4 x i64> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+  ret <4 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <4 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+}
+
+define <4 x double> @fun71(<4 x i64> %val1, <4 x i64> %val2,
+                <4 x double> %val3, <4 x double> %val4) {
+  %cmp = icmp eq <4 x i64> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+  ret <4 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <4 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+}
+
+define <8 x i8> @fun72(<8 x i8> %val1, <8 x i8> %val2,
+                <8 x i8> %val3, <8 x i8> %val4) {
+  %cmp = icmp eq <8 x i8> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+  ret <8 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <8 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+}
+
+define <8 x i16> @fun73(<8 x i8> %val1, <8 x i8> %val2,
+                <8 x i16> %val3, <8 x i16> %val4) {
+  %cmp = icmp eq <8 x i8> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+  ret <8 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <8 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+}
+
+define <8 x i32> @fun74(<8 x i8> %val1, <8 x i8> %val2,
+                <8 x i32> %val3, <8 x i32> %val4) {
+  %cmp = icmp eq <8 x i8> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+  ret <8 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <8 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+}
+
+define <8 x i64> @fun75(<8 x i8> %val1, <8 x i8> %val2,
+                <8 x i64> %val3, <8 x i64> %val4) {
+  %cmp = icmp eq <8 x i8> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+  ret <8 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = icmp eq <8 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+}
+
+define <8 x float> @fun76(<8 x i8> %val1, <8 x i8> %val2,
+                <8 x float> %val3, <8 x float> %val4) {
+  %cmp = icmp eq <8 x i8> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+  ret <8 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <8 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+}
+
+define <8 x double> @fun77(<8 x i8> %val1, <8 x i8> %val2,
+                <8 x double> %val3, <8 x double> %val4) {
+  %cmp = icmp eq <8 x i8> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+  ret <8 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = icmp eq <8 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+}
+
+define <8 x i8> @fun78(<8 x i16> %val1, <8 x i16> %val2,
+                <8 x i8> %val3, <8 x i8> %val4) {
+  %cmp = icmp eq <8 x i16> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+  ret <8 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <8 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+}
+
+define <8 x i16> @fun79(<8 x i16> %val1, <8 x i16> %val2,
+                <8 x i16> %val3, <8 x i16> %val4) {
+  %cmp = icmp eq <8 x i16> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+  ret <8 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <8 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+}
+
+define <8 x i32> @fun80(<8 x i16> %val1, <8 x i16> %val2,
+                <8 x i32> %val3, <8 x i32> %val4) {
+  %cmp = icmp eq <8 x i16> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+  ret <8 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <8 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+}
+
+define <8 x i64> @fun81(<8 x i16> %val1, <8 x i16> %val2,
+                <8 x i64> %val3, <8 x i64> %val4) {
+  %cmp = icmp eq <8 x i16> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+  ret <8 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = icmp eq <8 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+}
+
+define <8 x float> @fun82(<8 x i16> %val1, <8 x i16> %val2,
+                <8 x float> %val3, <8 x float> %val4) {
+  %cmp = icmp eq <8 x i16> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+  ret <8 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <8 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+}
+
+define <8 x double> @fun83(<8 x i16> %val1, <8 x i16> %val2,
+                <8 x double> %val3, <8 x double> %val4) {
+  %cmp = icmp eq <8 x i16> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+  ret <8 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = icmp eq <8 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+}
+
+define <8 x i8> @fun84(<8 x i32> %val1, <8 x i32> %val2,
+                <8 x i8> %val3, <8 x i8> %val4) {
+  %cmp = icmp eq <8 x i32> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+  ret <8 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <8 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+}
+
+define <8 x i16> @fun85(<8 x i32> %val1, <8 x i32> %val2,
+                <8 x i16> %val3, <8 x i16> %val4) {
+  %cmp = icmp eq <8 x i32> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+  ret <8 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <8 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+}
+
+define <8 x i32> @fun86(<8 x i32> %val1, <8 x i32> %val2,
+                <8 x i32> %val3, <8 x i32> %val4) {
+  %cmp = icmp eq <8 x i32> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+  ret <8 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <8 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+}
+
+define <8 x i64> @fun87(<8 x i32> %val1, <8 x i32> %val2,
+                <8 x i64> %val3, <8 x i64> %val4) {
+  %cmp = icmp eq <8 x i32> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+  ret <8 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = icmp eq <8 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+}
+
+define <8 x float> @fun88(<8 x i32> %val1, <8 x i32> %val2,
+                <8 x float> %val3, <8 x float> %val4) {
+  %cmp = icmp eq <8 x i32> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+  ret <8 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <8 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+}
+
+define <8 x double> @fun89(<8 x i32> %val1, <8 x i32> %val2,
+                <8 x double> %val3, <8 x double> %val4) {
+  %cmp = icmp eq <8 x i32> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+  ret <8 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = icmp eq <8 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+}
+
+define <8 x i8> @fun90(<8 x i64> %val1, <8 x i64> %val2,
+                <8 x i8> %val3, <8 x i8> %val4) {
+  %cmp = icmp eq <8 x i64> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+  ret <8 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <8 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+}
+
+define <8 x i16> @fun91(<8 x i64> %val1, <8 x i64> %val2,
+                <8 x i16> %val3, <8 x i16> %val4) {
+  %cmp = icmp eq <8 x i64> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+  ret <8 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <8 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+}
+
+define <8 x i32> @fun92(<8 x i64> %val1, <8 x i64> %val2,
+                <8 x i32> %val3, <8 x i32> %val4) {
+  %cmp = icmp eq <8 x i64> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+  ret <8 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <8 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+}
+
+define <8 x i64> @fun93(<8 x i64> %val1, <8 x i64> %val2,
+                <8 x i64> %val3, <8 x i64> %val4) {
+  %cmp = icmp eq <8 x i64> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+  ret <8 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <8 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+}
+
+define <8 x float> @fun94(<8 x i64> %val1, <8 x i64> %val2,
+                <8 x float> %val3, <8 x float> %val4) {
+  %cmp = icmp eq <8 x i64> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+  ret <8 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <8 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+}
+
+define <8 x double> @fun95(<8 x i64> %val1, <8 x i64> %val2,
+                <8 x double> %val3, <8 x double> %val4) {
+  %cmp = icmp eq <8 x i64> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+  ret <8 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <8 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+}
+
+define <16 x i8> @fun96(<16 x i8> %val1, <16 x i8> %val2,
+                <16 x i8> %val3, <16 x i8> %val4) {
+  %cmp = icmp eq <16 x i8> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+  ret <16 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq <16 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+}
+
+define <16 x i16> @fun97(<16 x i8> %val1, <16 x i8> %val2,
+                <16 x i16> %val3, <16 x i16> %val4) {
+  %cmp = icmp eq <16 x i8> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+  ret <16 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <16 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+}
+
+define <16 x i32> @fun98(<16 x i8> %val1, <16 x i8> %val2,
+                <16 x i32> %val3, <16 x i32> %val4) {
+  %cmp = icmp eq <16 x i8> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+  ret <16 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = icmp eq <16 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+}
+
+define <16 x i64> @fun99(<16 x i8> %val1, <16 x i8> %val2,
+                <16 x i64> %val3, <16 x i64> %val4) {
+  %cmp = icmp eq <16 x i8> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+  ret <16 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 22 for instruction:   %cmp = icmp eq <16 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+}
+
+define <16 x float> @fun100(<16 x i8> %val1, <16 x i8> %val2,
+                <16 x float> %val3, <16 x float> %val4) {
+  %cmp = icmp eq <16 x i8> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+  ret <16 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = icmp eq <16 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+}
+
+define <16 x double> @fun101(<16 x i8> %val1, <16 x i8> %val2,
+                <16 x double> %val3, <16 x double> %val4) {
+  %cmp = icmp eq <16 x i8> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+  ret <16 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 22 for instruction:   %cmp = icmp eq <16 x i8> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+}
+
+define <16 x i8> @fun102(<16 x i16> %val1, <16 x i16> %val2,
+                <16 x i8> %val3, <16 x i8> %val4) {
+  %cmp = icmp eq <16 x i16> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+  ret <16 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <16 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+}
+
+define <16 x i16> @fun103(<16 x i16> %val1, <16 x i16> %val2,
+                <16 x i16> %val3, <16 x i16> %val4) {
+  %cmp = icmp eq <16 x i16> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+  ret <16 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = icmp eq <16 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+}
+
+define <16 x i32> @fun104(<16 x i16> %val1, <16 x i16> %val2,
+                <16 x i32> %val3, <16 x i32> %val4) {
+  %cmp = icmp eq <16 x i16> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+  ret <16 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = icmp eq <16 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+}
+
+define <16 x i64> @fun105(<16 x i16> %val1, <16 x i16> %val2,
+                <16 x i64> %val3, <16 x i64> %val4) {
+  %cmp = icmp eq <16 x i16> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+  ret <16 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %cmp = icmp eq <16 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+}
+
+define <16 x float> @fun106(<16 x i16> %val1, <16 x i16> %val2,
+                <16 x float> %val3, <16 x float> %val4) {
+  %cmp = icmp eq <16 x i16> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+  ret <16 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = icmp eq <16 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+}
+
+define <16 x double> @fun107(<16 x i16> %val1, <16 x i16> %val2,
+                <16 x double> %val3, <16 x double> %val4) {
+  %cmp = icmp eq <16 x i16> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+  ret <16 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %cmp = icmp eq <16 x i16> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+}
+
+define <16 x i8> @fun108(<16 x i32> %val1, <16 x i32> %val2,
+                <16 x i8> %val3, <16 x i8> %val4) {
+  %cmp = icmp eq <16 x i32> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+  ret <16 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <16 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+}
+
+define <16 x i16> @fun109(<16 x i32> %val1, <16 x i32> %val2,
+                <16 x i16> %val3, <16 x i16> %val4) {
+  %cmp = icmp eq <16 x i32> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+  ret <16 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <16 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+}
+
+define <16 x i32> @fun110(<16 x i32> %val1, <16 x i32> %val2,
+                <16 x i32> %val3, <16 x i32> %val4) {
+  %cmp = icmp eq <16 x i32> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+  ret <16 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <16 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+}
+
+define <16 x i64> @fun111(<16 x i32> %val1, <16 x i32> %val2,
+                <16 x i64> %val3, <16 x i64> %val4) {
+  %cmp = icmp eq <16 x i32> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+  ret <16 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %cmp = icmp eq <16 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+}
+
+define <16 x float> @fun112(<16 x i32> %val1, <16 x i32> %val2,
+                <16 x float> %val3, <16 x float> %val4) {
+  %cmp = icmp eq <16 x i32> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+  ret <16 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = icmp eq <16 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+}
+
+define <16 x double> @fun113(<16 x i32> %val1, <16 x i32> %val2,
+                <16 x double> %val3, <16 x double> %val4) {
+  %cmp = icmp eq <16 x i32> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+  ret <16 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %cmp = icmp eq <16 x i32> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+}
+
+define <16 x i8> @fun114(<16 x i64> %val1, <16 x i64> %val2,
+                <16 x i8> %val3, <16 x i8> %val4) {
+  %cmp = icmp eq <16 x i64> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+  ret <16 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = icmp eq <16 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+}
+
+define <16 x i16> @fun115(<16 x i64> %val1, <16 x i64> %val2,
+                <16 x i16> %val3, <16 x i16> %val4) {
+  %cmp = icmp eq <16 x i64> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+  ret <16 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = icmp eq <16 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+}
+
+define <16 x i32> @fun116(<16 x i64> %val1, <16 x i64> %val2,
+                <16 x i32> %val3, <16 x i32> %val4) {
+  %cmp = icmp eq <16 x i64> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+  ret <16 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = icmp eq <16 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+}
+
+define <16 x i64> @fun117(<16 x i64> %val1, <16 x i64> %val2,
+                <16 x i64> %val3, <16 x i64> %val4) {
+  %cmp = icmp eq <16 x i64> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+  ret <16 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = icmp eq <16 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+}
+
+define <16 x float> @fun118(<16 x i64> %val1, <16 x i64> %val2,
+                <16 x float> %val3, <16 x float> %val4) {
+  %cmp = icmp eq <16 x i64> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+  ret <16 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = icmp eq <16 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+}
+
+define <16 x double> @fun119(<16 x i64> %val1, <16 x i64> %val2,
+                <16 x double> %val3, <16 x double> %val4) {
+  %cmp = icmp eq <16 x i64> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+  ret <16 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = icmp eq <16 x i64> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+}
+
+define i8 @fun120(float %val1, float %val2,
+                i8 %val3, i8 %val4) {
+  %cmp = fcmp ogt float %val1, %val2
+  %sel = select i1 %cmp, i8 %val3, i8 %val4
+  ret i8 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt float %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i8 %val3, i8 %val4
+}
+
+define i16 @fun121(float %val1, float %val2,
+                i16 %val3, i16 %val4) {
+  %cmp = fcmp ogt float %val1, %val2
+  %sel = select i1 %cmp, i16 %val3, i16 %val4
+  ret i16 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt float %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i16 %val3, i16 %val4
+}
+
+define i32 @fun122(float %val1, float %val2,
+                i32 %val3, i32 %val4) {
+  %cmp = fcmp ogt float %val1, %val2
+  %sel = select i1 %cmp, i32 %val3, i32 %val4
+  ret i32 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt float %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i32 %val3, i32 %val4
+}
+
+define i64 @fun123(float %val1, float %val2,
+                i64 %val3, i64 %val4) {
+  %cmp = fcmp ogt float %val1, %val2
+  %sel = select i1 %cmp, i64 %val3, i64 %val4
+  ret i64 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt float %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i64 %val3, i64 %val4
+}
+
+define float @fun124(float %val1, float %val2,
+                float %val3, float %val4) {
+  %cmp = fcmp ogt float %val1, %val2
+  %sel = select i1 %cmp, float %val3, float %val4
+  ret float %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt float %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, float %val3, float %val4
+}
+
+define double @fun125(float %val1, float %val2,
+                double %val3, double %val4) {
+  %cmp = fcmp ogt float %val1, %val2
+  %sel = select i1 %cmp, double %val3, double %val4
+  ret double %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt float %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, double %val3, double %val4
+}
+
+define i8 @fun126(double %val1, double %val2,
+                i8 %val3, i8 %val4) {
+  %cmp = fcmp ogt double %val1, %val2
+  %sel = select i1 %cmp, i8 %val3, i8 %val4
+  ret i8 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt double %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i8 %val3, i8 %val4
+}
+
+define i16 @fun127(double %val1, double %val2,
+                i16 %val3, i16 %val4) {
+  %cmp = fcmp ogt double %val1, %val2
+  %sel = select i1 %cmp, i16 %val3, i16 %val4
+  ret i16 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt double %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i16 %val3, i16 %val4
+}
+
+define i32 @fun128(double %val1, double %val2,
+                i32 %val3, i32 %val4) {
+  %cmp = fcmp ogt double %val1, %val2
+  %sel = select i1 %cmp, i32 %val3, i32 %val4
+  ret i32 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt double %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i32 %val3, i32 %val4
+}
+
+define i64 @fun129(double %val1, double %val2,
+                i64 %val3, i64 %val4) {
+  %cmp = fcmp ogt double %val1, %val2
+  %sel = select i1 %cmp, i64 %val3, i64 %val4
+  ret i64 %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt double %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i64 %val3, i64 %val4
+}
+
+define float @fun130(double %val1, double %val2,
+                float %val3, float %val4) {
+  %cmp = fcmp ogt double %val1, %val2
+  %sel = select i1 %cmp, float %val3, float %val4
+  ret float %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt double %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, float %val3, float %val4
+}
+
+define double @fun131(double %val1, double %val2,
+                double %val3, double %val4) {
+  %cmp = fcmp ogt double %val1, %val2
+  %sel = select i1 %cmp, double %val3, double %val4
+  ret double %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt double %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, double %val3, double %val4
+}
+
+define <2 x i8> @fun132(<2 x float> %val1, <2 x float> %val2,
+                <2 x i8> %val3, <2 x i8> %val4) {
+  %cmp = fcmp ogt <2 x float> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+  ret <2 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = fcmp ogt <2 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+}
+
+define <2 x i16> @fun133(<2 x float> %val1, <2 x float> %val2,
+                <2 x i16> %val3, <2 x i16> %val4) {
+  %cmp = fcmp ogt <2 x float> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+  ret <2 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = fcmp ogt <2 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+}
+
+define <2 x i32> @fun134(<2 x float> %val1, <2 x float> %val2,
+                <2 x i32> %val3, <2 x i32> %val4) {
+  %cmp = fcmp ogt <2 x float> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+  ret <2 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = fcmp ogt <2 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+}
+
+define <2 x i64> @fun135(<2 x float> %val1, <2 x float> %val2,
+                <2 x i64> %val3, <2 x i64> %val4) {
+  %cmp = fcmp ogt <2 x float> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+  ret <2 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = fcmp ogt <2 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+}
+
+define <2 x float> @fun136(<2 x float> %val1, <2 x float> %val2,
+                <2 x float> %val3, <2 x float> %val4) {
+  %cmp = fcmp ogt <2 x float> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+  ret <2 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = fcmp ogt <2 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+}
+
+define <2 x double> @fun137(<2 x float> %val1, <2 x float> %val2,
+                <2 x double> %val3, <2 x double> %val4) {
+  %cmp = fcmp ogt <2 x float> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+  ret <2 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = fcmp ogt <2 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+}
+
+define <2 x i8> @fun138(<2 x double> %val1, <2 x double> %val2,
+                <2 x i8> %val3, <2 x i8> %val4) {
+  %cmp = fcmp ogt <2 x double> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+  ret <2 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt <2 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4
+}
+
+define <2 x i16> @fun139(<2 x double> %val1, <2 x double> %val2,
+                <2 x i16> %val3, <2 x i16> %val4) {
+  %cmp = fcmp ogt <2 x double> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+  ret <2 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt <2 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4
+}
+
+define <2 x i32> @fun140(<2 x double> %val1, <2 x double> %val2,
+                <2 x i32> %val3, <2 x i32> %val4) {
+  %cmp = fcmp ogt <2 x double> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+  ret <2 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt <2 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4
+}
+
+define <2 x i64> @fun141(<2 x double> %val1, <2 x double> %val2,
+                <2 x i64> %val3, <2 x i64> %val4) {
+  %cmp = fcmp ogt <2 x double> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+  ret <2 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt <2 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+}
+
+define <2 x float> @fun142(<2 x double> %val1, <2 x double> %val2,
+                <2 x float> %val3, <2 x float> %val4) {
+  %cmp = fcmp ogt <2 x double> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+  ret <2 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt <2 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4
+}
+
+define <2 x double> @fun143(<2 x double> %val1, <2 x double> %val2,
+                <2 x double> %val3, <2 x double> %val4) {
+  %cmp = fcmp ogt <2 x double> %val1, %val2
+  %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+  ret <2 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = fcmp ogt <2 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+}
+
+define <4 x i8> @fun144(<4 x float> %val1, <4 x float> %val2,
+                <4 x i8> %val3, <4 x i8> %val4) {
+  %cmp = fcmp ogt <4 x float> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+  ret <4 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = fcmp ogt <4 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+}
+
+define <4 x i16> @fun145(<4 x float> %val1, <4 x float> %val2,
+                <4 x i16> %val3, <4 x i16> %val4) {
+  %cmp = fcmp ogt <4 x float> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+  ret <4 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = fcmp ogt <4 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+}
+
+define <4 x i32> @fun146(<4 x float> %val1, <4 x float> %val2,
+                <4 x i32> %val3, <4 x i32> %val4) {
+  %cmp = fcmp ogt <4 x float> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+  ret <4 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = fcmp ogt <4 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+}
+
+define <4 x i64> @fun147(<4 x float> %val1, <4 x float> %val2,
+                <4 x i64> %val3, <4 x i64> %val4) {
+  %cmp = fcmp ogt <4 x float> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+  ret <4 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 22 for instruction:   %cmp = fcmp ogt <4 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+}
+
+define <4 x float> @fun148(<4 x float> %val1, <4 x float> %val2,
+                <4 x float> %val3, <4 x float> %val4) {
+  %cmp = fcmp ogt <4 x float> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+  ret <4 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %cmp = fcmp ogt <4 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+}
+
+define <4 x double> @fun149(<4 x float> %val1, <4 x float> %val2,
+                <4 x double> %val3, <4 x double> %val4) {
+  %cmp = fcmp ogt <4 x float> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+  ret <4 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 22 for instruction:   %cmp = fcmp ogt <4 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+}
+
+define <4 x i8> @fun150(<4 x double> %val1, <4 x double> %val2,
+                <4 x i8> %val3, <4 x i8> %val4) {
+  %cmp = fcmp ogt <4 x double> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+  ret <4 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = fcmp ogt <4 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4
+}
+
+define <4 x i16> @fun151(<4 x double> %val1, <4 x double> %val2,
+                <4 x i16> %val3, <4 x i16> %val4) {
+  %cmp = fcmp ogt <4 x double> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+  ret <4 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = fcmp ogt <4 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4
+}
+
+define <4 x i32> @fun152(<4 x double> %val1, <4 x double> %val2,
+                <4 x i32> %val3, <4 x i32> %val4) {
+  %cmp = fcmp ogt <4 x double> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+  ret <4 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = fcmp ogt <4 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+}
+
+define <4 x i64> @fun153(<4 x double> %val1, <4 x double> %val2,
+                <4 x i64> %val3, <4 x i64> %val4) {
+  %cmp = fcmp ogt <4 x double> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+  ret <4 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = fcmp ogt <4 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4
+}
+
+define <4 x float> @fun154(<4 x double> %val1, <4 x double> %val2,
+                <4 x float> %val3, <4 x float> %val4) {
+  %cmp = fcmp ogt <4 x double> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+  ret <4 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = fcmp ogt <4 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+}
+
+define <4 x double> @fun155(<4 x double> %val1, <4 x double> %val2,
+                <4 x double> %val3, <4 x double> %val4) {
+  %cmp = fcmp ogt <4 x double> %val1, %val2
+  %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+  ret <4 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %cmp = fcmp ogt <4 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4
+}
+
+define <8 x i8> @fun156(<8 x float> %val1, <8 x float> %val2,
+                <8 x i8> %val3, <8 x i8> %val4) {
+  %cmp = fcmp ogt <8 x float> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+  ret <8 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %cmp = fcmp ogt <8 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+}
+
+define <8 x i16> @fun157(<8 x float> %val1, <8 x float> %val2,
+                <8 x i16> %val3, <8 x i16> %val4) {
+  %cmp = fcmp ogt <8 x float> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+  ret <8 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %cmp = fcmp ogt <8 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+}
+
+define <8 x i32> @fun158(<8 x float> %val1, <8 x float> %val2,
+                <8 x i32> %val3, <8 x i32> %val4) {
+  %cmp = fcmp ogt <8 x float> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+  ret <8 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %cmp = fcmp ogt <8 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+}
+
+define <8 x i64> @fun159(<8 x float> %val1, <8 x float> %val2,
+                <8 x i64> %val3, <8 x i64> %val4) {
+  %cmp = fcmp ogt <8 x float> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+  ret <8 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 44 for instruction:   %cmp = fcmp ogt <8 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+}
+
+define <8 x float> @fun160(<8 x float> %val1, <8 x float> %val2,
+                <8 x float> %val3, <8 x float> %val4) {
+  %cmp = fcmp ogt <8 x float> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+  ret <8 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %cmp = fcmp ogt <8 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+}
+
+define <8 x double> @fun161(<8 x float> %val1, <8 x float> %val2,
+                <8 x double> %val3, <8 x double> %val4) {
+  %cmp = fcmp ogt <8 x float> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+  ret <8 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 44 for instruction:   %cmp = fcmp ogt <8 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+}
+
+define <8 x i8> @fun162(<8 x double> %val1, <8 x double> %val2,
+                <8 x i8> %val3, <8 x i8> %val4) {
+  %cmp = fcmp ogt <8 x double> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+  ret <8 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = fcmp ogt <8 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4
+}
+
+define <8 x i16> @fun163(<8 x double> %val1, <8 x double> %val2,
+                <8 x i16> %val3, <8 x i16> %val4) {
+  %cmp = fcmp ogt <8 x double> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+  ret <8 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = fcmp ogt <8 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+}
+
+define <8 x i32> @fun164(<8 x double> %val1, <8 x double> %val2,
+                <8 x i32> %val3, <8 x i32> %val4) {
+  %cmp = fcmp ogt <8 x double> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+  ret <8 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = fcmp ogt <8 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4
+}
+
+define <8 x i64> @fun165(<8 x double> %val1, <8 x double> %val2,
+                <8 x i64> %val3, <8 x i64> %val4) {
+  %cmp = fcmp ogt <8 x double> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+  ret <8 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = fcmp ogt <8 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4
+}
+
+define <8 x float> @fun166(<8 x double> %val1, <8 x double> %val2,
+                <8 x float> %val3, <8 x float> %val4) {
+  %cmp = fcmp ogt <8 x double> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+  ret <8 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = fcmp ogt <8 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4
+}
+
+define <8 x double> @fun167(<8 x double> %val1, <8 x double> %val2,
+                <8 x double> %val3, <8 x double> %val4) {
+  %cmp = fcmp ogt <8 x double> %val1, %val2
+  %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+  ret <8 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %cmp = fcmp ogt <8 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4
+}
+
+define <16 x i8> @fun168(<16 x float> %val1, <16 x float> %val2,
+                <16 x i8> %val3, <16 x i8> %val4) {
+  %cmp = fcmp ogt <16 x float> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+  ret <16 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %cmp = fcmp ogt <16 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+}
+
+define <16 x i16> @fun169(<16 x float> %val1, <16 x float> %val2,
+                <16 x i16> %val3, <16 x i16> %val4) {
+  %cmp = fcmp ogt <16 x float> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+  ret <16 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %cmp = fcmp ogt <16 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+}
+
+define <16 x i32> @fun170(<16 x float> %val1, <16 x float> %val2,
+                <16 x i32> %val3, <16 x i32> %val4) {
+  %cmp = fcmp ogt <16 x float> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+  ret <16 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %cmp = fcmp ogt <16 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+}
+
+define <16 x i64> @fun171(<16 x float> %val1, <16 x float> %val2,
+                <16 x i64> %val3, <16 x i64> %val4) {
+  %cmp = fcmp ogt <16 x float> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+  ret <16 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 88 for instruction:   %cmp = fcmp ogt <16 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+}
+
+define <16 x float> @fun172(<16 x float> %val1, <16 x float> %val2,
+                <16 x float> %val3, <16 x float> %val4) {
+  %cmp = fcmp ogt <16 x float> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+  ret <16 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %cmp = fcmp ogt <16 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+}
+
+define <16 x double> @fun173(<16 x float> %val1, <16 x float> %val2,
+                <16 x double> %val3, <16 x double> %val4) {
+  %cmp = fcmp ogt <16 x float> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+  ret <16 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 88 for instruction:   %cmp = fcmp ogt <16 x float> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+}
+
+define <16 x i8> @fun174(<16 x double> %val1, <16 x double> %val2,
+                <16 x i8> %val3, <16 x i8> %val4) {
+  %cmp = fcmp ogt <16 x double> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+  ret <16 x i8> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = fcmp ogt <16 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+}
+
+define <16 x i16> @fun175(<16 x double> %val1, <16 x double> %val2,
+                <16 x i16> %val3, <16 x i16> %val4) {
+  %cmp = fcmp ogt <16 x double> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+  ret <16 x i16> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = fcmp ogt <16 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4
+}
+
+define <16 x i32> @fun176(<16 x double> %val1, <16 x double> %val2,
+                <16 x i32> %val3, <16 x i32> %val4) {
+  %cmp = fcmp ogt <16 x double> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+  ret <16 x i32> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = fcmp ogt <16 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4
+}
+
+define <16 x i64> @fun177(<16 x double> %val1, <16 x double> %val2,
+                <16 x i64> %val3, <16 x i64> %val4) {
+  %cmp = fcmp ogt <16 x double> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+  ret <16 x i64> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = fcmp ogt <16 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4
+}
+
+define <16 x float> @fun178(<16 x double> %val1, <16 x double> %val2,
+                <16 x float> %val3, <16 x float> %val4) {
+  %cmp = fcmp ogt <16 x double> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+  ret <16 x float> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = fcmp ogt <16 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4
+}
+
+define <16 x double> @fun179(<16 x double> %val1, <16 x double> %val2,
+                <16 x double> %val3, <16 x double> %val4) {
+  %cmp = fcmp ogt <16 x double> %val1, %val2
+  %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+  ret <16 x double> %sel
+
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %cmp = fcmp ogt <16 x double> %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4
+}
Index: test/Analysis/CostModel/SystemZ/fp-arith.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/SystemZ/fp-arith.ll
@@ -0,0 +1,119 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+;
+; Note: The scalarized vector instructions cost is not including any
+; extracts, due to the undef operands
+;
+; Note: FRem is implemented with libcall, so not included here.
+
+define void @fadd() {
+  %res0 = fadd float undef, undef
+  %res1 = fadd double undef, undef
+  %res2 = fadd fp128 undef, undef
+  %res3 = fadd <2 x float> undef, undef
+  %res4 = fadd <2 x double> undef, undef
+  %res5 = fadd <4 x float> undef, undef
+  %res6 = fadd <4 x double> undef, undef
+  %res7 = fadd <8 x float> undef, undef
+  %res8 = fadd <8 x double> undef, undef
+  %res9 = fadd <16 x float> undef, undef
+  %res10 = fadd <16 x double> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res0 = fadd float undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res1 = fadd double undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = fadd fp128 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res3 = fadd <2 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = fadd <2 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res5 = fadd <4 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res6 = fadd <4 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %res7 = fadd <8 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res8 = fadd <8 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %res9 = fadd <16 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res10 = fadd <16 x double> undef, undef
+
+  ret void;
+}
+
+define void @fsub() {
+  %res0 = fsub float undef, undef
+  %res1 = fsub double undef, undef
+  %res2 = fsub fp128 undef, undef
+  %res3 = fsub <2 x float> undef, undef
+  %res4 = fsub <2 x double> undef, undef
+  %res5 = fsub <4 x float> undef, undef
+  %res6 = fsub <4 x double> undef, undef
+  %res7 = fsub <8 x float> undef, undef
+  %res8 = fsub <8 x double> undef, undef
+  %res9 = fsub <16 x float> undef, undef
+  %res10 = fsub <16 x double> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res0 = fsub float undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res1 = fsub double undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = fsub fp128 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res3 = fsub <2 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = fsub <2 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res5 = fsub <4 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res6 = fsub <4 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %res7 = fsub <8 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res8 = fsub <8 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %res9 = fsub <16 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res10 = fsub <16 x double> undef, undef
+
+  ret void;
+}
+
+define void @fmul() {
+  %res0 = fmul float undef, undef
+  %res1 = fmul double undef, undef
+  %res2 = fmul fp128 undef, undef
+  %res3 = fmul <2 x float> undef, undef
+  %res4 = fmul <2 x double> undef, undef
+  %res5 = fmul <4 x float> undef, undef
+  %res6 = fmul <4 x double> undef, undef
+  %res7 = fmul <8 x float> undef, undef
+  %res8 = fmul <8 x double> undef, undef
+  %res9 = fmul <16 x float> undef, undef
+  %res10 = fmul <16 x double> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res0 = fmul float undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res1 = fmul double undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = fmul fp128 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res3 = fmul <2 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = fmul <2 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res5 = fmul <4 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res6 = fmul <4 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %res7 = fmul <8 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res8 = fmul <8 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %res9 = fmul <16 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res10 = fmul <16 x double> undef, undef
+
+  ret void;
+}
+
+define void @fdiv() {
+  %res0 = fdiv float undef, undef
+  %res1 = fdiv double undef, undef
+  %res2 = fdiv fp128 undef, undef
+  %res3 = fdiv <2 x float> undef, undef
+  %res4 = fdiv <2 x double> undef, undef
+  %res5 = fdiv <4 x float> undef, undef
+  %res6 = fdiv <4 x double> undef, undef
+  %res7 = fdiv <8 x float> undef, undef
+  %res8 = fdiv <8 x double> undef, undef
+  %res9 = fdiv <16 x float> undef, undef
+  %res10 = fdiv <16 x double> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res0 = fdiv float undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res1 = fdiv double undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = fdiv fp128 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res3 = fdiv <2 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = fdiv <2 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res5 = fdiv <4 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res6 = fdiv <4 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %res7 = fdiv <8 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res8 = fdiv <8 x double> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %res9 = fdiv <16 x float> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res10 = fdiv <16 x double> undef, undef
+
+  ret void;
+}
+
Index: test/Analysis/CostModel/SystemZ/fp-cast.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/SystemZ/fp-cast.ll
@@ -0,0 +1,541 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+;
+; Note: The scalarized vector instructions costs are not including any
+; extracts, due to the undef operands.
+
+define void @fpext() {
+  %v0 = fpext double undef to fp128
+  %v1 = fpext float undef to fp128
+  %v2 = fpext float undef to double
+  %v3 = fpext <2 x double> undef to <2 x fp128>
+  %v4 = fpext <2 x float> undef to <2 x fp128>
+  %v5 = fpext <2 x float> undef to <2 x double>
+  %v6 = fpext <4 x double> undef to <4 x fp128>
+  %v7 = fpext <4 x float> undef to <4 x fp128>
+  %v8 = fpext <4 x float> undef to <4 x double>
+  %v9 = fpext <8 x double> undef to <8 x fp128>
+  %v10 = fpext <8 x float> undef to <8 x fp128>
+  %v11 = fpext <8 x float> undef to <8 x double>
+  %v12 = fpext <16 x float> undef to <16 x double>
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v0 = fpext double undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v1 = fpext float undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v2 = fpext float undef to double
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v3 = fpext <2 x double> undef to <2 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v4 = fpext <2 x float> undef to <2 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v5 = fpext <2 x float> undef to <2 x double>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v6 = fpext <4 x double> undef to <4 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v7 = fpext <4 x float> undef to <4 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v8 = fpext <4 x float> undef to <4 x double>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v9 = fpext <8 x double> undef to <8 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v10 = fpext <8 x float> undef to <8 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v11 = fpext <8 x float> undef to <8 x double>
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %v12 = fpext <16 x float> undef to <16 x double>
+
+  ret void;
+}
+
+define void @fptosi() {
+  %v0 = fptosi fp128 undef to i64
+  %v1 = fptosi fp128 undef to i32
+  %v2 = fptosi fp128 undef to i16
+  %v3 = fptosi fp128 undef to i8
+  %v4 = fptosi double undef to i64
+  %v5 = fptosi double undef to i32
+  %v6 = fptosi double undef to i16
+  %v7 = fptosi double undef to i8
+  %v8 = fptosi float undef to i64
+  %v9 = fptosi float undef to i32
+  %v10 = fptosi float undef to i16
+  %v11 = fptosi float undef to i8
+  %v12 = fptosi <2 x fp128> undef to <2 x i64>
+  %v13 = fptosi <2 x fp128> undef to <2 x i32>
+  %v14 = fptosi <2 x fp128> undef to <2 x i16>
+  %v15 = fptosi <2 x fp128> undef to <2 x i8>
+  %v16 = fptosi <2 x double> undef to <2 x i64>
+  %v17 = fptosi <2 x double> undef to <2 x i32>
+  %v18 = fptosi <2 x double> undef to <2 x i16>
+  %v19 = fptosi <2 x double> undef to <2 x i8>
+  %v20 = fptosi <2 x float> undef to <2 x i64>
+  %v21 = fptosi <2 x float> undef to <2 x i32>
+  %v22 = fptosi <2 x float> undef to <2 x i16>
+  %v23 = fptosi <2 x float> undef to <2 x i8>
+  %v24 = fptosi <4 x fp128> undef to <4 x i64>
+  %v25 = fptosi <4 x fp128> undef to <4 x i32>
+  %v26 = fptosi <4 x fp128> undef to <4 x i16>
+  %v27 = fptosi <4 x fp128> undef to <4 x i8>
+  %v28 = fptosi <4 x double> undef to <4 x i64>
+  %v29 = fptosi <4 x double> undef to <4 x i32>
+  %v30 = fptosi <4 x double> undef to <4 x i16>
+  %v31 = fptosi <4 x double> undef to <4 x i8>
+  %v32 = fptosi <4 x float> undef to <4 x i64>
+  %v33 = fptosi <4 x float> undef to <4 x i32>
+  %v34 = fptosi <4 x float> undef to <4 x i16>
+  %v35 = fptosi <4 x float> undef to <4 x i8>
+  %v36 = fptosi <8 x fp128> undef to <8 x i64>
+  %v37 = fptosi <8 x fp128> undef to <8 x i32>
+  %v38 = fptosi <8 x fp128> undef to <8 x i16>
+  %v39 = fptosi <8 x fp128> undef to <8 x i8>
+  %v40 = fptosi <8 x double> undef to <8 x i64>
+  %v41 = fptosi <8 x double> undef to <8 x i32>
+  %v42 = fptosi <8 x double> undef to <8 x i16>
+  %v43 = fptosi <8 x double> undef to <8 x i8>
+  %v44 = fptosi <8 x float> undef to <8 x i64>
+  %v45 = fptosi <8 x float> undef to <8 x i32>
+  %v46 = fptosi <8 x float> undef to <8 x i16>
+  %v47 = fptosi <8 x float> undef to <8 x i8>
+  %v48 = fptosi <16 x double> undef to <16 x i64>
+  %v49 = fptosi <16 x double> undef to <16 x i32>
+  %v50 = fptosi <16 x double> undef to <16 x i16>
+  %v51 = fptosi <16 x double> undef to <16 x i8>
+  %v52 = fptosi <16 x float> undef to <16 x i64>
+  %v53 = fptosi <16 x float> undef to <16 x i32>
+  %v54 = fptosi <16 x float> undef to <16 x i16>
+  %v55 = fptosi <16 x float> undef to <16 x i8>
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v0 = fptosi fp128 undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v1 = fptosi fp128 undef to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v2 = fptosi fp128 undef to i16
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v3 = fptosi fp128 undef to i8
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v4 = fptosi double undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v5 = fptosi double undef to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v6 = fptosi double undef to i16
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v7 = fptosi double undef to i8
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v8 = fptosi float undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v9 = fptosi float undef to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v10 = fptosi float undef to i16
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v11 = fptosi float undef to i8
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v12 = fptosi <2 x fp128> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v13 = fptosi <2 x fp128> undef to <2 x i32>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v14 = fptosi <2 x fp128> undef to <2 x i16>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v15 = fptosi <2 x fp128> undef to <2 x i8>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v16 = fptosi <2 x double> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v17 = fptosi <2 x double> undef to <2 x i32>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v18 = fptosi <2 x double> undef to <2 x i16>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v19 = fptosi <2 x double> undef to <2 x i8>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v20 = fptosi <2 x float> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v21 = fptosi <2 x float> undef to <2 x i32>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v22 = fptosi <2 x float> undef to <2 x i16>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v23 = fptosi <2 x float> undef to <2 x i8>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v24 = fptosi <4 x fp128> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v25 = fptosi <4 x fp128> undef to <4 x i32>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v26 = fptosi <4 x fp128> undef to <4 x i16>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v27 = fptosi <4 x fp128> undef to <4 x i8>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v28 = fptosi <4 x double> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v29 = fptosi <4 x double> undef to <4 x i32>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v30 = fptosi <4 x double> undef to <4 x i16>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v31 = fptosi <4 x double> undef to <4 x i8>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v32 = fptosi <4 x float> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v33 = fptosi <4 x float> undef to <4 x i32>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v34 = fptosi <4 x float> undef to <4 x i16>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v35 = fptosi <4 x float> undef to <4 x i8>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v36 = fptosi <8 x fp128> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v37 = fptosi <8 x fp128> undef to <8 x i32>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v38 = fptosi <8 x fp128> undef to <8 x i16>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v39 = fptosi <8 x fp128> undef to <8 x i8>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v40 = fptosi <8 x double> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v41 = fptosi <8 x double> undef to <8 x i32>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v42 = fptosi <8 x double> undef to <8 x i16>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v43 = fptosi <8 x double> undef to <8 x i8>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v44 = fptosi <8 x float> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v45 = fptosi <8 x float> undef to <8 x i32>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v46 = fptosi <8 x float> undef to <8 x i16>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v47 = fptosi <8 x float> undef to <8 x i8>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v48 = fptosi <16 x double> undef to <16 x i64>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v49 = fptosi <16 x double> undef to <16 x i32>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v50 = fptosi <16 x double> undef to <16 x i16>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v51 = fptosi <16 x double> undef to <16 x i8>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v52 = fptosi <16 x float> undef to <16 x i64>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v53 = fptosi <16 x float> undef to <16 x i32>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v54 = fptosi <16 x float> undef to <16 x i16>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v55 = fptosi <16 x float> undef to <16 x i8>
+
+  ret void;
+}
+
+
+define void @fptoui() {
+  %v0 = fptoui fp128 undef to i64
+  %v1 = fptoui fp128 undef to i32
+  %v2 = fptoui fp128 undef to i16
+  %v3 = fptoui fp128 undef to i8
+  %v4 = fptoui double undef to i64
+  %v5 = fptoui double undef to i32
+  %v6 = fptoui double undef to i16
+  %v7 = fptoui double undef to i8
+  %v8 = fptoui float undef to i64
+  %v9 = fptoui float undef to i32
+  %v10 = fptoui float undef to i16
+  %v11 = fptoui float undef to i8
+  %v12 = fptoui <2 x fp128> undef to <2 x i64>
+  %v13 = fptoui <2 x fp128> undef to <2 x i32>
+  %v14 = fptoui <2 x fp128> undef to <2 x i16>
+  %v15 = fptoui <2 x fp128> undef to <2 x i8>
+  %v16 = fptoui <2 x double> undef to <2 x i64>
+  %v17 = fptoui <2 x double> undef to <2 x i32>
+  %v18 = fptoui <2 x double> undef to <2 x i16>
+  %v19 = fptoui <2 x double> undef to <2 x i8>
+  %v20 = fptoui <2 x float> undef to <2 x i64>
+  %v21 = fptoui <2 x float> undef to <2 x i32>
+  %v22 = fptoui <2 x float> undef to <2 x i16>
+  %v23 = fptoui <2 x float> undef to <2 x i8>
+  %v24 = fptoui <4 x fp128> undef to <4 x i64>
+  %v25 = fptoui <4 x fp128> undef to <4 x i32>
+  %v26 = fptoui <4 x fp128> undef to <4 x i16>
+  %v27 = fptoui <4 x fp128> undef to <4 x i8>
+  %v28 = fptoui <4 x double> undef to <4 x i64>
+  %v29 = fptoui <4 x double> undef to <4 x i32>
+  %v30 = fptoui <4 x double> undef to <4 x i16>
+  %v31 = fptoui <4 x double> undef to <4 x i8>
+  %v32 = fptoui <4 x float> undef to <4 x i64>
+  %v33 = fptoui <4 x float> undef to <4 x i32>
+  %v34 = fptoui <4 x float> undef to <4 x i16>
+  %v35 = fptoui <4 x float> undef to <4 x i8>
+  %v36 = fptoui <8 x fp128> undef to <8 x i64>
+  %v37 = fptoui <8 x fp128> undef to <8 x i32>
+  %v38 = fptoui <8 x fp128> undef to <8 x i16>
+  %v39 = fptoui <8 x fp128> undef to <8 x i8>
+  %v40 = fptoui <8 x double> undef to <8 x i64>
+  %v41 = fptoui <8 x double> undef to <8 x i32>
+  %v42 = fptoui <8 x double> undef to <8 x i16>
+  %v43 = fptoui <8 x double> undef to <8 x i8>
+  %v44 = fptoui <8 x float> undef to <8 x i64>
+  %v45 = fptoui <8 x float> undef to <8 x i32>
+  %v46 = fptoui <8 x float> undef to <8 x i16>
+  %v47 = fptoui <8 x float> undef to <8 x i8>
+  %v48 = fptoui <16 x double> undef to <16 x i64>
+  %v49 = fptoui <16 x double> undef to <16 x i32>
+  %v50 = fptoui <16 x double> undef to <16 x i16>
+  %v51 = fptoui <16 x double> undef to <16 x i8>
+  %v52 = fptoui <16 x float> undef to <16 x i64>
+  %v53 = fptoui <16 x float> undef to <16 x i32>
+  %v54 = fptoui <16 x float> undef to <16 x i16>
+  %v55 = fptoui <16 x float> undef to <16 x i8>
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v0 = fptoui fp128 undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v1 = fptoui fp128 undef to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v2 = fptoui fp128 undef to i16
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v3 = fptoui fp128 undef to i8
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v4 = fptoui double undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v5 = fptoui double undef to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v6 = fptoui double undef to i16
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v7 = fptoui double undef to i8
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v8 = fptoui float undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v9 = fptoui float undef to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v10 = fptoui float undef to i16
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v11 = fptoui float undef to i8
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v12 = fptoui <2 x fp128> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v13 = fptoui <2 x fp128> undef to <2 x i32>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v14 = fptoui <2 x fp128> undef to <2 x i16>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v15 = fptoui <2 x fp128> undef to <2 x i8>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v16 = fptoui <2 x double> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v17 = fptoui <2 x double> undef to <2 x i32>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v18 = fptoui <2 x double> undef to <2 x i16>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v19 = fptoui <2 x double> undef to <2 x i8>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v20 = fptoui <2 x float> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v21 = fptoui <2 x float> undef to <2 x i32>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v22 = fptoui <2 x float> undef to <2 x i16>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v23 = fptoui <2 x float> undef to <2 x i8>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v24 = fptoui <4 x fp128> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v25 = fptoui <4 x fp128> undef to <4 x i32>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v26 = fptoui <4 x fp128> undef to <4 x i16>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v27 = fptoui <4 x fp128> undef to <4 x i8>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v28 = fptoui <4 x double> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v29 = fptoui <4 x double> undef to <4 x i32>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v30 = fptoui <4 x double> undef to <4 x i16>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v31 = fptoui <4 x double> undef to <4 x i8>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v32 = fptoui <4 x float> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v33 = fptoui <4 x float> undef to <4 x i32>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v34 = fptoui <4 x float> undef to <4 x i16>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v35 = fptoui <4 x float> undef to <4 x i8>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v36 = fptoui <8 x fp128> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v37 = fptoui <8 x fp128> undef to <8 x i32>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v38 = fptoui <8 x fp128> undef to <8 x i16>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v39 = fptoui <8 x fp128> undef to <8 x i8>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v40 = fptoui <8 x double> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v41 = fptoui <8 x double> undef to <8 x i32>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v42 = fptoui <8 x double> undef to <8 x i16>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v43 = fptoui <8 x double> undef to <8 x i8>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v44 = fptoui <8 x float> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v45 = fptoui <8 x float> undef to <8 x i32>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v46 = fptoui <8 x float> undef to <8 x i16>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v47 = fptoui <8 x float> undef to <8 x i8>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v48 = fptoui <16 x double> undef to <16 x i64>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v49 = fptoui <16 x double> undef to <16 x i32>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v50 = fptoui <16 x double> undef to <16 x i16>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v51 = fptoui <16 x double> undef to <16 x i8>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v52 = fptoui <16 x float> undef to <16 x i64>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v53 = fptoui <16 x float> undef to <16 x i32>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v54 = fptoui <16 x float> undef to <16 x i16>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v55 = fptoui <16 x float> undef to <16 x i8>
+
+  ret void;
+}
+
+define void @fptrunc() {
+  %v0 = fptrunc fp128 undef to double
+  %v1 = fptrunc fp128 undef to float
+  %v2 = fptrunc double undef to float
+  %v3 = fptrunc <2 x fp128> undef to <2 x double>
+  %v4 = fptrunc <2 x fp128> undef to <2 x float>
+  %v5 = fptrunc <2 x double> undef to <2 x float>
+  %v6 = fptrunc <4 x fp128> undef to <4 x double>
+  %v7 = fptrunc <4 x fp128> undef to <4 x float>
+  %v8 = fptrunc <4 x double> undef to <4 x float>
+  %v9 = fptrunc <8 x fp128> undef to <8 x double>
+  %v10 = fptrunc <8 x fp128> undef to <8 x float>
+  %v11 = fptrunc <8 x double> undef to <8 x float>
+  %v12 = fptrunc <16 x double> undef to <16 x float>
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v0 = fptrunc fp128 undef to double
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v1 = fptrunc fp128 undef to float
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v2 = fptrunc double undef to float
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v3 = fptrunc <2 x fp128> undef to <2 x double>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v4 = fptrunc <2 x fp128> undef to <2 x float>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v5 = fptrunc <2 x double> undef to <2 x float>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v6 = fptrunc <4 x fp128> undef to <4 x double>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v7 = fptrunc <4 x fp128> undef to <4 x float>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v8 = fptrunc <4 x double> undef to <4 x float>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v9 = fptrunc <8 x fp128> undef to <8 x double>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v10 = fptrunc <8 x fp128> undef to <8 x float>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v11 = fptrunc <8 x double> undef to <8 x float>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v12 = fptrunc <16 x double> undef to <16 x float>
+
+  ret void;
+}
+
+define void @sitofp() {
+  %v0 = sitofp i64 undef to fp128
+  %v1 = sitofp i64 undef to double
+  %v2 = sitofp i64 undef to float
+  %v3 = sitofp i32 undef to fp128
+  %v4 = sitofp i32 undef to double
+  %v5 = sitofp i32 undef to float
+  %v6 = sitofp i16 undef to fp128
+  %v7 = sitofp i16 undef to double
+  %v8 = sitofp i16 undef to float
+  %v9 = sitofp i8 undef to fp128
+  %v10 = sitofp i8 undef to double
+  %v11 = sitofp i8 undef to float
+  %v12 = sitofp <2 x i64> undef to <2 x fp128>
+  %v13 = sitofp <2 x i64> undef to <2 x double>
+  %v14 = sitofp <2 x i64> undef to <2 x float>
+  %v15 = sitofp <2 x i32> undef to <2 x fp128>
+  %v16 = sitofp <2 x i32> undef to <2 x double>
+  %v17 = sitofp <2 x i32> undef to <2 x float>
+  %v18 = sitofp <2 x i16> undef to <2 x fp128>
+  %v19 = sitofp <2 x i16> undef to <2 x double>
+  %v20 = sitofp <2 x i16> undef to <2 x float>
+  %v21 = sitofp <2 x i8> undef to <2 x fp128>
+  %v22 = sitofp <2 x i8> undef to <2 x double>
+  %v23 = sitofp <2 x i8> undef to <2 x float>
+  %v24 = sitofp <4 x i64> undef to <4 x fp128>
+  %v25 = sitofp <4 x i64> undef to <4 x double>
+  %v26 = sitofp <4 x i64> undef to <4 x float>
+  %v27 = sitofp <4 x i32> undef to <4 x fp128>
+  %v28 = sitofp <4 x i32> undef to <4 x double>
+  %v29 = sitofp <4 x i32> undef to <4 x float>
+  %v30 = sitofp <4 x i16> undef to <4 x fp128>
+  %v31 = sitofp <4 x i16> undef to <4 x double>
+  %v32 = sitofp <4 x i16> undef to <4 x float>
+  %v33 = sitofp <4 x i8> undef to <4 x fp128>
+  %v34 = sitofp <4 x i8> undef to <4 x double>
+  %v35 = sitofp <4 x i8> undef to <4 x float>
+  %v36 = sitofp <8 x i64> undef to <8 x fp128>
+  %v37 = sitofp <8 x i64> undef to <8 x double>
+  %v38 = sitofp <8 x i64> undef to <8 x float>
+  %v39 = sitofp <8 x i32> undef to <8 x fp128>
+  %v40 = sitofp <8 x i32> undef to <8 x double>
+  %v41 = sitofp <8 x i32> undef to <8 x float>
+  %v42 = sitofp <8 x i16> undef to <8 x fp128>
+  %v43 = sitofp <8 x i16> undef to <8 x double>
+  %v44 = sitofp <8 x i16> undef to <8 x float>
+  %v45 = sitofp <8 x i8> undef to <8 x fp128>
+  %v46 = sitofp <8 x i8> undef to <8 x double>
+  %v47 = sitofp <8 x i8> undef to <8 x float>
+  %v48 = sitofp <16 x i64> undef to <16 x double>
+  %v49 = sitofp <16 x i64> undef to <16 x float>
+  %v50 = sitofp <16 x i32> undef to <16 x double>
+  %v51 = sitofp <16 x i32> undef to <16 x float>
+  %v52 = sitofp <16 x i16> undef to <16 x double>
+  %v53 = sitofp <16 x i16> undef to <16 x float>
+  %v54 = sitofp <16 x i8> undef to <16 x double>
+  %v55 = sitofp <16 x i8> undef to <16 x float>
+  
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v0 = sitofp i64 undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v1 = sitofp i64 undef to double
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v2 = sitofp i64 undef to float
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v3 = sitofp i32 undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v4 = sitofp i32 undef to double
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v5 = sitofp i32 undef to float
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v6 = sitofp i16 undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v7 = sitofp i16 undef to double
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v8 = sitofp i16 undef to float
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v9 = sitofp i8 undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v10 = sitofp i8 undef to double
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v11 = sitofp i8 undef to float
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v12 = sitofp <2 x i64> undef to <2 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v13 = sitofp <2 x i64> undef to <2 x double>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v14 = sitofp <2 x i64> undef to <2 x float>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v15 = sitofp <2 x i32> undef to <2 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v16 = sitofp <2 x i32> undef to <2 x double>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v17 = sitofp <2 x i32> undef to <2 x float>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v18 = sitofp <2 x i16> undef to <2 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v19 = sitofp <2 x i16> undef to <2 x double>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v20 = sitofp <2 x i16> undef to <2 x float>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v21 = sitofp <2 x i8> undef to <2 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v22 = sitofp <2 x i8> undef to <2 x double>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v23 = sitofp <2 x i8> undef to <2 x float>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v24 = sitofp <4 x i64> undef to <4 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v25 = sitofp <4 x i64> undef to <4 x double>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v26 = sitofp <4 x i64> undef to <4 x float>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v27 = sitofp <4 x i32> undef to <4 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v28 = sitofp <4 x i32> undef to <4 x double>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v29 = sitofp <4 x i32> undef to <4 x float>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v30 = sitofp <4 x i16> undef to <4 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v31 = sitofp <4 x i16> undef to <4 x double>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v32 = sitofp <4 x i16> undef to <4 x float>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v33 = sitofp <4 x i8> undef to <4 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v34 = sitofp <4 x i8> undef to <4 x double>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v35 = sitofp <4 x i8> undef to <4 x float>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v36 = sitofp <8 x i64> undef to <8 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v37 = sitofp <8 x i64> undef to <8 x double>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v38 = sitofp <8 x i64> undef to <8 x float>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v39 = sitofp <8 x i32> undef to <8 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v40 = sitofp <8 x i32> undef to <8 x double>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v41 = sitofp <8 x i32> undef to <8 x float>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v42 = sitofp <8 x i16> undef to <8 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %v43 = sitofp <8 x i16> undef to <8 x double>
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %v44 = sitofp <8 x i16> undef to <8 x float>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v45 = sitofp <8 x i8> undef to <8 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %v46 = sitofp <8 x i8> undef to <8 x double>
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %v47 = sitofp <8 x i8> undef to <8 x float>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v48 = sitofp <16 x i64> undef to <16 x double>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v49 = sitofp <16 x i64> undef to <16 x float>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v50 = sitofp <16 x i32> undef to <16 x double>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v51 = sitofp <16 x i32> undef to <16 x float>
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %v52 = sitofp <16 x i16> undef to <16 x double>
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %v53 = sitofp <16 x i16> undef to <16 x float>
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %v54 = sitofp <16 x i8> undef to <16 x double>
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %v55 = sitofp <16 x i8> undef to <16 x float>
+
+  ret void;
+}
+
+define void @uitofp() {
+  %v0 = uitofp i64 undef to fp128
+  %v1 = uitofp i64 undef to double
+  %v2 = uitofp i64 undef to float
+  %v3 = uitofp i32 undef to fp128
+  %v4 = uitofp i32 undef to double
+  %v5 = uitofp i32 undef to float
+  %v6 = uitofp i16 undef to fp128
+  %v7 = uitofp i16 undef to double
+  %v8 = uitofp i16 undef to float
+  %v9 = uitofp i8 undef to fp128
+  %v10 = uitofp i8 undef to double
+  %v11 = uitofp i8 undef to float
+  %v12 = uitofp <2 x i64> undef to <2 x fp128>
+  %v13 = uitofp <2 x i64> undef to <2 x double>
+  %v14 = uitofp <2 x i64> undef to <2 x float>
+  %v15 = uitofp <2 x i32> undef to <2 x fp128>
+  %v16 = uitofp <2 x i32> undef to <2 x double>
+  %v17 = uitofp <2 x i32> undef to <2 x float>
+  %v18 = uitofp <2 x i16> undef to <2 x fp128>
+  %v19 = uitofp <2 x i16> undef to <2 x double>
+  %v20 = uitofp <2 x i16> undef to <2 x float>
+  %v21 = uitofp <2 x i8> undef to <2 x fp128>
+  %v22 = uitofp <2 x i8> undef to <2 x double>
+  %v23 = uitofp <2 x i8> undef to <2 x float>
+  %v24 = uitofp <4 x i64> undef to <4 x fp128>
+  %v25 = uitofp <4 x i64> undef to <4 x double>
+  %v26 = uitofp <4 x i64> undef to <4 x float>
+  %v27 = uitofp <4 x i32> undef to <4 x fp128>
+  %v28 = uitofp <4 x i32> undef to <4 x double>
+  %v29 = uitofp <4 x i32> undef to <4 x float>
+  %v30 = uitofp <4 x i16> undef to <4 x fp128>
+  %v31 = uitofp <4 x i16> undef to <4 x double>
+  %v32 = uitofp <4 x i16> undef to <4 x float>
+  %v33 = uitofp <4 x i8> undef to <4 x fp128>
+  %v34 = uitofp <4 x i8> undef to <4 x double>
+  %v35 = uitofp <4 x i8> undef to <4 x float>
+  %v36 = uitofp <8 x i64> undef to <8 x fp128>
+  %v37 = uitofp <8 x i64> undef to <8 x double>
+  %v38 = uitofp <8 x i64> undef to <8 x float>
+  %v39 = uitofp <8 x i32> undef to <8 x fp128>
+  %v40 = uitofp <8 x i32> undef to <8 x double>
+  %v41 = uitofp <8 x i32> undef to <8 x float>
+  %v42 = uitofp <8 x i16> undef to <8 x fp128>
+  %v43 = uitofp <8 x i16> undef to <8 x double>
+  %v44 = uitofp <8 x i16> undef to <8 x float>
+  %v45 = uitofp <8 x i8> undef to <8 x fp128>
+  %v46 = uitofp <8 x i8> undef to <8 x double>
+  %v47 = uitofp <8 x i8> undef to <8 x float>
+  %v48 = uitofp <16 x i64> undef to <16 x double>
+  %v49 = uitofp <16 x i64> undef to <16 x float>
+  %v50 = uitofp <16 x i32> undef to <16 x double>
+  %v51 = uitofp <16 x i32> undef to <16 x float>
+  %v52 = uitofp <16 x i16> undef to <16 x double>
+  %v53 = uitofp <16 x i16> undef to <16 x float>
+  %v54 = uitofp <16 x i8> undef to <16 x double>
+  %v55 = uitofp <16 x i8> undef to <16 x float>
+  
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v0 = uitofp i64 undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v1 = uitofp i64 undef to double
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v2 = uitofp i64 undef to float
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v3 = uitofp i32 undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v4 = uitofp i32 undef to double
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v5 = uitofp i32 undef to float
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v6 = uitofp i16 undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v7 = uitofp i16 undef to double
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v8 = uitofp i16 undef to float
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v9 = uitofp i8 undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v10 = uitofp i8 undef to double
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v11 = uitofp i8 undef to float
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v12 = uitofp <2 x i64> undef to <2 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v13 = uitofp <2 x i64> undef to <2 x double>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v14 = uitofp <2 x i64> undef to <2 x float>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v15 = uitofp <2 x i32> undef to <2 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v16 = uitofp <2 x i32> undef to <2 x double>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v17 = uitofp <2 x i32> undef to <2 x float>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v18 = uitofp <2 x i16> undef to <2 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v19 = uitofp <2 x i16> undef to <2 x double>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v20 = uitofp <2 x i16> undef to <2 x float>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v21 = uitofp <2 x i8> undef to <2 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v22 = uitofp <2 x i8> undef to <2 x double>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v23 = uitofp <2 x i8> undef to <2 x float>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v24 = uitofp <4 x i64> undef to <4 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v25 = uitofp <4 x i64> undef to <4 x double>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v26 = uitofp <4 x i64> undef to <4 x float>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v27 = uitofp <4 x i32> undef to <4 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v28 = uitofp <4 x i32> undef to <4 x double>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v29 = uitofp <4 x i32> undef to <4 x float>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v30 = uitofp <4 x i16> undef to <4 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v31 = uitofp <4 x i16> undef to <4 x double>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v32 = uitofp <4 x i16> undef to <4 x float>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v33 = uitofp <4 x i8> undef to <4 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v34 = uitofp <4 x i8> undef to <4 x double>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v35 = uitofp <4 x i8> undef to <4 x float>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v36 = uitofp <8 x i64> undef to <8 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v37 = uitofp <8 x i64> undef to <8 x double>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v38 = uitofp <8 x i64> undef to <8 x float>
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %v39 = uitofp <8 x i32> undef to <8 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v40 = uitofp <8 x i32> undef to <8 x double>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v41 = uitofp <8 x i32> undef to <8 x float>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v42 = uitofp <8 x i16> undef to <8 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %v43 = uitofp <8 x i16> undef to <8 x double>
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %v44 = uitofp <8 x i16> undef to <8 x float>
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %v45 = uitofp <8 x i8> undef to <8 x fp128>
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %v46 = uitofp <8 x i8> undef to <8 x double>
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %v47 = uitofp <8 x i8> undef to <8 x float>
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %v48 = uitofp <16 x i64> undef to <16 x double>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v49 = uitofp <16 x i64> undef to <16 x float>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v50 = uitofp <16 x i32> undef to <16 x double>
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %v51 = uitofp <16 x i32> undef to <16 x float>
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %v52 = uitofp <16 x i16> undef to <16 x double>
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %v53 = uitofp <16 x i16> undef to <16 x float>
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %v54 = uitofp <16 x i8> undef to <16 x double>
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %v55 = uitofp <16 x i8> undef to <16 x float>
+
+  ret void;
+}
Index: test/Analysis/CostModel/SystemZ/int-arith.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/SystemZ/int-arith.ll
@@ -0,0 +1,326 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+;
+; Note: The scalarized vector instructions costs are not including any
+; extracts, due to the undef operands.
+
+define void @add() {
+  %res0 = add i8 undef, undef
+  %res1 = add i16 undef, undef
+  %res2 = add i32 undef, undef
+  %res3 = add i64 undef, undef
+  %res4 = add <2 x i8> undef, undef
+  %res5 = add <2 x i16> undef, undef
+  %res6 = add <2 x i32> undef, undef
+  %res7 = add <2 x i64> undef, undef
+  %res8 = add <4 x i8> undef, undef
+  %res9 = add <4 x i16> undef, undef
+  %res10 = add <4 x i32> undef, undef
+  %res11 = add <4 x i64> undef, undef
+  %res12 = add <8 x i8> undef, undef
+  %res13 = add <8 x i16> undef, undef
+  %res14 = add <8 x i32> undef, undef
+  %res15 = add <8 x i64> undef, undef
+  %res16 = add <16 x i8> undef, undef
+  %res17 = add <16 x i16> undef, undef
+  %res18 = add <16 x i32> undef, undef
+  %res19 = add <16 x i64> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res0 = add i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res1 = add i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = add i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res3 = add i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = add <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res5 = add <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res6 = add <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res7 = add <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res8 = add <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res9 = add <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res10 = add <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res11 = add <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res12 = add <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res13 = add <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res14 = add <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res15 = add <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res16 = add <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res17 = add <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res18 = add <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res19 = add <16 x i64> undef, undef
+
+  ret void;
+}
+
+define void @sub() {
+  %res0 = sub i8 undef, undef
+  %res1 = sub i16 undef, undef
+  %res2 = sub i32 undef, undef
+  %res3 = sub i64 undef, undef
+  %res4 = sub <2 x i8> undef, undef
+  %res5 = sub <2 x i16> undef, undef
+  %res6 = sub <2 x i32> undef, undef
+  %res7 = sub <2 x i64> undef, undef
+  %res8 = sub <4 x i8> undef, undef
+  %res9 = sub <4 x i16> undef, undef
+  %res10 = sub <4 x i32> undef, undef
+  %res11 = sub <4 x i64> undef, undef
+  %res12 = sub <8 x i8> undef, undef
+  %res13 = sub <8 x i16> undef, undef
+  %res14 = sub <8 x i32> undef, undef
+  %res15 = sub <8 x i64> undef, undef
+  %res16 = sub <16 x i8> undef, undef
+  %res17 = sub <16 x i16> undef, undef
+  %res18 = sub <16 x i32> undef, undef
+  %res19 = sub <16 x i64> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res0 = sub i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res1 = sub i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = sub i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res3 = sub i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = sub <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res5 = sub <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res6 = sub <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res7 = sub <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res8 = sub <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res9 = sub <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res10 = sub <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res11 = sub <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res12 = sub <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res13 = sub <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res14 = sub <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res15 = sub <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res16 = sub <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res17 = sub <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res18 = sub <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res19 = sub <16 x i64> undef, undef
+
+  ret void;
+}
+
+define void @mul() {
+  %res0 = mul i8 undef, undef
+  %res1 = mul i16 undef, undef
+  %res2 = mul i32 undef, undef
+  %res3 = mul i64 undef, undef
+  %res4 = mul <2 x i8> undef, undef
+  %res5 = mul <2 x i16> undef, undef
+  %res6 = mul <2 x i32> undef, undef
+  %res7 = mul <2 x i64> undef, undef
+  %res8 = mul <4 x i8> undef, undef
+  %res9 = mul <4 x i16> undef, undef
+  %res10 = mul <4 x i32> undef, undef
+  %res11 = mul <4 x i64> undef, undef
+  %res12 = mul <8 x i8> undef, undef
+  %res13 = mul <8 x i16> undef, undef
+  %res14 = mul <8 x i32> undef, undef
+  %res15 = mul <8 x i64> undef, undef
+  %res16 = mul <16 x i8> undef, undef
+  %res17 = mul <16 x i16> undef, undef
+  %res18 = mul <16 x i32> undef, undef
+  %res19 = mul <16 x i64> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res0 = mul i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res1 = mul i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = mul i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res3 = mul i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = mul <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res5 = mul <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res6 = mul <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res7 = mul <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res8 = mul <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res9 = mul <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res10 = mul <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res11 = mul <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res12 = mul <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res13 = mul <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res14 = mul <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %res15 = mul <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res16 = mul <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res17 = mul <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res18 = mul <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %res19 = mul <16 x i64> undef, undef
+
+  ret void;
+}
+
+define void @sdiv() {
+  %res0 = sdiv i8 undef, undef
+  %res1 = sdiv i16 undef, undef
+  %res2 = sdiv i32 undef, undef
+  %res3 = sdiv i64 undef, undef
+  %res4 = sdiv <2 x i8> undef, undef
+  %res5 = sdiv <2 x i16> undef, undef
+  %res6 = sdiv <2 x i32> undef, undef
+  %res7 = sdiv <2 x i64> undef, undef
+  %res8 = sdiv <4 x i8> undef, undef
+  %res9 = sdiv <4 x i16> undef, undef
+  %res10 = sdiv <4 x i32> undef, undef
+  %res11 = sdiv <4 x i64> undef, undef
+  %res12 = sdiv <8 x i8> undef, undef
+  %res13 = sdiv <8 x i16> undef, undef
+  %res14 = sdiv <8 x i32> undef, undef
+  %res15 = sdiv <8 x i64> undef, undef
+  %res16 = sdiv <16 x i8> undef, undef
+  %res17 = sdiv <16 x i16> undef, undef
+  %res18 = sdiv <16 x i32> undef, undef
+  %res19 = sdiv <16 x i64> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res0 = sdiv i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res1 = sdiv i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res2 = sdiv i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res3 = sdiv i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %res4 = sdiv <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %res5 = sdiv <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %res6 = sdiv <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %res7 = sdiv <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %res8 = sdiv <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %res9 = sdiv <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %res10 = sdiv <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %res11 = sdiv <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %res12 = sdiv <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %res13 = sdiv <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %res14 = sdiv <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %res15 = sdiv <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 80 for instruction:   %res16 = sdiv <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 80 for instruction:   %res17 = sdiv <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %res18 = sdiv <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %res19 = sdiv <16 x i64> undef, undef
+
+  ret void;
+}
+
+define void @srem() {
+  %res0 = srem i8 undef, undef
+  %res1 = srem i16 undef, undef
+  %res2 = srem i32 undef, undef
+  %res3 = srem i64 undef, undef
+  %res4 = srem <2 x i8> undef, undef
+  %res5 = srem <2 x i16> undef, undef
+  %res6 = srem <2 x i32> undef, undef
+  %res7 = srem <2 x i64> undef, undef
+  %res8 = srem <4 x i8> undef, undef
+  %res9 = srem <4 x i16> undef, undef
+  %res10 = srem <4 x i32> undef, undef
+  %res11 = srem <4 x i64> undef, undef
+  %res12 = srem <8 x i8> undef, undef
+  %res13 = srem <8 x i16> undef, undef
+  %res14 = srem <8 x i32> undef, undef
+  %res15 = srem <8 x i64> undef, undef
+  %res16 = srem <16 x i8> undef, undef
+  %res17 = srem <16 x i16> undef, undef
+  %res18 = srem <16 x i32> undef, undef
+  %res19 = srem <16 x i64> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res0 = srem i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res1 = srem i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res2 = srem i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res3 = srem i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %res4 = srem <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %res5 = srem <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %res6 = srem <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %res7 = srem <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %res8 = srem <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %res9 = srem <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %res10 = srem <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %res11 = srem <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %res12 = srem <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %res13 = srem <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %res14 = srem <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %res15 = srem <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 80 for instruction:   %res16 = srem <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 80 for instruction:   %res17 = srem <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %res18 = srem <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 48 for instruction:   %res19 = srem <16 x i64> undef, undef
+
+  ret void;
+}
+
+define void @udiv() {
+  %res0 = udiv i8 undef, undef
+  %res1 = udiv i16 undef, undef
+  %res2 = udiv i32 undef, undef
+  %res3 = udiv i64 undef, undef
+  %res4 = udiv <2 x i8> undef, undef
+  %res5 = udiv <2 x i16> undef, undef
+  %res6 = udiv <2 x i32> undef, undef
+  %res7 = udiv <2 x i64> undef, undef
+  %res8 = udiv <4 x i8> undef, undef
+  %res9 = udiv <4 x i16> undef, undef
+  %res10 = udiv <4 x i32> undef, undef
+  %res11 = udiv <4 x i64> undef, undef
+  %res12 = udiv <8 x i8> undef, undef
+  %res13 = udiv <8 x i16> undef, undef
+  %res14 = udiv <8 x i32> undef, undef
+  %res15 = udiv <8 x i64> undef, undef
+  %res16 = udiv <16 x i8> undef, undef
+  %res17 = udiv <16 x i16> undef, undef
+  %res18 = udiv <16 x i32> undef, undef
+  %res19 = udiv <16 x i64> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res0 = udiv i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res1 = udiv i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %res2 = udiv i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %res3 = udiv i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %res4 = udiv <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %res5 = udiv <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res6 = udiv <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res7 = udiv <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %res8 = udiv <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %res9 = udiv <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %res10 = udiv <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %res11 = udiv <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %res12 = udiv <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %res13 = udiv <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %res14 = udiv <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %res15 = udiv <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 80 for instruction:   %res16 = udiv <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 80 for instruction:   %res17 = udiv <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %res18 = udiv <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %res19 = udiv <16 x i64> undef, undef
+
+  ret void;
+}
+
+define void @urem() {
+  %res0 = urem i8 undef, undef
+  %res1 = urem i16 undef, undef
+  %res2 = urem i32 undef, undef
+  %res3 = urem i64 undef, undef
+  %res4 = urem <2 x i8> undef, undef
+  %res5 = urem <2 x i16> undef, undef
+  %res6 = urem <2 x i32> undef, undef
+  %res7 = urem <2 x i64> undef, undef
+  %res8 = urem <4 x i8> undef, undef
+  %res9 = urem <4 x i16> undef, undef
+  %res10 = urem <4 x i32> undef, undef
+  %res11 = urem <4 x i64> undef, undef
+  %res12 = urem <8 x i8> undef, undef
+  %res13 = urem <8 x i16> undef, undef
+  %res14 = urem <8 x i32> undef, undef
+  %res15 = urem <8 x i64> undef, undef
+  %res16 = urem <16 x i8> undef, undef
+  %res17 = urem <16 x i16> undef, undef
+  %res18 = urem <16 x i32> undef, undef
+  %res19 = urem <16 x i64> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res0 = urem i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res1 = urem i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %res2 = urem i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %res3 = urem i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %res4 = urem <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %res5 = urem <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res6 = urem <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res7 = urem <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %res8 = urem <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 20 for instruction:   %res9 = urem <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %res10 = urem <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 16 for instruction:   %res11 = urem <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %res12 = urem <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 40 for instruction:   %res13 = urem <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %res14 = urem <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 32 for instruction:   %res15 = urem <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 80 for instruction:   %res16 = urem <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 80 for instruction:   %res17 = urem <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %res18 = urem <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 64 for instruction:   %res19 = urem <16 x i64> undef, undef
+
+  ret void;
+}
Index: test/Analysis/CostModel/SystemZ/int-cast.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/SystemZ/int-cast.ll
@@ -0,0 +1,199 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+
+define void @sext() {
+  %v0 = sext i8 undef to i16
+  %v1 = sext i8 undef to i32
+  %v2 = sext i8 undef to i64
+  %v3 = sext i16 undef to i32
+  %v4 = sext i16 undef to i64
+  %v5 = sext i32 undef to i64
+  %v6 = sext <2 x i8> undef to <2 x i16>
+  %v7 = sext <2 x i8> undef to <2 x i32>
+  %v8 = sext <2 x i8> undef to <2 x i64>
+  %v9 = sext <2 x i16> undef to <2 x i32>
+  %v10 = sext <2 x i16> undef to <2 x i64>
+  %v11 = sext <2 x i32> undef to <2 x i64>
+  %v12 = sext <4 x i8> undef to <4 x i16>
+  %v13 = sext <4 x i8> undef to <4 x i32>
+  %v14 = sext <4 x i8> undef to <4 x i64>
+  %v15 = sext <4 x i16> undef to <4 x i32>
+  %v16 = sext <4 x i16> undef to <4 x i64>
+  %v17 = sext <4 x i32> undef to <4 x i64>
+  %v18 = sext <8 x i8> undef to <8 x i16>
+  %v19 = sext <8 x i8> undef to <8 x i32>
+  %v20 = sext <8 x i8> undef to <8 x i64>
+  %v21 = sext <8 x i16> undef to <8 x i32>
+  %v22 = sext <8 x i16> undef to <8 x i64>
+  %v23 = sext <8 x i32> undef to <8 x i64>
+  %v24 = sext <16 x i8> undef to <16 x i16>
+  %v25 = sext <16 x i8> undef to <16 x i32>
+  %v26 = sext <16 x i8> undef to <16 x i64>
+  %v27 = sext <16 x i16> undef to <16 x i32>
+  %v28 = sext <16 x i16> undef to <16 x i64>
+  %v29 = sext <16 x i32> undef to <16 x i64>
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v0 = sext i8 undef to i16
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v1 = sext i8 undef to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v2 = sext i8 undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v3 = sext i16 undef to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v4 = sext i16 undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v5 = sext i32 undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v6 = sext <2 x i8> undef to <2 x i16>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v7 = sext <2 x i8> undef to <2 x i32>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v8 = sext <2 x i8> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v9 = sext <2 x i16> undef to <2 x i32>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v10 = sext <2 x i16> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v11 = sext <2 x i32> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v12 = sext <4 x i8> undef to <4 x i16>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v13 = sext <4 x i8> undef to <4 x i32>
+; CHECK: Cost Model: Found an estimated cost of 7 for instruction:   %v14 = sext <4 x i8> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v15 = sext <4 x i16> undef to <4 x i32>
+; CHECK: Cost Model: Found an estimated cost of 5 for instruction:   %v16 = sext <4 x i16> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v17 = sext <4 x i32> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v18 = sext <8 x i8> undef to <8 x i16>
+; CHECK: Cost Model: Found an estimated cost of 5 for instruction:   %v19 = sext <8 x i8> undef to <8 x i32>
+; CHECK: Cost Model: Found an estimated cost of 15 for instruction:   %v20 = sext <8 x i8> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v21 = sext <8 x i16> undef to <8 x i32>
+; CHECK: Cost Model: Found an estimated cost of 11 for instruction:   %v22 = sext <8 x i16> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v23 = sext <8 x i32> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v24 = sext <16 x i8> undef to <16 x i16>
+; CHECK: Cost Model: Found an estimated cost of 11 for instruction:   %v25 = sext <16 x i8> undef to <16 x i32>
+; CHECK: Cost Model: Found an estimated cost of 31 for instruction:   %v26 = sext <16 x i8> undef to <16 x i64>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v27 = sext <16 x i16> undef to <16 x i32>
+; CHECK: Cost Model: Found an estimated cost of 22 for instruction:   %v28 = sext <16 x i16> undef to <16 x i64>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v29 = sext <16 x i32> undef to <16 x i64>
+
+ ret void
+}
+
+define void @zext() {
+  %v0 = zext i8 undef to i16
+  %v1 = zext i8 undef to i32
+  %v2 = zext i8 undef to i64
+  %v3 = zext i16 undef to i32
+  %v4 = zext i16 undef to i64
+  %v5 = zext i32 undef to i64
+  %v6 = zext <2 x i8> undef to <2 x i16>
+  %v7 = zext <2 x i8> undef to <2 x i32>
+  %v8 = zext <2 x i8> undef to <2 x i64>
+  %v9 = zext <2 x i16> undef to <2 x i32>
+  %v10 = zext <2 x i16> undef to <2 x i64>
+  %v11 = zext <2 x i32> undef to <2 x i64>
+  %v12 = zext <4 x i8> undef to <4 x i16>
+  %v13 = zext <4 x i8> undef to <4 x i32>
+  %v14 = zext <4 x i8> undef to <4 x i64>
+  %v15 = zext <4 x i16> undef to <4 x i32>
+  %v16 = zext <4 x i16> undef to <4 x i64>
+  %v17 = zext <4 x i32> undef to <4 x i64>
+  %v18 = zext <8 x i8> undef to <8 x i16>
+  %v19 = zext <8 x i8> undef to <8 x i32>
+  %v20 = zext <8 x i8> undef to <8 x i64>
+  %v21 = zext <8 x i16> undef to <8 x i32>
+  %v22 = zext <8 x i16> undef to <8 x i64>
+  %v23 = zext <8 x i32> undef to <8 x i64>
+  %v24 = zext <16 x i8> undef to <16 x i16>
+  %v25 = zext <16 x i8> undef to <16 x i32>
+  %v26 = zext <16 x i8> undef to <16 x i64>
+  %v27 = zext <16 x i16> undef to <16 x i32>
+  %v28 = zext <16 x i16> undef to <16 x i64>
+  %v29 = zext <16 x i32> undef to <16 x i64>
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v0 = zext i8 undef to i16
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v1 = zext i8 undef to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v2 = zext i8 undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v3 = zext i16 undef to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v4 = zext i16 undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v5 = zext i32 undef to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v6 = zext <2 x i8> undef to <2 x i16>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v7 = zext <2 x i8> undef to <2 x i32>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v8 = zext <2 x i8> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v9 = zext <2 x i16> undef to <2 x i32>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v10 = zext <2 x i16> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v11 = zext <2 x i32> undef to <2 x i64>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v12 = zext <4 x i8> undef to <4 x i16>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v13 = zext <4 x i8> undef to <4 x i32>
+; CHECK: Cost Model: Found an estimated cost of 7 for instruction:   %v14 = zext <4 x i8> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v15 = zext <4 x i16> undef to <4 x i32>
+; CHECK: Cost Model: Found an estimated cost of 5 for instruction:   %v16 = zext <4 x i16> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v17 = zext <4 x i32> undef to <4 x i64>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v18 = zext <8 x i8> undef to <8 x i16>
+; CHECK: Cost Model: Found an estimated cost of 5 for instruction:   %v19 = zext <8 x i8> undef to <8 x i32>
+; CHECK: Cost Model: Found an estimated cost of 15 for instruction:   %v20 = zext <8 x i8> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v21 = zext <8 x i16> undef to <8 x i32>
+; CHECK: Cost Model: Found an estimated cost of 11 for instruction:   %v22 = zext <8 x i16> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v23 = zext <8 x i32> undef to <8 x i64>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v24 = zext <16 x i8> undef to <16 x i16>
+; CHECK: Cost Model: Found an estimated cost of 11 for instruction:   %v25 = zext <16 x i8> undef to <16 x i32>
+; CHECK: Cost Model: Found an estimated cost of 31 for instruction:   %v26 = zext <16 x i8> undef to <16 x i64>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v27 = zext <16 x i16> undef to <16 x i32>
+; CHECK: Cost Model: Found an estimated cost of 22 for instruction:   %v28 = zext <16 x i16> undef to <16 x i64>
+; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %v29 = zext <16 x i32> undef to <16 x i64>
+
+ ret void
+}
+
+define void @trunc() {
+  %v0 = trunc i16 undef to i8
+  %v1 = trunc i32 undef to i16
+  %v2 = trunc i32 undef to i8
+  %v3 = trunc i64 undef to i32
+  %v4 = trunc i64 undef to i16
+  %v5 = trunc i64 undef to i8
+  %v6 = trunc <2 x i16> undef to <2 x i8>
+  %v7 = trunc <2 x i32> undef to <2 x i16>
+  %v8 = trunc <2 x i32> undef to <2 x i8>
+  %v9 = trunc <2 x i64> undef to <2 x i32>
+  %v10 = trunc <2 x i64> undef to <2 x i16>
+  %v11 = trunc <2 x i64> undef to <2 x i8>
+  %v12 = trunc <4 x i16> undef to <4 x i8>
+  %v13 = trunc <4 x i32> undef to <4 x i16>
+  %v14 = trunc <4 x i32> undef to <4 x i8>
+  %v15 = trunc <4 x i64> undef to <4 x i32>
+  %v16 = trunc <4 x i64> undef to <4 x i16>
+  %v17 = trunc <4 x i64> undef to <4 x i8>
+  %v18 = trunc <8 x i16> undef to <8 x i8>
+  %v19 = trunc <8 x i32> undef to <8 x i16>
+  %v20 = trunc <8 x i32> undef to <8 x i8>
+  %v21 = trunc <8 x i64> undef to <8 x i32>
+  %v22 = trunc <8 x i64> undef to <8 x i16>
+  %v23 = trunc <8 x i64> undef to <8 x i8>
+  %v24 = trunc <16 x i16> undef to <16 x i8>
+  %v25 = trunc <16 x i32> undef to <16 x i16>
+  %v26 = trunc <16 x i32> undef to <16 x i8>
+  %v27 = trunc <16 x i64> undef to <16 x i32>
+  %v28 = trunc <16 x i64> undef to <16 x i16>
+  %v29 = trunc <16 x i64> undef to <16 x i8>
+
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %v0 = trunc i16 undef to i8
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %v1 = trunc i32 undef to i16
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %v2 = trunc i32 undef to i8
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %v3 = trunc i64 undef to i32
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %v4 = trunc i64 undef to i16
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %v5 = trunc i64 undef to i8
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v6 = trunc <2 x i16> undef to <2 x i8>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v7 = trunc <2 x i32> undef to <2 x i16>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v8 = trunc <2 x i32> undef to <2 x i8>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v9 = trunc <2 x i64> undef to <2 x i32>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v10 = trunc <2 x i64> undef to <2 x i16>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v11 = trunc <2 x i64> undef to <2 x i8>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v12 = trunc <4 x i16> undef to <4 x i8>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v13 = trunc <4 x i32> undef to <4 x i16>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v14 = trunc <4 x i32> undef to <4 x i8>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v15 = trunc <4 x i64> undef to <4 x i32>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v16 = trunc <4 x i64> undef to <4 x i16>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v17 = trunc <4 x i64> undef to <4 x i8>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v18 = trunc <8 x i16> undef to <8 x i8>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v19 = trunc <8 x i32> undef to <8 x i16>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v20 = trunc <8 x i32> undef to <8 x i8>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v21 = trunc <8 x i64> undef to <8 x i32>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v22 = trunc <8 x i64> undef to <8 x i16>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v23 = trunc <8 x i64> undef to <8 x i8>
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %v24 = trunc <16 x i16> undef to <16 x i8>
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %v25 = trunc <16 x i32> undef to <16 x i16>
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %v26 = trunc <16 x i32> undef to <16 x i8>
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %v27 = trunc <16 x i64> undef to <16 x i32>
+; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %v28 = trunc <16 x i64> undef to <16 x i16>
+; CHECK: Cost Model: Found an estimated cost of 7 for instruction:   %v29 = trunc <16 x i64> undef to <16 x i8>
+
+ ret void
+}
Index: test/Analysis/CostModel/SystemZ/load_store.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/SystemZ/load_store.ll
@@ -0,0 +1,137 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+
+define void @store() {
+  store i8 undef, i8* undef
+  store i16 undef, i16* undef
+  store i32 undef, i32* undef
+  store i64 undef, i64* undef
+  store float undef, float* undef
+  store double undef, double* undef
+  store fp128 undef, fp128* undef
+  store <2 x i8> undef, <2 x i8>* undef
+  store <2 x i16> undef, <2 x i16>* undef
+  store <2 x i32> undef, <2 x i32>* undef
+  store <2 x i64> undef, <2 x i64>* undef
+  store <2 x float> undef, <2 x float>* undef
+  store <2 x double> undef, <2 x double>* undef
+  store <4 x i8> undef, <4 x i8>* undef
+  store <4 x i16> undef, <4 x i16>* undef
+  store <4 x i32> undef, <4 x i32>* undef
+  store <4 x i64> undef, <4 x i64>* undef
+  store <4 x float> undef, <4 x float>* undef
+  store <4 x double> undef, <4 x double>* undef
+  store <8 x i8> undef, <8 x i8>* undef
+  store <8 x i16> undef, <8 x i16>* undef
+  store <8 x i32> undef, <8 x i32>* undef
+  store <8 x i64> undef, <8 x i64>* undef
+  store <8 x float> undef, <8 x float>* undef
+  store <8 x double> undef, <8 x double>* undef
+  store <16 x i8> undef, <16 x i8>* undef
+  store <16 x i16> undef, <16 x i16>* undef
+  store <16 x i32> undef, <16 x i32>* undef
+  store <16 x i64> undef, <16 x i64>* undef
+  store <16 x float> undef, <16 x float>* undef
+  store <16 x double> undef, <16 x double>* undef
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store i8 undef, i8* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store i16 undef, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store i32 undef, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store i64 undef, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store float undef, float* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store double undef, double* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   store fp128 undef, fp128* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <2 x i8> undef, <2 x i8>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <2 x i16> undef, <2 x i16>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <2 x i32> undef, <2 x i32>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <2 x i64> undef, <2 x i64>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <2 x float> undef, <2 x float>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <2 x double> undef, <2 x double>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <4 x i8> undef, <4 x i8>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <4 x i16> undef, <4 x i16>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <4 x i32> undef, <4 x i32>* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   store <4 x i64> undef, <4 x i64>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <4 x float> undef, <4 x float>* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   store <4 x double> undef, <4 x double>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <8 x i8> undef, <8 x i8>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <8 x i16> undef, <8 x i16>* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   store <8 x i32> undef, <8 x i32>* undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   store <8 x i64> undef, <8 x i64>* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   store <8 x float> undef, <8 x float>* undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   store <8 x double> undef, <8 x double>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   store <16 x i8> undef, <16 x i8>* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   store <16 x i16> undef, <16 x i16>* undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   store <16 x i32> undef, <16 x i32>* undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   store <16 x i64> undef, <16 x i64>* undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   store <16 x float> undef, <16 x float>* undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   store <16 x double> undef, <16 x double>* undef
+
+  ret void;
+}
+
+define void @load() {
+  load i8, i8* undef
+  load i16, i16* undef
+  load i32, i32* undef
+  load i64, i64* undef
+  load float, float* undef
+  load double, double* undef
+  load fp128, fp128* undef
+  load <2 x i8>, <2 x i8>* undef
+  load <2 x i16>, <2 x i16>* undef
+  load <2 x i32>, <2 x i32>* undef
+  load <2 x i64>, <2 x i64>* undef
+  load <2 x float>, <2 x float>* undef
+  load <2 x double>, <2 x double>* undef
+  load <4 x i8>, <4 x i8>* undef
+  load <4 x i16>, <4 x i16>* undef
+  load <4 x i32>, <4 x i32>* undef
+  load <4 x i64>, <4 x i64>* undef
+  load <4 x float>, <4 x float>* undef
+  load <4 x double>, <4 x double>* undef
+  load <8 x i8>, <8 x i8>* undef
+  load <8 x i16>, <8 x i16>* undef
+  load <8 x i32>, <8 x i32>* undef
+  load <8 x i64>, <8 x i64>* undef
+  load <8 x float>, <8 x float>* undef
+  load <8 x double>, <8 x double>* undef
+  load <16 x i8>, <16 x i8>* undef
+  load <16 x i16>, <16 x i16>* undef
+  load <16 x i32>, <16 x i32>* undef
+  load <16 x i64>, <16 x i64>* undef
+  load <16 x float>, <16 x float>* undef
+  load <16 x double>, <16 x double>* undef
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = load i8, i8* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %2 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %5 = load float, float* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %6 = load double, double* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %7 = load fp128, fp128* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %8 = load <2 x i8>, <2 x i8>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %9 = load <2 x i16>, <2 x i16>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %10 = load <2 x i32>, <2 x i32>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %11 = load <2 x i64>, <2 x i64>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %12 = load <2 x float>, <2 x float>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %13 = load <2 x double>, <2 x double>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %14 = load <4 x i8>, <4 x i8>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %15 = load <4 x i16>, <4 x i16>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %16 = load <4 x i32>, <4 x i32>* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %17 = load <4 x i64>, <4 x i64>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %18 = load <4 x float>, <4 x float>* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %19 = load <4 x double>, <4 x double>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %20 = load <8 x i8>, <8 x i8>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %21 = load <8 x i16>, <8 x i16>* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %22 = load <8 x i32>, <8 x i32>* undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %23 = load <8 x i64>, <8 x i64>* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %24 = load <8 x float>, <8 x float>* undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %25 = load <8 x double>, <8 x double>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %26 = load <16 x i8>, <16 x i8>* undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %27 = load <16 x i16>, <16 x i16>* undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %28 = load <16 x i32>, <16 x i32>* undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %29 = load <16 x i64>, <16 x i64>* undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %30 = load <16 x float>, <16 x float>* undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %31 = load <16 x double>, <16 x double>* undef
+
+  ret void;
+}
Index: test/Analysis/CostModel/SystemZ/logical.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/SystemZ/logical.ll
@@ -0,0 +1,277 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+
+define void @and() {
+  %res0 = and i8 undef, undef
+  %res1 = and i16 undef, undef
+  %res2 = and i32 undef, undef
+  %res3 = and i64 undef, undef
+  %res4 = and <2 x i8> undef, undef
+  %res5 = and <2 x i16> undef, undef
+  %res6 = and <2 x i32> undef, undef
+  %res7 = and <2 x i64> undef, undef
+  %res8 = and <4 x i8> undef, undef
+  %res9 = and <4 x i16> undef, undef
+  %res10 = and <4 x i32> undef, undef
+  %res11 = and <4 x i64> undef, undef
+  %res12 = and <8 x i8> undef, undef
+  %res13 = and <8 x i16> undef, undef
+  %res14 = and <8 x i32> undef, undef
+  %res15 = and <8 x i64> undef, undef
+  %res16 = and <16 x i8> undef, undef
+  %res17 = and <16 x i16> undef, undef
+  %res18 = and <16 x i32> undef, undef
+  %res19 = and <16 x i64> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res0 = and i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res1 = and i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = and i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res3 = and i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = and <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res5 = and <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res6 = and <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res7 = and <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res8 = and <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res9 = and <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res10 = and <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res11 = and <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res12 = and <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res13 = and <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res14 = and <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res15 = and <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res16 = and <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res17 = and <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res18 = and <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res19 = and <16 x i64> undef, undef
+
+  ret void;
+}
+
+define void @ashr() {
+  %res0 = ashr i8 undef, undef
+  %res1 = ashr i16 undef, undef
+  %res2 = ashr i32 undef, undef
+  %res3 = ashr i64 undef, undef
+  %res4 = ashr <2 x i8> undef, undef
+  %res5 = ashr <2 x i16> undef, undef
+  %res6 = ashr <2 x i32> undef, undef
+  %res7 = ashr <2 x i64> undef, undef
+  %res8 = ashr <4 x i8> undef, undef
+  %res9 = ashr <4 x i16> undef, undef
+  %res10 = ashr <4 x i32> undef, undef
+  %res11 = ashr <4 x i64> undef, undef
+  %res12 = ashr <8 x i8> undef, undef
+  %res13 = ashr <8 x i16> undef, undef
+  %res14 = ashr <8 x i32> undef, undef
+  %res15 = ashr <8 x i64> undef, undef
+  %res16 = ashr <16 x i8> undef, undef
+  %res17 = ashr <16 x i16> undef, undef
+  %res18 = ashr <16 x i32> undef, undef
+  %res19 = ashr <16 x i64> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res0 = ashr i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res1 = ashr i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = ashr i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res3 = ashr i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = ashr <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res5 = ashr <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res6 = ashr <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res7 = ashr <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res8 = ashr <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res9 = ashr <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res10 = ashr <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res11 = ashr <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res12 = ashr <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res13 = ashr <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res14 = ashr <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res15 = ashr <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res16 = ashr <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res17 = ashr <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res18 = ashr <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res19 = ashr <16 x i64> undef, undef
+
+  ret void;
+}
+
+define void @lshr() {
+  %res0 = lshr i8 undef, undef
+  %res1 = lshr i16 undef, undef
+  %res2 = lshr i32 undef, undef
+  %res3 = lshr i64 undef, undef
+  %res4 = lshr <2 x i8> undef, undef
+  %res5 = lshr <2 x i16> undef, undef
+  %res6 = lshr <2 x i32> undef, undef
+  %res7 = lshr <2 x i64> undef, undef
+  %res8 = lshr <4 x i8> undef, undef
+  %res9 = lshr <4 x i16> undef, undef
+  %res10 = lshr <4 x i32> undef, undef
+  %res11 = lshr <4 x i64> undef, undef
+  %res12 = lshr <8 x i8> undef, undef
+  %res13 = lshr <8 x i16> undef, undef
+  %res14 = lshr <8 x i32> undef, undef
+  %res15 = lshr <8 x i64> undef, undef
+  %res16 = lshr <16 x i8> undef, undef
+  %res17 = lshr <16 x i16> undef, undef
+  %res18 = lshr <16 x i32> undef, undef
+  %res19 = lshr <16 x i64> undef, undef
+
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res0 = lshr i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res1 = lshr i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = lshr i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res3 = lshr i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = lshr <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res5 = lshr <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res6 = lshr <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res7 = lshr <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res8 = lshr <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res9 = lshr <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res10 = lshr <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res11 = lshr <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res12 = lshr <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res13 = lshr <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res14 = lshr <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res15 = lshr <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res16 = lshr <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res17 = lshr <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res18 = lshr <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res19 = lshr <16 x i64> undef, undef
+
+  ret void;
+}
+
+define void @or() {
+  %res0 = or i8 undef, undef
+  %res1 = or i16 undef, undef
+  %res2 = or i32 undef, undef
+  %res3 = or i64 undef, undef
+  %res4 = or <2 x i8> undef, undef
+  %res5 = or <2 x i16> undef, undef
+  %res6 = or <2 x i32> undef, undef
+  %res7 = or <2 x i64> undef, undef
+  %res8 = or <4 x i8> undef, undef
+  %res9 = or <4 x i16> undef, undef
+  %res10 = or <4 x i32> undef, undef
+  %res11 = or <4 x i64> undef, undef
+  %res12 = or <8 x i8> undef, undef
+  %res13 = or <8 x i16> undef, undef
+  %res14 = or <8 x i32> undef, undef
+  %res15 = or <8 x i64> undef, undef
+  %res16 = or <16 x i8> undef, undef
+  %res17 = or <16 x i16> undef, undef
+  %res18 = or <16 x i32> undef, undef
+  %res19 = or <16 x i64> undef, undef
+  
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res0 = or i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res1 = or i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = or i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res3 = or i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = or <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res5 = or <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res6 = or <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res7 = or <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res8 = or <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res9 = or <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res10 = or <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res11 = or <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res12 = or <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res13 = or <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res14 = or <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res15 = or <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res16 = or <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res17 = or <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res18 = or <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res19 = or <16 x i64> undef, undef
+
+  ret void;
+}
+
+define void @shl() {
+  %res0 = shl i8 undef, undef
+  %res1 = shl i16 undef, undef
+  %res2 = shl i32 undef, undef
+  %res3 = shl i64 undef, undef
+  %res4 = shl <2 x i8> undef, undef
+  %res5 = shl <2 x i16> undef, undef
+  %res6 = shl <2 x i32> undef, undef
+  %res7 = shl <2 x i64> undef, undef
+  %res8 = shl <4 x i8> undef, undef
+  %res9 = shl <4 x i16> undef, undef
+  %res10 = shl <4 x i32> undef, undef
+  %res11 = shl <4 x i64> undef, undef
+  %res12 = shl <8 x i8> undef, undef
+  %res13 = shl <8 x i16> undef, undef
+  %res14 = shl <8 x i32> undef, undef
+  %res15 = shl <8 x i64> undef, undef
+  %res16 = shl <16 x i8> undef, undef
+  %res17 = shl <16 x i16> undef, undef
+  %res18 = shl <16 x i32> undef, undef
+  %res19 = shl <16 x i64> undef, undef
+  
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res0 = shl i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res1 = shl i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = shl i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res3 = shl i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = shl <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res5 = shl <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res6 = shl <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res7 = shl <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res8 = shl <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res9 = shl <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res10 = shl <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res11 = shl <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res12 = shl <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res13 = shl <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res14 = shl <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res15 = shl <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res16 = shl <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res17 = shl <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res18 = shl <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res19 = shl <16 x i64> undef, undef
+
+  ret void;
+}
+
+define void @xor() {
+  %res0 = xor i8 undef, undef
+  %res1 = xor i16 undef, undef
+  %res2 = xor i32 undef, undef
+  %res3 = xor i64 undef, undef
+  %res4 = xor <2 x i8> undef, undef
+  %res5 = xor <2 x i16> undef, undef
+  %res6 = xor <2 x i32> undef, undef
+  %res7 = xor <2 x i64> undef, undef
+  %res8 = xor <4 x i8> undef, undef
+  %res9 = xor <4 x i16> undef, undef
+  %res10 = xor <4 x i32> undef, undef
+  %res11 = xor <4 x i64> undef, undef
+  %res12 = xor <8 x i8> undef, undef
+  %res13 = xor <8 x i16> undef, undef
+  %res14 = xor <8 x i32> undef, undef
+  %res15 = xor <8 x i64> undef, undef
+  %res16 = xor <16 x i8> undef, undef
+  %res17 = xor <16 x i16> undef, undef
+  %res18 = xor <16 x i32> undef, undef
+  %res19 = xor <16 x i64> undef, undef
+  
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res0 = xor i8 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res1 = xor i16 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res2 = xor i32 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res3 = xor i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res4 = xor <2 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res5 = xor <2 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res6 = xor <2 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res7 = xor <2 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res8 = xor <4 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res9 = xor <4 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res10 = xor <4 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res11 = xor <4 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res12 = xor <8 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res13 = xor <8 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res14 = xor <8 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res15 = xor <8 x i64> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res16 = xor <16 x i8> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res17 = xor <16 x i16> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res18 = xor <16 x i32> undef, undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction:   %res19 = xor <16 x i64> undef, undef
+
+  ret void;
+}