Index: include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfo.h
+++ include/llvm/Analysis/TargetTransformInfo.h
@@ -520,7 +520,8 @@
       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
       OperandValueKind Opd2Info = OK_AnyValue,
       OperandValueProperties Opd1PropInfo = OP_None,
-      OperandValueProperties Opd2PropInfo = OP_None) const;
+      OperandValueProperties Opd2PropInfo = OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
 
   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
   /// The index and subtype parameters are used by the subvector insertion and
@@ -760,7 +761,8 @@
   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
                          OperandValueKind Opd2Info,
                          OperandValueProperties Opd1PropInfo,
-                         OperandValueProperties Opd2PropInfo) = 0;
+                         OperandValueProperties Opd2PropInfo,
+                         ArrayRef<const Value *> Args) = 0;
   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                              Type *SubTp) = 0;
   virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
@@ -980,9 +982,10 @@
   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
                          OperandValueKind Opd2Info,
                          OperandValueProperties Opd1PropInfo,
-                         OperandValueProperties Opd2PropInfo) override {
+                         OperandValueProperties Opd2PropInfo,
+                         ArrayRef<const Value *> Args) override {
     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
-                                       Opd1PropInfo, Opd2PropInfo);
+                                       Opd1PropInfo, Opd2PropInfo, Args);
   }
   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                      Type *SubTp) override {
Index: include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfoImpl.h
+++ include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -305,7 +305,8 @@
                                   TTI::OperandValueKind Opd1Info,
                                   TTI::OperandValueKind Opd2Info,
                                   TTI::OperandValueProperties Opd1PropInfo,
-                                  TTI::OperandValueProperties Opd2PropInfo) {
+                                  TTI::OperandValueProperties Opd2PropInfo,
+                                  ArrayRef<const Value *> Args) {
     return 1;
   }
 
@@ -422,6 +423,65 @@
                                 VectorType *VecTy) const {
     return VF;
   }
+protected:
+  // Obtain the minimum required size to hold the value (without the sign)
+  // In case of a vector it returns the min required size for one element.
+  unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
+    if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
+      const Constant* VectorValue = cast<Constant>(Val);
+
+      // In case of a vector need to pick the max between the min
+      // required size for each element
+      VectorType *VT = cast<VectorType>(Val->getType());
+
+      // Assume unsigned elements
+      isSigned = false;
+
+      // The max required size is the total vector width divided by num
+      // of elements in the vector
+      unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
+
+      unsigned MinRequiredSize = 0;
+      for(unsigned i = 0; i < VT->getNumElements();
+        ++i) {
+        if (ConstantInt* IntElement =
+              dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
+          bool signedElement = IntElement->getValue().isNegative();
+          // Get the element min required size.
+          unsigned ElementMinRequiredSize =
+            IntElement->getValue().getMinSignedBits() - 1;
+          // In case one element is signed then all the vector is signed.
+          isSigned |= signedElement;
+          // Save the max required bit size between all the elements.
+          if (MinRequiredSize < ElementMinRequiredSize)
+            MinRequiredSize = ElementMinRequiredSize;
+        }
+        else {
+          // not an int constant element
+          return MaxRequiredSize;
+        }
+      }
+      return MinRequiredSize;
+    }
+
+    if (const ConstantInt* CI = dyn_cast<ConstantInt>(Val)) {
+      isSigned = CI->getValue().isNegative();
+      return CI->getValue().getMinSignedBits() - 1;
+    }
+
+    if (const CastInst* Cast = dyn_cast<SExtInst>(Val)) {
+      isSigned = true;
+      return Cast->getSrcTy()->getScalarSizeInBits() - 1;
+    }
+
+    if (const CastInst* Cast = dyn_cast<ZExtInst>(Val)) {
+      isSigned = false;
+      return Cast->getSrcTy()->getScalarSizeInBits();
+    }
+
+    isSigned = false;
+    return Val->getType()->getScalarSizeInBits();
+  }  
 };
 
 /// \brief CRTP base class for use as a mix-in that aids implementing
Index: include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- include/llvm/CodeGen/BasicTTIImpl.h
+++ include/llvm/CodeGen/BasicTTIImpl.h
@@ -308,7 +308,8 @@
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None) {
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
     // Check if any of the operands are vector operands.
     const TargetLoweringBase *TLI = getTLI();
     int ISD = TLI->InstructionOpcodeToISD(Opcode);
Index: include/llvm/IR/User.h
===================================================================
--- include/llvm/IR/User.h
+++ include/llvm/IR/User.h
@@ -238,6 +238,25 @@
     return make_range(value_op_begin(), value_op_end());
   }
 
+  struct const_value_op_iterator
+      : iterator_adaptor_base<const_value_op_iterator, const_op_iterator,
+                              std::random_access_iterator_tag, const Value *,
+                              ptrdiff_t, const Value *, const Value *> {
+    explicit const_value_op_iterator(const Use *U = nullptr) : 
+      iterator_adaptor_base(U) {}
+    const Value *operator*() const { return *I; }
+    const Value *operator->() const { return operator*(); }
+  };
+  const_value_op_iterator value_op_begin() const {
+    return const_value_op_iterator(op_begin());
+  }
+  const_value_op_iterator value_op_end() const {
+    return const_value_op_iterator(op_end());
+  }
+  iterator_range<const_value_op_iterator> operand_values() const {
+    return make_range(value_op_begin(), value_op_end());
+  }
+
   /// \brief Drop all references to operands.
   ///
   /// This function is in charge of "letting go" of all objects that this User
Index: lib/Analysis/CostModel.cpp
===================================================================
--- lib/Analysis/CostModel.cpp
+++ lib/Analysis/CostModel.cpp
@@ -438,6 +438,14 @@
       getOperandInfo(I->getOperand(0));
     TargetTransformInfo::OperandValueKind Op2VK =
       getOperandInfo(I->getOperand(1));
+    if (I->getOpcode() == Instruction::Mul) {
+      SmallVector<const Value*, 2> Operands(I->value_op_begin(), 
+                                            I->value_op_end());
+      return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
+                                         Op2VK, TargetTransformInfo::OP_None, 
+                                         TargetTransformInfo::OP_None, 
+                                         Operands);
+    }
     return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
                                        Op2VK);
   }
Index: lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- lib/Analysis/TargetTransformInfo.cpp
+++ lib/Analysis/TargetTransformInfo.cpp
@@ -277,9 +277,10 @@
 int TargetTransformInfo::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
     OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
-    OperandValueProperties Opd2PropInfo) const {
+    OperandValueProperties Opd2PropInfo,
+    ArrayRef<const Value *> Args) const {
   int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
-                                             Opd1PropInfo, Opd2PropInfo);
+                                             Opd1PropInfo, Opd2PropInfo, Args);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
Index: lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -102,7 +102,8 @@
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
 
   int getAddressComputationCost(Type *Ty, bool IsComplex);
 
Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -374,7 +374,7 @@
 int AArch64TTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo) {
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
   // Legalize the type.
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -83,7 +83,8 @@
     TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
     TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
     TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-    TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+    TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+    ArrayRef<const Value *> Args = ArrayRef<const Value *>());
 
   unsigned getCFInstrCost(unsigned Opcode);
 
Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -110,7 +110,7 @@
 int AMDGPUTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo) {
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
 
   EVT OrigTy = TLI->getValueType(DL, Ty);
   if (!OrigTy.isSimple()) {
Index: lib/Target/ARM/ARMTargetTransformInfo.h
===================================================================
--- lib/Target/ARM/ARMTargetTransformInfo.h
+++ lib/Target/ARM/ARMTargetTransformInfo.h
@@ -113,7 +113,8 @@
       TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
 
   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                       unsigned AddressSpace);
Index: lib/Target/ARM/ARMTargetTransformInfo.cpp
===================================================================
--- lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -430,7 +430,8 @@
 int ARMTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
     TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo) {
+    TTI::OperandValueProperties Opd2PropInfo,
+    ArrayRef<const Value *> Args) {
 
   int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
Index: lib/Target/Lanai/LanaiTargetTransformInfo.h
===================================================================
--- lib/Target/Lanai/LanaiTargetTransformInfo.h
+++ lib/Target/Lanai/LanaiTargetTransformInfo.h
@@ -54,7 +54,8 @@
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None) {
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
     int ISD = TLI->InstructionOpcodeToISD(Opcode);
 
     switch (ISD) {
Index: lib/Target/NVPTX/NVPTXTargetTransformInfo.h
===================================================================
--- lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -54,7 +54,8 @@
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
 
   void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
 };
Index: lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -115,7 +115,7 @@
 int NVPTXTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo) {
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
   // Legalize the type.
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
Index: lib/Target/PowerPC/PPCTargetTransformInfo.h
===================================================================
--- lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -71,7 +71,8 @@
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
   int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
Index: lib/Target/PowerPC/PPCTargetTransformInfo.cpp
===================================================================
--- lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -281,7 +281,7 @@
 int PPCTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
     TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo) {
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
   // Fallback to the default implementation.
Index: lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
===================================================================
--- lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -61,7 +61,8 @@
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
   unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
 
   /// @}
Index: lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
===================================================================
--- lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -46,7 +46,7 @@
 unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo) {
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
 
   unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
       Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
Index: lib/Target/X86/X86TargetTransformInfo.h
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.h
+++ lib/Target/X86/X86TargetTransformInfo.h
@@ -60,7 +60,8 @@
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
   int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
Index: lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.cpp
+++ lib/Target/X86/X86TargetTransformInfo.cpp
@@ -111,15 +111,58 @@
 }
 
 int X86TTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
-    TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo) {
+    unsigned Opcode, Type *Ty,  
+    TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
+    TTI::OperandValueProperties Opd1PropInfo,
+    TTI::OperandValueProperties Opd2PropInfo,
+    ArrayRef<const Value *> Args) {
   // Legalize the type.
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
+  static const CostTblEntry SLMCostTable[] = {
+    { ISD::MUL,  MVT::v4i32, 11 }, // pmulld
+    { ISD::MUL,  MVT::v8i16, 2  }, // pmullw
+    { ISD::MUL,  MVT::v16i8, 14 }, // extend/pmullw/trunc sequence.
+    { ISD::FMUL, MVT::f64,   2  }, // mulsd
+    { ISD::FMUL, MVT::v2f64, 4  }, // mulpd
+    { ISD::FMUL, MVT::v4f32, 2  }, // mulps
+    { ISD::FDIV, MVT::f32,   17 }, // divss
+    { ISD::FDIV, MVT::v4f32, 39 }, // divps
+    { ISD::FDIV, MVT::f64,   32 }, // divsd
+    { ISD::FDIV, MVT::v2f64, 69 }, // divpd
+    { ISD::FADD, MVT::v2f64, 2  }, // addpd
+    { ISD::FSUB, MVT::v2f64, 2  }, // subpd
+  };
+
+  if (ST->isSLM()) {
+    if (Args.size() == 2 && ISD == ISD::MUL && LT.second == MVT::v4i32) {
+      // Check if the operands can be shrinked into a smaller datatype.
+      bool Op1Signed = false;
+      unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
+      bool Op2Signed = false;
+      unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed);
+
+      bool signedMode = Op1Signed | Op2Signed;
+      unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
+
+      if (OpMinSize <= 7)
+        return LT.first * 3; // pmullw/sext
+      if (!signedMode && OpMinSize <= 8)
+        return LT.first * 3; // pmullw/zext
+      if (OpMinSize <= 15)
+        return LT.first * 5; // pmullw/pmulhw/pshuf
+      if (!signedMode && OpMinSize <= 16)
+        return LT.first * 5; // pmullw/pmulhw/pshuf
+    }
+    if (const auto *Entry = CostTableLookup(SLMCostTable, ISD,
+                                            LT.second)) {
+      return LT.first * Entry->Cost;
+    }
+  }
+
   if (ISD == ISD::SDIV &&
       Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
       Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
@@ -574,6 +617,16 @@
     { ISD::MUL,     MVT::v4i64,    8 },
     { ISD::MUL,     MVT::v8i64,    8 }
   };
+  static const CostTblEntry CustomLoweredSLM[] = {
+    // v2i64/v4i64 mul is custom lowered as a series of long
+    // multiplies(3), shifts(3) and adds(2).
+    // slm muldq version throughput is 2 
+    { ISD::MUL,     MVT::v2i64,    11 },
+  };
+  if (ST->isSLM()) 
+    if (const auto *Entry = CostTableLookup(CustomLoweredSLM, ISD, 
+                                                         LT.second))
+      return LT.first * Entry->Cost;                                                          
   if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second))
     return LT.first * Entry->Cost;
 
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -80,6 +80,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/Verifier.h"
@@ -6968,7 +6969,11 @@
     } else if (Legal->isUniform(Op2)) {
       Op2VK = TargetTransformInfo::OK_UniformValue;
     }
-
+    if (I->getOpcode() == Instruction::Mul) {
+      SmallVector<const Value *, 4> Operands(I->operand_values()); 
+      return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
+                                        Op2VK, Op1VP, Op2VP, Operands);
+    }
     return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK,
                                       Op1VP, Op2VP);
   }
Index: test/Analysis/CostModel/X86/slm-arith-costs.ll
===================================================================
--- test/Analysis/CostModel/X86/slm-arith-costs.ll
+++ test/Analysis/CostModel/X86/slm-arith-costs.ll
@@ -0,0 +1,323 @@
+; RUN: opt < %s -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefix=SLM
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; 8bit mul
+define i8 @slm-costs_8_scalar_mul(i8 %a, i8 %b)  {
+entry:
+; SLM:  cost of 1 {{.*}} mul nsw i8
+  %res = mul nsw i8 %a, %b
+  ret i8 %res
+}
+
+define <2 x i8> @slm-costs_8_v2_mul(<2 x i8> %a, <2 x i8> %b)  {
+entry:
+; SLM:  cost of 11 {{.*}} mul nsw <2 x i8>
+  %res = mul nsw <2 x i8> %a, %b
+  ret <2 x i8> %res
+}
+
+define <4 x i8> @slm-costs_8_v4_mul(<4 x i8> %a, <4 x i8> %b)  {
+entry:
+; SLM:  cost of 3 {{.*}} mul nsw <4 x i8>
+  %res = mul nsw <4 x i8> %a, %b
+  ret <4 x i8> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_zext_mul(<4 x i8> %a)  {
+entry:
+; SLM:  cost of 3 {{.*}} mul nsw <4 x i32>
+  %zext = zext <4 x i8> %a to <4 x i32> 
+  %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 255, i32 255>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_zext_mul_fail(<4 x i8> %a)  {
+entry:
+; SLM:  cost of 5 {{.*}} mul nsw <4 x i32>
+  %zext = zext <4 x i8> %a to <4 x i32>
+  %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 -1, i32 255>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_zext_mul_fail_2(<4 x i8> %a)  {
+entry:
+; SLM:  cost of 5 {{.*}} mul nsw <4 x i32>
+  %zext = zext <4 x i8> %a to <4 x i32>
+  %res = mul nsw <4 x i32> %zext, <i32 255, i32 256, i32 255, i32 255>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_sext_mul(<4 x i8> %a)  {
+entry:
+; SLM:  cost of 3 {{.*}} mul nsw <4 x i32>
+  %sext = sext <4 x i8> %a to <4 x i32>
+  %res = mul nsw <4 x i32> %sext, <i32 127, i32 -128, i32 127, i32 -128>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_sext_mul_fail(<4 x i8> %a)  {
+entry:
+; SLM:  cost of 5 {{.*}} mul nsw <4 x i32>
+  %sext = sext <4 x i8> %a to <4 x i32>
+  %res = mul nsw <4 x i32> %sext, <i32 127, i32 -128, i32 128, i32 -128>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_sext_mul_fail_2(<4 x i8> %a)  {
+entry:
+; SLM:  cost of 5 {{.*}} mul nsw <4 x i32>
+  %sext = sext <4 x i8> %a to <4 x i32>
+  %res = mul nsw <4 x i32> %sext, <i32 127, i32 -129, i32 127, i32 -128>
+  ret <4 x i32> %res
+}
+
+define <8 x i8> @slm-costs_8_v8_mul(<8 x i8> %a, <8 x i8> %b)  {
+entry:
+; SLM:  cost of 2 {{.*}} mul nsw <8 x i8>
+  %res = mul nsw <8 x i8> %a, %b
+  ret <8 x i8> %res
+}
+
+define <16 x i8> @slm-costs_8_v16_mul(<16 x i8> %a, <16 x i8> %b)  {
+entry:
+; SLM:  cost of 14 {{.*}} mul nsw <16 x i8>
+  %res = mul nsw <16 x i8> %a, %b
+  ret <16 x i8> %res
+}
+
+; 16bit mul
+define i16 @slm-costs_16_scalar_mul(i16 %a, i16 %b)  {
+entry:
+; SLM:  cost of 1 {{.*}} mul nsw i16
+  %res = mul nsw i16 %a, %b
+  ret i16 %res
+}
+
+define <2 x i16> @slm-costs_16_v2_mul(<2 x i16> %a, <2 x i16> %b)  {
+entry:
+; SLM:  cost of 11 {{.*}} mul nsw <2 x i16>
+  %res = mul nsw <2 x i16> %a, %b
+  ret <2 x i16> %res
+}
+
+define <4 x i16> @slm-costs_16_v4_mul(<4 x i16> %a, <4 x i16> %b)  {
+entry:
+; SLM:  cost of 5 {{.*}} mul nsw <4 x i16>
+  %res = mul nsw <4 x i16> %a, %b
+  ret <4 x i16> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_zext_mul(<4 x i16> %a)  {
+entry:
+; SLM:  cost of 5 {{.*}} mul nsw <4 x i32>
+  %zext = zext <4 x i16> %a to <4 x i32>
+  %res = mul nsw <4 x i32> %zext, <i32 65535, i32 65535, i32 65535, i32 65535>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_zext_mul_fail(<4 x i16> %a)  {
+entry:
+; SLM:  cost of 11 {{.*}} mul nsw <4 x i32>
+  %zext = zext <4 x i16> %a to <4 x i32>
+  %res = mul nsw <4 x i32> %zext, <i32 -1, i32 65535, i32 65535, i32 65535>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_zext_mul_fail_2(<4 x i16> %a)  {
+entry:
+; SLM:  cost of 11 {{.*}} mul nsw <4 x i32>
+  %zext = zext <4 x i16> %a to <4 x i32>
+  %res = mul nsw <4 x i32> %zext, <i32 65536, i32 65535, i32 65535, i32 65535>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_sext_mul(<4 x i16> %a)  {
+entry:
+; SLM:  cost of 5 {{.*}} mul nsw <4 x i32>
+  %sext = sext <4 x i16> %a to <4 x i32>
+  %res = mul nsw <4 x i32> %sext, <i32 32767, i32 -32768, i32 32767, i32 -32768>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_sext_mul_fail(<4 x i16> %a)  {
+entry:
+; SLM:  cost of 11 {{.*}} mul nsw <4 x i32>
+  %sext = sext <4 x i16> %a to <4 x i32>
+  %res = mul nsw <4 x i32> %sext, <i32 32767, i32 -32768, i32 32768, i32 -32768>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_sext_mul_fail_2(<4 x i16> %a)  {
+entry:
+; SLM:  cost of 11 {{.*}} mul nsw <4 x i32>
+  %sext = sext <4 x i16> %a to <4 x i32>
+  %res = mul nsw <4 x i32> %sext, <i32 32767, i32 -32768, i32 32767, i32 -32769>
+  ret <4 x i32> %res
+}
+
+define <8 x i16> @slm-costs_16_v8_mul(<8 x i16> %a, <8 x i16> %b)  {
+entry:
+; SLM:  cost of 2 {{.*}} mul nsw <8 x i16>
+  %res = mul nsw <8 x i16> %a, %b
+  ret <8 x i16> %res
+}
+
+define <16 x i16> @slm-costs_16_v16_mul(<16 x i16> %a, <16 x i16> %b)  {
+entry:
+; SLM:  cost of 4 {{.*}} mul nsw <16 x i16>
+  %res = mul nsw <16 x i16> %a, %b
+  ret <16 x i16> %res
+}
+
+; 32bit mul
+define i32 @slm-costs_32_scalar_mul(i32 %a, i32 %b)  {
+entry:
+; SLM:  cost of 1 {{.*}} mul nsw i32
+  %res = mul nsw i32 %a, %b
+  ret i32 %res 
+}
+
+define <2 x i32> @slm-costs_32_v2_mul(<2 x i32> %a, <2 x i32> %b)  {
+entry:
+; SLM:  cost of 11 {{.*}} mul nsw <2 x i32>
+  %res = mul nsw <2 x i32> %a, %b
+  ret <2 x i32> %res
+}
+
+define <4 x i32> @slm-costs_32_v4_mul(<4 x i32> %a, <4 x i32> %b)  {
+entry:
+; SLM:  cost of 11 {{.*}} mul nsw <4 x i32>
+  %res = mul nsw <4 x i32> %a, %b
+  ret <4 x i32> %res
+}
+
+define <8 x i32> @slm-costs_32_v8_mul(<8 x i32> %a, <8 x i32> %b)  {
+entry:
+; SLM:  cost of 22 {{.*}} mul nsw <8 x i32>
+  %res = mul nsw <8 x i32> %a, %b
+  ret <8 x i32> %res
+}
+
+define <16 x i32> @slm-costs_32_v16_mul(<16 x i32> %a, <16 x i32> %b)  {
+entry:
+; SLM:  cost of 44 {{.*}} mul nsw <16 x i32>
+  %res = mul nsw <16 x i32> %a, %b
+  ret <16 x i32> %res
+}
+
+; 64bit mul
+define i64 @slm-costs_64_scalar_mul(i64 %a, i64 %b)  {
+entry:
+; SLM:  cost of 1 {{.*}} mul nsw i64
+  %res = mul nsw i64 %a, %b
+  ret i64 %res
+}
+
+define <2 x i64> @slm-costs_64_v2_mul(<2 x i64> %a, <2 x i64> %b)  {
+entry:
+; SLM:  cost of 11 {{.*}} mul nsw <2 x i64>
+  %res = mul nsw <2 x i64> %a, %b
+  ret <2 x i64> %res
+}
+
+define <4 x i64> @slm-costs_64_v4_mul(<4 x i64> %a, <4 x i64> %b)  {
+entry:
+; SLM:  cost of 22 {{.*}} mul nsw <4 x i64>
+  %res = mul nsw <4 x i64> %a, %b
+  ret <4 x i64> %res
+}
+
+define <8 x i64> @slm-costs_64_v8_mul(<8 x i64> %a, <8 x i64> %b)  {
+entry:
+; SLM:  cost of 44 {{.*}} mul nsw <8 x i64>
+  %res = mul nsw <8 x i64> %a, %b
+  ret <8 x i64> %res
+}
+
+define <16 x i64> @slm-costs_64_v16_mul(<16 x i64> %a, <16 x i64> %b)  {
+entry:
+; SLM:  cost of 88 {{.*}} mul nsw <16 x i64>
+  %res = mul nsw <16 x i64> %a, %b
+  ret <16 x i64> %res
+}
+
+; mulsd
+define double @slm-costs_mulsd(double %a, double %b)  {
+entry:
+; SLM:  cost of 2 {{.*}} fmul double
+  %res = fmul double %a, %b
+  ret double %res
+}
+
+; mulpd
+define <2 x double> @slm-costs_mulpd(<2 x double> %a, <2 x double> %b)  {
+entry:
+; SLM:  cost of 4 {{.*}} fmul <2 x double>
+  %res = fmul <2 x double> %a, %b
+  ret <2 x double> %res
+}
+
+; mulps
+define <4 x float> @slm-costs_mulps(<4 x float> %a, <4 x float> %b)  {
+entry:
+; SLM:  cost of 2 {{.*}} fmul <4 x float>
+  %res = fmul <4 x float> %a, %b
+  ret <4 x float> %res
+}
+
+; divss
+define float @slm-costs_divss(float %a, float %b)  {
+entry:
+; SLM:  cost of 17 {{.*}} fdiv float
+  %res = fdiv float %a, %b
+  ret float %res
+}
+
+; divps
+define <4 x float> @slm-costs_divps(<4 x float> %a, <4 x float> %b)  {
+entry:
+; SLM:  cost of 39 {{.*}} fdiv <4 x float>
+  %res = fdiv <4 x float> %a, %b
+  ret <4 x float> %res
+}
+
+; divsd
+define double @slm-costs_divsd(double %a, double %b)  {
+entry:
+; SLM:  cost of 32 {{.*}} fdiv double
+  %res = fdiv double %a, %b
+  ret double %res
+}
+
+; divpd
+define <2 x double> @slm-costs_divpd(<2 x double> %a, <2 x double> %b)  {
+entry:
+; SLM:  cost of 69 {{.*}} fdiv <2 x double>
+  %res = fdiv <2 x double> %a, %b
+  ret <2 x double> %res
+}
+
+; addpd
+define <2 x double> @slm-costs_addpd(<2 x double> %a, <2 x double> %b)  {
+entry:
+; SLM:  cost of 2 {{.*}} fadd <2 x double>
+  %res = fadd <2 x double> %a, %b
+  ret <2 x double> %res
+}
+
+; subpd
+define <2 x double> @slm-costs_subpd(<2 x double> %a, <2 x double> %b)  {
+entry:
+; SLM:  cost of 2 {{.*}} fsub <2 x double>
+  %res = fsub <2 x double> %a, %b
+  ret <2 x double> %res
+}
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 4.0.0 (cfe/trunk 287996)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
Index: test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
===================================================================
--- test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
+++ test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
@@ -0,0 +1,144 @@
+; RUN: opt < %s -S -debug -loop-vectorize -mcpu=slm 2>&1 | FileCheck %s --check-prefix=SLM
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i8 @mul_i8(i8* %dataA, i8* %dataB, i32 %N) {
+entry:
+  %cmp12 = icmp eq i32 %N, 0
+  br i1 %cmp12, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %N to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  %phitmp = trunc i32 %add4 to i8
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %acc.0.lcssa = phi i8 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ]
+  ret i8 %acc.0.lcssa
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %acc.013 = phi i32 [ %add4, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i8, i8* %dataA, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  %arrayidx2 = getelementptr inbounds i8, i8* %dataB, i64 %indvars.iv
+  %1 = load i8, i8* %arrayidx2, align 1
+  %conv3 = sext i8 %1 to i32
+; sources of the mul is sext\sext from i8 
+; use pmullw\sext seq.   
+; SLM:  cost of 3 for VF 4 {{.*}} mul nsw i32  
+  %mul = mul nsw i32 %conv3, %conv
+; sources of the mul is zext\sext from i8
+; use pmulhw\pmullw\pshuf
+; SLM:  cost of 5 for VF 4 {{.*}} mul nsw i32
+  %conv4 = zext i8 %1 to i32
+  %mul2 = mul nsw i32 %conv4, %conv
+  %sum0 = add i32 %mul, %mul2
+; sources of the mul is zext\zext from i8
+; use pmullw\zext
+; SLM:  cost of 3 for VF 4 {{.*}} mul nsw i32
+  %conv5 = zext i8 %0 to i32
+  %mul3 = mul nsw i32 %conv5, %conv4
+  %sum1 = add i32 %sum0, %mul3
+; sources of the mul is sext\-120
+; use pmullw\sext
+; SLM:  cost of 3 for VF 4 {{.*}} mul nsw i32
+  %mul4 = mul nsw i32 -120, %conv3
+  %sum2 = add i32 %sum1, %mul4
+; sources of the mul is sext\250
+; use pmulhw\pmullw\pshuf
+; SLM:  cost of 5 for VF 4 {{.*}} mul nsw i32
+  %mul5 = mul nsw i32 250, %conv3
+  %sum3 = add i32 %sum2, %mul5
+; sources of the mul is zext\-120
+; use pmulhw\pmullw\pshuf
+; SLM:  cost of 5 for VF 4 {{.*}} mul nsw i32
+  %mul6 = mul nsw i32 -120, %conv4
+  %sum4 = add i32 %sum3, %mul6
+; sources of the mul is zext\250
+; use pmullw\zext
+; SLM:  cost of 3 for VF 4 {{.*}} mul nsw i32
+  %mul7 = mul nsw i32 250, %conv4
+  %sum5 = add i32 %sum4, %mul7
+  %add = add i32 %acc.013, 5
+  %add4 = add i32 %add, %sum5
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i16 @mul_i16(i16* %dataA, i16* %dataB, i32 %N) {
+entry:
+  %cmp12 = icmp eq i32 %N, 0
+  br i1 %cmp12, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %N to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  %phitmp = trunc i32 %add4 to i16
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %acc.0.lcssa = phi i16 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ]
+  ret i16 %acc.0.lcssa
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %acc.013 = phi i32 [ %add4, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i16, i16* %dataA, i64 %indvars.iv
+  %0 = load i16, i16* %arrayidx, align 1
+  %conv = sext i16 %0 to i32
+  %arrayidx2 = getelementptr inbounds i16, i16* %dataB, i64 %indvars.iv
+  %1 = load i16, i16* %arrayidx2, align 1
+  %conv3 = sext i16 %1 to i32
+; sources of the mul is sext\sext from i16 
+; use pmulhw\pmullw\pshuf seq.   
+; SLM:  cost of 5 for VF 4 {{.*}} mul nsw i32  
+  %mul = mul nsw i32 %conv3, %conv
+; sources of the mul is zext\sext from i16
+; use pmulld
+; SLM:  cost of 11 for VF 4 {{.*}} mul nsw i32
+  %conv4 = zext i16 %1 to i32
+  %mul2 = mul nsw i32 %conv4, %conv
+  %sum0 = add i32 %mul, %mul2
+; sources of the mul is zext\zext from i16
+; use pmulhw\pmullw\zext
+; SLM:  cost of 5 for VF 4 {{.*}} mul nsw i32
+  %conv5 = zext i16 %0 to i32
+  %mul3 = mul nsw i32 %conv5, %conv4
+  %sum1 = add i32 %sum0, %mul3
+; sources of the mul is sext\-32000
+; use pmulhw\pmullw\sext
+; SLM:  cost of 5 for VF 4 {{.*}} mul nsw i32
+  %mul4 = mul nsw i32 -32000, %conv3
+  %sum2 = add i32 %sum1, %mul4
+; sources of the mul is sext\64000
+; use pmulld
+; SLM:  cost of 11 for VF 4 {{.*}} mul nsw i32
+  %mul5 = mul nsw i32 64000, %conv3
+  %sum3 = add i32 %sum2, %mul5
+; sources of the mul is zext\-32000
+; use pmulld
+; SLM:  cost of 11 for VF 4 {{.*}} mul nsw i32
+  %mul6 = mul nsw i32 -32000, %conv4
+  %sum4 = add i32 %sum3, %mul6
+; sources of the mul is zext\64000
+; use pmulhw\pmullw\zext
+; SLM:  cost of 5 for VF 4 {{.*}} mul nsw i32
+  %mul7 = mul nsw i32 250, %conv4
+  %sum5 = add i32 %sum4, %mul7
+  %add = add i32 %acc.013, 5
+  %add4 = add i32 %add, %sum5
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+
Index: unittests/IR/UserTest.cpp
===================================================================
--- unittests/IR/UserTest.cpp
+++ unittests/IR/UserTest.cpp
@@ -84,6 +84,22 @@
   EXPECT_FALSE(P.value_op_begin() >= P.value_op_end());
   EXPECT_EQ(10, std::distance(P.value_op_begin(), P.value_op_end()));
 
+  // const value op iteration
+  const PHINode *IP = &P;
+  EXPECT_TRUE(IP->value_op_begin() == IP->value_op_begin());
+  EXPECT_FALSE(IP->value_op_begin() == IP->value_op_end());
+  EXPECT_TRUE(IP->value_op_begin() != IP->value_op_end());
+  EXPECT_FALSE(IP->value_op_end() != IP->value_op_end());
+  EXPECT_TRUE(IP->value_op_begin() < IP->value_op_end());
+  EXPECT_FALSE(IP->value_op_begin() < IP->value_op_begin());
+  EXPECT_TRUE(IP->value_op_end() > IP->value_op_begin());
+  EXPECT_FALSE(IP->value_op_begin() > IP->value_op_begin());
+  EXPECT_TRUE(IP->value_op_begin() <= IP->value_op_begin());
+  EXPECT_FALSE(IP->value_op_end() <= IP->value_op_begin());
+  EXPECT_TRUE(IP->value_op_begin() >= IP->value_op_begin());
+  EXPECT_FALSE(IP->value_op_begin() >= IP->value_op_end());
+  EXPECT_EQ(10, std::distance(IP->value_op_begin(), IP->value_op_end()));
+
   User::value_op_iterator I = P.value_op_begin();
   I += 3;
   EXPECT_EQ(std::next(P.value_op_begin(), 3), I);
@@ -91,6 +107,15 @@
   I++;
   EXPECT_EQ(P.getOperand(6), I[2]);
   EXPECT_EQ(P.value_op_end(), (I - 2) + 8);
+
+  // const value op
+  User::const_value_op_iterator CI = IP->value_op_begin();
+  CI += 3;
+  EXPECT_EQ(std::next(IP->value_op_begin(), 3), CI);
+  EXPECT_EQ(IP->getOperand(3), *CI);
+  CI++;
+  EXPECT_EQ(IP->getOperand(6), CI[2]);
+  EXPECT_EQ(IP->value_op_end(), (CI - 2) + 8);
 }
 
 TEST(UserTest, PersonalityUser) {