Index: lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -49,8 +49,16 @@
     Mod(nullptr),
     HasUnsafeFPMath(false) { }
 
+  /// \brief Promotes uniform 16 bit operation to equivalent 32 bit operation by
+  /// zero extending operands to 32 bits, replacing 16 bit operation with
+  /// equivalent 32 bit operation, and truncating the result of 32 bit operation
+  /// back to 16 bits. Always returns true.
+  bool promoteUniformI16OpToI32Op(BinaryOperator &I) const;
+
   bool visitFDiv(BinaryOperator &I);
 
+  bool visitBinaryOperator(BinaryOperator &I);
+
   bool visitInstruction(Instruction &I) {
     return false;
   }
@@ -79,6 +87,25 @@
   return UnsafeDiv || CNum->isExactlyValue(+1.0);
 }
 
+bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(BinaryOperator &I) const {
+  assert(DA->isUniform(&I) && "Op must be uniform");
+  assert(I.getType()->isIntegerTy(16) && "Op must be 16 bits");
+
+  IRBuilder<> Builder(&I);
+  Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+  Value *ZExtOp0 = Builder.CreateZExt(I.getOperand(0), Builder.getInt32Ty());
+  Value *ZExtOp1 = Builder.CreateZExt(I.getOperand(1), Builder.getInt32Ty());
+  Value *ZExtRes = Builder.CreateBinOp(I.getOpcode(), ZExtOp0, ZExtOp1);
+  Value *TruncRes = Builder.CreateTrunc(ZExtRes, Builder.getInt16Ty());
+
+  I.replaceAllUsesWith(TruncRes);
+  I.dropAllReferences();
+  I.eraseFromParent();
+
+  return true;
+}
+
 // Insert an intrinsic for fast fdiv for safe math situations where we can
 // reduce precision. Leave fdiv for situations where the generic node is
 // expected to be optimized.
@@ -149,6 +176,16 @@
   return true;
 }
 
+bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
+  bool Changed = false;
+
+  // Promote uniform 16 bit operation to equivalent 32 bit operation.
+  if (DA->isUniform(&I) && I.getType()->isIntegerTy(16))
+    Changed |= promoteUniformI16OpToI32Op(I);
+
+  return Changed;
+}
+
 static bool hasUnsafeFPMath(const Function &F) {
   Attribute Attr = F.getFnAttribute("unsafe-fp-math");
   return Attr.getValueAsString() == "true";
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -540,6 +540,10 @@
 
 bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
 
+  // i16 is not desirable unless it is a load or a store.
+  if (VT == MVT::i16 && Op != ISD::LOAD && Op != ISD::STORE)
+    return false;
+
   // SimplifySetCC uses this function to determine whether or not it should
   // create setcc with i1 operands.  We don't have instructions for i1 setcc.
   if (VT == MVT::i1 && Op == ISD::SETCC)
Index: test/CodeGen/AMDGPU/mul_uint24.ll
===================================================================
--- test/CodeGen/AMDGPU/mul_uint24.ll
+++ test/CodeGen/AMDGPU/mul_uint24.ll
@@ -23,8 +23,8 @@
 ; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
 ; EG: 16
 
-; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16
+; SI: s_mul_i32
+; SI: s_sext_i32_i16
 define void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
 entry:
   %mul = mul i16 %a, %b
@@ -34,9 +34,9 @@
 }
 
 ; FUNC-LABEL: {{^}}test_umul24_i16:
+; SI: s_mul_i32
 ; SI: s_and_b32
-; SI: v_mul_u32_u24_e32
-; SI: v_and_b32_e32
+; SI: v_mov_b32_e32
 define void @test_umul24_i16(i32 addrspace(1)* %out, i16 %a, i16 %b) {
 entry:
   %mul = mul i16 %a, %b
Index: test/CodeGen/AMDGPU/sdivrem24.ll
===================================================================
--- test/CodeGen/AMDGPU/sdivrem24.ll
+++ test/CodeGen/AMDGPU/sdivrem24.ll
@@ -22,10 +22,10 @@
 }
 
 ; FUNC-LABEL: {{^}}sdiv24_i16:
-; SI: v_cvt_f32_i32
-; SI: v_cvt_f32_i32
-; SI: v_rcp_f32
-; SI: v_cvt_i32_f32
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: v_rcp_f32_e32
+; SI: v_cvt_u32_f32_e32
 
 ; EG: INT_TO_FLT
 ; EG-DAG: INT_TO_FLT
@@ -140,10 +140,10 @@
 }
 
 ; FUNC-LABEL: {{^}}srem24_i16:
-; SI: v_cvt_f32_i32
-; SI: v_cvt_f32_i32
-; SI: v_rcp_f32
-; SI: v_cvt_i32_f32
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: v_rcp_f32_e32
+; SI: v_cvt_u32_f32_e32
 
 ; EG: INT_TO_FLT
 ; EG-DAG: INT_TO_FLT