diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1256,7 +1256,7 @@
 
 // Same as a 32-bit inreg
 def : GCNPat<
-  (i32 (sext i16:$src)),
+  (i32 (UniformUnaryFrag<sext> i16:$src)),
   (S_SEXT_I32_I16 $src)
 >;
 
@@ -1283,7 +1283,7 @@
 >;
 
 def : GCNPat <
-  (i64 (sext i16:$src)),
+  (i64 (UniformUnaryFrag<sext> i16:$src)),
     (REG_SEQUENCE SReg_64, (i32 (S_SEXT_I32_I16 $src)), sub0,
     (i32 (COPY_TO_REGCLASS (S_ASHR_I32 (i32 (S_SEXT_I32_I16 $src)), (S_MOV_B32 (i32 31))), SGPR_32)), sub1)
 >;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -402,6 +402,20 @@
 } // End SubtargetPredicate = isGFX8Plus
 } // End SchedRW = [Write64Bit]
 
+def : GCNPat<
+  (i64 (getDivergentFrag<sext>.ret i16:$src)),
+    (REG_SEQUENCE VReg_64,
+      (i32 (V_BFE_I32 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))), sub0,
+      (i32 (COPY_TO_REGCLASS
+         (V_ASHRREV_I32_e32 (S_MOV_B32 (i32 0x1f)), (i32 (V_BFE_I32 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
+      ), VGPR_32)), sub1)
+>;
+
+def : GCNPat<
+  (i32 (getDivergentFrag<sext>.ret i16:$src)),
+  (i32 (V_BFE_I32 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
+>;
+
 let SubtargetPredicate = isGFX6GFX7GFX10 in {
 def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
 } // End SubtargetPredicate = isGFX6GFX7GFX10
diff --git a/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll b/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll
new file mode 100755
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll
@@ -0,0 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
+
+define amdgpu_kernel void @sext_i16_to_i32_uniform(i32 addrspace(1)* %out, i16 %a, i32 %b) {
+; GCN-LABEL: sext_i16_to_i32_uniform:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_sext_i32_i16 s0, s0
+; GCN-NEXT:    s_add_i32 s0, s1, s0
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+  %sext = sext i16 %a to i32
+  %res = add i32 %b, %sext
+  store i32 %res, i32 addrspace(1)* %out
+  ret void
+}
+
+
+define amdgpu_kernel void @sext_i16_to_i64_uniform(i64 addrspace(1)* %out, i16 %a, i64 %b) {
+; GCN-LABEL: sext_i16_to_i64_uniform:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
+; GCN-NEXT:    s_add_u32 s0, s0, s2
+; GCN-NEXT:    s_addc_u32 s1, s1, s3
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    v_mov_b32_e32 v1, s1
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+  %sext = sext i16 %a to i64
+  %res = add i64 %b, %sext
+  store i64 %res, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sext_i16_to_i32_divergent(i32 addrspace(1)* %out, i16 %a, i32 %b) {
+; GCN-LABEL: sext_i16_to_i32_divergent:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
+; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %tid.truncated = trunc i32 %tid to i16
+  %divergent.a = add i16 %a, %tid.truncated
+  %sext = sext i16 %divergent.a to i32
+  store i32 %sext, i32 addrspace(1)* %out
+  ret void
+}
+
+
+define amdgpu_kernel void @sext_i16_to_i64_divergent(i64 addrspace(1)* %out, i16 %a, i64 %b) {
+; GCN-LABEL: sext_i16_to_i64_divergent:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
+; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %tid.truncated = trunc i32 %tid to i16
+  %divergent.a = add i16 %a, %tid.truncated
+  %sext = sext i16 %divergent.a to i64
+  store i64 %sext, i64 addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }