diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -263,7 +263,7 @@ def : GCNPat< (i32 (DivergentUnaryFrag i16:$src)), - (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))) + (i32 (V_BFE_I32_e64 i16:$src, (V_MOV_B32_e32 (i32 0)), (V_MOV_B32_e32 (i32 0x10)))) >; let isReMaterializable = 1 in { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir @@ -208,7 +208,9 @@ ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 16, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir @@ -165,7 +165,9 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_AND_B32_e32_]], 0, 16, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_AND_B32_e32_]], [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/v_bfe_i32.ll b/llvm/test/CodeGen/AMDGPU/v_bfe_i32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/v_bfe_i32.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -verify-machineinstrs < %s | FileCheck --check-prefix=PREGFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefix=PREGFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -mcpu=fiji -verify-machineinstrs < %s | FileCheck --check-prefix=PREGFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefix=PREGFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10PLUS %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10PLUS %s + +define i32 @check_v_bfe(i16 %a) { +; PREGFX9-LABEL: check_v_bfe: +; PREGFX9: ; %bb.0: ; %entry +; PREGFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX9-NEXT: v_bfe_i32 v0, v0, 0, 16 +; PREGFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10PLUS-LABEL: check_v_bfe: +; GFX10PLUS: ; %bb.0: ; %entry +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +entry: + %res = sext i16 %a to i32 + ret i32 %res +}