diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -866,6 +866,8 @@ unsigned getNSAMaxSize() const { return NSAMaxSize; } + bool hasGFX8Insts() const { return GFX8Insts; } + bool hasGFX10_AEncoding() const { return GFX10_AEncoding; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1768,6 +1768,10 @@ .addImm(0) // neg_lo .addImm(0) // neg_hi .addImm(0); // clamp + } else if (ST.hasGFX8Insts()) { + BuildMI(MBB, MI, DL, get(AMDGPU::V_LSHLREV_B64_e64), Dst) + .addImm(0) // shift width + .addReg(SrcOp.getReg()); } else { BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) diff --git a/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir --- a/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir +++ b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir @@ -2,8 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX90A %s # GCN-LABEL: name: v_mov_b64_from_vgpr -# GFX900: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1 -# GFX900: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1 +# GFX900: $vgpr0_vgpr1 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec # GFX90A: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec name: v_mov_b64_from_vgpr body: | @@ -12,8 +11,7 @@ ... # GCN-LABEL: name: v_mov_b64_from_sgpr -# GFX900: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1 -# GFX900: $vgpr1 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit-def $vgpr0_vgpr1 +# GFX900: $vgpr0_vgpr1 = V_LSHLREV_B64_e64 0, $sgpr2_sgpr3, implicit $exec # GFX90A: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $sgpr2_sgpr3, 12, $sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec name: v_mov_b64_from_sgpr body: | diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -555,8 +555,7 @@ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s30 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, s31 +; GFX9-O0-NEXT: v_lshlrev_b64 v[9:10], 0, s[30:31] ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 ; GFX9-O0-NEXT: v_writelane_b32 v11, s30, 6 @@ -592,8 +591,7 @@ ; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[40:41], v2, v4 ; GFX9-O0-NEXT: v_addc_co_u32_e64 v3, s[40:41], v3, v5, s[40:41] ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[0:1], 0, v[2:3] ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff400 @@ -660,10 +658,9 @@ ; GFX9-O3-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 ; GFX9-O3-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v7, vcc ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3 -; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4 +; GFX9-O3-NEXT: v_lshlrev_b64 v[0:1], 0, v[2:3] ; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800 +; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4 ; GFX9-O3-NEXT: s_mov_b32 s33, s38 ; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 ; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload @@ -720,11 +717,9 @@ ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s40 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s41 +; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], 0, s[40:41] ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2 +; GFX9-O0-NEXT: v_lshlrev_b64 v[8:9], 0, v[1:2] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v13 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 ; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec @@ -732,20 +727,16 @@ ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s40 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s41 +; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], 0, s[40:41] ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 +; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], 0, v[1:2] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s40 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s41 +; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], 0, s[40:41] ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-O0-NEXT: v_lshlrev_b64 v[3:4], 0, v[1:2] ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7 @@ -784,24 +775,18 @@ ; GFX9-O3-NEXT: s_brev_b32 s35, -2 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_waitcnt vmcnt(1) -; GFX9-O3-NEXT: v_mov_b32_e32 v1, s34 -; GFX9-O3-NEXT: v_mov_b32_e32 v2, s35 +; GFX9-O3-NEXT: v_lshlrev_b64 v[1:2], 0, s[34:35] ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_not_b64 exec, exec -; GFX9-O3-NEXT: v_mov_b32_e32 v3, s34 -; GFX9-O3-NEXT: v_mov_b32_e32 v4, s35 +; GFX9-O3-NEXT: v_lshlrev_b64 v[3:4], 0, s[34:35] ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: v_mov_b32_e32 v5, s34 -; GFX9-O3-NEXT: v_mov_b32_e32 v6, s35 +; GFX9-O3-NEXT: v_lshlrev_b64 v[5:6], 0, s[34:35] ; GFX9-O3-NEXT: s_not_b64 exec, exec -; GFX9-O3-NEXT: v_mov_b32_e32 v7, v1 -; GFX9-O3-NEXT: v_mov_b32_e32 v9, v3 -; GFX9-O3-NEXT: v_mov_b32_e32 v8, v2 -; GFX9-O3-NEXT: v_mov_b32_e32 v10, v4 -; GFX9-O3-NEXT: v_mov_b32_e32 v11, v5 -; GFX9-O3-NEXT: v_mov_b32_e32 v12, v6 +; GFX9-O3-NEXT: v_lshlrev_b64 v[7:8], 0, v[1:2] +; GFX9-O3-NEXT: v_lshlrev_b64 v[9:10], 0, v[3:4] +; GFX9-O3-NEXT: v_lshlrev_b64 v[11:12], 0, v[5:6] ; GFX9-O3-NEXT: buffer_store_dwordx4 v[7:10], v0, s[4:7], 0 offen ; GFX9-O3-NEXT: buffer_store_dwordx2 v[11:12], v0, s[4:7], 0 offen offset:16 ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -141,7 +141,7 @@ define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) { ; GFX9: s_load_dwordx2 s{{\[}}[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]{{\]}} -; GFX9-O0: s_mov_b64 s{{\[}}[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]{{\]}}, 0{{$}} +; GFX9-O0: s_mov_b64 s[[ZERO:\[[0-9:]+\]]], 0{{$}} ; GFX9-O0: v_mov_b32_e32 v0, s[[ARG_LO]] ; GFX9-O0: v_mov_b32_e32 v1, s[[ARG_HI]] ; GFX9-O0-DAG: v_mov_b32_e32 v10, v1 @@ -151,8 +151,7 @@ ; GFX9-O3-DAG: v_mov_b32_e32 v6, s[[ARG_LO]] ; GFX9: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s[[ZERO_LO]] -; GFX9-O0-NEXT: v_mov_b32_e32 v10, s[[ZERO_HI]] +; GFX9-O0-NEXT: v_lshlrev_b64 v[9:10], 0, s[[ZERO]] ; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_not_b64 exec, exec @@ -335,7 +334,7 @@ define amdgpu_kernel void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) { ; GFX9: s_load_dwordx2 s{{\[}}[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]{{\]}} -; GFX9-O0: s_mov_b64 s{{\[}}[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]{{\]}}, 0{{$}} +; GFX9-O0: s_mov_b64 s[[ZERO:\[[0-9:]+\]]], 0{{$}} ; GFX9-O0: v_mov_b32_e32 v0, s[[ARG_LO]] ; GFX9-O0: v_mov_b32_e32 v1, s[[ARG_HI]] ; GFX9-O0-DAG: v_mov_b32_e32 v10, v1 @@ -345,8 +344,7 @@ ; GFX9-O3-DAG: v_mov_b32_e32 v6, s[[ARG_LO]] ; GFX9: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s[[ZERO_LO]] -; GFX9-O0-NEXT: v_mov_b32_e32 v10, s[[ZERO_HI]] +; GFX9-O0-NEXT: v_lshlrev_b64 v[9:10], 0, s[[ZERO]] ; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_not_b64 exec, exec