Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -122,6 +122,10 @@ assert(MI.getDesc().getNumImplicitUses() == 1); return MI.getNumOperands() == 3; default: + if (isVOP1(MI) || isVOP3(MI)) + return !MI.hasImplicitDef() && + MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses(); + return false; } } Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -203,6 +203,7 @@ let Inst{31-25} = 0x3f; //encoding } +let isReMaterializable = 1 in { let SchedRW = [WriteDoubleCvt] in { // OMod clears exceptions when set in this instruction defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>; @@ -210,6 +211,7 @@ let mayRaiseFPException = 0 in { defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; } +} // End isReMaterializable = 1 defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll @@ -82,8 +82,8 @@ ; The offset to the dynamic shared memory array should be aligned on the ; maximal one. ; CHECK-LABEL: {{^}}dynamic_shared_array_4: -; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x48 -; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x48 +; CHECK-DAG: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} ; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]] define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -101,8 +101,8 @@ ; Honor the explicit alignment from the specified variable. ; CHECK-LABEL: {{^}}dynamic_shared_array_5: -; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x44 -; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x44 +; CHECK-DAG: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} ; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]] define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -120,8 +120,8 @@ ; Honor the explicit alignment from the specified variable. ; CHECK-LABEL: {{^}}dynamic_shared_array_6: -; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x50 -; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x50 +; CHECK-DAG: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} ; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]] define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() Index: llvm/test/CodeGen/AMDGPU/fneg-combines.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -1639,12 +1639,15 @@ ret void } +; FIXME: Second store could reuse the same 64 bit VGPR, but since +; v_cvt_f32_f64_e32 became rematerializable RA doesn't do it. + ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32: ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}} ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}} ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] -; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:[[NEG_A_HI]]{{\]}} define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 Index: llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -68,7 +68,7 @@ ; SI-DAG: v_cvt_f16_f32_e32 v[[CVTHI:[0-9]+]], v[[A_F32_1]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[CVTHI]] -; VI: v_cvt_f16_f32_sdwa v[[R_F16_HI:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-DAG: v_cvt_f16_f32_sdwa v[[R_F16_HI:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; SIVI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] Index: llvm/test/CodeGen/AMDGPU/half.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/half.ll +++ llvm/test/CodeGen/AMDGPU/half.ll @@ -400,13 +400,23 @@ ; VI: v_cvt_f32_f16_sdwa ; GCN-NOT: v_cvt_f32_f16 -; GCN: v_cvt_f64_f32_e32 -; GCN: v_cvt_f64_f32_e32 -; GCN: v_cvt_f64_f32_e32 -; GCN-NOT: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +; SI-NOT: v_cvt_f64_f32_e32 +; SI: flat_store_dwordx2 + +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +; SI-NOT: v_cvt_f64_f32_e32 +; SI: flat_store_dwordx4 + +; VI: v_cvt_f64_f32_e32 +; VI: v_cvt_f64_f32_e32 +; VI: v_cvt_f64_f32_e32 +; VI-NOT: v_cvt_f64_f32_e32 + +; VI-DAG: flat_store_dwordx4 +; VI-DAG: flat_store_dwordx2 -; GCN-DAG: flat_store_dwordx4 -; GCN-DAG: flat_store_dwordx2 ; GCN: s_endpgm define amdgpu_kernel void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 { %val = load <3 x half>, <3 x half> addrspace(1)* %in Index: llvm/test/CodeGen/AMDGPU/remat-vop.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/remat-vop.mir +++ llvm/test/CodeGen/AMDGPU/remat-vop.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs --stress-regalloc=2 -start-before=greedy -stop-after=virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs --stress-regalloc=2 -start-before=greedy -stop-after=virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s --- name: test_remat_v_mov_b32_e32 @@ -121,15 +121,218 @@ bb.0: ; GCN-LABEL: name: test_remat_v_mov_b64_pseudo ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 1, implicit $exec + ; GCN: renamable $vgpr2_vgpr3 = V_MOV_B64_PSEUDO 2, implicit $exec ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 2, implicit $exec - ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 3, implicit $exec ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 ; GCN: S_ENDPGM 0 - %0:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec - %1:vreg_64 = V_MOV_B64_PSEUDO 2, implicit $exec - %2:vreg_64 = V_MOV_B64_PSEUDO 3, implicit $exec + %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 3, implicit $exec + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +name: test_remat_v_cvt_i32_f64_e32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e32 + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode + %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +# Cannot rematerialize if MODE register is modified anywhere +name: test_no_remat_v_cvt_i32_f64_e32_mode_def +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_mode_def + ; GCN: $mode = IMPLICIT_DEF + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + $mode = IMPLICIT_DEF + %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode + %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +name: test_remat_v_cvt_i32_f64_e64 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64 + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode + %1:vgpr_32 = V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode + %2:vgpr_32 = V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +name: test_remat_v_cvt_i32_f64_e64_undef +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64_undef + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %1:vgpr_32 = V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode + %2:vgpr_32 = V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode + %3:vgpr_32 = V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0 +... +--- +name: test_no_remat_v_cvt_i32_f64_dpp +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_dpp + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_dpp undef $vgpr1, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %1:vgpr_32 = V_CVT_I32_F64_dpp undef %1:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode + %2:vgpr_32 = V_CVT_I32_F64_dpp undef %2:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode + %3:vgpr_32 = V_CVT_I32_F64_dpp undef %3:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0 +... +--- +name: test_no_remat_v_cvt_i32_f64_e32_imp_def +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_def + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 + %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 + %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +name: test_no_remat_v_cvt_i32_f64_e32_imp_use +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_use + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0 + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0 + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0 + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0 + %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0 + %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0 + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +name: test_remat_v_cvt_f64_i32_e32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: test_remat_v_cvt_f64_i32_e32 + ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode + ; GCN: renamable $vgpr2_vgpr3 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: S_ENDPGM 0 + %0:vreg_64_align2 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode + %1:vreg_64_align2 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode + %2:vreg_64_align2 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode S_NOP 0, implicit %0 S_NOP 0, implicit %1 S_NOP 0, implicit %2