Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -122,6 +122,16 @@ assert(MI.getDesc().getNumImplicitUses() == 1); return MI.getNumOperands() == 3; default: + if (isVOP1(MI) || isVOP3(MI)) { + // Normally use of exec would block the rematerialization, but that is + // OK in this case to have an implicit exec read as all VALU do. + // Another potential implicit use is mode register. The core logic of + // the RA will not attempt rematerialization if mode is set anywhere + // in the function, otherwise it is safe since mode is not changed. + return !MI.hasImplicitDef() && + MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses(); + } + return false; } } Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -204,12 +204,14 @@ } let SchedRW = [WriteDoubleCvt] in { +let isReMaterializable = 1 in { // OMod clears exceptions when set in this instruction defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>; let mayRaiseFPException = 0 in { defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; } +} // End isReMaterializable = 1 defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; Index: llvm/test/CodeGen/AMDGPU/remat-vop.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/remat-vop.mir +++ llvm/test/CodeGen/AMDGPU/remat-vop.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs --stress-regalloc=2 -start-before=greedy -stop-after=virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs --stress-regalloc=2 -start-before=greedy -stop-after=virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s --- name: test_remat_v_mov_b32_e32 @@ -121,15 +121,218 @@ bb.0: ; GCN-LABEL: name: test_remat_v_mov_b64_pseudo ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 1, implicit $exec + ; GCN: renamable $vgpr2_vgpr3 = V_MOV_B64_PSEUDO 2, implicit $exec ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 2, implicit $exec - ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 3, implicit $exec ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 ; GCN: S_ENDPGM 0 - %0:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec - %1:vreg_64 = V_MOV_B64_PSEUDO 2, implicit $exec - %2:vreg_64 = V_MOV_B64_PSEUDO 3, implicit $exec + %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 3, implicit $exec + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +name: test_remat_v_cvt_i32_f64_e32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e32 + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode + %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +# Cannot rematerialize if MODE register is modified anywhere +name: test_no_remat_v_cvt_i32_f64_e32_mode_def +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_mode_def + ; GCN: $mode = IMPLICIT_DEF + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + $mode = IMPLICIT_DEF + %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode + %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +name: test_remat_v_cvt_i32_f64_e64 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64 + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode + %1:vgpr_32 = V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode + %2:vgpr_32 = V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +name: test_remat_v_cvt_i32_f64_e64_undef +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64_undef + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode + ; GCN: dead renamable $vgpr0 = V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %1:vgpr_32 = V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode + %2:vgpr_32 = V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode + %3:vgpr_32 = V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0 +... +--- +name: test_no_remat_v_cvt_i32_f64_dpp +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_dpp + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_dpp undef $vgpr1, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %1:vgpr_32 = V_CVT_I32_F64_dpp undef %1:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode + %2:vgpr_32 = V_CVT_I32_F64_dpp undef %2:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode + %3:vgpr_32 = V_CVT_I32_F64_dpp undef %3:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0 +... +--- +name: test_no_remat_v_cvt_i32_f64_e32_imp_def +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_def + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 + %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 + %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +name: test_no_remat_v_cvt_i32_f64_e32_imp_use +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_use + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0 + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0 + ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0 + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0 + %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0 + %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0 + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- +name: test_remat_v_cvt_f64_i32_e32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: test_remat_v_cvt_f64_i32_e32 + ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode + ; GCN: renamable $vgpr2_vgpr3 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 + ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode + ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: S_ENDPGM 0 + %0:vreg_64_align2 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode + %1:vreg_64_align2 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode + %2:vreg_64_align2 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode S_NOP 0, implicit %0 S_NOP 0, implicit %1 S_NOP 0, implicit %2