Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -107,19 +107,19 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, AAResults *AA) const { - if (isVOP1(MI) || isVOP3(MI) || isSDWA(MI)) { - // Normally VALU use of exec would block the rematerialization, but that - // is OK in this case to have an implicit exec read as all VALU do. - // We really want all of the generic logic for this except for this. - - // Another potential implicit use is mode register. The core logic of - // the RA will not attempt rematerialization if mode is set anywhere - // in the function, otherwise it is safe since mode is not changed. - return !MI.hasImplicitDef() && - MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses(); - } - - return false; + // Normally VALU use of exec would block the rematerialization, but that + // is OK in this case to have an implicit exec read as all VALU do. + + // Another potential implicit use is mode register. The core logic of + // the RA will not attempt rematerialization if mode is set anywhere + // in the function, otherwise it is safe since mode is not changed. + + // A generic implementation bails on only virtual register use which + // effectively prevents rematerialization. On AMDGPU we only need to + // check we have no tied operands which normally model read-modify-write. + return !MI.hasImplicitDef() && + MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses() && + !MI.getOperand(0).isTied(); } bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const { Index: llvm/test/CodeGen/AMDGPU/remat-sop.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/remat-sop.mir +++ llvm/test/CodeGen/AMDGPU/remat-sop.mir @@ -51,6 +51,66 @@ S_NOP 0, implicit %2 S_ENDPGM 0 ... +# The liverange of %0 covers a point of rematerialization, source value is +# availabe. +--- +name: test_remat_s_mov_b32_vreg_src_long_lr +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_remat_s_mov_b32_vreg_src_long_lr + ; GCN: renamable $sgpr0 = IMPLICIT_DEF + ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0 + ; GCN: S_NOP 0, implicit killed renamable $sgpr1 + ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0 + ; GCN: S_NOP 0, implicit killed renamable $sgpr1 + ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0 + ; GCN: S_NOP 0, implicit killed renamable $sgpr1 + ; GCN: S_NOP 0, implicit killed renamable $sgpr0 + ; GCN: S_ENDPGM 0 + %0:sreg_32 = IMPLICIT_DEF + %1:sreg_32 = S_MOV_B32 %0:sreg_32 + %2:sreg_32 = S_MOV_B32 %0:sreg_32 + %3:sreg_32 = S_MOV_B32 %0:sreg_32 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + S_NOP 0, implicit %0 + S_ENDPGM 0 +... +# The liverange of %0 does not cover a point of rematerialization, source value is +# unavailabe and we do not want to artificially extend the liverange. +--- +name: test_no_remat_s_mov_b32_vreg_src_short_lr +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_no_remat_s_mov_b32_vreg_src_short_lr + ; GCN: renamable $sgpr0 = IMPLICIT_DEF + ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0 + ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5) + ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0 + ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) + ; GCN: renamable $sgpr0 = S_MOV_B32 killed renamable $sgpr0 + ; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $sgpr1 + ; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $sgpr1 + ; GCN: S_NOP 0, implicit killed renamable $sgpr0 + ; GCN: S_ENDPGM 0 + %0:sreg_32 = IMPLICIT_DEF + %1:sreg_32 = S_MOV_B32 %0:sreg_32 + %2:sreg_32 = S_MOV_B32 %0:sreg_32 + %3:sreg_32 = S_MOV_B32 %0:sreg_32 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0 +... --- name: test_remat_s_mov_b64 tracksRegLiveness: true