Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -129,6 +129,14 @@ isReallyTriviallyReMaterializableGeneric(MI, AA))); } + /// Given \p MO is a PhysReg use return if it is a real or ambient use. + /// An ambient use is to model the execution environment not affecting + /// an instruction execution itself. Used to allow rematerialization of + /// an instruction using a predicate like register. + virtual bool isAmbientPhysRegUse(const MachineOperand &MO) const { + return false; + } + protected: /// For instructions with opcodes for which the M_REMATERIALIZABLE flag is /// set, this hook lets the target specify whether the instruction is actually Index: llvm/lib/CodeGen/LiveRangeEdit.cpp =================================================================== --- llvm/lib/CodeGen/LiveRangeEdit.cpp +++ llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -113,9 +113,10 @@ if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) continue; - // We can't remat physreg uses, unless it is a constant. + // We can't remat physreg uses, unless it is a constant or ambient, i.e. + // not affecting instruction's execution. if (Register::isPhysicalRegister(MO.getReg())) { - if (MRI.isConstantPhysReg(MO.getReg())) + if (MRI.isConstantPhysReg(MO.getReg()) || TII.isAmbientPhysRegUse(MO)) continue; return false; } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -181,6 +181,8 @@ bool isReallyTriviallyReMaterializable(const MachineInstr &MI, AAResults *AA) const override; + bool isAmbientPhysRegUse(const MachineOperand &MO) const override; + bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -122,6 +122,12 @@ return false; } +bool SIInstrInfo::isAmbientPhysRegUse(const MachineOperand &MO) const { + // Any implicit use of exec by VALU is not a real register read. + return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() && + isVALU(*MO.getParent()); +} + bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const { Index: llvm/test/CodeGen/AMDGPU/remat-sop.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/remat-sop.mir +++ llvm/test/CodeGen/AMDGPU/remat-sop.mir @@ -23,6 +23,35 @@ S_ENDPGM 0 ... --- +name: test_no_remat_s_mov_b32_impuse_exec +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_no_remat_s_mov_b32_impuse_exec + ; GCN: $exec = IMPLICIT_DEF + ; GCN: renamable $sgpr0 = S_MOV_B32 1, implicit $exec + ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5) + ; GCN: renamable $sgpr1 = S_MOV_B32 2, implicit $exec + ; GCN: renamable $sgpr0 = S_MOV_B32 3, implicit $exec + ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) + ; GCN: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $sgpr0 + ; GCN: S_NOP 0, implicit killed renamable $sgpr1 + ; GCN: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $sgpr0 + ; GCN: S_ENDPGM 0 + $exec = IMPLICIT_DEF + %0:sreg_32 = S_MOV_B32 1, implicit $exec + %1:sreg_32 = S_MOV_B32 2, implicit $exec + %2:sreg_32 = S_MOV_B32 3, implicit $exec + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- name: test_remat_s_mov_b64 tracksRegLiveness: true body: | Index: llvm/test/CodeGen/AMDGPU/remat-vop.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/remat-vop.mir +++ llvm/test/CodeGen/AMDGPU/remat-vop.mir @@ -52,6 +52,31 @@ S_ENDPGM 0 ... --- +name: test_remat_v_mov_b32_e32_exec_def +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_remat_v_mov_b32_e32_exec_def + ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec + ; GCN: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: $exec = S_ANDN2_B64_term $exec, undef renamable $sgpr0_sgpr1, implicit-def $scc + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + $exec = S_ANDN2_B64_term $exec, undef %4:sreg_64, implicit-def $scc + S_ENDPGM 0 +... +--- name: test_remat_v_mov_b32_e64 tracksRegLiveness: true body: |