Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -691,6 +691,9 @@ return MO.isReg() && RI.isVGPR(MRI, MO.getReg());}); } + /// Return true if the instruction modifies the mode register.q + static bool modifiesModeRegister(const MachineInstr &MI); + /// Whether we must prevent this instruction from executing with EXEC = 0. bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3011,6 +3011,20 @@ Opcode == AMDGPU::DS_GWS_BARRIER; } +bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) { + // Skip the full operand and register alias search modifiesRegister + // does. There's only a handful of instructions that touch this, it's only an + // implicit def, and doesn't alias any other registers. + if (const MCPhysReg *ImpDef = MI.getDesc().getImplicitDefs()) { + for (; ImpDef && *ImpDef; ++ImpDef) { + if (*ImpDef == AMDGPU::MODE) + return true; + } + } + + return false; +} + bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); @@ -3036,6 +3050,10 @@ if (MI.isCall() || MI.isInlineAsm()) return true; // conservative assumption + // A mode change is a scalar operation that influences vector instructions. + if (modifiesModeRegister(MI)) + return true; + // These are like SALU instructions in terms of effects, so it's questionable // whether we should return true for those. // Index: llvm/test/CodeGen/AMDGPU/remove-short-exec-branches.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/remove-short-exec-branches.mir @@ -0,0 +1,111 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s +# Make sure mandatory skips are not removed around mode defs. +# FIXME: -amdgpu-skip-threshold seems to be backwards. + +--- + +name: need_skip_setreg_imm32_b32 +body: | + ; CHECK-LABEL: name: need_skip_setreg_imm32_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode + + bb.2: + S_ENDPGM 0 +... + +--- + +name: need_skip_setreg_b32 +body: | + ; CHECK-LABEL: name: need_skip_setreg_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + liveins: $sgpr0 + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + liveins: $sgpr0 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode + + bb.2: + S_ENDPGM 0 +... + +--- + +name: need_skip_denorm_mode +body: | + ; CHECK-LABEL: name: need_skip_denorm_mode + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: S_DENORM_MODE 3, implicit-def $mode, implicit $mode + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + S_DENORM_MODE 3, implicit-def $mode, implicit $mode + + bb.2: + S_ENDPGM 0 +... + +--- + +name: need_skip_round_mode +body: | + ; CHECK-LABEL: name: need_skip_round_mode + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: S_ROUND_MODE 3, implicit-def $mode, implicit $mode + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + S_ROUND_MODE 3, implicit-def $mode, implicit $mode + + bb.2: + S_ENDPGM 0 +... Index: llvm/test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -486,6 +486,24 @@ ret void } +; CHECK-LABEL: {{^}}skip_mode_switch: +; CHECK: s_and_saveexec_b64 +; CHECK-NEXT: s_cbranch_execz +; CHECK: s_setreg_imm32 +; CHECK: s_or_b64 exec, exec +define void @skip_mode_switch(i32 %arg) { +entry: + %cmp = icmp eq i32 %arg, 0 + br i1 %cmp, label %bb.0, label %bb.1 + +bb.0: + call void @llvm.amdgcn.s.setreg(i32 2049, i32 3) + br label %bb.1 + +bb.1: + ret void +} + declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #2 declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) #2 declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) #3 @@ -494,6 +512,8 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare void @llvm.amdgcn.kill(i1) #0 +declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) + attributes #0 = { nounwind } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone speculatable }