diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -116,8 +116,11 @@ case AMDGPU::V_MOV_B64_PSEUDO: case AMDGPU::V_ACCVGPR_READ_B32_e64: case AMDGPU::V_ACCVGPR_WRITE_B32_e64: - // No implicit operands. - return MI.getNumOperands() == MI.getDesc().getNumOperands(); + // No non-standard implicit operands. + assert(MI.getDesc().getNumOperands() == 2); + assert(MI.getDesc().getNumImplicitDefs() == 0); + assert(MI.getDesc().getNumImplicitUses() == 1); + return MI.getNumOperands() == 3; default: return false; } diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-remat.mir b/llvm/test/CodeGen/AMDGPU/vgpr-remat.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vgpr-remat.mir @@ -0,0 +1,44 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=simple-register-coalescing -o - %s | FileCheck %s + +# Check that we get two move-immediates into %1 and %2, instead of a copy from +# %1 to %2, because that would introduce a dependency and maybe a stall. +--- +name: f +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: f + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $sgpr0 + ; CHECK: undef %4.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: %4.sub1:vreg_96 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0 + ; CHECK: $exec = S_MOV_B64_term [[COPY]] + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: S_BRANCH %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: %4.sub0:vreg_96 = V_MUL_F32_e32 %4.sub0, %4.sub0, implicit $mode, implicit $exec + ; CHECK: %4.sub1:vreg_96 = V_MUL_F32_e32 %4.sub1, %4.sub1, implicit $mode, implicit $exec + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0, implicit %4 + bb.0: + liveins: $sgpr0 + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = COPY %0:vgpr_32 + %2:vgpr_32 = COPY %0:vgpr_32 + %3:sreg_64 = COPY $sgpr0 + $exec = S_MOV_B64_term %3:sreg_64 + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.1 + + bb.1: + %1:vgpr_32 = V_MUL_F32_e32 %1:vgpr_32, %1:vgpr_32, implicit $mode, implicit $exec + %2:vgpr_32 = V_MUL_F32_e32 %2:vgpr_32, %2:vgpr_32, implicit $mode, implicit $exec + + bb.2: + undef %4.sub0:vreg_96 = COPY %1:vgpr_32 + %4.sub1:vreg_96 = COPY %2:vgpr_32 + S_ENDPGM 0, implicit %4 +...