Index: lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- lib/Target/AMDGPU/SIFoldOperands.cpp +++ lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1349,6 +1349,8 @@ for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineBasicBlock::iterator I, Next; + + MachineOperand *CurrentKnownM0Val = nullptr; for (I = MBB->begin(); I != MBB->end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; @@ -1361,6 +1363,25 @@ if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || !tryFoldOMod(MI)) tryFoldClamp(MI); + + // Saw an unknown clobber of m0, so we no longer know what it is. + if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI)) + CurrentKnownM0Val = nullptr; + continue; + } + + // Specially track simple redefs of m0 to the same value in a block, so we + // can erase the later ones. + if (MI.getOperand(0).getReg() == AMDGPU::M0) { + MachineOperand &NewM0Val = MI.getOperand(1); + if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) { + MI.eraseFromParent(); + continue; + } + + // We aren't tracking other physical registers + CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ? + nullptr : &NewM0Val; continue; } Index: test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir @@ -0,0 +1,352 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s + +--- | + define amdgpu_kernel void @redef_m0_same_copy() { ret void } + define amdgpu_kernel void @multi_redef_m0_same_copy() { ret void } + define amdgpu_kernel void @redef_m0_different_copy() { ret void } + define amdgpu_kernel void @redef_m0_mixed_copy0() { ret void } + define amdgpu_kernel void @redef_m0_mixed_copy1() { ret void } + define amdgpu_kernel void @redef_m0_same_mov_imm() { ret void } + define amdgpu_kernel void @redef_m0_different_inst0() { ret void } + define amdgpu_kernel void @redef_m0_different_inst1() { ret void } + define amdgpu_kernel void @redef_m0_mixed_read_m0() { ret void } + define amdgpu_kernel void @redef_m0_same_copy_call() { ret void } + define amdgpu_kernel void @redef_m0_same_copy_multi_block() { ret void } + define amdgpu_kernel void @redef_m0_copy_self() { ret void } + define amdgpu_kernel void @redef_m0_copy_physreg() { ret void } + + declare void @func() +... + +--- +name: redef_m0_same_copy +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_same_copy + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: multi_redef_m0_same_copy +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: multi_redef_m0_same_copy + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_different_copy +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_different_copy + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_mixed_copy0 +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_mixed_copy0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %1 + $m0 = COPY %2 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_mixed_copy1 +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_mixed_copy1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + $m0 = COPY %1 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_same_mov_imm +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_same_mov_imm + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = S_MOV_B32 -1 + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = S_MOV_B32 -1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = S_MOV_B32 -1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_different_inst0 +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_different_inst0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = IMPLICIT_DEF + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = IMPLICIT_DEF + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_different_inst1 +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_different_inst1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: S_NOP 0, implicit-def $m0 + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + S_NOP 0, implicit-def $m0 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_mixed_read_m0 +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_mixed_read_m0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_2:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 128, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + %5:vgpr_32 = DS_READ_B32 %0, 128, 0, implicit $m0, implicit $exec :: (load 4) +... + +--- +name: redef_m0_same_copy_call +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_same_copy_call + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_same_copy_multi_block +machineFunctionInfo: + isEntryFunction: true +body: | + ; GCN-LABEL: name: redef_m0_same_copy_multi_block + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: bb.1: + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + bb.0: + liveins: $vgpr0, $sgpr0 + + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + + bb.1: + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_copy_self +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_copy_self + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY $m0 + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY $m0 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_copy_physreg +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_copy_physreg + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY $sgpr0 + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $sgpr0 = S_MOV_B32 0 + ; GCN: $m0 = COPY $sgpr0 + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY $sgpr0 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $sgpr0 = S_MOV_B32 0 + $m0 = COPY $sgpr0 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +...