Index: lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- lib/Target/AMDGPU/SIFoldOperands.cpp +++ lib/Target/AMDGPU/SIFoldOperands.cpp @@ -591,6 +591,32 @@ return false; } +// Try to fold an instruction into a simpler one +static bool tryFoldInst(const SIInstrInfo *TII, + MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + + if (Opc == AMDGPU::V_CNDMASK_B32_e32 || + Opc == AMDGPU::V_CNDMASK_B32_e64 || + Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) { + const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); + const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1); + if (Src1->isIdenticalTo(*Src0)) { + DEBUG(dbgs() << "Folded " << *MI << " into "); + int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); + if (Src2Idx != -1) + MI->RemoveOperand(Src2Idx); + MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1)); + mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY + : getMovOpc(false))); + DEBUG(dbgs() << *MI << '\n'); + return true; + } + } + + return false; +} + void SIFoldOperands::foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const { // We need mutate the operands of new mov instructions to add implicit @@ -692,6 +718,7 @@ } DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << static_cast(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n'); + tryFoldInst(TII, Fold.UseMI); } } } @@ -907,6 +934,8 @@ Next = std::next(I); MachineInstr &MI = *I; + tryFoldInst(TII, &MI); + if (!isFoldableCopy(MI)) { if (IsIEEEMode || !tryFoldOMod(MI)) tryFoldClamp(MI); Index: test/CodeGen/AMDGPU/fold-cndmask.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/fold-cndmask.mir @@ -0,0 +1,34 @@ +# RUN: llc -march=amdgcn -run-pass si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s + +# CHECK: %1 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %2 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %4 = COPY %3 +# CHECK: %5 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %6 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %7 = COPY %3 + +--- +name: fold_cndmask +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } + - { id: 5, class: vgpr_32 } + - { id: 6, class: vgpr_32 } + - { id: 7, class: vgpr_32 } +body: | + bb.0.entry: + %0 = IMPLICIT_DEF + %1 = V_CNDMASK_B32_e64 0, 0, %0, implicit %exec + %2 = V_CNDMASK_B32_e64 %1, %1, %0, implicit %exec + %3 = IMPLICIT_DEF + %4 = V_CNDMASK_B32_e64 %3, %3, %0, implicit %exec + %5 = COPY %1 + %6 = V_CNDMASK_B32_e64 %5, 0, %0, implicit %exec + %vcc = IMPLICIT_DEF + %7 = V_CNDMASK_B32_e32 %3, %3, implicit %exec, implicit %vcc + +...