diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -349,6 +349,64 @@ return MadeChange; } + // This function removes redundant pairs of accumulator prime/unprime + // instructions. In some situations, it's possible the compiler inserts an + // accumulator prime instruction followed by an unprime instruction (e.g. + // when we store an accumulator after restoring it from a spill). If the + // accumulator is not used between the two, they can be removed. This + // function removes these redundant pairs from basic blocks. + // The algorithm is quite straightforward - every time we encounter a prime + // instruction, the primed register is added to a candidate set. Any use + // other than a prime removes the candidate from the set and any de-prime + // of a current candidate marks both the prime and de-prime for removal. + // This way we ensure we only remove prime/de-prime *pairs* with no + // intervening uses. + bool removeAccPrimeUnprime(MachineBasicBlock &MBB) { + DenseSet InstrsToErase; + // Initially, none of the acc registers are candidates. + SmallVector Candidates( + PPC::UACCRCRegClass.getNumRegs(), nullptr); + + for (MachineInstr &BBI : MBB.instrs()) { + unsigned Opc = BBI.getOpcode(); + // If we are visiting a xxmtacc instruction, we add it and its operand + // register to the candidate set. + if (Opc == PPC::XXMTACC) { + Register Acc = BBI.getOperand(0).getReg(); + assert(PPC::ACCRCRegClass.contains(Acc) && + "Unexpected register for XXMTACC"); + Candidates[Acc - PPC::ACC0] = &BBI; + } + // If we are visiting a xxmfacc instruction and its operand register is + // in the candidate set, we mark the two instructions for removal. + else if (Opc == PPC::XXMFACC) { + Register Acc = BBI.getOperand(0).getReg(); + assert(PPC::ACCRCRegClass.contains(Acc) && + "Unexpected register for XXMFACC"); + if (!Candidates[Acc - PPC::ACC0]) + continue; + InstrsToErase.insert(&BBI); + InstrsToErase.insert(Candidates[Acc - PPC::ACC0]); + } + // If we are visiting an instruction using an accumulator register + // as operand, we remove it from the candidate set. + else { + for (MachineOperand &Operand : BBI.operands()) { + if (!Operand.isReg()) + continue; + Register Reg = Operand.getReg(); + if (PPC::ACCRCRegClass.contains(Reg)) + Candidates[Reg - PPC::ACC0] = nullptr; + } + } + } + + for (MachineInstr *MI : InstrsToErase) + MI->eraseFromParent(); + NumRemovedInPreEmit += InstrsToErase.size(); + return !InstrsToErase.empty(); + } + bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { // Remove UNENCODED_NOP even when this pass is disabled. @@ -370,6 +428,7 @@ for (MachineBasicBlock &MBB : MF) { Changed |= removeRedundantLIs(MBB, TRI); Changed |= addLinkerOpt(MBB, TRI); + Changed |= removeAccPrimeUnprime(MBB); for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); if (Opc == PPC::UNENCODED_NOP) { diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll --- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll @@ -16,8 +16,6 @@ ; LE-PAIRED-NEXT: plxv vs0, f@PCREL+112(0), 1 ; LE-PAIRED-NEXT: plxv vs3, f@PCREL+64(0), 1 ; LE-PAIRED-NEXT: plxv vs2, f@PCREL+80(0), 1 -; LE-PAIRED-NEXT: xxmtacc acc0 -; LE-PAIRED-NEXT: xxmfacc acc0 ; LE-PAIRED-NEXT: pstxv vs0, f@PCREL+176(0), 1 ; LE-PAIRED-NEXT: pstxv vs1, f@PCREL+160(0), 1 ; LE-PAIRED-NEXT: pstxv vs2, f@PCREL+144(0), 1 @@ -32,8 +30,6 @@ ; BE-PAIRED-NEXT: lxv vs0, 64(r3) ; BE-PAIRED-NEXT: lxv vs3, 112(r3) ; BE-PAIRED-NEXT: lxv vs2, 96(r3) -; BE-PAIRED-NEXT: xxmtacc acc0 -; BE-PAIRED-NEXT: xxmfacc acc0 ; BE-PAIRED-NEXT: stxv vs1, 144(r3) ; BE-PAIRED-NEXT: stxv vs0, 128(r3) ; BE-PAIRED-NEXT: stxv vs3, 176(r3) @@ -58,8 +54,6 @@ ; LE-PAIRED-NEXT: lxvx vs3, r5, r3 ; LE-PAIRED-NEXT: lxv vs2, 16(r6) ; LE-PAIRED-NEXT: sldi r3, r4, 6 -; LE-PAIRED-NEXT: xxmtacc acc0 -; LE-PAIRED-NEXT: xxmfacc acc0 ; LE-PAIRED-NEXT: stxvx vs3, r5, r3 ; LE-PAIRED-NEXT: add r3, r5, r3 ; LE-PAIRED-NEXT: stxv vs0, 48(r3) @@ -78,8 +72,6 @@ ; BE-PAIRED-NEXT: lxv vs1, 16(r6) ; BE-PAIRED-NEXT: lxv vs3, 48(r6) ; BE-PAIRED-NEXT: lxv vs2, 32(r6) -; BE-PAIRED-NEXT: xxmtacc acc0 -; BE-PAIRED-NEXT: xxmfacc acc0 ; BE-PAIRED-NEXT: stxvx vs0, r5, r3 ; BE-PAIRED-NEXT: add r3, r5, r3 ; BE-PAIRED-NEXT: stxv vs1, 16(r3) @@ -101,8 +93,6 @@ ; LE-PAIRED-NEXT: plxv vs0, f@PCREL+59(0), 1 ; LE-PAIRED-NEXT: plxv vs3, f@PCREL+11(0), 1 ; LE-PAIRED-NEXT: plxv vs2, f@PCREL+27(0), 1 -; LE-PAIRED-NEXT: xxmtacc acc0 -; LE-PAIRED-NEXT: xxmfacc acc0 ; LE-PAIRED-NEXT: pstxv vs0, f@PCREL+67(0), 1 ; LE-PAIRED-NEXT: pstxv vs1, f@PCREL+51(0), 1 ; LE-PAIRED-NEXT: pstxv vs2, f@PCREL+35(0), 1 @@ -122,8 +112,6 @@ ; BE-PAIRED-NEXT: li r4, 59 ; BE-PAIRED-NEXT: lxvx vs3, r3, r4 ; BE-PAIRED-NEXT: li r4, 35 -; BE-PAIRED-NEXT: xxmtacc acc0 -; BE-PAIRED-NEXT: xxmfacc acc0 ; BE-PAIRED-NEXT: stxvx vs1, r3, r4 ; BE-PAIRED-NEXT: li r4, 19 ; BE-PAIRED-NEXT: stxvx vs0, r3, r4 diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -16,8 +16,6 @@ ; CHECK-NEXT: xxlor vs1, v3, v3 ; CHECK-NEXT: xxlor vs2, v2, v2 ; CHECK-NEXT: xxlor vs3, v3, v3 -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xxmfacc acc0 ; CHECK-NEXT: stxv vs0, 48(r3) ; CHECK-NEXT: stxv vs1, 32(r3) ; CHECK-NEXT: stxv vs2, 16(r3) @@ -31,8 +29,6 @@ ; CHECK-BE-NEXT: xxlor vs1, v3, v3 ; CHECK-BE-NEXT: xxlor vs2, v2, v2 ; CHECK-BE-NEXT: xxlor vs3, v3, v3 -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: stxv vs3, 48(r3) @@ -77,8 +73,6 @@ ; CHECK-NEXT: xxlor vs2, v2, v2 ; CHECK-NEXT: xxlor vs3, v3, v3 ; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xxmfacc acc0 ; CHECK-NEXT: stxv vs0, 48(r3) ; CHECK-NEXT: stxv vs1, 32(r3) ; CHECK-NEXT: stxv vs2, 16(r3) @@ -93,8 +87,6 @@ ; CHECK-BE-NEXT: xxlor vs2, v2, v2 ; CHECK-BE-NEXT: xxlor vs3, v3, v3 ; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: stxv vs3, 48(r3) @@ -119,9 +111,6 @@ ; CHECK-NEXT: xxlor vs1, v3, v3 ; CHECK-NEXT: xxlor vs2, v2, v2 ; CHECK-NEXT: xxlor vs3, v3, v3 -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: xxmfacc acc0 ; CHECK-NEXT: stxv vs0, 48(r3) ; CHECK-NEXT: stxv vs1, 32(r3) ; CHECK-NEXT: stxv vs2, 16(r3) @@ -135,9 +124,6 @@ ; CHECK-BE-NEXT: xxlor vs1, v3, v3 ; CHECK-BE-NEXT: xxlor vs2, v2, v2 ; CHECK-BE-NEXT: xxlor vs3, v3, v3 -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: stxv vs3, 48(r3) @@ -262,8 +248,6 @@ ; CHECK-NEXT: xvi4ger8pp acc0, v2, v2 ; CHECK-NEXT: .LBB7_3: # %if.end ; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xxmfacc acc0 ; CHECK-NEXT: stxv vs0, 48(r3) ; CHECK-NEXT: stxv vs1, 32(r3) ; CHECK-NEXT: stxv vs2, 16(r3) @@ -286,8 +270,6 @@ ; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2 ; CHECK-BE-NEXT: .LBB7_3: # %if.end ; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: stxv vs3, 48(r3) @@ -637,8 +619,6 @@ ; CHECK-NEXT: lxvp vsp0, r1(r3) ; CHECK-NEXT: li r3, 32 ; CHECK-NEXT: lxvp vsp2, r1(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xxmfacc acc0 ; CHECK-NEXT: stxv vs0, 112(r30) ; CHECK-NEXT: stxv vs1, 96(r30) ; CHECK-NEXT: stxv vs2, 80(r30) @@ -675,8 +655,6 @@ ; CHECK-BE-NEXT: lxvp vsp0, r1(r3) ; CHECK-BE-NEXT: li r3, 144 ; CHECK-BE-NEXT: lxvp vsp2, r1(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs3, 112(r30) ; CHECK-BE-NEXT: stxv vs2, 96(r30) ; CHECK-BE-NEXT: stxv vs1, 80(r30)