Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3177,6 +3177,11 @@ // the function label. def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>; +// Pseudo-instruction marked for deletion. When deleting the instruction would +// cause iterator invalidation in MIR transformation passes, this pseudo can be +// used instead. It will be removed unconditionally at pre-emit time (prior to +// branch selection). +def UNENCODED_NOP: PPCEmitTimePseudo<(outs), (ins), "#UNENCODED_NOP", []>; // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift Index: llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -161,8 +161,17 @@ } bool runOnMachineFunction(MachineFunction &MF) override { - if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) + if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { + // Remove UNENCODED_NOP even with -ppc-late-peephole=false + SmallVector InstrsToErase; + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : MBB) + if (MI.getOpcode() == PPC::UNENCODED_NOP) + InstrsToErase.push_back(&MI); + for (MachineInstr *MI : InstrsToErase) + MI->eraseFromParent(); return false; + } bool Changed = false; const PPCInstrInfo *TII = MF.getSubtarget().getInstrInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); @@ -171,6 +180,10 @@ Changed |= removeRedundantLIs(MBB, TRI); for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); + if (Opc == PPC::UNENCODED_NOP) { + InstrsToErase.push_back(&MI); + continue; + } // Detect self copies - these can result from running AADB. if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) { const MCInstrDesc &MCID = TII->get(Opc); Index: llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -747,12 +747,18 @@ Register SrcReg = MI.getOperand(0).getReg(); // Search up the BB to find the definition of the CR bit. - MachineBasicBlock::reverse_iterator Ins; + MachineBasicBlock::reverse_iterator Ins = MI; + MachineBasicBlock::reverse_iterator Rend = MBB.rend(); + ++Ins; unsigned CRBitSpillDistance = 0; - for (Ins = MI; Ins != MBB.rend(); Ins++) { + bool SeenUse = false; + for (; Ins != Rend; ++Ins) { // Definition found. if (Ins->modifiesRegister(SrcReg, TRI)) break; + // Use found. + if (Ins->readsRegister(SrcReg, TRI)) + SeenUse = true; // Unable to find CR bit definition within maximum search distance. if (CRBitSpillDistance == MaxCRBitSpillDist) { Ins = MI; @@ -767,15 +773,18 @@ if (Ins == MBB.rend()) Ins = MI; + bool SpillsKnownBit = false; // There is no need to extract the CR bit if its value is already known. switch (Ins->getOpcode()) { case PPC::CRUNSET: BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LI8 : PPC::LI), Reg) .addImm(0); + SpillsKnownBit = true; break; case PPC::CRSET: BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LIS8 : PPC::LIS), Reg) .addImm(-32768); + SpillsKnownBit = true; break; default: // We need to move the CR field that contains the CR bit we are spilling. @@ -803,8 +812,13 @@ .addReg(Reg, RegState::Kill), FrameIndex); + bool KillsCRBit = MI.killsRegister(SrcReg, TRI); // Discard the pseudo instruction. MBB.erase(II); + if (SpillsKnownBit && KillsCRBit && !SeenUse) { + Ins->setDesc(TII.get(PPC::UNENCODED_NOP)); + Ins->RemoveOperand(0); + } } void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, Index: llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll =================================================================== --- llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll +++ llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll @@ -2,6 +2,8 @@ ; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 \ ; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-late-peephole=false -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s ; For known CRBit spills, CRSET/CRUNSET, it is more efficient to just load and @@ -21,7 +23,7 @@ ; CHECK-DAG: mfocrf [[REG2:.*]], [[CREG]] ; CHECK-DAG: rlwinm [[REG2]], [[REG2]] ; CHECK: .LBB0_3: -; CHECK-DAG: creqv [[CREG:.*]]*cr5+lt, [[CREG]]*cr5+lt, [[CREG]]*cr5+lt +; CHECK-NOT: #UNENCODED_NOP ; CHECK: lis [[REG1:.*]], -32768 ; CHECK: .LBB0_4: ; CHECK-NOT: mfocrf [[REG2:.*]], [[CREG]] @@ -81,8 +83,8 @@ define dso_local signext i32 @spillCRUNSET(%struct.p5rx* readonly %p1, i32 signext %p2, i32 signext %p3) { ; CHECK-LABEL: spillCRUNSET: ; CHECK: # %bb.0: # %entry -; CHECK-DAG: crxor [[CREG:.*]]*cr5+lt, [[CREG]]*cr5+lt, [[CREG]]*cr5+lt ; CHECK-DAG: li [[REG1:.*]], 0 +; CHECK-NOT: #UNENCODED_NOP ; CHECK-NOT: mfocrf [[REG2:.*]], [[CREG]] ; CHECK-NOT: rlwinm [[REG2]], [[REG2]] ; CHECK: stw [[REG1]]