Index: lib/Target/PowerPC/PPCPreEmitPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -33,6 +33,8 @@ "Number of r+r instructions converted to r+i in pre-emit peephole"); STATISTIC(NumRemovedInPreEmit, "Number of instructions deleted in pre-emit peephole"); +STATISTIC(NumRedundantPairsFound, + "Number of redundant pair instructions found in pre-emit peephole"); static cl::opt RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(false), @@ -40,6 +42,7 @@ namespace { class PPCPreEmitPeephole : public MachineFunctionPass { + MachineInstr *hasRedundantPair(MachineInstr &MI) const; public: static char ID; PPCPreEmitPeephole() : MachineFunctionPass(ID) { @@ -64,6 +67,7 @@ for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { MachineInstr *DefMIToErase = nullptr; + MachineInstr *RedundantPairMI = nullptr; if (TII->convertToImmediateForm(MI, &DefMIToErase)) { Changed = true; NumRRConvertedInPreEmit++; @@ -72,18 +76,65 @@ if (DefMIToErase) { InstrsToErase.push_back(DefMIToErase); } + } else if ((RedundantPairMI = hasRedundantPair(MI))) { + InstrsToErase.push_back(RedundantPairMI); + NumRedundantPairsFound++; } } } for (MachineInstr *MI : InstrsToErase) { DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: "); DEBUG(MI->dump()); + assert(MI->getParent() && + "The same instruction marked redundant multiple times?"); MI->eraseFromParent(); NumRemovedInPreEmit++; } return Changed; } }; + + // For now, only handle mr X, Y -> mr Y, X with no clobber of Y in between + // as that is the only pattern known to come up. + MachineInstr *PPCPreEmitPeephole::hasRedundantPair(MachineInstr &MI) const { + unsigned Opc = MI.getOpcode(); + + // If this isn't an mr, return null. + if ((Opc != PPC::OR && Opc != PPC::OR8) || + MI.getOperand(1).getReg() != MI.getOperand(2).getReg()) + return nullptr; + + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + const MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + + MachineBasicBlock::iterator StartIt = MI, EndIt = MI.getParent()->end(); + StartIt++; + for (; StartIt != EndIt; ++StartIt) { + // If we encounter an instruction that clobbers the destination, + // any subsequent copy of the original destination won't be copying + // the same register, return null. + if (StartIt->modifiesRegister(DestReg, TRI)) + return nullptr; + + // If we find an instruction that clobbers the source register, it is + // redundant if it's a copy from the original destination and we haven't + // exited above. + if (StartIt->modifiesRegister(SrcReg, TRI)) { + if (StartIt->getOpcode() != Opc || + StartIt->getOperand(1).getReg() != DestReg || + StartIt->getOperand(2).getReg() != DestReg) + return nullptr; + + // Clear the kill flags from the original instruction. + MI.getOperand(1).setIsKill(false); + MI.getOperand(2).setIsKill(false); + return &*StartIt; + } + } + return nullptr; + } } INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole", Index: test/CodeGen/PowerPC/remove-cyclic-mr.ll =================================================================== --- test/CodeGen/PowerPC/remove-cyclic-mr.ll +++ test/CodeGen/PowerPC/remove-cyclic-mr.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 \ +; RUN: -ppc-late-peephole -verify-machineinstrs | FileCheck %s +%struct.x0 = type { i8 } + +@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 + +define void @_Z2x6v() { +; CHECK-LABEL: _Z2x6v: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: addi 3, 1, 40 +; CHECK-NEXT: std 30, 48(1) # 8-byte Folded Spill +; CHECK-NEXT: bl _ZN2x02x1Ev +; CHECK-NEXT: nop +; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: addis 12, 2, .L.str@toc@ha +; CHECK-NEXT: addi 4, 12, .L.str@toc@l +; Note: previously there was an mr 3, 30 here. +; CHECK-NEXT: bl _ZN2x02x4EPKc +; CHECK-NEXT: nop +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: mr 3, 30 +; CHECK-NEXT: bl _ZN2x0lsIiEEvT_ +; CHECK-NEXT: nop +; CHECK-NEXT: ld 30, 48(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %ref.tmp = alloca %struct.x0, align 1 + %0 = getelementptr inbounds %struct.x0, %struct.x0* %ref.tmp, i64 0, i32 0 + call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %0) + %call = call dereferenceable(1) %struct.x0* @_ZN2x02x1Ev(%struct.x0* nonnull %ref.tmp) + call void @_ZN2x02x4EPKc(%struct.x0* nonnull %call, i8* nonnull getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0)) + call void @_ZN2x0lsIiEEvT_(%struct.x0* nonnull %call, i32 signext 0) + call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %0) + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare dereferenceable(1) %struct.x0* @_ZN2x02x1Ev(%struct.x0*) + +declare void @_ZN2x0lsIiEEvT_(%struct.x0*, i32 signext) + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +declare void @_ZN2x02x4EPKc(%struct.x0*, i8*)