Index: lib/Target/PowerPC/CMakeLists.txt =================================================================== --- lib/Target/PowerPC/CMakeLists.txt +++ lib/Target/PowerPC/CMakeLists.txt @@ -42,6 +42,7 @@ PPCVSXFMAMutate.cpp PPCVSXSwapRemoval.cpp PPCExpandISEL.cpp + PPCRegCopyElim.cpp ) add_subdirectory(AsmParser) Index: lib/Target/PowerPC/PPC.h =================================================================== --- lib/Target/PowerPC/PPC.h +++ lib/Target/PowerPC/PPC.h @@ -49,6 +49,7 @@ FunctionPass *createPPCTLSDynamicCallPass(); FunctionPass *createPPCBoolRetToIntPass(); FunctionPass *createPPCExpandISELPass(); + FunctionPass *createPPCRegCopyElimPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, @@ -59,6 +60,7 @@ void initializePPCBoolRetToIntPass(PassRegistry&); void initializePPCExpandISELPass(PassRegistry &); void initializePPCTLSDynamicCallPass(PassRegistry &); + void initializePPCRegCopyElimPass(PassRegistry &); extern char &PPCVSXFMAMutateID; namespace PPCII { Index: lib/Target/PowerPC/PPCRegCopyElim.cpp =================================================================== --- /dev/null +++ lib/Target/PowerPC/PPCRegCopyElim.cpp @@ -0,0 +1,473 @@ +//===----- PPCRegCopyElim.cpp -- Redundant Register Copy Elimination -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This pass aims to eliminate redundancy related to physical registers +// after the register allocation. +// So far, we eliminate the following two types of redundant register copys. +// +// 1) intra-BB redundant register copy +// li Y, 0 li X, 0 +// mr X, Y => (erase mr) +// .. .. +// 2) inter-BB partially redundant register copy +// BB1-------- BB1-------- +// | .. | | .. | +// | mr Y, X | | (erase) | +// | .. | | .. | +// with ----------- with ----------- +// 1 pred / | 1 pred / | +// BB-------- | BB-------- BB--------- | BB--------- +// | .. | | | .. | => | mr Y, X | | | .. | +// | .. | | | .. | | .. | | | mr X, Y | +// ---------- | ---------- ----------- | ----------- +// | / with | / with +// BB2-------- 1 succ BB2-------- 1 succ +// | .. | | .. | +// | mr X, Y | | (erase) | +// | .. | | .. | +// ----------- ----------- +// +//===---------------------------------------------------------------------===// + +#include "PPCInstrBuilder.h" +#include "PPCTargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-regcopy-elim" + +namespace { + +struct PPCRegCopyElim : public MachineFunctionPass { + + static char ID; + const PPCInstrInfo *TII; + MachineFunction *MF; + + PPCRegCopyElim() : MachineFunctionPass(ID) { + initializePPCRegCopyElimPass(*PassRegistry::getPassRegistry()); + } + +private: + // Initialize class variables. + void initialize(MachineFunction &MFParm); + + // Perform peepholes. + bool eliminateRedundantRegCopy(void); + +public: + // Main entry point for this pass. + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(*MF.getFunction())) + return false; + initialize(MF); + bool Simplified = false; + + Simplified |= eliminateRedundantRegCopy(); + return Simplified; + } +}; + +// Initialize class variables. +void PPCRegCopyElim::initialize(MachineFunction &MFParm) { + MF = &MFParm; + TII = MF->getSubtarget().getInstrInfo(); + DEBUG(dbgs() << + "*** PowerPC Redundant Register Copy Elimination pass ***\n\n"); +} + +// If MI is a register copy, this method returns true and set +// source and destination operands in SrcOperand and DstOperand. +// When IsKill is true, this method returns true only if the src operand +// has kill flag set. +static bool isRegCopy(MachineInstr &MI, MachineOperand* &SrcOperand, + MachineOperand* &DstOperand) { + // Refer PPCInstrInfo::copyPhysReg to find opcodes used for copying + // the value in a physical register for each register class. + // Currently we do not optimize VSX registers (copied by xxlor), + // which may involve different register types, i.e. FPR and VRF. + unsigned Opc = MI.getOpcode(); + if (Opc == PPC::FMR) { + DstOperand = &MI.getOperand(0); + SrcOperand = &MI.getOperand(1); + return true; + } + if ((Opc == PPC::OR8 || Opc == PPC::OR || Opc == PPC::VOR) && + MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { + DstOperand = &MI.getOperand(0); + SrcOperand = &MI.getOperand(1); + return true; + } + return false; +} + +// Returns true if MI is the last user of Reg. +// The result should be equivalent with kill flag. +static bool isLastUse(MachineInstr &MI, unsigned Reg, + const TargetRegisterInfo *TRI) { + MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock::iterator MBBI = MI, MBBIE = MBB->end(); + for (MBBI++; MBBI != MBBIE; MBBI++) { + if (MBBI->isDebugValue()) + continue; + bool SrcKilled = false; + for (auto &MO : MBBI->operands()) { + if (MO.isReg() && !MO.isDef() && + TRI->isSuperOrSubRegisterEq(MO.getReg(), Reg)) + return false; + + if ((MO.isReg() && MO.isDef() && + TRI->isSubRegisterEq(MO.getReg(), Reg)) || + (MO.isRegMask() && MO.clobbersPhysReg(Reg))) + SrcKilled = true; // we may still have use of Reg in this MI + } + if (SrcKilled) + return true; + } + + // We look for Reg in the seccessor BB liveins. + for (auto &SuccMBB : MBB->successors()) + for (auto &LiveIn : SuccMBB->liveins()) + if (TRI->isSuperOrSubRegisterEq(LiveIn.PhysReg, Reg)) + return false; + + return true; +} + +// We cannot optimize instructions with some special opcode. +static bool isUnsafeToOptimize(MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + if (Opcode == PPC::GETtlsldADDR || Opcode == PPC::GETtlsldADDR32) + return true; + return false; +} + +// We iterate backward to find the def of SrcReg. +// This returns pointer for the MI defining SrcReg (or nullptr if not found.) +static MachineInstr* +findDefBackward(MachineBasicBlock::reverse_iterator &MBBRI, + MachineBasicBlock &MBB, unsigned DstReg, unsigned SrcReg, + bool &IsClobbered, bool &IsDstUsed, + SmallVector &OperandsToRewrite, + const TargetRegisterInfo *TRI) { + IsDstUsed = false; + MachineBasicBlock::reverse_iterator MBBRIE = MBB.rend(); + SmallVector TmpOperands; + for (; MBBRI != MBBRIE; MBBRI++) { + if (MBBRI->isDebugValue()) + continue; + + // If this instruction defines source reg of copy, this is the + // target for our optimization. + int Idx = MBBRI->findRegisterDefOperandIdx(SrcReg, false, true, TRI); + if (Idx != -1) { + MachineOperand DefMO = MBBRI->getOperand(Idx); + if (DefMO.isReg() && DefMO.getReg() == SrcReg && + !DefMO.isTied() && !DefMO.isImplicit() && + !isUnsafeToOptimize(*MBBRI)) { + IsClobbered = false; + return &(*MBBRI); + } + else { + IsClobbered = true; + return nullptr; + } + } + + // If this instruction conflicts with the reg copy, we give up here. + // MachineInstr->readsRegister() cannot be used since it does not catch + // accesses to a sub-register. + for (auto &MO : MBBRI->operands()) { + if (MO.isRegMask() && (MO.clobbersPhysReg(SrcReg) || + MO.clobbersPhysReg(DstReg))) { + IsClobbered = true; + return nullptr; + } + if (MO.isReg()) { + if (TRI->isSuperOrSubRegisterEq(MO.getReg(), DstReg)) { + if (!MO.isDef()) + IsDstUsed = true; + else { + IsClobbered = true; + return nullptr; + } + } + if (TRI->isSuperOrSubRegisterEq(MO.getReg(), SrcReg)) { + // If this instruction uses source register of register copy, + // we need to remember the operand to replace it with destination + // register of copy. For example: + // li r4, 1 li r3, 1 + // cmplwi r4, 0 -> cmplwi r3, 0 + // mr r3, r4 (erase mr) + if (MO.getReg() == SrcReg) + TmpOperands.push_back(&MO); + else { + IsClobbered = true; + return nullptr; + } + } + } + } + + if (!TmpOperands.empty()) { + OperandsToRewrite.append(TmpOperands.begin(), TmpOperands.end()); + TmpOperands.clear(); + } + } + IsClobbered = false; + return nullptr; +} + +bool PPCRegCopyElim::eliminateRedundantRegCopy(void) { + const PPCSubtarget *PPCSubTarget = &MF->getSubtarget(); + const PPCFrameLowering *TFI = PPCSubTarget->getFrameLowering(); + const PPCRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); + bool Simplified = false; + + auto ResetKillFlags = [](MachineInstr *MI, unsigned Reg) { + for (auto &MO : MI->uses()) + if (MO.isReg() && MO.getReg() == Reg) + MO.setIsKill(false); + }; + + SmallPtrSet WorkSet; + for (MachineBasicBlock &MBB : *MF) + WorkSet.insert(&MBB); + + while (!WorkSet.empty()) { + MachineBasicBlock & MBB = **(WorkSet.begin()); + WorkSet.erase(&MBB); + SmallPtrSet Visited; + bool CurrentInstrErased = false; + do { + CurrentInstrErased = false; + for (MachineInstr &CopyMI : MBB) { + if (CopyMI.isDebugValue()) + continue; + + MachineOperand *SrcMO = nullptr, *DstMO = nullptr; + if (isRegCopy(CopyMI, SrcMO, DstMO)) { + // We avoid processing same MI twice when we re-iterate this BB. + if (!Visited.insert(&CopyMI).second) + continue; + + unsigned DstReg = DstMO->getReg(); + unsigned SrcReg = SrcMO->getReg(); + + // We do not optimize instruction based on special registers, such as + // the stack pointer or the frame pointer. + auto isSpecialReg = [&](unsigned Reg) { + if (Reg == PPC::X1 || Reg == PPC::R1 || + Reg == PPC::X2 || Reg == PPC::R2 || + (TFI->needsFP(*MF) && (Reg == PPC::X31 || Reg == PPC::R31)) || + (TRI->hasBasePointer(*MF) && + (Reg == TRI->getBaseRegister(*MF) || Reg == PPC::X30))) + return true; + return false; + }; + if (isSpecialReg(SrcReg) || isSpecialReg(DstReg)) + continue; + + MachineInstr *DefMI = nullptr; + bool IsClobbered = false; + SmallVector OperandsToRewrite1; + + // We iterate instructions backward to find the def of the source + // of register copy or instructions conflicting with register copy + // (e.g. instructions that use destination reg of copy). + bool IsDstUsed = false; + MachineBasicBlock::reverse_iterator MBBRI = CopyMI; + DefMI = findDefBackward(++MBBRI, MBB, DstReg, SrcReg, + IsClobbered, IsDstUsed, OperandsToRewrite1, TRI); + if (IsClobbered) + continue; + + // Def for the source of the register copy is found in the BB. + if (DefMI) { + if (!IsDstUsed && DefMI->getOperand(0).getReg() == SrcReg) { + // We optimize within this block if possible. + bool IsKill = isLastUse(CopyMI, SrcReg, TRI); + // We can eliminate a redundant reg copy pair. + if (isRegCopy(*DefMI, SrcMO, DstMO) && + DefMI->getOpcode() == CopyMI.getOpcode() && + SrcReg == DstMO->getReg() && DstReg == SrcMO->getReg()) { + if (IsKill) + DefMI->eraseFromParent(); + else + ResetKillFlags(DefMI, DstReg); + } + else { + // If the SrcReg of reg copy will not be used, + // we avoid register copy by modifing the def or SrcReg. + if (!IsKill) + continue; + DefMI->getOperand(0).setReg(DstReg); + } + + DEBUG(dbgs() << "Eliminating redundant register copy:\n"); + DEBUG(CopyMI.dump()); + + CopyMI.eraseFromParent(); + for (auto &MO : OperandsToRewrite1) + MO->setReg(DstReg); + CurrentInstrErased = true; + break; + } + continue; + } + + // If no def and no conflicting instruction is found in this BB, + // from here, we handle redundancy among multiple BBs. + // We currently support following CFGs. + // + // Pred1MBB + // / | / + // (SideMBB) | (Pred2MBB) + // / | / + // MBB + // + // 1) MBB doesn't have more than two predecessors. + // 2) Pred1MBB doesn't have more than two successors. + // 3) SideMBB and Pred2MBB are optional. + // 4) Pred2MBB has only one successor (MBB). + // 5) SideMBB has only one predecessor (Pred1MBB). + // 6) SideMBB and Pred2MBB may be the same BB. + + // So far, we do not optimize if this BB has more than two + // predecessors not to increase the number of total instructions. + if (MBB.pred_size() > 2) + continue; + + // Our optimization uses DstReg instead of SrcReg to bring data + // among BBs, so we need to confirm DstReg is not already used. + bool DstLive = false; + for (auto &LiveIn : MBB.liveins()) + if (TRI->isSuperOrSubRegisterEq(LiveIn.PhysReg, DstReg)) { + DstLive = true; + break; + } + if (DstLive) + continue; + + auto FirstPredBB = MBB.pred_begin(); + for (auto &Pred1MBB : MBB.predecessors()) { + // We check the control flow among BBs before iterating. + // If the BBs does not match the above patterns we support, + // we give up here. + MachineBasicBlock *Pred2MBB = nullptr, *SideMBB = nullptr; + if (Pred1MBB->succ_size() > 2) + continue; + if (MBB.pred_size() == 2) { + Pred2MBB = (Pred1MBB == *FirstPredBB) ? *(FirstPredBB + 1): + * FirstPredBB; + if (Pred2MBB->succ_size() > 1) + continue; + } + if (Pred1MBB->succ_size() == 2) { + auto FirstSuccBB = Pred1MBB->succ_begin(); + SideMBB = (&MBB == *FirstSuccBB) ? *(FirstSuccBB + 1): + * FirstSuccBB; + if (SideMBB->pred_size() > 1) + continue; + } + + // As we did for the current MBB, we iterate instructions backward + // to find the def of the source of register copy, starting from + // the end of the BB. + DefMI = nullptr; + SmallVector OperandsToRewrite2; + MachineBasicBlock::reverse_iterator MBBRI = Pred1MBB->rbegin(); + DefMI = findDefBackward(MBBRI, *Pred1MBB, DstReg, SrcReg, + IsClobbered, IsDstUsed, OperandsToRewrite2, + TRI); + + // We keep going iff the def of the reg copy is another reg copy + // with reversed src and dst. + if (!(DefMI && isRegCopy(*DefMI, SrcMO, DstMO) && + DefMI->getOpcode() == CopyMI.getOpcode() && + SrcReg == DstMO->getReg() && DstReg == SrcMO->getReg())) + continue; + + DEBUG(dbgs() << "Optimizing partially redundant reg copy pair:\n"); + DEBUG(DefMI->dump()); + DEBUG(CopyMI.dump()); + + // Here, we have found a partially redundant register copy pair. + // If the SrcReg of second reg copy is not used after it, + // we can eliminate or move both reg copy instructions; + // i.e. if we have SideMBB, we copy the first reg copy to its entry. + // Otherwise, we can erase it. + bool IsKill = isLastUse(CopyMI, SrcReg, TRI); + if (IsKill) { + if (SideMBB) { + SideMBB->splice(SideMBB->getFirstNonDebugInstr(), Pred1MBB, + MachineBasicBlock::iterator(DefMI)); + SideMBB->removeLiveIn(SrcReg); + if (!SideMBB->isLiveIn(DstReg)) + SideMBB->addLiveIn(DstReg); + } + else + DefMI->eraseFromParent(); + } + + // We move the second reg copy to Pred2MBB by inserting before + // the first terminator instructions (or at the end if no + // terminator in the BB). + if (Pred2MBB) { + MachineBasicBlock::iterator T = Pred2MBB->getFirstTerminator(); + if (T == Pred2MBB->end()) + Pred2MBB->push_back(MBB.remove(&CopyMI)); + else + Pred2MBB->splice(T, &MBB, MachineBasicBlock::iterator(CopyMI)); + // The copied register copy may be further optimized. + // So we will optimize this basic block later again. + WorkSet.insert(Pred2MBB); + } + else + CopyMI.eraseFromParent(); + + if (IsKill) + MBB.removeLiveIn(SrcReg); + MBB.addLiveIn(DstReg); + + // We touch up some operands if we have found any. + for (auto &MO : OperandsToRewrite1) + MO->setReg(DstReg); + for (auto &MO : OperandsToRewrite2) + MO->setReg(DstReg); + + if (!IsKill && DefMI->killsRegister(DstReg)) + ResetKillFlags(DefMI, DstReg); + + CurrentInstrErased = true; + break; + } + + Simplified = CurrentInstrErased; + // If the current instruction has been eliminated, + // we cannot continue iteration, so restart it. + if (CurrentInstrErased) + break; + } + } + } while(CurrentInstrErased); + } + return Simplified; +} + +} // end default namespace + +INITIALIZE_PASS_BEGIN(PPCRegCopyElim, DEBUG_TYPE, + "PowerPC Redundant Register Copy Elimination", false, false) +INITIALIZE_PASS_END(PPCRegCopyElim, DEBUG_TYPE, + "PowerPC Redundant Register Copy Elimination", false, false) + +char PPCRegCopyElim::ID = 0; +FunctionPass* +llvm::createPPCRegCopyElimPass() { return new PPCRegCopyElim(); } Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -68,6 +68,10 @@ opt DisableMIPeephole("disable-ppc-peephole", cl::Hidden, cl::desc("Disable machine peepholes for PPC")); +static cl:: +opt DisableRegCopyElim("disable-ppc-regcopy-elim", cl::Hidden, + cl::desc("Disable register copy elimination for PPC")); + static cl::opt EnableGEPOpt("ppc-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), @@ -98,6 +102,7 @@ initializePPCBoolRetToIntPass(PR); initializePPCExpandISELPass(PR); initializePPCTLSDynamicCallPass(PR); + initializePPCRegCopyElimPass(PR); } /// Return the datalayout string of a subtarget. @@ -435,8 +440,14 @@ void PPCPassConfig::addPreEmitPass() { addPass(createPPCExpandISELPass()); - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { + // We try to eliminate redundant register copies among physical registers. + if (!DisableRegCopyElim) + addPass(createPPCRegCopyElimPass()); + addPass(createPPCEarlyReturnPass(), false); + } + // Must run branch selection immediately preceding the asm printer. addPass(createPPCBranchSelectionPass(), false); } Index: test/CodeGen/PowerPC/ppc64-byval-align.ll =================================================================== --- test/CodeGen/PowerPC/ppc64-byval-align.ll +++ test/CodeGen/PowerPC/ppc64-byval-align.ll @@ -24,8 +24,7 @@ ret void } ; CHECK-LABEL: @caller1 -; CHECK: mr [[REG:[0-9]+]], 3 -; CHECK: mr 7, [[REG]] +; CHECK: mr 7, 3 ; CHECK: bl test1 define i64 @callee2(%struct.pad* byval nocapture readnone %x, i32 signext %y, %struct.test* byval align 16 nocapture readonly %z) { Index: test/CodeGen/PowerPC/redundant_regcopy.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/redundant_regcopy.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s + +define i8* @func1(i8* %a, i1 %b) { +; CHECK-LABEL: @func1 +; CHECK: bc +; CHECK: # BB#1 +; CHECK: mr 30, 3 +; CHECK: bl callee +; CHECK: mr 3, 30 +; CHECK: .LBB0_2: +; CHECK: blr +entry: + br i1 %b, label %exit, label %foo + +foo: + call void @callee() + br label %exit + +exit: + ret i8* %a +} + +declare void @callee() Index: test/CodeGen/PowerPC/redundant_regcopy_1.mir =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/redundant_regcopy_1.mir @@ -0,0 +1,49 @@ +# test for BB-local register copy elimination +# RUN: llc -run-pass ppc-regcopy-elim -verify-machineinstrs -o - %s | FileCheck %s + +--- | + target datalayout = "E-m:e-i64:64-n32:64" + target triple = "powerpc64le-unknown-linux-gnu" + define signext i32 @func(i32 signext %i) { + entry: + ret i32 %i + } + +... +--- +name: func +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%x3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %x3 + + ; CHECK-LABEL: bb.0.entry: + ; CHECK: %x3 = ADDI8 killed %x3, 1 + ; CHECK-NOT: OR8 + %x4 = ADDI8 killed %x3, 1 + %cr0 = CMPLDI %x4, 0 + %x3 = OR8 %x4, killed %x4 + +... + Index: test/CodeGen/PowerPC/redundant_regcopy_2.mir =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/redundant_regcopy_2.mir @@ -0,0 +1,75 @@ +# test for BB-local register copy elimination +# RUN: llc -run-pass ppc-regcopy-elim -verify-machineinstrs -o - %s | FileCheck %s + +--- | + target datalayout = "E-m:e-i64:64-n32:64" + target triple = "powerpc64le-unknown-linux-gnu" + define i8* @func(i8* %a, i1 %b) { + entry: + br i1 %b, label %exit, label %foo + + foo: ; preds = %entry + br label %exit + + exit: ; preds = %foo, %entry + ret i8* %a + } + +... +--- +name: func +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%x3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.2.exit(0x40000000), %bb.1.foo(0x40000000) + liveins: %x3 + + ; CHECK-LABEL: bb.0.entry: + ; CHECK-NOT: OR8 + ; CHECK: %cr0 = CMPLDI %x3, 0 + %x4 = OR8 %x3, %x3 + %cr0 = CMPLDI %x3, 0 + BCC 76, killed %cr0, %bb.2.exit + B %bb.1.foo + + bb.1.foo: + liveins: %x4 + + ; CHECK-LABEL: bb.1.foo: + ; CHECK: %x4 = OR8 %x3, %x3 + ; CHECK: %x3 = ADDI8 %x4, 123 + %x4 = ADDI8 %x4, 123 + B %bb.2.exit + + bb.2.exit: + liveins: %x4 + ; CHECK-LABEL: bb.2.exit: + ; CHECK-NOT: OR8 + ; CHECK: BLR8 + %cr0 = CMPLDI %x4, 0 + %x3 = OR8 %x4, killed %x4 + BLR8 implicit %lr8, implicit %rm + +... + Index: test/CodeGen/PowerPC/remove-cyclic-mr.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/remove-cyclic-mr.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 \ +; RUN: -verify-machineinstrs | FileCheck %s +%struct.x0 = type { i8 } + +@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 + +define void @_Z2x6v() { +; CHECK-LABEL: _Z2x6v: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: addi 3, 1, 40 +; CHECK-NEXT: std 30, 48(1) # 8-byte Folded Spill +; CHECK-NEXT: bl _ZN2x02x1Ev +; CHECK-NEXT: nop +; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: addis 12, 2, .L.str@toc@ha +; CHECK-NEXT: addi 4, 12, .L.str@toc@l +; Note: previously there was an mr 3, 30 here. +; CHECK-NEXT: bl _ZN2x02x4EPKc +; CHECK-NEXT: nop +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: mr 3, 30 +; CHECK-NEXT: bl _ZN2x0lsIiEEvT_ +; CHECK-NEXT: nop +; CHECK-NEXT: ld 30, 48(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %ref.tmp = alloca %struct.x0, align 1 + %0 = getelementptr inbounds %struct.x0, %struct.x0* %ref.tmp, i64 0, i32 0 + call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %0) + %call = call dereferenceable(1) %struct.x0* @_ZN2x02x1Ev(%struct.x0* nonnull %ref.tmp) + call void @_ZN2x02x4EPKc(%struct.x0* nonnull %call, i8* nonnull getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0)) + call void @_ZN2x0lsIiEEvT_(%struct.x0* nonnull %call, i32 signext 0) + call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %0) + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare dereferenceable(1) %struct.x0* @_ZN2x02x1Ev(%struct.x0*) + +declare void @_ZN2x0lsIiEEvT_(%struct.x0*, i32 signext) + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +declare void @_ZN2x02x4EPKc(%struct.x0*, i8*) +