Index: lib/Target/PowerPC/PPC.h =================================================================== --- lib/Target/PowerPC/PPC.h +++ lib/Target/PowerPC/PPC.h @@ -59,6 +59,7 @@ void initializePPCBoolRetToIntPass(PassRegistry&); void initializePPCExpandISELPass(PassRegistry &); void initializePPCTLSDynamicCallPass(PassRegistry &); + void initializePPCMIPeepholePass(PassRegistry&); extern char &PPCVSXFMAMutateID; namespace PPCII { Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -297,19 +297,22 @@ const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const; static int getRecordFormOpcode(unsigned Opcode); - bool isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, - const unsigned PhiDepth) const; + bool isSignOrZeroExtended(const unsigned Reg, bool SignExt, + const unsigned PhiDepth, + const MachineRegisterInfo *MRI) const; /// Return true if the output of the instruction is always a sign-extended, /// i.e. 0 to 31-th bits are same as 32-th bit. - bool isSignExtended(const MachineInstr &MI, const unsigned depth = 0) const { - return isSignOrZeroExtended(MI, true, depth); + bool isSignExtended(const unsigned Reg, + const MachineRegisterInfo *MRI) const { + return isSignOrZeroExtended(Reg, true, 0, MRI); } /// Return true if the output of the instruction is always zero-extended, /// i.e. 0 to 31-th bits are all zeros - bool isZeroExtended(const MachineInstr &MI, const unsigned depth = 0) const { - return isSignOrZeroExtended(MI, false, depth); + bool isZeroExtended(const unsigned Reg, + const MachineRegisterInfo *MRI) const { + return isSignOrZeroExtended(Reg, false, 0, MRI); } }; Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1639,15 +1639,15 @@ bool noSub = false; if (isPPC64) { if (is32BitSignedCompare) { - // We can perform this optimization only if MI is sign-extending. - if (isSignExtended(*MI)) + // We can perform this optimization only if SrcReg is sign-extending. + if (isSignExtended(SrcReg, MRI)) noSub = true; else return false; } else if (is32BitUnsignedCompare) { - // We can perform this optimization, equality only, if MI is + // We can perform this optimization, equality only, if SrcReg is // zero-extending. - if (isZeroExtended(*MI)) { + if (isZeroExtended(SrcReg, MRI)) { noSub = true; equalityOnly = true; } else @@ -2161,8 +2161,15 @@ // This function returns true if the machine instruction // always outputs a value by sign-extending a 32 bit value, // i.e. 0 to 31-th bits are same as 32-th bit. -static bool isSignExtendingOp(const MachineInstr &MI) { - int Opcode = MI.getOpcode(); +static bool isSignExtendingOp(const unsigned Reg, + const MachineRegisterInfo *MRI) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + + MachineInstr *MI = MRI->getVRegDef(Reg); + if (!MI) return false; + + int Opcode = MI->getOpcode(); if (Opcode == PPC::LI || Opcode == PPC::LI8 || Opcode == PPC::LIS || Opcode == PPC::LIS8 || Opcode == PPC::SRAW || Opcode == PPC::SRAWo || @@ -2173,12 +2180,8 @@ Opcode == PPC::LHA8 || Opcode == PPC::LHAX8 || Opcode == PPC::LBZ || Opcode == PPC::LBZX || Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || - Opcode == PPC::LBZU || Opcode == PPC::LBZUX || - Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || Opcode == PPC::LHZ || Opcode == PPC::LHZX || Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || - Opcode == PPC::LHZU || Opcode == PPC::LHZUX || - Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || Opcode == PPC::EXTSB || Opcode == PPC::EXTSBo || Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo || Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 || @@ -2187,13 +2190,21 @@ Opcode == PPC::EXTSB8_32_64) return true; - if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33) + // The first def of LBZU/LHZU is sign extended. + if ((Opcode == PPC::LBZU || Opcode == PPC::LBZUX || + Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || + Opcode == PPC::LHZU || Opcode == PPC::LHZUX || + Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8) && + MI->getOperand(0).getReg() == Reg) + return true; + + if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33) return true; if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo) && - MI.getOperand(3).getImm() > 0 && - MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) + MI->getOperand(3).getImm() > 0 && + MI->getOperand(3).getImm() <= MI->getOperand(4).getImm()) return true; return false; @@ -2201,13 +2212,20 @@ // This function returns true if the machine instruction // always outputs zeros in higher 32 bits. -static bool isZeroExtendingOp(const MachineInstr &MI) { - int Opcode = MI.getOpcode(); +static bool isZeroExtendingOp(const unsigned Reg, + const MachineRegisterInfo *MRI) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + + MachineInstr *MI = MRI->getVRegDef(Reg); + if (!MI) return false; + + int Opcode = MI->getOpcode(); // The 16-bit immediate is sign-extended in li/lis. // If the most significant bit is zero, all higher bits are zero. if (Opcode == PPC::LI || Opcode == PPC::LI8 || Opcode == PPC::LIS || Opcode == PPC::LIS8) { - int64_t Imm = MI.getOperand(1).getImm(); + int64_t Imm = MI->getOperand(1).getImm(); if (((uint64_t)Imm & ~0x7FFFuLL) == 0) return true; } @@ -2217,18 +2235,18 @@ if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo || Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo || Opcode == PPC::RLDICL_32_64) && - MI.getOperand(3).getImm() >= 32) + MI->getOperand(3).getImm() >= 32) return true; if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) && - MI.getOperand(3).getImm() >= 32 && - MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm()) + MI->getOperand(3).getImm() >= 32 && + MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm()) return true; if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo || Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && - MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) + MI->getOperand(3).getImm() <= MI->getOperand(4).getImm()) return true; // There are other instructions that clear higher 32-bits. @@ -2244,25 +2262,29 @@ Opcode == PPC::SLWI || Opcode == PPC::SLWIo || Opcode == PPC::SRWI || Opcode == PPC::SRWIo || Opcode == PPC::LWZ || Opcode == PPC::LWZX || - Opcode == PPC::LWZU || Opcode == PPC::LWZUX || Opcode == PPC::LWBRX || Opcode == PPC::LHBRX || Opcode == PPC::LHZ || Opcode == PPC::LHZX || - Opcode == PPC::LHZU || Opcode == PPC::LHZUX || Opcode == PPC::LBZ || Opcode == PPC::LBZX || - Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 || - Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8 || Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 || Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || - Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || - Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || Opcode == PPC::ANDIo || Opcode == PPC::ANDISo || Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWIo || Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWIo || Opcode == PPC::MFVSRWZ) return true; + // The first def of LBZU/LHZU/LWZU are zero extended. + if ((Opcode == PPC::LBZU || Opcode == PPC::LBZUX || + Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || + Opcode == PPC::LHZU || Opcode == PPC::LHZUX || + Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || + Opcode == PPC::LWZU || Opcode == PPC::LWZUX || + Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) && + MI->getOperand(0).getReg() == Reg) + return true; + return false; } @@ -2271,29 +2293,34 @@ const unsigned MAX_DEPTH = 1; bool -PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, - const unsigned Depth) const { - const MachineFunction *MF = MI.getParent()->getParent(); - const MachineRegisterInfo *MRI = &MF->getRegInfo(); +PPCInstrInfo::isSignOrZeroExtended(const unsigned Reg, bool SignExt, + const unsigned Depth, + const MachineRegisterInfo *MRI) const { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + + MachineInstr *MI = MRI->getVRegDef(Reg); + if (!MI) return false; // If we know this instruction returns sign- or zero-extended result, // return true. - if (SignExt ? isSignExtendingOp(MI): - isZeroExtendingOp(MI)) + if (SignExt ? isSignExtendingOp(Reg, MRI): + isZeroExtendingOp(Reg, MRI)) return true; - switch (MI.getOpcode()) { + switch (MI->getOpcode()) { case PPC::COPY: { - unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); // In both ELFv1 and v2 ABI, method parameters and the return value // are sign- or zero-extended. + const MachineFunction *MF = MI->getParent()->getParent(); if (MF->getSubtarget().isSVR4ABI()) { const PPCFunctionInfo *FuncInfo = MF->getInfo(); // We check the ZExt/SExt flags for a method parameter. - if (MI.getParent()->getBasicBlock() == + if (MI->getParent()->getBasicBlock() == &MF->getFunction()->getEntryBlock()) { - unsigned VReg = MI.getOperand(0).getReg(); + unsigned VReg = MI->getOperand(0).getReg(); if (MF->getRegInfo().isLiveIn(VReg)) return SignExt ? FuncInfo->isLiveInSExt(VReg) : FuncInfo->isLiveInZExt(VReg); @@ -2306,9 +2333,9 @@ // ADJCALLSTACKUP 32, 0, %R1, %R1 // %vreg5 = COPY %X3; G8RC:%vreg5 if (SrcReg == PPC::X3) { - const MachineBasicBlock *MBB = MI.getParent(); + const MachineBasicBlock *MBB = MI->getParent(); MachineBasicBlock::const_instr_iterator II = - MachineBasicBlock::const_instr_iterator(&MI); + MachineBasicBlock::const_instr_iterator(MI); if (II != MBB->instr_begin() && (--II)->getOpcode() == PPC::ADJCALLSTACKUP) { const MachineInstr &CallMI = *(--II); @@ -2330,37 +2357,36 @@ } // If this is a copy from another register, we recursively check source. - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) - return false; - const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); - if (SrcMI != NULL) - return isSignOrZeroExtended(*SrcMI, SignExt, Depth); - - return false; + return isSignOrZeroExtended(SrcReg, SignExt, Depth, MRI); } + // Logical operation with 16-bit immediate does not change the upper 48 bits. + // So, we track the operand register as we do for register copy. case PPC::ANDIo: - case PPC::ANDISo: case PPC::ORI: - case PPC::ORIS: case PPC::XORI: - case PPC::XORIS: case PPC::ANDIo8: - case PPC::ANDISo8: case PPC::ORI8: + case PPC::XORI8: { + unsigned SrcReg = MI->getOperand(1).getReg(); + return isSignOrZeroExtended(SrcReg, SignExt, Depth, MRI); + } + + // Logical operation with shifted 16-bit immediate does not change the upper + // 32 bits. So, we track the operand register only for zero extension. + case PPC::ANDISo: + case PPC::ORIS: + case PPC::XORIS: + case PPC::ANDISo8: case PPC::ORIS8: - case PPC::XORI8: case PPC::XORIS8: { - // logical operation with 16-bit immediate does not change the upper bits. - // So, we track the operand register as we do for register copy. - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) - return false; - const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); - if (SrcMI != NULL) - return isSignOrZeroExtended(*SrcMI, SignExt, Depth); - - return false; + if (SignExt) { + uint16_t Imm = MI->getOperand(2).getImm(); + if (Imm & 0x8000) + return false; + } + unsigned SrcReg = MI->getOperand(1).getReg(); + return isSignOrZeroExtended(SrcReg, SignExt, Depth, MRI); } // If all incoming values are sign-/zero-extended, @@ -2377,21 +2403,17 @@ // The input registers for PHI are operand 1, 3, ... // The input registers for others are operand 1 and 2. unsigned E = 3, D = 1; - if (MI.getOpcode() == PPC::PHI) { - E = MI.getNumOperands(); + if (MI->getOpcode() == PPC::PHI) { + E = MI->getNumOperands(); D = 2; } for (unsigned I = 1; I != E; I += D) { - if (MI.getOperand(I).isReg()) { - unsigned SrcReg = MI.getOperand(I).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) - return false; - const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); - if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1)) - return false; - } - else + if (!MI->getOperand(I).isReg()) + return false; + + unsigned SrcReg = MI->getOperand(I).getReg(); + if (!isSignOrZeroExtended(SrcReg, SignExt, Depth+1, MRI)) return false; } return true; Index: lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCMIPeephole.cpp +++ lib/Target/PowerPC/PPCMIPeephole.cpp @@ -50,10 +50,6 @@ cl::desc("enable elimination of zero-extensions"), cl::init(false), cl::Hidden); -namespace llvm { - void initializePPCMIPeepholePass(PassRegistry&); -} - namespace { struct PPCMIPeephole : public MachineFunctionPass { @@ -126,7 +122,9 @@ // This function returns number of known zero bits in output of MI // starting from the most significant bit. static unsigned -getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) { +getKnownLeadingZeroCount(const unsigned Reg, const PPCInstrInfo *TII, + const MachineRegisterInfo *MRI) { + MachineInstr *MI = MRI->getVRegDef(Reg); unsigned Opcode = MI->getOpcode(); if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo || Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo) @@ -170,7 +168,7 @@ Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8) return 56; - if (TII->isZeroExtended(*MI)) + if (TII->isZeroExtended(Reg, MRI)) return 32; return 0; @@ -195,6 +193,46 @@ if (MI.isDebugValue()) continue; + if (!MI.isPHI()) + for (auto & Use : MI.uses()) { + if (Use.isReg() && !Use.getSubReg() && (rand() & 0x7) == 0) { + unsigned Reg = Use.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->getRegClass(Reg) == &PPC::G8RCRegClass) { + MachineInstr *SrcMI = MRI->getVRegDef(Reg); + if (SrcMI && TII->isSignExtended(Reg, MRI)) { + // fprintf(stderr, "sign extended\n"); + unsigned TmpReg = + MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::EXTSW), TmpReg) + .addReg(Reg); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TD)) + .addImm(24).addReg(TmpReg).addReg(Reg); +// dbgs() << MF->getName() << "\n"; +// SrcMI->dump(); +// MI.dump(); +// MBB.dump(); + // if (SrcMI->getOpcode() == PPC::COPY) + // MBB.getParent()->dump(); + } +#if 1 + if (SrcMI && TII->isZeroExtended(Reg, MRI)) { + // fprintf(stderr, "zero extended\n"); + unsigned TmpReg = + MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::RLDICL), TmpReg) + .addReg(Reg).addImm(0).addImm(32); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TD)) + .addImm(24).addReg(TmpReg).addReg(Reg); +// dbgs() << MF->getName() << "\n"; +// SrcMI->dump(); +// MI.dump(); +// MBB.dump(); + } +#endif + } + } + } + // Per-opcode peepholes. switch (MI.getOpcode()) { @@ -519,7 +557,7 @@ Simplified = true; NumEliminatedSExt++; } else if (MI.getOpcode() == PPC::EXTSW_32_64 && - TII->isSignExtended(*SrcMI)) { + TII->isSignExtended(NarrowReg, MRI)) { // We can eliminate EXTSW if the input is known to be already // sign-extended. DEBUG(dbgs() << "Removing redundant sign-extension\n"); @@ -571,7 +609,8 @@ SrcMI = MRI->getVRegDef(CopyReg); } - unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII); + SrcReg = SrcMI->getOperand(0).getReg(); + unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcReg, TII, MRI); if (MI.getOperand(3).getImm() <= KnownZeroCount) { DEBUG(dbgs() << "Removing redundant zero-extension\n"); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -98,6 +98,7 @@ initializePPCBoolRetToIntPass(PR); initializePPCExpandISELPass(PR); initializePPCTLSDynamicCallPass(PR); + initializePPCMIPeepholePass(PR); } /// Return the datalayout string of a subtarget. Index: test/CodeGen/PowerPC/sext_elimination.mir =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/sext_elimination.mir @@ -0,0 +1,68 @@ +# test for BB-local register copy elimination +# RUN: llc -run-pass ppc-mi-peepholes -ppc-eliminate-signext -ppc-eliminate-zeroext -verify-machineinstrs -o - %s | FileCheck %s + +--- | + target datalayout = "E-m:e-i64:64-n32:64" + target triple = "powerpc64le-unknown-linux-gnu" + define i8* @func(i8* %a) { + entry: + ret i8* %a + } + +... +--- +name: func +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%x3', virtual-reg: '%0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %x3 + + ; CHECK-LABEL: bb.0.entry: + ; CHECK: %4:g8rc = EXTSW_32_64 %3 + ; CHECK: %5:g8rc = INSERT_SUBREG %15, %1, %subreg.sub_32 + ; CHECK: %7:g8rc = EXTSW_32_64 %6 + ; CHECK: %9:g8rc = INSERT_SUBREG %16, %8, %subreg.sub_32 + ; CHECK: %11:g8rc = INSERT_SUBREG %17, %10, %subreg.sub_32 + ; CHECK: %14:g8rc = COPY %1 + + %0:g8rc_nox0 = COPY %x3 + %1:gprc, %2:g8rc_nox0 = LBZU 0, %0:g8rc_nox0 + %3:gprc = COPY %2:g8rc_nox0 + %4:g8rc = EXTSW_32_64 %3:gprc ; should not be eliminated + %5:g8rc = EXTSW_32_64 %1:gprc + + %6:gprc = ORIS %1:gprc, 32768 ; should not be eliminated + %7:g8rc = EXTSW_32_64 %6:gprc + + %8:gprc = ORIS %1:gprc, 32767 + %9:g8rc = EXTSW_32_64 %8:gprc + + %10:gprc = ORI %1:gprc, 32768 + %11:g8rc = EXTSW_32_64 %10:gprc + + %12:g8rc = IMPLICIT_DEF + %13:g8rc = INSERT_SUBREG %12:g8rc, %1:gprc, %subreg.sub_32 + %14:g8rc = RLDICL %13:g8rc, 0, 32 + +...