Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -68,10 +68,25 @@ #define DEBUG_TYPE "ppc-codegen" +STATISTIC(NumSextSetcc, + "Number of (sext(setcc)) nodes expanded into GPR sequence."); +STATISTIC(NumZextSetcc, + "Number of (zext(setcc)) nodes expanded into GPR sequence."); +STATISTIC(NumSelectSetcc, + "Number of (select (setcc), PwrOf2, 0) nodes calculated in GPR."); +STATISTIC(NumSelectCCPow2True, + "Number of (select_cc a, b, PwrOf2, 0) nodes calculated in GPR."); +STATISTIC(NumSelectCCPow2False, + "Number of (select_cc a, b, 0, PwrOf2) nodes calculated in GPR."); +STATISTIC(NumComparisonOps, + "Number of cmp(a, b) => { 0, 1, -1 } calculated in GPR."); STATISTIC(SignExtensionsAdded, "Number of sign extensions for compare inputs added."); STATISTIC(ZeroExtensionsAdded, "Number of zero extensions for compare inputs added."); +STATISTIC(NumLogicOpsOnComparison, + "Number of logical ops on i1 values calculated in GPR."); + // FIXME: Remove this once the bug has been fixed! cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); @@ -277,6 +292,7 @@ int64_t RHSValue, SDLoc dl); SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl); + SDValue getSETCCInGPR(SDValue Compare, bool IsSext, bool InvertCC = false); void PeepholePPC64(); void PeepholePPC64ZExt(); @@ -2497,26 +2513,262 @@ // Handle select (setcc), pwrOf2, 0. bool PPCDAGToDAGISel::trySELECT(SDNode *N) { - return false; + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + if (N->getOperand(0).getOpcode() != ISD::SETCC) + return false; + SDValue TrueVal = N->getOperand(1); + SDValue FalseVal = N->getOperand(2); + ConstantSDNode *TrueConst = dyn_cast(TrueVal); + ConstantSDNode *FalseConst = dyn_cast(FalseVal); + if (!TrueConst || !FalseConst || !isPowerOf2_64(TrueConst->getSExtValue()) || + !FalseConst->isNullValue()) + return false; + + // Get the result of the comparison in a GPR. + SDValue SCCInGPR = getSETCCInGPR(N->getOperand(0), false); + if (!SCCInGPR) + return false; + + SDLoc dl(N); + EVT InputVT = SCCInGPR.getValueType(); + EVT OutputVT = N->getValueType(0); + bool Input32Bit = InputVT == MVT::i32; + bool Output32Bit = OutputVT == MVT::i32; + + SDValue ConvOp = addExtOrTrunc(SCCInGPR, Input32Bit, Output32Bit); + + // Shift the result of the comparison as required. + if (Output32Bit) { + uint32_t ShAmt = Log2_32(TrueConst->getSExtValue()); + SDValue Ops[] = { ConvOp, getI32Imm(ShAmt, dl), getI32Imm(0, dl), + getI32Imm(31 - ShAmt, dl) }; + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + } else { + uint64_t ShAmt = Log2_64(TrueConst->getSExtValue()); + CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, ConvOp, + getI64Imm(ShAmt, dl), getI64Imm(63 - ShAmt, dl)); + } + NumSelectSetcc++; + return true; } /// If this node is a sign/zero extension of an integer comparison, /// it can usually be computed in GPR's rather than using comparison /// instructions and ISEL. bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) { + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + assert((N->getOpcode() == ISD::ZERO_EXTEND || + N->getOpcode() == ISD::SIGN_EXTEND) && + "Expecting a zero/sign extend node!"); + if (N->getOperand(0).getOpcode() != ISD::SETCC) + return false; + + SDLoc dl(N); + SDValue WideRes = getSETCCInGPR(N->getOperand(0), + N->getOpcode() == ISD::SIGN_EXTEND); + + bool Inputs32Bit = N->getOperand(0).getOperand(0).getValueType() == MVT::i32; + bool Output32Bit = N->getValueType(0) == MVT::i32; + if (!WideRes) + return false; + + if (N->getOpcode() == ISD::SIGN_EXTEND) + NumSextSetcc++; + else + NumZextSetcc++; + + SDValue ConvOp = addExtOrTrunc(WideRes, Inputs32Bit, Output32Bit); + ReplaceNode(N, ConvOp.getNode()); + + return true; +} + +// Is this a comparison operator (i.e. returns 0 if the values are equal, +// -1 if the first value is less and 1 if the first value is greater)? +static bool isComparisonOp(SDNode *N) { + if (N->getOpcode() != ISD::SELECT_CC) + return false; + SDValue DefFalse = N->getOperand(3); + SDValue MyLHS = N->getOperand(0); + SDValue MyRHS = N->getOperand(1); + ISD::CondCode MyCC = cast(N->getOperand(4))->get(); + ConstantSDNode *MyTrueConst = dyn_cast(N->getOperand(2)); + if (!MyTrueConst) + return false; + + // The false value may be a zero/sign extended result of a setcc. + if (DefFalse.getOpcode() == ISD::ZERO_EXTEND || + DefFalse.getOpcode() == ISD::SIGN_EXTEND) { + if (DefFalse.getOperand(0).getOpcode() != ISD::SETCC) + return false; + SDValue SetCC = DefFalse.getOperand(0); + if (SetCC.getOperand(0) != MyLHS || SetCC.getOperand(1) != MyRHS) + return false; + ISD::CondCode DefCC = cast(SetCC.getOperand(2))->get(); + if (MyTrueConst->getSExtValue() == 1 && MyCC == ISD::SETUGT && + DefFalse.getOpcode() == ISD::SIGN_EXTEND && + (DefCC == ISD::SETNE || DefCC == ISD::SETULT)) + return true; + if (MyTrueConst->isAllOnesValue() && MyCC == ISD::SETULT && + DefFalse.getOpcode() == ISD::ZERO_EXTEND && + DefCC == ISD::SETNE) + return true; + return false; + } + + if (DefFalse.getOpcode() != ISD::SELECT_CC) + return false; + SDValue DefLHS = DefFalse.getOperand(0); + SDValue DefRHS = DefFalse.getOperand(1); + if (MyLHS != DefLHS || MyRHS != DefRHS) + return false; + ConstantSDNode *DefTrueConst = + dyn_cast(DefFalse.getOperand(2)); + ConstantSDNode *DefFalseConst = + dyn_cast(DefFalse.getOperand(3)); + ISD::CondCode DefCC = cast(DefFalse.getOperand(4))->get(); + if (!DefTrueConst || !DefFalseConst || !MyTrueConst) + return false; + if (MyTrueConst->isNullValue() && MyCC == ISD::SETEQ && + DefTrueConst->isAllOnesValue() && DefFalseConst->getSExtValue() == 1 && + DefCC == ISD::SETULT) + return true; + if (MyTrueConst->isNullValue() && MyCC == ISD::SETEQ && + DefTrueConst->getSExtValue() == 1 && DefFalseConst->isAllOnesValue() && + DefCC == ISD::SETUGT) + return true; return false; } // Handle select_cc %a, %b, pwrOf2, 0 // and select_cc %a, %b, 0, pwrOf2 bool PPCDAGToDAGISel::trySELECT_CC(SDNode *N) { - return false; + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + SDValue TrueVal = N->getOperand(2); + SDValue FalseVal = N->getOperand(3); + ConstantSDNode *TrueConst = dyn_cast(TrueVal); + ConstantSDNode *FalseConst = dyn_cast(FalseVal); + EVT InputVT = N->getOperand(0).getValueType(); + EVT OutputVT = TrueVal.getValueType(); + SDLoc dl(N); + + // Can only do this if comparing and returning integers. + if ((InputVT != MVT::i32 && InputVT != MVT::i64) || + (OutputVT != MVT::i32 && OutputVT != MVT::i64)) + return false; + + bool Input32Bit = InputVT == MVT::i32; + bool Output32Bit = OutputVT == MVT::i32; + + // Comparison operators will typically be in the DAG as SELECT_CC nodes. + if (isComparisonOp(N)) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Subtract = + SDValue(CurDAG->getMachineNode(Input32Bit ? PPC::SUBF : PPC::SUBF8, dl, + InputVT, RHS, LHS), 0); + SDValue SubC = + SDValue(CurDAG->getMachineNode(Input32Bit ? PPC::SUBFC : PPC::SUBFC8, dl, + InputVT, MVT::Glue, LHS, RHS), 1); + SDValue SubExt1 = + SDValue(CurDAG->getMachineNode(Input32Bit ? PPC::SUBFE : PPC::SUBFE8, dl, + InputVT, MVT::Glue, RHS, LHS, SubC), 0); + SDValue SubExt2 = + SDValue(CurDAG->getMachineNode(Input32Bit ? PPC::SUBFE : PPC::SUBFE8, dl, + InputVT, SubExt1, Subtract, + SubExt1.getValue(1)), 0); + SDValue ConvOp = addExtOrTrunc(SubExt2, Input32Bit, Output32Bit); + ReplaceNode(N, ConvOp.getNode()); + NumComparisonOps++; + return true; + } + + if (!TrueConst || !FalseConst) + return false; + + SDValue SCCInGPR; + uint64_t ShAmt = 0; + // Is this select_cc %a, %b, 0, pwrOf2 + if (TrueConst->isNullValue() && + isPowerOf2_64(FalseConst->getSExtValue())) { + SCCInGPR = getSETCCInGPR(SDValue(N, 0), false, true); + if (!SCCInGPR) + return false; + NumSelectCCPow2False++; + ShAmt = Log2_64(FalseConst->getSExtValue()); + } else if (FalseConst->isNullValue() && + isPowerOf2_64(TrueConst->getSExtValue())) { + // Is this select_cc %a, %b, pwrOf2, 0 + SCCInGPR = getSETCCInGPR(SDValue(N, 0), false); + if (!SCCInGPR) + return false; + NumSelectCCPow2True++; + ShAmt = Log2_64(TrueConst->getSExtValue()); + } else + return false; + + SDValue ConvOp = addExtOrTrunc(SCCInGPR, Input32Bit, Output32Bit); + + if (ShAmt && Output32Bit) { + SDValue Ops[] = { ConvOp, getI32Imm(ShAmt, dl), getI32Imm(0, dl), + getI32Imm(31 - ShAmt, dl) }; + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + } else if (ShAmt) { + CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, ConvOp, + getI64Imm(ShAmt, dl), getI64Imm(63 - ShAmt, dl)); + } else + ReplaceNode(N, ConvOp.getNode()); + + return true; } // Logical operatnions on comparison results end up being lowered to CR-logical // instructions which tend to have a high issue-to-issue latency. bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) { - return false; + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + SDLoc dl(N); + if (N->getValueType(0) != MVT::i1) + return false; + assert(N->getOperand(0).getOpcode() == ISD::SETCC && + N->getOperand(1).getOpcode() == ISD::SETCC && + (N->getOpcode() == ISD::AND || N->getOpcode() == ISD::OR || + N->getOpcode() == ISD::XOR) && + "Expected a logical operation on setcc results."); + + SDValue SCC1InGPR = getSETCCInGPR(N->getOperand(0), false); + SDValue SCC2InGPR = getSETCCInGPR(N->getOperand(1), false); + if (!SCC1InGPR || !SCC2InGPR) + return false; + + EVT VT1 = SCC1InGPR.getValueType(); + EVT VT2 = SCC2InGPR.getValueType(); + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown logical operation."); + case ISD::AND: Opc = PPC::AND8o; break; + case ISD::OR: Opc = PPC::OR8o; break; + case ISD::XOR: Opc = PPC::XOR8o; break; + } + + if (VT1 == MVT::i32) + SCC1InGPR = addExtOrTrunc(SCC1InGPR, true, false); + if (VT2 == MVT::i32) + SCC2InGPR = addExtOrTrunc(SCC2InGPR, true, false); + SDValue LogicOp = + SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i64, MVT::Glue, SCC1InGPR, + SCC2InGPR), 0); + SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + SDValue SRIdxVal = + CurDAG->getTargetConstant(PPC::sub_gt, dl, MVT::i32); + CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, + SRIdxVal, LogicOp.getValue(1)); + NumLogicOpsOnComparison++; + + return true; } /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. @@ -3094,6 +3346,48 @@ } } +/// Returns an equivalent of a SETCC node but with the result the same width as +/// the inputs. This can nalso be used for SELECT_CC if either the true or false +/// values is a power of two while the other is zero. +SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare, bool IsSext, + bool InvertCC) { + assert((Compare.getOpcode() == ISD::SETCC || + Compare.getOpcode() == ISD::SELECT_CC) && + "An ISD::SETCC node required here."); + + SDValue LHS = Compare.getOperand(0); + SDValue RHS = Compare.getOperand(1); + bool IsSelectCC = Compare.getOpcode() == ISD::SELECT_CC; + ISD::CondCode CC = + cast(Compare.getOperand(IsSelectCC ? 4 : 2))->get(); + EVT InputVT = LHS.getValueType(); + if (InputVT != MVT::i32 && InputVT != MVT::i64) + return SDValue(); + + SDLoc dl(Compare); + ConstantSDNode *RHSConst = dyn_cast(RHS); + bool Inputs32Bit = InputVT == MVT::i32; + int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; + + if (InvertCC) + CC = ISD::getSetCCInverse(CC, true); + if (ISD::isSignedIntSetCC(CC) && Inputs32Bit) { + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + } else if (ISD::isUnsignedIntSetCC(CC) && Inputs32Bit) { + LHS = zeroExtendInputIfNeeded(LHS); + RHS = zeroExtendInputIfNeeded(RHS); + } + + if (Inputs32Bit && IsSext) + return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (Inputs32Bit) + return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (IsSext) + return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); + return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); +} + void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -3388,6 +3388,7 @@ if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store; @@ -3422,6 +3423,7 @@ break; unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Addr = FIN; if (j) { @@ -3458,6 +3460,7 @@ // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) @@ -3503,6 +3506,7 @@ // since otherwise we never run out of FPRs before running out // of GPRs. unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::f32) { Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -290,6 +290,22 @@ return Reg >= PPC::V0 && Reg <= PPC::V31; } const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const; + + bool isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, + const unsigned PhiDepth) const; + + /// Return true if the output of the instruction is always a sign-extended, + /// i.e. 0 to 31-th bits are same as 32-th bit. + bool isSignExtended(const MachineInstr &MI, const unsigned depth = 0) const { + return isSignOrZeroExtended(MI, true, depth); + } + + /// Return true if the output of the instruction is always zero-extended, + /// i.e. 0 to 31-th bits are all zeros + bool isZeroExtended(const MachineInstr &MI, const unsigned depth = 0) const { + return isSignOrZeroExtended(MI, false, depth); + } + }; } Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -254,6 +254,7 @@ switch (MI.getOpcode()) { default: return false; case PPC::EXTSW: + case PPC::EXTSW_32: case PPC::EXTSW_32_64: SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); @@ -1930,3 +1931,200 @@ return &PPC::VSRCRegClass; return RC; } + +// This function returns true if the machine instruction +// always outputs a value by extending a 32 bit value, +// i.e. 0 to 31-th bits are same as 32-th bit +static bool +isSignExtendingOp(const MachineInstr &MI) { + int Opcode = MI.getOpcode(); + if (Opcode == PPC::LI || Opcode == PPC::LI8 || + Opcode == PPC::LIS || Opcode == PPC::LIS8 || + Opcode == PPC::SRAW || Opcode == PPC::SRAWo || + Opcode == PPC::SRAWI || Opcode == PPC::SRAWIo || + Opcode == PPC::LWA || Opcode == PPC::LWAX || + Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 || + Opcode == PPC::LHA || Opcode == PPC::LHAX || + Opcode == PPC::LHA8 || Opcode == PPC::LHAX8 || + Opcode == PPC::EXTSB || Opcode == PPC::EXTSBo || + Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo || + Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 || + Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo || + Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 || + Opcode == PPC::EXTSB8_32_64) + return true; + + else if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33) + return true; + + else if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo) && + MI.getOperand(3).getImm() > 0 && + MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) + return true; + + return false; +} + +// This function returns true if the machine instruction +// always outputs zeros in higher 32 bits +static bool +isZeroExtendingOp(const MachineInstr &MI) { + int Opcode = MI.getOpcode(); + // The 16-bit immediate is sign-extended in li/lis. + // If the most significant bit is zero, all higher bits are zero. + if (Opcode == PPC::LI || Opcode == PPC::LI8 || + Opcode == PPC::LIS || Opcode == PPC::LIS8) { + int64_t Imm = MI.getOperand(1).getImm(); + if (((uint64_t)Imm & ~0x7FFFuLL) == 0) + return true; + } + + // We have some variations of rotate-and-mask instructions + // that clear higher 32-bits + else if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo || + Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo || + Opcode == PPC::RLDICL_32_64) && + MI.getOperand(3).getImm() >= 32) + return true; + + else if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) && + MI.getOperand(3).getImm() >= 32 && + MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm()) + return true; + + else if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo || + Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && + MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) + return true; + + // We have some variations of rotate-and-mask instructions + else if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo || + Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo || + Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8 || + Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo || + Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo || + Opcode == PPC::POPCNTD || + Opcode == PPC::SLW || Opcode == PPC::SLWo || + Opcode == PPC::SRW || Opcode == PPC::SRWo || + Opcode == PPC::SLW8 || Opcode == PPC::SRW8 || + Opcode == PPC::SLWI || Opcode == PPC::SLWIo || + Opcode == PPC::SRWI || Opcode == PPC::SRWIo || + Opcode == PPC::LWZ || Opcode == PPC::LWZX || + Opcode == PPC::LHZ || Opcode == PPC::LHZX || + Opcode == PPC::LBZ || Opcode == PPC::LBZX || + Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 || + Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || + Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || + Opcode == PPC::ANDIo || Opcode == PPC::ANDISo || + Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWIo || + Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWIo) + return true; + + return false; +} + +// We limit the max depth to track incoming values of PHIs/ISELs to avoid +// exsessive cost. +const unsigned MAX_PHI_DEPTH = 1; + +bool +PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, + const unsigned PhiDepth) const { + const MachineFunction *MF = MI.getParent()->getParent(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + const PPCFunctionInfo *FuncInfo = MF->getInfo(); + const PPCSubtarget &Subtarget = MF->getSubtarget(); + + switch (MI.getOpcode()) { + case PPC::COPY: { + unsigned SrcReg = MI.getOperand(1).getReg(); + + // In both ELFv1 and v2 ABI, method parameters and the return value + // are sign- or zero-extended. + if (Subtarget.isSVR4ABI()) { + // We check the ZExt/SExt flags for a method parameter. + if (MI.getParent()->getBasicBlock() == + &MF->getFunction()->getEntryBlock()) { + unsigned VReg = MI.getOperand(0).getReg(); + if (MF->getRegInfo().isLiveIn(VReg)) + return SignExt ? FuncInfo->isLiveInSExt(VReg) : + FuncInfo->isLiveInZExt(VReg); + } + + // For a method return value, we check the ZExt/SExt flags in attribute. + // We assume the following code sequence for method call. + // ADJCALLSTACKDOWN 32, %R1, %R1 + // BL8_NOP ,... + // ADJCALLSTACKUP 32, 0, %R1, %R1 + // %vreg5 = COPY %X3; G8RC:%vreg5 + if (SrcReg == PPC::X3) { + const MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock::const_instr_iterator II = + MachineBasicBlock::const_instr_iterator(&MI); + if (II != MBB->instr_begin() && + (--II)->getOpcode() == PPC::ADJCALLSTACKUP) { + const MachineInstr &CallMI = *(--II); + if (CallMI.isCall() && CallMI.getOperand(0).isGlobal()) { + const Function *CalleeFn = + dyn_cast(CallMI.getOperand(0).getGlobal()); + const IntegerType *IntTy = + dyn_cast(CalleeFn->getReturnType()); + const AttributeList &Attrs = + CalleeFn->getAttributes().getRetAttributes(); + if (IntTy && IntTy->getBitWidth() <= 32) + return Attrs.hasAttribute(AttributeList::ReturnIndex, SignExt ? + Attribute::SExt : Attribute::ZExt); + } + } + } + } + + // If this is a copy from another register, we recursively check the source. + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI != NULL) + return isSignOrZeroExtended(*SrcMI, SignExt, PhiDepth); + + return false; + } + + // If all incoming values are sign- or zero-extended, + // the output of the isel or phi is also sign- or zero-extended. + case PPC::ISEL: + case PPC::PHI: { + if (PhiDepth >= MAX_PHI_DEPTH) + return false; + + // The input registers for ISEL are operand 1 and 2. + // The input registers for PHI are operand 1, 3, ... + unsigned e = 3, d = 1; + if (MI.getOpcode() == PPC::PHI) { + e = MI.getNumOperands(); + d = 2; + } + + for (unsigned i = 1; i != e; i += d) { + if (MI.getOperand(i).isReg()) { + unsigned SrcReg = MI.getOperand(i).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, PhiDepth+1)) + return false; + } + else + return false; + } + return true; + } + + default: + return SignExt?isSignExtendingOp(MI): + isZeroExtendingOp(MI); + } + return false; +} + Index: lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCMIPeephole.cpp +++ lib/Target/PowerPC/PPCMIPeephole.cpp @@ -27,11 +27,27 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; #define DEBUG_TYPE "ppc-mi-peepholes" +STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions"); +STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions"); +STATISTIC(NumConvertedToImmediateForm, + "Number of instructions converted to their immediate form."); + +static cl::opt + EnableSExtElimination("ppc-eliminate-signext", + cl::desc("enable elimination of sign-extensions"), + cl::init(true), cl::Hidden); + +static cl::opt + EnableZExtElimination("ppc-eliminate-zeroext", + cl::desc("enable elimination of zero-extensions"), + cl::init(true), cl::Hidden); + namespace llvm { void initializePPCMIPeepholePass(PassRegistry&); } @@ -336,6 +352,82 @@ } break; } + case PPC::EXTSW: + case PPC::EXTSW_32: + case PPC::EXTSW_32_64:{ + if (!EnableSExtElimination) break; + unsigned NarrowReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + break; + MachineInstr *DefMI = MRI->getVRegDef(NarrowReg); + // If we've used a zero-extending load that we will sign-extend, + // just do a sign-extending load. + if (DefMI->getOpcode() == PPC::LWZ) { + if (!MRI->hasOneNonDBGUse(DefMI->getOperand(0).getReg())) + break; + unsigned Opc = + (MI.getOpcode() == PPC::EXTSW || + MI.getOpcode() == PPC::EXTSW_32_64) ? PPC::LWA : PPC::LWA_32; + DEBUG(dbgs() << "Removing redundant sign extension:\n"); + DEBUG(MI.dump()); + DEBUG(dbgs() << "As it was fed by:\n"); + DEBUG(DefMI->dump()); + DefMI->setDesc(TII->get(Opc)); + DefMI->getOperand(0).setReg(MI.getOperand(0).getReg()); + ToErase = &MI; + Simplified = true; + NumEliminatedSExt++; + } else if (MI.getOpcode()==PPC::EXTSW_32_64 && + TII->isSignExtended(*DefMI)) { + // We can eliminate EXTSW if the input is known to be already + // sign-extended. + DEBUG(dbgs() << "Removing redundant sign-extension\n"); + unsigned TmpReg = + MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), + TmpReg); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG), + MI.getOperand(0).getReg()) + .addReg(TmpReg) + .addReg(NarrowReg) + .addImm(PPC::sub_32); + ToErase = &MI; + Simplified = true; + NumEliminatedSExt++; + } + break; + } + case PPC::RLDICL: { + // We can eliminate RLDICL for zero-extension + // if the input is known to be already zero-extended. + if (!EnableZExtElimination) break; + if (MI.getOperand(2).getImm() != 0 || + MI.getOperand(3).getImm() > 32) + break; + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + break; + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + + if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG && + SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg())) + break; + + MachineInstr *ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); + MachineInstr *SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg()); + if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break; + + if (TII->isZeroExtended(*SubRegMI)) { + DEBUG(dbgs() << "Removing redundant zero-extension\n"); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .addReg(SrcReg); + ToErase = &MI; + Simplified = true; + NumEliminatedZExt++; + } + break; + } } } // If the last instruction was marked for elimination, Index: lib/Target/PowerPC/PPCMachineFunctionInfo.h =================================================================== --- lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetCallingConv.h" namespace llvm { @@ -113,6 +114,10 @@ /// copies bool IsSplitCSR = false; + /// We keep track attributes for each live-in virtual registers + /// to use SExt/ZExt flags in later optimization. + std::vector> LiveInAttrs; + public: explicit PPCFunctionInfo(MachineFunction &MF) : MF(MF) {} @@ -175,6 +180,19 @@ unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } + /// This function associates attributes for each live-in virtual register. + void addLiveInAttr(unsigned VReg, ISD::ArgFlagsTy Flags) { + LiveInAttrs.push_back(std::make_pair(VReg, Flags)); + } + + /// This function returns true if the spesified vreg is + /// a live-in register and sign-extended. + bool isLiveInSExt(unsigned VReg) const; + + /// This function returns true if the spesified vreg is + /// a live-in register and zero-extended. + bool isLiveInZExt(unsigned VReg) const; + int getCRSpillFrameIndex() const { return CRSpillFrameIndex; } void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; } Index: lib/Target/PowerPC/PPCMachineFunctionInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -43,3 +43,17 @@ "func_toc" + Twine(MF.getFunctionNumber())); } + +bool PPCFunctionInfo::isLiveInSExt(unsigned VReg) const { + for (const std::pair &LiveIn : LiveInAttrs) + if (LiveIn.first == VReg) + return LiveIn.second.isSExt(); + return false; +} + +bool PPCFunctionInfo::isLiveInZExt(unsigned VReg) const { + for (const std::pair &LiveIn : LiveInAttrs) + if (LiveIn.first == VReg) + return LiveIn.second.isZExt(); + return false; +} Index: test/CodeGen/PowerPC/crbits.ll =================================================================== --- test/CodeGen/PowerPC/crbits.ll +++ test/CodeGen/PowerPC/crbits.ll @@ -187,11 +187,14 @@ ret i32 %and ; CHECK-LABEL: @test10 -; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0 -; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0 -; CHECK-DAG: li [[REG2:[0-9]+]], 1 -; CHECK-DAG: crorc [[REG3:[0-9]+]], -; CHECK: isel 3, 0, [[REG2]], [[REG3]] +; CHECK-DAG: cntlzw 3, 3 +; CHECK-DAG: cntlzw 4, 4 +; CHECK-DAG: srwi 3, 3, 5 +; CHECK-DAG: srwi 4, 4, 5 +; CHECK: xori 3, 3, 1 +; CHECK-DAG: li [[ONE:[0-9]+]], 1 +; CHECK-DAG: li [[ZERO:[0-9]+]], 0 +; CHECK: isel 3, [[ONE]], [[ZERO]], 1 ; CHECK: blr } Index: test/CodeGen/PowerPC/expand-isel.ll =================================================================== --- test/CodeGen/PowerPC/expand-isel.ll +++ test/CodeGen/PowerPC/expand-isel.ll @@ -212,13 +212,12 @@ ret i32 %retval.0 ; CHECK-LABEL: @testComplexISEL -; CHECK: bc 12, 2, [[TRUE:.LBB[0-9]+]] -; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NEXT: [[TRUE]] -; CHECK-NEXT: addi r3, r12, 0 -; CHECK-NEXT: [[SUCCESSOR]] -; CHECK-NEXT: clrldi r3, r3, 32 -; CHECK-NEXT: blr +; CHECK-DAG: [[LI:r[0-9]+]], 1 +; CHECK-DAG: cmplwi [[LD:r[0-9]+]], 0 +; CHECK: bnelr cr0 +; CHECK: xor [[XOR:r[0-9]+]] +; CHECK: cntlzd [[CZ:r[0-9]+]], [[XOR]] +; CHECK: rldicl [[SH:r[0-9]+]], [[CZ]], 58, 63 } !1 = !{!2, !2, i64 0} Index: test/CodeGen/PowerPC/i1-ext-fold.ll =================================================================== --- test/CodeGen/PowerPC/i1-ext-fold.ll +++ test/CodeGen/PowerPC/i1-ext-fold.ll @@ -13,18 +13,10 @@ ; CHECK-LABEL: @foo ; CHECK-NO-ISEL-LABEL: @foo -; CHECK-DAG: cmpw -; CHECK-DAG: li [[REG1:[0-9]+]], 0 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 -; CHECK: isel 3, [[REG2]], [[REG1]], +; CHECK-DAG: subf [[REG1:[0-9]+]], 4, 3 +; CHECK-DAG: rldicl [[REG2:[0-9]+]], [[REG1]], 1, 63 +; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 4 ; CHECK: blr - -; CHECK-NO-ISEL: bc 12, 0, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 3, 5, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: [[TRUE]] -; CHECK-NO-ISEL-NEXT: addi 3, 12, 0 -; CHECK-NO-ISEL-NEXT: blr } ; Function Attrs: nounwind readnone @@ -60,19 +52,13 @@ %shl = shl nuw nsw i32 %conv, 4 ret i32 %shl -; CHECK-LABEL: @foo3 -; CHECK-NO-ISEL-LABEL: @foo3 -; CHECK-DAG: cmpw -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK: isel 3, 0, [[REG1]], -; CHECK: blr +; CHECK-ALL-LABEL: @foo3 +; CHECK-ALL: subf [[SUB:[0-9]+]], 3, 4 +; CHECK-ALL: rldicl [[SH:[0-9]+]], [[SUB]], 33, 63 +; CHECK-ALL: xori [[FLIP:[0-9]+]], [[SH]], 1 +; CHECK-ALL: sldi 3, [[FLIP]], 4 +; CHECK-ALL: blr -; CHECK-NO-ISEL: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 3, 5, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: [[TRUE]] -; CHECK-NO-ISEL-NEXT: addi 3, 0, 0 -; CHECK-NO-ISEL-NEXT: blr } attributes #0 = { nounwind readnone } Index: test/CodeGen/PowerPC/no-pref-jumps.ll =================================================================== --- test/CodeGen/PowerPC/no-pref-jumps.ll +++ test/CodeGen/PowerPC/no-pref-jumps.ll @@ -11,9 +11,11 @@ br i1 %or.cond, label %if.then, label %if.else ; CHECK-LABEL: @foo -; CHECK: cmpwi -; CHECK: cmpwi -; CHECK: cror +; CHECK: subf +; CHECK: subf +; CHECK: rldicl +; CHECK: rldicl +; CHECK: or. ; CHECK: blr if.then: ; preds = %entry Index: test/CodeGen/PowerPC/optcmp.ll =================================================================== --- test/CodeGen/PowerPC/optcmp.ll +++ test/CodeGen/PowerPC/optcmp.ll @@ -127,8 +127,12 @@ ret i64 %conv1 ; CHECK: @foo2l -; CHECK: sld. 4, 3, 4 -; CHECK: std 4, 0(5) +; CHECK: sld 4, 3, 4 +; CHECK-NEXT: addi 3, 4, -1 +; CHECK-NEXT: std 4, 0(5) +; CHECK-NEXT: nor 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 1, 63 +; CHECK-NEXT: blr } define double @food(double %a, double %b, double* nocapture %c) #0 { Index: test/CodeGen/PowerPC/ppc-crbits-onoff.ll =================================================================== --- test/CodeGen/PowerPC/ppc-crbits-onoff.ll +++ test/CodeGen/PowerPC/ppc-crbits-onoff.ll @@ -12,19 +12,14 @@ %and = zext i1 %and3 to i32 ret i32 %and -; CHECK-LABEL: @crbitsoff -; CHECK-NO-ISEL-LABEL: @crbitsoff -; CHECK-DAG: cmplwi {{[0-9]+}}, 3, 0 -; CHECK-DAG: li [[REG2:[0-9]+]], 1 -; CHECK-DAG: cntlzw [[REG3:[0-9]+]], -; CHECK: isel [[REG4:[0-9]+]], 0, [[REG2]] -; CHECK-NO-ISEL: bc 12, 2, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 4, 5, 0 -; CHECK-NO-ISEL-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NO-ISEL: [[TRUE]] -; CHECK-NO-ISEL-NEXT: addi 4, 0, 0 -; CHECK: and 3, [[REG4]], [[REG3]] -; CHECK: blr +; CHECK-ALL-LABEL: @crbitsoff +; CHECK-ALL-DAG: cntlzw [[CNTv1:[0-9]+]], 3 +; CHECK-ALL-DAG: cntlzw [[CNTv2:[0-9]+]], 4 +; CHECK-ALL-DAG: srwi [[SH:[0-9]+]], [[CNTv1]], 5 +; CHECK-ALL-DAG: xori [[FLIP:[0-9]+]], [[SH]], 1 +; CHECK-ALL-DAG: rlwinm [[SH2:[0-9]+]], [[CNTv2]], 27, 5, 31 +; CHECK-ALL: and 3, [[FLIP]], [[SH2]] +; CHECK-ALL: blr } define signext i32 @crbitson(i32 signext %v1, i32 signext %v2) #1 { @@ -37,15 +32,20 @@ ; CHECK-LABEL: @crbitson ; CHECK-NO-ISEL-LABEL: @crbitson -; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0 -; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0 -; CHECK-DAG: li [[REG2:[0-9]+]], 1 -; CHECK-DAG: crorc [[REG3:[0-9]+]], -; CHECK: isel 3, 0, [[REG2]], [[REG3]] -; CHECK-NO-ISEL: bc 12, 20, [[TRUE:.LBB[0-9]+]] +; CHECK-DAG: cntlzw [[REG1:[0-9]+]], 3 +; CHECK-DAG: cntlzw [[REG2:[0-9]+]], 4 +; CHECK: srwi [[REG3:[0-9]+]], [[REG1]], 5 +; CHECK: srwi [[REG4:[0-9]+]], [[REG2]], 5 +; CHECK: xori [[REG5:[0-9]+]], [[REG3]], 1 +; CHECK: and. [[REG6:[0-9]+]], [[REG5]], [[REG4]] +; CHECK-DAG: li [[ONE:[0-9]+]], 1 +; CHECK-DAG: li [[ZERO:[0-9]+]], 0 +; CHECK: isel 3, [[ONE]], [[ZERO]], 1 +; CHECK-NO-ISEL: li [[ONE:[0-9]+]], 1 +; CHECK-NO-ISEL: bc 12, 1, [[TRUE:.LBB[0-9]+]] ; CHECK-NO-ISEL-NEXT: blr ; CHECK-NO-ISEL: [[TRUE]] -; CHECK-NO-ISEL-NEXT: addi 3, 0, 0 +; CHECK-NO-ISEL-NEXT: addi 3, [[ONE]], 0 ; CHECK-NO-ISEL-NEXT: blr ; CHECK: blr } Index: test/CodeGen/PowerPC/setcc-logic.ll =================================================================== --- test/CodeGen/PowerPC/setcc-logic.ll +++ test/CodeGen/PowerPC/setcc-logic.ll @@ -6,7 +6,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31 +; CHECK-NEXT: srwi 3, 3, 5 ; CHECK-NEXT: blr %a = icmp eq i32 %P, 0 %b = icmp eq i32 %Q, 0 @@ -18,6 +18,7 @@ ; CHECK-LABEL: all_sign_bits_clear: ; CHECK: # BB#0: ; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: extsw 3, 3 ; CHECK-NEXT: nor 3, 3, 3 ; CHECK-NEXT: srwi 3, 3, 31 ; CHECK-NEXT: blr @@ -30,11 +31,11 @@ define zeroext i1 @all_bits_set(i32 %P, i32 %Q) { ; CHECK-LABEL: all_bits_set: ; CHECK: # BB#0: +; CHECK-NEXT: li 5, -1 ; CHECK-NEXT: and 3, 3, 4 -; CHECK-NEXT: li 5, 0 -; CHECK-NEXT: li 12, 1 -; CHECK-NEXT: cmpwi 0, 3, -1 -; CHECK-NEXT: isel 3, 12, 5, 2 +; CHECK-NEXT: xor 3, 3, 5 +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: srwi 3, 3, 5 ; CHECK-NEXT: blr %a = icmp eq i32 %P, -1 %b = icmp eq i32 %Q, -1 @@ -46,6 +47,7 @@ ; CHECK-LABEL: all_sign_bits_set: ; CHECK: # BB#0: ; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: extsw 3, 3 ; CHECK-NEXT: srwi 3, 3, 31 ; CHECK-NEXT: blr %a = icmp slt i32 %P, 0 @@ -59,8 +61,8 @@ ; CHECK: # BB#0: ; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: nor 3, 3, 3 -; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %a = icmp ne i32 %P, 0 %b = icmp ne i32 %Q, 0 @@ -72,6 +74,7 @@ ; CHECK-LABEL: any_sign_bits_set: ; CHECK: # BB#0: ; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: extsw 3, 3 ; CHECK-NEXT: srwi 3, 3, 31 ; CHECK-NEXT: blr %a = icmp slt i32 %P, 0 @@ -83,10 +86,12 @@ define zeroext i1 @any_bits_clear(i32 %P, i32 %Q) { ; CHECK-LABEL: any_bits_clear: ; CHECK: # BB#0: +; CHECK-NEXT: li 5, -1 ; CHECK-NEXT: and 3, 3, 4 -; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: cmpwi 0, 3, -1 -; CHECK-NEXT: isel 3, 0, 5, 2 +; CHECK-NEXT: xor 3, 3, 5 +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %a = icmp ne i32 %P, -1 %b = icmp ne i32 %Q, -1 @@ -98,6 +103,7 @@ ; CHECK-LABEL: any_sign_bits_clear: ; CHECK: # BB#0: ; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: extsw 3, 3 ; CHECK-NEXT: nor 3, 3, 3 ; CHECK-NEXT: srwi 3, 3, 31 ; CHECK-NEXT: blr @@ -416,11 +422,11 @@ define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) { ; CHECK-LABEL: ne_neg1_and_ne_zero: ; CHECK: # BB#0: +; CHECK-NEXT: li 4, 1 ; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: li 12, 1 -; CHECK-NEXT: cmpldi 3, 1 -; CHECK-NEXT: isel 3, 12, 4, 1 +; CHECK-NEXT: subfc 3, 3, 4 +; CHECK-NEXT: subfe 3, 4, 4 +; CHECK-NEXT: neg 3, 3 ; CHECK-NEXT: blr %cmp1 = icmp ne i64 %x, -1 %cmp2 = icmp ne i64 %x, 0 @@ -437,7 +443,7 @@ ; CHECK-NEXT: xor 3, 3, 4 ; CHECK-NEXT: or 3, 3, 5 ; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31 +; CHECK-NEXT: srwi 3, 3, 5 ; CHECK-NEXT: blr %cmp1 = icmp eq i16 %a, %b %cmp2 = icmp eq i16 %c, %d @@ -452,8 +458,8 @@ ; CHECK-NEXT: xor 3, 3, 4 ; CHECK-NEXT: or 3, 3, 5 ; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: nor 3, 3, 3 -; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %cmp1 = icmp ne i32 %a, %b %cmp2 = icmp ne i32 %c, %d