Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -77,6 +77,11 @@ "Number of sign extensions for compare inputs added."); STATISTIC(ZeroExtensionsAdded, "Number of zero extensions for compare inputs added."); +STATISTIC(NumLogicOpsOnComparison, + "Number of logical ops on i1 values calculated in GPR."); +STATISTIC(OmittedForNonExtendUses, + "Number of compares not eliminated as they have non-extending uses."); + // FIXME: Remove this once the bug has been fixed! cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); @@ -275,6 +280,8 @@ bool trySETCC(SDNode *N); bool tryEXTEND(SDNode *N); + bool tryLogicOpOfCompares(SDNode *N); + SDValue getLogicOpInGPR(SDValue LogicOp, bool KeepInGPR); SDValue signExtendInputIfNeeded(SDValue Input); SDValue zeroExtendInputIfNeeded(SDValue Input); SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); @@ -2501,6 +2508,10 @@ return true; } +static bool isLogicOp(unsigned Opc) { + return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; +} + /// If this node is a sign/zero extension of an integer comparison, /// it can usually be computed in GPR's rather than using comparison /// instructions and ISEL. We only do this on 64-bit targets for now @@ -2513,13 +2524,20 @@ N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"); - if (N->getOperand(0).getOpcode() != ISD::SETCC) + SDValue WideRes; + // If we are zero-extending the result of a logical operation on i1 + // values, we can keep the values in GPRs. + if (isLogicOp(N->getOperand(0).getOpcode()) && + N->getOperand(0).getValueType() == MVT::i1 && + N->getOpcode() == ISD::ZERO_EXTEND) + WideRes = getLogicOpInGPR(N->getOperand(0), true); + else if (N->getOperand(0).getOpcode() != ISD::SETCC) return false; - - SDValue WideRes = - getSETCCInGPR(N->getOperand(0), - N->getOpcode() == ISD::SIGN_EXTEND ? - SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); + else + WideRes = + getSETCCInGPR(N->getOperand(0), + N->getOpcode() == ISD::SIGN_EXTEND ? + SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); if (!WideRes) return false; @@ -2540,6 +2558,122 @@ return true; } +// Lower a logical operation on i1 values into a GPR sequence if possible. +// The result can be kept in a GPR if requested. +// Three types of inputs can be handled: +// - SETCC +// - TRUNCATE +// - Logical operation (AND/OR/XOR) +// There is also a special case that is handled (namely a complement operation +// achieved with xor %a, -1). +SDValue PPCDAGToDAGISel::getLogicOpInGPR(SDValue LogicOp, bool KeepInGPR) { + assert(isLogicOp(LogicOp.getOpcode()) && + "Can only handle logic operations here."); + assert(LogicOp.getValueType() == MVT::i1 && + "Can only handle logic operations on i1 values here."); + SDLoc dl(LogicOp); + SDValue Op1, Op2; + + // Special case: xor %a, -1 + bool IsNot = isBitwiseNot(LogicOp); + + // Produces a GPR sequence for each operand of the binary logic operation. + // For SETCC, it produces the respective comparison, for TRUNCATE it truncates + // the value in a GPR and for logic operations, it will recursively produce + // a GPR sequence for the operation. + auto getLogicOperand = [&] (SDValue Operand) -> SDValue { + unsigned OperandOpcode = Operand.getOpcode(); + if (OperandOpcode == ISD::SETCC) + return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); + else if (OperandOpcode == ISD::TRUNCATE) { + SDValue InputOp = Operand.getOperand(0); + EVT InVT = InputOp.getValueType(); + // If this is a (xor (trunc to i1), -1), no need to truncate twice. + if (IsNot && !KeepInGPR) + return InVT == MVT::i64 ? InputOp : + addExtOrTrunc(InputOp, ExtOrTruncConversion::Ext); + return + SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : + PPC::RLDICL, dl, InVT, InputOp, + getI64Imm(0, dl), getI64Imm(63, dl)), 0); + } else if (isLogicOp(OperandOpcode)) + return getLogicOpInGPR(Operand, true); + return SDValue(); + }; + Op1 = getLogicOperand(LogicOp.getOperand(0)); + Op2 = getLogicOperand(LogicOp.getOperand(1)); + + // If a GPR sequence can't be produced for the LHS we can't proceed. + // Not producing a GPR sequence for the RHS is only a problem if this isn't + // a bitwise negation operation. + if (!Op1 || (!Op2 && !IsNot)) + return SDValue(); + + NumLogicOpsOnComparison++; + + // We will use the inputs as 64-bit values. + if (Op1.getValueType() == MVT::i32) + Op1 = addExtOrTrunc(Op1, ExtOrTruncConversion::Ext); + if (!IsNot && Op2.getValueType() == MVT::i32) + Op2 = addExtOrTrunc(Op2, ExtOrTruncConversion::Ext); + + unsigned NewOpc; + switch (LogicOp.getOpcode()) { + default: llvm_unreachable("Unknown logic operation."); + case ISD::AND: NewOpc = KeepInGPR ? PPC::AND8 : PPC::AND8o; break; + case ISD::OR: NewOpc = KeepInGPR ? PPC::OR8 : PPC::OR8o; break; + case ISD::XOR: NewOpc = KeepInGPR ? PPC::XOR8 : PPC::XOR8o; break; + } + + SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + // We can negate an i1 value (in a GPR) by clearing the top 63 bits and using + // the EQ bit. If we had a zero in the least significant bit, the result is + // zero and the EQ bit is set. Otherwise, it isn't set. + if (IsNot) { + if (!KeepInGPR) { + SDValue WideOp = + SDValue(CurDAG->getMachineNode(PPC::RLDICLo, dl, MVT::i64, MVT::Glue, + Op1, getI64Imm(0, dl), + getI64Imm(63, dl)), 0); + SDValue SRIdxVal = + CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32); + return SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + MVT::i1, CR0Reg, SRIdxVal, + WideOp.getValue(1)), 0); + } + Op2 = getI64Imm(1, dl); + NewOpc = PPC::XORI8; + } + + if (KeepInGPR) + return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, Op1, Op2), 0); + + SDValue WideOp = + SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, MVT::Glue, Op1, + Op2), 0); + SDValue SRIdxVal = + CurDAG->getTargetConstant(PPC::sub_gt, dl, MVT::i32); + return SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + MVT::i1, CR0Reg, SRIdxVal, + WideOp.getValue(1)), 0); +} + +bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) { + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + SDLoc dl(N); + if (N->getValueType(0) != MVT::i1) + return false; + assert(isLogicOp(N->getOpcode()) && + "Expected a logic operation on setcc results."); + SDValue LoweredLogical = getLogicOpInGPR(SDValue(N, 0), false); + if (!LoweredLogical) + return false; + + ReplaceNode(N, LoweredLogical.getNode()); + return true; +} + /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. /// Useful when emitting comparison code for 32-bit values without using /// the compare instruction (which only considers the lower 32-bits). @@ -2677,6 +2811,20 @@ } } +static bool allUsesExtend(SDValue Compare) { + if (Compare.hasOneUse()) + return true; + for (auto CompareUse : Compare.getNode()->uses()) + if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && + CompareUse->getOpcode() != ISD::ZERO_EXTEND && + CompareUse->getOpcode() != ISD::SELECT && + !isLogicOp(CompareUse->getOpcode())) { + OmittedForNonExtendUses++; + return false; + } + return true; +} + /// Returns an equivalent of a SETCC node but with the result the same width as /// the inputs. This can nalso be used for SELECT_CC if either the true or false /// values is a power of two while the other is zero. @@ -2697,6 +2845,11 @@ if (InputVT != MVT::i32) return SDValue(); + // Don't convert this comparison to a GPR sequence because there are uses + // of the i1 result (i.e. uses that require the result in the CR). + if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare)) + return SDValue(); + SDLoc dl(Compare); ConstantSDNode *RHSConst = dyn_cast(RHS); int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; @@ -2906,6 +3059,9 @@ } case ISD::AND: { + if (tryLogicOpOfCompares(N)) + return; + unsigned Imm, Imm2, SH, MB, ME; uint64_t Imm64; @@ -3025,6 +3181,9 @@ if (tryBitfieldInsert(N)) return; + if (tryLogicOpOfCompares(N)) + return; + short Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { @@ -3042,6 +3201,10 @@ // Other cases are autogenerated. break; } + case ISD::XOR: { + if (tryLogicOpOfCompares(N)) + return; + } case ISD::ADD: { short Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && Index: test/CodeGen/PowerPC/logic-ops-on-compares.ll =================================================================== --- test/CodeGen/PowerPC/logic-ops-on-compares.ll +++ test/CodeGen/PowerPC/logic-ops-on-compares.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl + +; Function Attrs: nounwind +define signext i32 @test(i32 signext %a, i32 signext %b, i32 signext %c) { +; CHECK-LABEL: test: +; CHECK: xor r7, r3, r4 +; CHECK-NEXT: li r6, 55 +; CHECK-NEXT: xor r5, r5, r6 +; CHECK-NEXT: or r7, r7, r4 +; CHECK-NEXT: cntlzw r5, r5 +; CHECK-NEXT: cntlzw r6, r7 +; CHECK-NEXT: srwi r6, r6, 5 +; CHECK-NEXT: srwi r5, r5, 5 +; CHECK-NEXT: or. r5, r6, r5 +; CHECK-NEXT: bc 4, 1, .LBB0_2 +entry: + %tobool = icmp eq i32 %a, %b + %tobool1 = icmp eq i32 %b, 0 + %or.cond = and i1 %tobool, %tobool1 + %tobool3 = icmp eq i32 %c, 55 + %or.cond5 = or i1 %or.cond, %tobool3 + br i1 %or.cond5, label %if.end, label %if.then + +if.then: ; preds = %entry + %call = tail call signext i32 @foo(i32 signext %a) #2 + br label %return + +if.end: ; preds = %entry + %call4 = tail call signext i32 @bar(i32 signext %b) #2 + br label %return + +return: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %call4, %if.end ], [ %call, %if.then ] + ret i32 %retval.0 +} +declare signext i32 @foo(i32 signext) +declare signext i32 @bar(i32 signext)