Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1734,7 +1734,7 @@ return SDValue(N, 0); // fold (a+b) -> (a|b) iff a and b share no bits. - if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::OR, VT)) && VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); @@ -6373,7 +6373,7 @@ isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && - (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + (!LegalOperations && TLI.isOperationLegalOrCustom(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -488,6 +488,8 @@ SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const; + SDValue LowerAND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -144,6 +144,16 @@ setOperationAction(ISD::XOR, MVT::i32, Custom); setOperationAction(ISD::XOR, MVT::i64, Custom); + // Custom lowering hooks are needed for OR + // to fold it into CCMP. + setOperationAction(ISD::OR, MVT::i32, Custom); + setOperationAction(ISD::OR, MVT::i64, Custom); + + // Custom lowering hooks are needed for AND + // to fold it into CCMP. + setOperationAction(ISD::AND, MVT::i32, Custom); + setOperationAction(ISD::AND, MVT::i64, Custom); + // Virtually no operation on f128 is legal, but LLVM can't expand them when // there's a valid register class, so we need custom operations in most cases. setOperationAction(ISD::FABS, MVT::f128, Expand); @@ -1541,6 +1551,32 @@ return Cmp; } +// Attempt to form conditional compare sequences for and/or trees +// with setcc leafs. +static SDValue tryLowerToAArch64Cmp(SDValue Op, SelectionDAG &DAG) { + + EVT VT = Op.getValueType(); + if (!Op->hasOneUse()) + return Op; + + SDLoc DL(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + bool canPushNegate; + if (!(isConjunctionDisjunctionTree(LHS, canPushNegate))) + return SDValue(); + if (!(isConjunctionDisjunctionTree(RHS, canPushNegate))) + return SDValue(); + if ((LHS.getOpcode() != ISD::SETCC) || (RHS.getOpcode() != ISD::SETCC)) + return Op; + + SDValue TVal = DAG.getConstant(1, DL, VT); + SDValue FVal = DAG.getConstant(0, DL, VT); + SDValue CCVal; + SDValue Cmp = getAArch64Cmp(Op, FVal, ISD::SETEQ, CCVal, DAG, DL); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, FVal, TVal, CCVal, Cmp); +} + static std::pair<SDValue, SDValue> getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && @@ -1662,6 +1698,22 @@ return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first; } +SDValue AArch64TargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const { + + if (Op.getValueType().isVector()) + return LowerVectorAND(Op, DAG); + + return tryLowerToAArch64Cmp(Op, DAG); +} + +SDValue AArch64TargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const { + + if (Op.getValueType().isVector()) + return LowerVectorOR(Op, DAG); + + return tryLowerToAArch64Cmp(Op, DAG); +} + static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { SDValue Sel = Op.getOperand(0); SDValue Other = Op.getOperand(1); @@ -2316,9 +2368,9 @@ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::AND: - return LowerVectorAND(Op, DAG); + return LowerAND(Op, DAG); case ISD::OR: - return LowerVectorOR(Op, DAG); + return LowerOR(Op, DAG); case ISD::XOR: return LowerXOR(Op, DAG); case ISD::PREFETCH: Index: test/CodeGen/AArch64/arm64-ccmp.ll =================================================================== --- test/CodeGen/AArch64/arm64-ccmp.ll +++ test/CodeGen/AArch64/arm64-ccmp.ll @@ -389,21 +389,76 @@ ret i32 %sel } -; CHECK-LABEL: select_noccmp1 -define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) { -; CHECK: cmp x0, #0 -; CHECK-NEXT: cset [[REG0:w[0-9]+]], lt -; CHECK-NEXT: cmp x0, #13 -; CHECK-NOT: ccmp +; CHECK-LABEL: single_noselect +define i32 @single_noselect(i32 %A, i32 %B) #0 { +; CHECK: cmp w1, #1 +; CHECK-NEXT: ccmp w0, #1, #8, ge +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret + %notlhs = icmp slt i32 %A, 1 + %notrhs = icmp slt i32 %B, 1 + %lnot = or i1 %notlhs, %notrhs + %conv = zext i1 %lnot to i32 + ret i32 %conv +} + +; CHECK-LABEL: single_and_ext +define i32 @single_and_ext(i32 %A, i32 %B, i32 %C) #0 { +; CHECK: cmp w1, #2 +; CHECK-NEXT: ccmp w0, #4, #0, lt +; CHECK-NEXT: cinc w0, w2, lt +; CHECK-NEXT: ret + %cmp = icmp slt i32 %A, 4 + %cmp1 = icmp slt i32 %B, 2 + %and1 = and i1 %cmp, %cmp1 + %conv = zext i1 %and1 to i32 + %add = add nsw i32 %conv, %C + ret i32 %add +} + +; CHECK-LABEL: single_noselect_phi +define i32 @single_noselect_phi(i32 %A, i32 %B, i32 %C) #0 { +; CHECK: cmp w1, #0 +; CHECK-NEXT: ccmp w0, #0, #4, gt ; CHECK-NEXT: cset [[REG1:w[0-9]+]], gt -; CHECK-NEXT: cmp x2, #2 +; CHECK-NEXT: cmp w1, #2 +; CHECK-NEXT: ccmp w0, #4, #8, ge ; CHECK-NEXT: cset [[REG2:w[0-9]+]], lt +; CHECK-NEXT: cmp w2, #0 +; CHECK-NEXT: csel w0, [[REG1]], [[REG2]], eq +; CHECK-NEXT: ret +entry: + %tobool = icmp eq i32 %C, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %cmp = icmp slt i32 %A, 4 + %cmp1 = icmp slt i32 %B, 2 + %0 = or i1 %cmp, %cmp1 + br label %if.end + +if.else: ; preds = %entry + %cmp2 = icmp sgt i32 %A, 0 + %cmp3 = icmp sgt i32 %B, 0 + %1 = and i1 %cmp2, %cmp3 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %b.0.in = phi i1 [ %0, %if.then ], [ %1, %if.else ] + %conv = zext i1 %b.0.in to i32 + ret i32 %conv +} + +; CHECK-LABEL: select_noccmp1 +define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) { +; CHECK: cmp x0, #13 +; CHECK-NEXT: ccmp x0, #0, #0, gt +; CHECK-NEXT: cset [[REG1:w[0-9]+]], lt ; CHECK-NEXT: cmp x2, #4 -; CHECK-NEXT: cset [[REG3:w[0-9]+]], gt -; CHECK-NEXT: and [[REG4:w[0-9]+]], [[REG0]], [[REG1]] -; CHECK-NEXT: and [[REG5:w[0-9]+]], [[REG2]], [[REG3]] -; CHECK-NEXT: orr [[REG6:w[0-9]+]], [[REG4]], [[REG5]] -; CHECK-NEXT: cmp [[REG6]], #0 +; CHECK-NEXT: ccmp x2, #2, #0, gt +; CHECK-NEXT: cset [[REG2:w[0-9]+]], lt +; CHECK-NEXT: orr [[REG3:w[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: cmp [[REG3]], #0 ; CHECK-NEXT: csel x0, xzr, x3, ne ; CHECK-NEXT: ret %c0 = icmp slt i64 %v1, 0