Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -19525,6 +19525,70 @@ Op0ExtV, Op1ExtV, Op->getOperand(2)); } +// Check whether the continuous comparison sequence. +static bool +isMemcmpPattern(SDValue N, + SmallVector, 16> &WorkList) { + // All the nodes must be either OR or XOR. + if (N->getOpcode() != ISD::OR && N->getOpcode() != ISD::XOR) + return false; + + // The leaf node must be XOR + if (N->getOpcode() == ISD::XOR) { + WorkList.push_back(std::make_pair(N->getOperand(0), N->getOperand(1))); + return true; + } + + if (isMemcmpPattern(N->getOperand(0), WorkList) && + isMemcmpPattern(N->getOperand(1), WorkList)) + return true; + return false; +} + +static SDValue performMemcmpCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + ISD::CondCode Cond = cast(N->getOperand(2))->get(); + SDLoc DL(N); + EVT VT = N->getValueType(0); + SmallVector, 16> WorkList; + + // Try to express conjunction "cmp 0 (or (xor A0 A1) (xor B0 B1))" as: + // cmp A0, A0; ccmp A0, B1, 0, eq; cmp inv(Cond) flag + if (!DCI.isBeforeLegalize() && VT.isScalarInteger() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) && + LHS->getOpcode() == ISD::OR && isMemcmpPattern(LHS, WorkList)) { + + SDValue CCVal = DAG.getConstant(AArch64CC::EQ, DL, MVT_CC); + EVT TstVT = LHS->getValueType(0); + SDValue XOR0, XOR1; + std::tie(XOR0, XOR1) = WorkList[0]; + SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, + DAG.getVTList(TstVT, MVT::i32), XOR0, XOR1); + SDValue Overflow = Cmp.getValue(1); + SDValue CCmp; + for (unsigned i = 1; i < WorkList.size(); i++) { + std::tie(XOR0, XOR1) = WorkList[i]; + SDValue NZCVOp = DAG.getConstant(0, DL, MVT::i32); + CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, XOR0, XOR1, NZCVOp, + CCVal, Overflow); + Overflow = CCmp; + } + + // Invert CSEL's operands. + SDValue TVal = DAG.getConstant(1, DL, VT); + SDValue FVal = DAG.getConstant(0, DL, VT); + AArch64CC::CondCode CC = changeIntCCToAArch64CC(Cond); + AArch64CC::CondCode InvCC = AArch64CC::getInvertedCondCode(CC); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, FVal, TVal, + DAG.getConstant(InvCC, DL, MVT::i32), CCmp); + } + + return SDValue(); +} + static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { @@ -19585,34 +19649,9 @@ } } - // Try to express conjunction "cmp 0 (or (xor A0 A1) (xor B0 B1))" as: - // cmp A0, A0; ccmp A0, B1, 0, eq; cmp inv(Cond) flag - if (!DCI.isBeforeLegalize() && VT.isScalarInteger() && - (Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) && - LHS->getOpcode() == ISD::OR && - (LHS.getOperand(0)->getOpcode() == ISD::XOR && - LHS.getOperand(1)->getOpcode() == ISD::XOR) && - LHS.hasOneUse() && LHS.getOperand(0)->hasOneUse() && - LHS.getOperand(1)->hasOneUse()) { - SDValue XOR0 = LHS.getOperand(0); - SDValue XOR1 = LHS.getOperand(1); - SDValue CCVal = DAG.getConstant(AArch64CC::EQ, DL, MVT_CC); - EVT TstVT = LHS->getValueType(0); - SDValue Cmp = - DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(TstVT, MVT::i32), - XOR0.getOperand(0), XOR0.getOperand(1)); - SDValue Overflow = Cmp.getValue(1); - SDValue NZCVOp = DAG.getConstant(0, DL, MVT::i32); - SDValue CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, XOR1.getOperand(0), - XOR1.getOperand(1), NZCVOp, CCVal, Overflow); - // Invert CSEL's operands. - SDValue TVal = DAG.getConstant(1, DL, VT); - SDValue FVal = DAG.getConstant(0, DL, VT); - AArch64CC::CondCode CC = changeIntCCToAArch64CC(Cond); - AArch64CC::CondCode InvCC = AArch64CC::getInvertedCondCode(CC); - return DAG.getNode(AArch64ISD::CSEL, DL, VT, FVal, TVal, - DAG.getConstant(InvCC, DL, MVT::i32), CCmp); - } + // Try to perform the memcmp when the result is tested for [in]equality with 0 + if (SDValue V = performMemcmpCombine(N, DCI, DAG)) + return V; return SDValue(); } Index: llvm/test/CodeGen/AArch64/bcmp-inline-small.ll =================================================================== --- llvm/test/CodeGen/AArch64/bcmp-inline-small.ll +++ llvm/test/CodeGen/AArch64/bcmp-inline-small.ll @@ -70,17 +70,13 @@ ; CHECKN-NEXT: ldp x8, x9, [x0] ; CHECKN-NEXT: ldp x10, x11, [x1] ; CHECKN-NEXT: ldr x12, [x0, #16] -; CHECKN-NEXT: ldr x13, [x1, #16] -; CHECKN-NEXT: ldur x14, [x0, #23] -; CHECKN-NEXT: eor x8, x8, x10 -; CHECKN-NEXT: ldur x15, [x1, #23] -; CHECKN-NEXT: eor x9, x9, x11 -; CHECKN-NEXT: eor x10, x12, x13 -; CHECKN-NEXT: orr x8, x8, x9 -; CHECKN-NEXT: eor x11, x14, x15 -; CHECKN-NEXT: orr x9, x10, x11 -; CHECKN-NEXT: orr x8, x8, x9 -; CHECKN-NEXT: cmp x8, #0 +; CHECKN-NEXT: cmp x8, x10 +; CHECKN-NEXT: ldr x8, [x1, #16] +; CHECKN-NEXT: ccmp x9, x11, #0, eq +; CHECKN-NEXT: ldur x9, [x0, #23] +; CHECKN-NEXT: ldur x10, [x1, #23] +; CHECKN-NEXT: ccmp x12, x8, #0, eq +; CHECKN-NEXT: ccmp x9, x10, #0, eq ; CHECKN-NEXT: cset w0, eq ; CHECKN-NEXT: ret ; Index: llvm/test/CodeGen/AArch64/bcmp.ll =================================================================== --- llvm/test/CodeGen/AArch64/bcmp.ll +++ llvm/test/CodeGen/AArch64/bcmp.ll @@ -39,6 +39,7 @@ ret i1 %r } +; or (and (xor a, b), C1), (and (xor c, d), C2) define i1 @bcmp3(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp3: ; CHECK: // %bb.0: @@ -70,6 +71,7 @@ ret i1 %r } +; or (xor a, b), (and (xor c, d), C2) define i1 @bcmp5(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp5: ; CHECK: // %bb.0: @@ -88,6 +90,7 @@ ret i1 %r } +; or (xor a, b), (and (xor c, d), C2) define i1 @bcmp6(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp6: ; CHECK: // %bb.0: @@ -106,6 +109,7 @@ ret i1 %r } +; or (xor a, b), (xor c, d) define i1 @bcmp7(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp7: ; CHECK: // %bb.0: @@ -295,13 +299,10 @@ ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] ; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: ldr x13, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: eor x10, x12, x13 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ldr x8, [x1, #16] +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ccmp x12, x8, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 24) @@ -365,20 +366,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: ldp x12, x13, [x0, #16] -; CHECK-NEXT: ldp x14, x15, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: ldur x10, [x0, #30] -; CHECK-NEXT: orr x8, x8, x9 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x11, #0, eq ; CHECK-NEXT: ldur x11, [x1, #30] -; CHECK-NEXT: eor x12, x12, x14 -; CHECK-NEXT: eor x13, x13, x15 -; CHECK-NEXT: orr x9, x12, x13 -; CHECK-NEXT: eor x10, x10, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ldp x12, x10, [x1, #16] +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldur x8, [x0, #30] +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: ccmp x8, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 38) @@ -391,24 +387,18 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: ldp x12, x13, [x0, #16] -; CHECK-NEXT: ldp x14, x15, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: ldr x16, [x0, #32] -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: ldr x17, [x1, #32] -; CHECK-NEXT: ldur x18, [x0, #37] -; CHECK-NEXT: eor x10, x12, x14 -; CHECK-NEXT: ldur x0, [x1, #37] -; CHECK-NEXT: eor x11, x13, x15 -; CHECK-NEXT: eor x12, x16, x17 -; CHECK-NEXT: orr x9, x10, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: eor x13, x18, x0 -; CHECK-NEXT: orr x10, x12, x13 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldr x11, [x1, #32] +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ldp x12, x10, [x1, #16] +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: ldur x9, [x0, #37] +; CHECK-NEXT: ldur x10, [x1, #37] +; CHECK-NEXT: ccmp x8, x11, #0, eq +; CHECK-NEXT: ccmp x9, x10, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 45) @@ -421,28 +411,20 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: ldp x12, x13, [x0, #16] -; CHECK-NEXT: ldp x14, x15, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: ldp x16, x17, [x0, #32] -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: ldp x18, x2, [x1, #32] -; CHECK-NEXT: eor x12, x12, x14 -; CHECK-NEXT: eor x13, x13, x15 -; CHECK-NEXT: ldp x3, x0, [x0, #48] -; CHECK-NEXT: orr x9, x12, x13 -; CHECK-NEXT: ldp x10, x11, [x1, #48] -; CHECK-NEXT: eor x14, x16, x18 -; CHECK-NEXT: eor x15, x17, x2 -; CHECK-NEXT: orr x12, x14, x15 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: eor x10, x3, x10 -; CHECK-NEXT: eor x11, x0, x11 -; CHECK-NEXT: orr x10, x10, x11 -; CHECK-NEXT: orr x9, x12, x10 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ldp x12, x10, [x1, #16] +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldp x8, x11, [x1, #32] +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: ldp x9, x10, [x0, #32] +; CHECK-NEXT: ccmp x9, x8, #0, eq +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ldp x9, x10, [x0, #48] +; CHECK-NEXT: ldp x8, x11, [x1, #48] +; CHECK-NEXT: ccmp x9, x8, #0, eq +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 64)