Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -19463,6 +19463,35 @@ } } + // Try to express conjunction "cmp 0 (or (xor A0 A1) (xor B0 B1))" as: + // cmp A0, A0; ccmp A0, B1, 0, eq; cmp inv(CB) flag + // Skip BRCOND as it may be combined with others operand. + if (!DCI.isBeforeLegalize() && VT.isScalarInteger() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) && + LHS->getOpcode() == ISD::OR && + (LHS.getOperand(0)->getOpcode() == ISD::XOR && + LHS.getOperand(1)->getOpcode() == ISD::XOR) && + LHS.getOperand(0)->hasOneUse() && LHS.getOperand(1)->hasOneUse() && + N->use_begin()->getOpcode() != ISD::BRCOND) { + SDValue XOR0 = LHS.getOperand(0); + SDValue XOR1 = LHS.getOperand(1); + SDValue CCVal = DAG.getConstant(0, DL, MVT_CC); + EVT TstVT = LHS->getValueType(0); + SDValue Cmp = + DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(TstVT, MVT::Glue), + XOR0.getOperand(0), XOR0.getOperand(1)); + SDValue Overflow = Cmp.getValue(1); + SDValue CCmp = DAG.getNode( + AArch64ISD::CCMP, DL, MVT_CC, XOR1.getOperand(0), XOR1.getOperand(1), + DAG.getConstant(AArch64CC::EQ, DL, MVT_CC), CCVal, Overflow); + // Invert CSEL's operands. + SDValue TVal = DAG.getConstant(1, DL, VT); + SDValue FVal = DAG.getConstant(0, DL, VT); + AArch64CC::CondCode CC0 = changeIntCCToAArch64CC(Cond); + SDValue NZCVOp = DAG.getConstant(CC0, DL, MVT::i32); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, FVal, TVal, NZCVOp, CCmp); + } + return SDValue(); } Index: llvm/test/CodeGen/AArch64/bcmp-inline-small.ll =================================================================== --- llvm/test/CodeGen/AArch64/bcmp-inline-small.ll +++ llvm/test/CodeGen/AArch64/bcmp-inline-small.ll @@ -12,11 +12,9 @@ ; CHECKN-NEXT: ldr x9, [x1] ; CHECKN-NEXT: ldur x10, [x0, #7] ; CHECKN-NEXT: ldur x11, [x1, #7] -; CHECKN-NEXT: eor x8, x8, x9 -; CHECKN-NEXT: eor x9, x10, x11 -; CHECKN-NEXT: orr x8, x8, x9 -; CHECKN-NEXT: cmp x8, #0 -; CHECKN-NEXT: cset w0, eq +; CHECKN-NEXT: cmp x8, x9 +; CHECKN-NEXT: ccmp x10, x11, #0, eq +; CHECKN-NEXT: cset w0, ne ; CHECKN-NEXT: ret ; ; CHECKS-LABEL: test_b2: @@ -44,11 +42,9 @@ ; CHECKN-NEXT: ldr x9, [x1] ; CHECKN-NEXT: ldur x10, [x0, #7] ; CHECKN-NEXT: ldur x11, [x1, #7] -; CHECKN-NEXT: eor x8, x8, x9 -; CHECKN-NEXT: eor x9, x10, x11 -; CHECKN-NEXT: orr x8, x8, x9 -; CHECKN-NEXT: cmp x8, #0 -; CHECKN-NEXT: cset w0, eq +; CHECKN-NEXT: cmp x8, x9 +; CHECKN-NEXT: ccmp x10, x11, #0, eq +; CHECKN-NEXT: cset w0, ne ; CHECKN-NEXT: ret ; ; CHECKS-LABEL: test_b2_align8: Index: llvm/test/CodeGen/AArch64/dag-combine-setcc.ll =================================================================== --- llvm/test/CodeGen/AArch64/dag-combine-setcc.ll +++ llvm/test/CodeGen/AArch64/dag-combine-setcc.ll @@ -128,3 +128,33 @@ %cmp2 = icmp ne i64 %cast, zeroinitializer ret i1 %cmp2 } + +define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) { +; CHECK-LABEL: combine_setcc_eq0_conjunction_xor_or: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x9, [x0] +; CHECK-NEXT: ldp x10, x11, [x1] +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16) + %cmp = icmp eq i32 %bcmp, 0 + ret i1 %cmp +} + +define i1 @combine_setcc_ne0_conjunction_xor_or(ptr %a, ptr %b) { +; CHECK-LABEL: combine_setcc_ne0_conjunction_xor_or: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x9, [x0] +; CHECK-NEXT: ldp x10, x11, [x1] +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16) + %cmp = icmp ne i32 %bcmp, 0 + ret i1 %cmp +} + +declare i32 @bcmp(ptr nocapture, ptr nocapture, i64) Index: llvm/test/CodeGen/AArch64/i128-cmp.ll =================================================================== --- llvm/test/CodeGen/AArch64/i128-cmp.ll +++ llvm/test/CodeGen/AArch64/i128-cmp.ll @@ -6,11 +6,9 @@ define i1 @cmp_i128_eq(i128 %a, i128 %b) { ; CHECK-LABEL: cmp_i128_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: eor x8, x1, x3 -; CHECK-NEXT: eor x9, x0, x2 -; CHECK-NEXT: orr x8, x9, x8 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: cmp x0, x2 +; CHECK-NEXT: ccmp x1, x3, #0, eq +; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %cmp = icmp eq i128 %a, %b ret i1 %cmp @@ -19,11 +17,9 @@ define i1 @cmp_i128_ne(i128 %a, i128 %b) { ; CHECK-LABEL: cmp_i128_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: eor x8, x1, x3 -; CHECK-NEXT: eor x9, x0, x2 -; CHECK-NEXT: orr x8, x9, x8 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: cmp x0, x2 +; CHECK-NEXT: ccmp x1, x3, #0, eq +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cmp = icmp ne i128 %a, %b ret i1 %cmp Index: llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -68,13 +68,11 @@ ; AARCH-NEXT: adds x11, x12, x11 ; AARCH-NEXT: adc x12, x13, x14 ; AARCH-NEXT: adds x10, x11, x10 -; AARCH-NEXT: adc x9, x12, x9 ; AARCH-NEXT: asr x11, x1, #63 -; AARCH-NEXT: eor x9, x9, x11 -; AARCH-NEXT: eor x10, x10, x11 -; AARCH-NEXT: orr x9, x10, x9 -; AARCH-NEXT: cmp x9, #0 -; AARCH-NEXT: cset w9, ne +; AARCH-NEXT: adc x9, x12, x9 +; AARCH-NEXT: cmp x10, x11 +; AARCH-NEXT: ccmp x9, x11, #0, eq +; AARCH-NEXT: cset w9, eq ; AARCH-NEXT: tbz x8, #63, .LBB1_2 ; AARCH-NEXT: // %bb.1: // %Entry ; AARCH-NEXT: eor x8, x3, #0x8000000000000000