Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8601,29 +8601,17 @@ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) && LHS->getOpcode() == ISD::OR && LHS->hasOneUse() && isOrXorChain(LHS, NumXors, WorkList)) { - SDValue CCVal = DAG.getConstant(AArch64CC::EQ, DL, MVT_CC); - EVT TstVT = LHS->getValueType(0); SDValue XOR0, XOR1; std::tie(XOR0, XOR1) = WorkList[0]; - SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, - DAG.getVTList(TstVT, MVT::i32), XOR0, XOR1); - SDValue Overflow = Cmp.getValue(1); - SDValue CCmp; + SDValue Cmp = DAG.getSetCC(DL, VT, XOR0, XOR1, ISD::SETNE); for (unsigned I = 1; I < WorkList.size(); I++) { std::tie(XOR0, XOR1) = WorkList[I]; - SDValue NZCVOp = DAG.getConstant(0, DL, MVT::i32); - CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, XOR0, XOR1, NZCVOp, - CCVal, Overflow); - Overflow = CCmp; + SDValue CmpChain = DAG.getSetCC(DL, VT, XOR0, XOR1, ISD::SETNE); + Cmp = DAG.getNode(ISD::OR, DL, VT, Cmp, CmpChain); } // Exit early by inverting the condition, which help reduce indentations. - SDValue TVal = DAG.getConstant(1, DL, VT); - SDValue FVal = DAG.getConstant(0, DL, VT); - AArch64CC::CondCode CC = changeIntCCToAArch64CC(Cond); - AArch64CC::CondCode InvCC = AArch64CC::getInvertedCondCode(CC); - return DAG.getNode(AArch64ISD::CSEL, DL, VT, FVal, TVal, - DAG.getConstant(InvCC, DL, MVT::i32), CCmp); + return DAG.getSetCC(DL, VT, Cmp, DAG.getConstant(0, DL, VT), Cond); } return SDValue(); @@ -8664,11 +8652,6 @@ } } - // Address some cases folded And in the stage of `Optimized type-legalized - // selection` - if (SDValue V = performOrXorChainCombine(Op.getNode(), DAG)) - return V; - if (LHS.getValueType().isInteger()) { SDValue CCVal; SDValue Cmp = getAArch64Cmp( @@ -19741,9 +19724,8 @@ } // Try to perform the memcmp when the result is tested for [in]equality with 0 - if (!DCI.isBeforeLegalize()) - if (SDValue V = performOrXorChainCombine(N, DAG)) - return V; + if (SDValue V = performOrXorChainCombine(N, DAG)) + return V; return SDValue(); } Index: llvm/test/CodeGen/AArch64/atomicrmw-O0.ll =================================================================== --- llvm/test/CodeGen/AArch64/atomicrmw-O0.ll +++ llvm/test/CodeGen/AArch64/atomicrmw-O0.ll @@ -249,7 +249,7 @@ ; NOLSE-NEXT: cset w8, ne ; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill ; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill -; NOLSE-NEXT: tbnz w8, #0, .LBB4_1 +; NOLSE-NEXT: cbnz w8, .LBB4_1 ; NOLSE-NEXT: b .LBB4_6 ; NOLSE-NEXT: .LBB4_6: // %atomicrmw.end ; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload @@ -288,7 +288,7 @@ ; LSE-NEXT: cset w8, ne ; LSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill ; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill -; LSE-NEXT: tbnz w8, #0, .LBB4_1 +; LSE-NEXT: cbnz w8, .LBB4_1 ; LSE-NEXT: b .LBB4_2 ; LSE-NEXT: .LBB4_2: // %atomicrmw.end ; LSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload @@ -645,7 +645,7 @@ ; NOLSE-NEXT: cset w8, ne ; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill ; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill -; NOLSE-NEXT: tbnz w8, #0, .LBB9_1 +; NOLSE-NEXT: cbnz w8, .LBB9_1 ; NOLSE-NEXT: b .LBB9_6 ; NOLSE-NEXT: .LBB9_6: // %atomicrmw.end ; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload @@ -688,7 +688,7 @@ ; LSE-NEXT: cset w8, ne ; LSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill ; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill -; LSE-NEXT: tbnz w8, #0, .LBB9_1 +; LSE-NEXT: cbnz w8, .LBB9_1 ; LSE-NEXT: b .LBB9_2 ; LSE-NEXT: .LBB9_2: // %atomicrmw.end ; LSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload Index: llvm/test/CodeGen/AArch64/bcmp.ll =================================================================== --- llvm/test/CodeGen/AArch64/bcmp.ll +++ llvm/test/CodeGen/AArch64/bcmp.ll @@ -133,19 +133,16 @@ ret i1 %r } -; TODO: or (xor a, b), (and (xor c, d), C2) +; or (xor a, b), (and (xor c, d), C2) define i1 @bcmp9(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp9: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w9, [x0, #8] -; CHECK-NEXT: ldrb w10, [x1, #8] ; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x11, [x1] -; CHECK-NEXT: eor w9, w9, w10 -; CHECK-NEXT: and x9, x9, #0xff -; CHECK-NEXT: eor x8, x8, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #8] +; CHECK-NEXT: ldrb w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 9) @@ -156,15 +153,12 @@ define i1 @bcmp10(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp10: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0, #8] -; CHECK-NEXT: ldrh w10, [x1, #8] ; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x11, [x1] -; CHECK-NEXT: eor w9, w9, w10 -; CHECK-NEXT: and x9, x9, #0xffff -; CHECK-NEXT: eor x8, x8, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldrh w10, [x0, #8] +; CHECK-NEXT: ldrh w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 10) @@ -195,10 +189,8 @@ ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: ldr w10, [x0, #8] ; CHECK-NEXT: ldr w11, [x1, #8] -; CHECK-NEXT: eor x8, x8, x9 -; CHECK-NEXT: eor w9, w10, w11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 12) @@ -274,13 +266,10 @@ ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] ; CHECK-NEXT: ldr w12, [x0, #16] -; CHECK-NEXT: ldr w13, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: eor w10, w12, w13 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ldr w8, [x1, #16] +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ccmp x12, x8, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 20) @@ -311,17 +300,13 @@ ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] ; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: ldr x13, [x1, #16] -; CHECK-NEXT: ldr w14, [x0, #24] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: ldr w15, [x1, #24] -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: eor x10, x12, x13 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: eor w11, w14, w15 -; CHECK-NEXT: orr x9, x10, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ldr x8, [x1, #16] +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldr w9, [x0, #24] +; CHECK-NEXT: ldr w10, [x1, #24] +; CHECK-NEXT: ccmp x12, x8, #0, eq +; CHECK-NEXT: ccmp x9, x10, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 28) @@ -334,21 +319,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: ldp x12, x13, [x0, #16] -; CHECK-NEXT: ldp x14, x15, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: ldrb w16, [x0, #32] -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: ldrb w17, [x1, #32] -; CHECK-NEXT: eor x10, x12, x14 -; CHECK-NEXT: eor x11, x13, x15 -; CHECK-NEXT: eor w12, w16, w17 -; CHECK-NEXT: orr x9, x10, x11 -; CHECK-NEXT: and x10, x12, #0xff -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldrb w11, [x1, #32] +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ldp x12, x10, [x1, #16] +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldrb w8, [x0, #32] +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: ccmp x8, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 33) Index: llvm/test/CodeGen/AArch64/dag-combine-setcc.ll =================================================================== --- llvm/test/CodeGen/AArch64/dag-combine-setcc.ll +++ llvm/test/CodeGen/AArch64/dag-combine-setcc.ll @@ -190,11 +190,10 @@ define i32 @combine_setcc_glue(i128 noundef %x, i128 noundef %y) { ; CHECK-LABEL: combine_setcc_glue: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: cset w8, eq -; CHECK-NEXT: ccmp x1, x3, #0, eq -; CHECK-NEXT: cset w9, eq -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: cmp x1, x3 +; CHECK-NEXT: ccmp x0, x2, #0, eq +; CHECK-NEXT: ccmp x0, x2, #4, ne +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %cmp3 = icmp eq i128 %x, %y @@ -222,8 +221,11 @@ ; CHECK-NEXT: csel x10, x1, x9, lo ; CHECK-NEXT: subs x8, x2, x8 ; CHECK-NEXT: sbc x9, x3, x10 -; CHECK-NEXT: ccmp x3, x10, #0, eq -; CHECK-NEXT: b.ne .LBB12_1 +; CHECK-NEXT: cset w11, ne +; CHECK-NEXT: cmp x3, x10 +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: orr w10, w11, w10 +; CHECK-NEXT: cbnz w10, .LBB12_1 ; CHECK-NEXT: // %bb.2: // %do.end ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: mov x1, xzr Index: llvm/test/CodeGen/AArch64/i128-cmp.ll =================================================================== --- llvm/test/CodeGen/AArch64/i128-cmp.ll +++ llvm/test/CodeGen/AArch64/i128-cmp.ll @@ -6,8 +6,8 @@ define i1 @cmp_i128_eq(i128 %a, i128 %b) { ; CHECK-LABEL: cmp_i128_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: ccmp x1, x3, #0, eq +; CHECK-NEXT: cmp x1, x3 +; CHECK-NEXT: ccmp x0, x2, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cmp = icmp eq i128 %a, %b @@ -17,8 +17,8 @@ define i1 @cmp_i128_ne(i128 %a, i128 %b) { ; CHECK-LABEL: cmp_i128_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: ccmp x1, x3, #0, eq +; CHECK-NEXT: cmp x1, x3 +; CHECK-NEXT: ccmp x0, x2, #0, eq ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %cmp = icmp ne i128 %a, %b Index: llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -68,10 +68,10 @@ ; AARCH-NEXT: adds x11, x12, x11 ; AARCH-NEXT: adc x12, x13, x14 ; AARCH-NEXT: adds x10, x11, x10 -; AARCH-NEXT: asr x11, x1, #63 ; AARCH-NEXT: adc x9, x12, x9 -; AARCH-NEXT: cmp x10, x11 -; AARCH-NEXT: ccmp x9, x11, #0, eq +; AARCH-NEXT: asr x11, x1, #63 +; AARCH-NEXT: cmp x9, x11 +; AARCH-NEXT: ccmp x10, x11, #0, eq ; AARCH-NEXT: cset w9, ne ; AARCH-NEXT: tbz x8, #63, .LBB1_2 ; AARCH-NEXT: // %bb.1: // %Entry