Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17147,6 +17147,31 @@ return SDValue(); } +// Combines for S forms of generic opcodes (AArch64ISD::ANDS into ISD::AND for +// example). NOTE: This could be used for ANDS and SUBS too, if we can find test +// cases. +static SDValue performANDSCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SDLoc DL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT VT = N->getValueType(0); + + // If the flag result isn't used, convert back to a generic opcode. + if (!N->hasAnyUseOfValue(1)) { + SDValue Res = DCI.DAG.getNode(ISD::AND, DL, VT, LHS, RHS); + return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)}, + DL); + } + + // Combine identical generic nodes into this node, re-using the result. + if (SDNode *GenericAddSub = + DCI.DAG.getNodeIfExists(ISD::AND, DCI.DAG.getVTList(VT), {LHS, RHS})) + DCI.CombineTo(GenericAddSub, SDValue(N, 0)); + + return SDValue(); +} + static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG) { // setcc_merge_zero pred // (sign_extend (extract_subvector (setcc_merge_zero ... pred ...))), 0, ne @@ -18190,6 +18215,8 @@ return performTBZCombine(N, DCI, DAG); case AArch64ISD::CSEL: return performCSELCombine(N, DCI, DAG); + case AArch64ISD::ANDS: + return performANDSCombine(N, DCI); case AArch64ISD::DUP: return performPostLD1Combine(N, DCI, false); case AArch64ISD::NVCAST: Index: llvm/test/CodeGen/AArch64/peephole-and-tst.ll =================================================================== --- llvm/test/CodeGen/AArch64/peephole-and-tst.ll +++ llvm/test/CodeGen/AArch64/peephole-and-tst.ll @@ -126,8 +126,7 @@ define i64 @test_and1(i64 %x, i64 %y) { ; CHECK-LABEL: test_and1: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x3 -; CHECK-NEXT: tst x0, #0x3 +; CHECK-NEXT: ands x8, x0, #0x3 ; CHECK-NEXT: csel x0, x8, x1, eq ; CHECK-NEXT: ret %a = and i64 %x, 3 @@ -151,22 +150,20 @@ define i64 @test_and3(i64 %x, i64 %y) { ; CHECK-LABEL: test_and3: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: mov x20, x0 -; CHECK-NEXT: and x21, x0, #0x3 ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: bl callee -; CHECK-NEXT: tst x20, #0x3 -; CHECK-NEXT: csel x0, x21, x19, eq +; CHECK-NEXT: ands x8, x20, #0x3 +; CHECK-NEXT: csel x0, x8, x19, eq ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = and i64 %x, 3 %b = call i64 @callee(i64 0) @@ -185,8 +182,7 @@ ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: ands x0, x0, #0x3 ; CHECK-NEXT: bl callee -; CHECK-NEXT: tst x19, #0x3 -; CHECK-NEXT: and x8, x19, #0x3 +; CHECK-NEXT: ands x8, x19, #0x3 ; CHECK-NEXT: csel x0, x8, x0, eq ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret