Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2570,7 +2570,8 @@ // bits that are implicitly ANDed off by the above opcodes and if so, skip // the AND. uint64_t MaskImm; - if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm)) + if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && + !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) return false; if (countTrailingOnes(MaskImm) < Bits) Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1673,14 +1673,22 @@ // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ? Opcode = AArch64ISD::ADDS; LHS = LHS.getOperand(1); - } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) && - !isUnsignedIntSetCC(CC)) { - // Similarly, (CMP (and X, Y), 0) can be implemented with a TST - // (a.k.a. ANDS) except that the flags are only guaranteed to work for one - // of the signed comparisons. - Opcode = AArch64ISD::ANDS; - RHS = LHS.getOperand(1); - LHS = LHS.getOperand(0); + } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) { + if (LHS.getOpcode() == ISD::AND) { + // Similarly, (CMP (and X, Y), 0) can be implemented with a TST + // (a.k.a. ANDS) except that the flags are only guaranteed to work for one + // of the signed comparisons. + const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl, + DAG.getVTList(VT, MVT_CC), + LHS.getOperand(0), + LHS.getOperand(1)); + // Replace all users of (and X, Y) with newly generated (ands X, Y) + DAG.ReplaceAllUsesWith(LHS, ANDSNode); + return ANDSNode.getValue(1); + } else if (LHS.getOpcode() == AArch64ISD::ANDS) { + // Use result of ANDS + return LHS.getValue(1); + } } return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS) Index: llvm/test/CodeGen/AArch64/funnel-shift.ll =================================================================== --- llvm/test/CodeGen/AArch64/funnel-shift.ll +++ llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -18,12 +18,11 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: fshl_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w2, #0x1f +; CHECK-NEXT: ands w9, w2, #0x1f ; CHECK-NEXT: neg w9, w9 ; CHECK-NEXT: lsl w8, w0, w2 ; CHECK-NEXT: lsr w9, w1, w9 ; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: tst w2, #0x1f ; CHECK-NEXT: csel w0, w0, w8, eq ; CHECK-NEXT: ret %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) @@ -146,12 +145,11 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: fshr_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w2, #0x1f +; CHECK-NEXT: ands w9, w2, #0x1f ; CHECK-NEXT: neg w9, w9 ; CHECK-NEXT: lsr w8, w1, w2 ; CHECK-NEXT: lsl w9, w0, w9 ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: tst w2, #0x1f ; CHECK-NEXT: csel w0, w1, w8, eq ; CHECK-NEXT: ret %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) Index: llvm/test/CodeGen/AArch64/peephole-and-tst.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/peephole-and-tst.ll @@ -0,0 +1,81 @@ +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +%struct.anon = type { i32*, i32* } + +@ptr_wrapper = common dso_local local_unnamed_addr global %struct.anon* null, align 8 + +define dso_local i32 @test_func_i32_two_uses(i32 %in, i32 %bit, i32 %mask) local_unnamed_addr { +entry: + %0 = load %struct.anon*, %struct.anon** @ptr_wrapper, align 8 + %result = getelementptr inbounds %struct.anon, %struct.anon* %0, i64 0, i32 1 + %tobool2 = icmp ne i32 %mask, 0 + br label %do.body + +do.body: ; preds = %4, %entry +; CHECK-LABEL: test_func_i32_two_uses: +; CHECK: ands [[DSTREG:w[0-9]+]] +; Usage #1 +; CHECK: cmp [[DSTREG]] +; Usage #2 +; CHECK: cbz [[DSTREG]] + %bit.addr.0 = phi i32 [ %bit, %entry ], [ %shl, %4 ] + %retval1.0 = phi i32 [ 0, %entry ], [ %retval1.1, %4 ] + %and = and i32 %bit.addr.0, %in + %tobool = icmp eq i32 %and, 0 + %not.tobool = xor i1 %tobool, true + %inc = zext i1 %not.tobool to i32 + %retval1.1 = add nuw nsw i32 %retval1.0, %inc + %1 = xor i1 %tobool, true + %2 = or i1 %tobool2, %1 + %dummy = and i32 %mask, %in + %use_and = icmp eq i32 %and, %dummy + %dummy_or = or i1 %use_and, %2 + br i1 %dummy_or, label %3, label %4 + +3: ; preds = %do.body + store i32* null, i32** %result, align 8 + br label %4 + +4: ; preds = %do.body, %3 + %shl = shl i32 %bit.addr.0, 1 + %tobool6 = icmp eq i32 %shl, 0 + br i1 %tobool6, label %do.end, label %do.body + +do.end: ; preds = %4 + ret i32 %retval1.1 +} + +define dso_local i32 @test_func_i64_one_use(i64 %in, i64 %bit, i64 %mask) local_unnamed_addr #0 { +entry: + %0 = load %struct.anon*, %struct.anon** @ptr_wrapper, align 8 + %result = getelementptr inbounds %struct.anon, %struct.anon* %0, i64 0, i32 1 + %tobool2 = icmp ne i64 %mask, 0 + br label %do.body + +do.body: ; preds = %4, %entry +; CHECK-LABEL: test_func_i64_one_use: +; CHECK: ands [[DSTREG:x[0-9]+]], [[SRCREG1:x[0-9]+]], [[SRCREG2:x[0-9]+]] +; CHECK-NEXT: orr [[DSTREG]], [[SRCREG_ORR:x[0-9]+]], [[DSTREG]] + %bit.addr.0 = phi i64 [ %bit, %entry ], [ %shl, %4 ] + %retval1.0 = phi i32 [ 0, %entry ], [ %retval1.1, %4 ] + %and = and i64 %bit.addr.0, %in + %tobool = icmp eq i64 %and, 0 + %not.tobool = xor i1 %tobool, true + %inc = zext i1 %not.tobool to i32 + %retval1.1 = add nuw nsw i32 %retval1.0, %inc + %1 = xor i1 %tobool, true + %2 = or i1 %tobool2, %1 + br i1 %2, label %3, label %4 + +3: ; preds = %do.body + store i32* null, i32** %result, align 8 + br label %4 + +4: ; preds = %do.body, %3 + %shl = shl i64 %bit.addr.0, 1 + %tobool6 = icmp eq i64 %shl, 0 + br i1 %tobool6, label %do.end, label %do.body + +do.end: ; preds = %4 + ret i32 %retval1.1 +} Index: llvm/test/CodeGen/AArch64/shift-by-signext.ll =================================================================== --- llvm/test/CodeGen/AArch64/shift-by-signext.ll +++ llvm/test/CodeGen/AArch64/shift-by-signext.ll @@ -80,12 +80,11 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind { ; CHECK-LABEL: n6_fshl: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w2, #0x1f +; CHECK-NEXT: ands w9, w2, #0x1f ; CHECK-NEXT: neg w9, w9 ; CHECK-NEXT: lsl w8, w0, w2 ; CHECK-NEXT: lsr w9, w1, w9 ; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: tst w2, #0x1f ; CHECK-NEXT: csel w0, w0, w8, eq ; CHECK-NEXT: ret %shamt_wide = sext i8 %shamt to i32 @@ -95,12 +94,11 @@ define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind { ; CHECK-LABEL: n7_fshr: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w2, #0x1f +; CHECK-NEXT: ands w9, w2, #0x1f ; CHECK-NEXT: neg w9, w9 ; CHECK-NEXT: lsr w8, w1, w2 ; CHECK-NEXT: lsl w9, w0, w9 ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: tst w2, #0x1f ; CHECK-NEXT: csel w0, w1, w8, eq ; CHECK-NEXT: ret %shamt_wide = sext i8 %shamt to i32