diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -546,6 +546,9 @@ setOperationAction(ISD::CTPOP, MVT::i64, Legal); setOperationAction(ISD::CTPOP, MVT::i128, Expand); setOperationAction(ISD::PARITY, MVT::i128, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Legal); + setOperationAction(ISD::CTTZ, MVT::i64, Legal); + setOperationAction(ISD::CTTZ, MVT::i128, Expand); } else { setOperationAction(ISD::CTPOP, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i64, Custom); @@ -932,6 +935,8 @@ setTargetDAGCombine(ISD::GlobalAddress); + setTargetDAGCombine(ISD::CTLZ); + // In case of strict alignment, avoid an excessive number of byte wide stores. MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemset = @@ -20280,6 +20285,17 @@ DAG.getConstant(MinOffset, DL, MVT::i64)); } +static SDValue performCTLZCombine(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + SDValue BR = N->getOperand(0); + if (!Subtarget->hasCSSC() || BR.getOpcode() != ISD::BITREVERSE || + !BR.getValueType().isScalarInteger()) + return SDValue(); + + SDLoc DL(N); + return DAG.getNode(ISD::CTTZ, DL, BR.getValueType(), BR.getOperand(0)); +} + // Turns the vector of indices into a vector of byte offstes by scaling Offset // by (BitWidth / 8). static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset, @@ -21185,6 +21201,8 @@ break; case ISD::GlobalAddress: return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine()); + case ISD::CTLZ: + return performCTLZCombine(N, DAG, Subtarget); } return SDValue(); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8556,7 +8556,7 @@ //===----------------------------------------------------------------------===// defm ABS : OneOperandData<0b001000, "abs">, Requires<[HasCSSC]>; defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>; -defm CTZ : OneOperandData<0b000110, "ctz">, Requires<[HasCSSC]>; +defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>; defm SMAX : ComparisonOp<0, 0, "smax">, Requires<[HasCSSC]>; defm SMIN : ComparisonOp<0, 1, "smin">, Requires<[HasCSSC]>; diff --git a/llvm/test/CodeGen/AArch64/gpr_cttz.ll b/llvm/test/CodeGen/AArch64/gpr_cttz.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/gpr_cttz.ll @@ -0,0 +1,172 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mattr=+cssc | FileCheck %s -check-prefix=CHECK-CSSC + +define i4 @cttz4(i4 %x) { +; CHECK-LABEL: cttz4: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x10 +; CHECK-NEXT: rbit w8, w8 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz4: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: orr w8, w0, #0x10 +; CHECK-CSSC-NEXT: ctz w0, w8 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i4 @llvm.cttz.i4(i4 %x) + ret i4 %ctz +} + +define i8 @cttz8(i8 %x) { +; CHECK-LABEL: cttz8: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x100 +; CHECK-NEXT: rbit w8, w8 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz8: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: orr w8, w0, #0x100 +; CHECK-CSSC-NEXT: ctz w0, w8 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i8 @llvm.cttz.i8(i8 %x) + ret i8 %ctz +} + +define i16 @cttz16(i16 %x) { +; CHECK-LABEL: cttz16: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x10000 +; CHECK-NEXT: rbit w8, w8 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz16: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: orr w8, w0, #0x10000 +; CHECK-CSSC-NEXT: ctz w0, w8 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i16 @llvm.cttz.i16(i16 %x) + ret i16 %ctz +} + +define i17 @cttz17(i17 %x) { +; CHECK-LABEL: cttz17: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x20000 +; CHECK-NEXT: rbit w8, w8 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz17: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: orr w8, w0, #0x20000 +; CHECK-CSSC-NEXT: ctz w0, w8 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i17 @llvm.cttz.i17(i17 %x) + ret i17 %ctz +} + +define i32 @cttz32(i32 %x) nounwind readnone { +; CHECK-LABEL: cttz32: +; CHECK: // %bb.0: +; CHECK-NEXT: rbit w8, w0 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz32: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: ctz w0, w0 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i32 @llvm.cttz.i32(i32 %x) + ret i32 %ctz +} + +define i64 @cttz64(i64 %x) nounwind readnone { +; CHECK-LABEL: cttz64: +; CHECK: // %bb.0: +; CHECK-NEXT: rbit x8, x0 +; CHECK-NEXT: clz x0, x8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz64: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: ctz x0, x0 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i64 @llvm.cttz.i64(i64 %x) + ret i64 %ctz +} + +define i128 @cttz128(i128 %x) nounwind readnone { +; CHECK-LABEL: cttz128: +; CHECK: // %bb.0: +; CHECK-NEXT: rbit x9, x1 +; CHECK-NEXT: rbit x8, x0 +; CHECK-NEXT: clz x9, x9 +; CHECK-NEXT: clz x8, x8 +; CHECK-NEXT: add x9, x9, #64 +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: csel x0, x8, x9, ne +; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz128: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: ctz x9, x1 +; CHECK-CSSC-NEXT: ctz x8, x0 +; CHECK-CSSC-NEXT: add x9, x9, #64 +; CHECK-CSSC-NEXT: cmp x0, #0 +; CHECK-CSSC-NEXT: csel x0, x8, x9, ne +; CHECK-CSSC-NEXT: mov x1, xzr +; CHECK-CSSC-NEXT: ret + %ctz = tail call i128 @llvm.cttz.i128(i128 %x) + ret i128 %ctz +} + +define i32 @cttz32combine(i32 %x) nounwind readnone { +; CHECK-LABEL: cttz32combine: +; CHECK: // %bb.0: +; CHECK-NEXT: rbit w8, w0 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz32combine: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: ctz w0, w0 +; CHECK-CSSC-NEXT: ret + %rev = tail call i32 @llvm.bitreverse.i32(i32 %x) + %ctz = tail call i32 @llvm.ctlz.i32(i32 %rev) + ret i32 %ctz +} + +define i64 @cttz64combine(i64 %x) nounwind readnone { +; CHECK-LABEL: cttz64combine: +; CHECK: // %bb.0: +; CHECK-NEXT: rbit x8, x0 +; CHECK-NEXT: clz x0, x8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz64combine: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: ctz x0, x0 +; CHECK-CSSC-NEXT: ret + %rev = tail call i64 @llvm.bitreverse.i64(i64 %x) + %ctz = tail call i64 @llvm.ctlz.i64(i64 %rev) + ret i64 %ctz +} + +declare i4 @llvm.cttz.i4(i4 %x) nounwind readnone +declare i8 @llvm.cttz.i8(i8 %x) nounwind readnone +declare i16 @llvm.cttz.i16(i16 %x) nounwind readnone +declare i17 @llvm.cttz.i17(i17 %x) nounwind readnone +declare i32 @llvm.cttz.i32(i32) nounwind readnone +declare i64 @llvm.cttz.i64(i64) nounwind readnone +declare i128 @llvm.cttz.i128(i128) nounwind readnone + +declare i32 @llvm.ctlz.i32(i32 %x) nounwind readnone +declare i32 @llvm.bitreverse.i32(i32 %x) nounwind readnone +declare i64 @llvm.ctlz.i64(i64 %x) nounwind readnone +declare i64 @llvm.bitreverse.i64(i64 %x) nounwind readnone