diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -545,6 +545,9 @@ setOperationAction(ISD::CTPOP, MVT::i64, Legal); setOperationAction(ISD::CTPOP, MVT::i128, Expand); setOperationAction(ISD::PARITY, MVT::i128, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Legal); + setOperationAction(ISD::CTTZ, MVT::i64, Legal); + setOperationAction(ISD::CTTZ, MVT::i128, Expand); } else { setOperationAction(ISD::CTPOP, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i64, Custom); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8494,7 +8494,16 @@ //===----------------------------------------------------------------------===// defm ABS : OneOperandData<0b001000, "abs">, Requires<[HasCSSC]>; defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>; -defm CTZ : OneOperandData<0b000110, "ctz">, Requires<[HasCSSC]>; +defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>; + +// SDCOMP-61561: Before upstreaming, let's make sure this is what upstream would +// want. See Gerrit review comments. +let Predicates = [HasCSSC] in { + def : Pat<(i32 (ctlz (bitreverse GPR32:$Rn))), + (CTZWr GPR32:$Rn)>; + def : Pat<(i64 (ctlz (bitreverse GPR64:$Rn))), + (CTZXr GPR64:$Rn)>; +} defm SMAX : ComparisonOp<0, 0, "smax">, Requires<[HasCSSC]>; defm SMIN : ComparisonOp<0, 1, "smin">, Requires<[HasCSSC]>; diff --git a/llvm/test/CodeGen/AArch64/gpr_cttz.ll b/llvm/test/CodeGen/AArch64/gpr_cttz.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/gpr_cttz.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mattr=+cssc | FileCheck %s -check-prefix=CHECK-CSSC + +define i4 @cttz4(i4 %x) { +; CHECK-LABEL: cttz4: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x10 +; CHECK-NEXT: rbit w8, w8 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz4: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: orr w8, w0, #0x10 +; CHECK-CSSC-NEXT: ctz w0, w8 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i4 @llvm.cttz.i4(i4 %x) + ret i4 %ctz +} + +define i8 @cttz8(i8 %x) { +; CHECK-LABEL: cttz8: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x100 +; CHECK-NEXT: rbit w8, w8 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz8: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: orr w8, w0, #0x100 +; CHECK-CSSC-NEXT: ctz w0, w8 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i8 @llvm.cttz.i8(i8 %x) + ret i8 %ctz +} + +define i16 @cttz16(i16 %x) { +; CHECK-LABEL: cttz16: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x10000 +; CHECK-NEXT: rbit w8, w8 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz16: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: orr w8, w0, #0x10000 +; CHECK-CSSC-NEXT: ctz w0, w8 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i16 @llvm.cttz.i16(i16 %x) + ret i16 %ctz +} + +define i17 @cttz17(i17 %x) { +; CHECK-LABEL: cttz17: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x20000 +; CHECK-NEXT: rbit w8, w8 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz17: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: orr w8, w0, #0x20000 +; CHECK-CSSC-NEXT: ctz w0, w8 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i17 @llvm.cttz.i17(i17 %x) + ret i17 %ctz +} + +define i32 @cttz32(i32 %x) nounwind readnone { +; CHECK-LABEL: cttz32: +; CHECK: // %bb.0: +; CHECK-NEXT: rbit w8, w0 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz32: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: ctz w0, w0 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i32 @llvm.cttz.i32(i32 %x) + ret i32 %ctz +} + +define i64 @cttz64(i64 %x) nounwind readnone { +; CHECK-LABEL: cttz64: +; CHECK: // %bb.0: +; CHECK-NEXT: rbit x8, x0 +; CHECK-NEXT: clz x0, x8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz64: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: ctz x0, x0 +; CHECK-CSSC-NEXT: ret + %ctz = tail call i64 @llvm.cttz.i64(i64 %x) + ret i64 %ctz +} + +define i128 @cttz128(i128 %x) nounwind readnone { +; CHECK-LABEL: cttz128: +; CHECK: // %bb.0: +; CHECK-NEXT: rbit x9, x1 +; CHECK-NEXT: rbit x8, x0 +; CHECK-NEXT: clz x9, x9 +; CHECK-NEXT: clz x8, x8 +; CHECK-NEXT: add x9, x9, #64 +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: csel x0, x8, x9, ne +; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz128: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: ctz x9, x1 +; CHECK-CSSC-NEXT: ctz x8, x0 +; CHECK-CSSC-NEXT: add x9, x9, #64 +; CHECK-CSSC-NEXT: cmp x0, #0 +; CHECK-CSSC-NEXT: csel x0, x8, x9, ne +; CHECK-CSSC-NEXT: mov x1, xzr +; CHECK-CSSC-NEXT: ret + %ctz = tail call i128 @llvm.cttz.i128(i128 %x) + ret i128 %ctz +} + +define i32 @cttz32combine(i32 %x) nounwind readnone { +; CHECK-LABEL: cttz32combine: +; CHECK: // %bb.0: +; CHECK-NEXT: rbit w8, w0 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: cttz32combine: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: ctz w0, w0 +; CHECK-CSSC-NEXT: ret + %rev = tail call i32 @llvm.bitreverse.i32(i32 %x) + %ctz = tail call i32 @llvm.ctlz.i32(i32 %rev) + ret i32 %ctz +} + +define i64 @cttz64combine(i64 %x) nounwind readnone { + %rev = tail call i64 @llvm.bitreverse.i64(i64 %x) + %ctz = tail call i64 @llvm.ctlz.i64(i64 %rev) + ret i64 %ctz +} + +declare i4 @llvm.cttz.i4(i4 %x) nounwind readnone +declare i8 @llvm.cttz.i8(i8 %x) nounwind readnone +declare i16 @llvm.cttz.i16(i16 %x) nounwind readnone +declare i17 @llvm.cttz.i17(i17 %x) nounwind readnone +declare i32 @llvm.cttz.i32(i32) nounwind readnone +declare i64 @llvm.cttz.i64(i64) nounwind readnone +declare i128 @llvm.cttz.i128(i128) nounwind readnone + +declare i32 @llvm.ctlz.i32(i32 %x) nounwind readnone +declare i32 @llvm.bitreverse.i32(i32 %x) nounwind readnone +declare i64 @llvm.ctlz.i64(i64 %x) nounwind readnone +declare i64 @llvm.bitreverse.i64(i64 %x) nounwind readnone