Index: llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2099,6 +2099,7 @@ case G_FSIN: case G_FSQRT: case G_BSWAP: + case G_CTLZ: return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); case G_SHL: case G_LSHR: Index: llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -457,6 +457,10 @@ }) .minScalarSameAs(1, 0); + getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct( + {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) + .scalarize(1); + computeTables(); verify(*ST.getInstrInfo()); } Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -322,7 +322,7 @@ # DEBUG: .. type index coverage check SKIPPED: no rules defined # # DEBUG-NEXT: G_CTLZ (opcode {{[0-9]+}}): 2 type indices -# DEBUG: .. type index coverage check SKIPPED: no rules defined +# DEBUG: .. the first uncovered type index: 2, OK # # DEBUG-NEXT: G_CTLZ_ZERO_UNDEF (opcode {{[0-9]+}}): 2 type indices # DEBUG: .. type index coverage check SKIPPED: no rules defined Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir @@ -0,0 +1,200 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=arm64-unknown-unknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s + +name: test_v8s8 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: test_v8s8 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[CLZv8i8_:%[0-9]+]]:fpr64 = CLZv8i8 [[COPY]] + ; CHECK: $d0 = COPY [[CLZv8i8_]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s8>) = G_CTLZ %0(<8 x s8>) + $d0 = COPY %1(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v4s16 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: test_v4s16 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[CLZv4i16_:%[0-9]+]]:fpr64 = CLZv4i16 [[COPY]] + ; CHECK: $d0 = COPY [[CLZv4i16_]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = G_CTLZ %0(<4 x s16>) + $d0 = COPY %1(<4 x s16>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v2s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: test_v2s32 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[CLZv2i32_:%[0-9]+]]:fpr64 = CLZv2i32 [[COPY]] + ; CHECK: $d0 = COPY [[CLZv2i32_]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = G_CTLZ %0(<2 x s32>) + $d0 = COPY %1(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_s64 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: test_s64 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] + ; CHECK: [[CLZXr:%[0-9]+]]:gpr64 = CLZXr [[COPY1]] + ; CHECK: $d0 = COPY [[CLZXr]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(s64) = COPY $d0 + %2:gpr(s64) = COPY %0(s64) + %1:gpr(s64) = G_CTLZ %2(s64) + $d0 = COPY %1(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: test_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $s0 + ; CHECK-LABEL: name: test_s32 + ; CHECK: liveins: $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[CLZWr:%[0-9]+]]:gpr32 = CLZWr [[COPY1]] + ; CHECK: $s0 = COPY [[CLZWr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(s32) = COPY $s0 + %2:gpr(s32) = COPY %0(s32) + %1:gpr(s32) = G_CTLZ %2(s32) + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: test_v16s8 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v16s8 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[CLZv16i8_:%[0-9]+]]:fpr128 = CLZv16i8 [[COPY]] + ; CHECK: $q0 = COPY [[CLZv16i8_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<16 x s8>) = COPY $q0 + %1:fpr(<16 x s8>) = G_CTLZ %0(<16 x s8>) + $q0 = COPY %1(<16 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v8s16 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v8s16 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[CLZv8i16_:%[0-9]+]]:fpr128 = CLZv8i16 [[COPY]] + ; CHECK: $q0 = COPY [[CLZv8i16_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<8 x s16>) = COPY $q0 + %1:fpr(<8 x s16>) = G_CTLZ %0(<8 x s16>) + $q0 = COPY %1(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v4s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v4s32 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[CLZv4i32_:%[0-9]+]]:fpr128 = CLZv4i32 [[COPY]] + ; CHECK: $q0 = COPY [[CLZv4i32_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = G_CTLZ %0(<4 x s32>) + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2s64 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + ; CHECK-LABEL: name: test_v2s64 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s64>) = COPY $q0 + ; CHECK: [[CTLZ:%[0-9]+]]:fpr(<2 x s64>) = G_CTLZ [[COPY]](<2 x s64>) + ; CHECK: $q0 = COPY [[CTLZ]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = G_CTLZ %0(<2 x s64>) + $q0 = COPY %1(<2 x s64>) + RET_ReallyLR implicit $q0 Index: llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -O0 -pass-remarks-missed=gisel* -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL,FALLBACK target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -7,6 +8,7 @@ declare i64 @llvm.ctlz.i64(i64, i1) #1 ; Function Attrs: nounwind ssp +; FALLBACK-NOT: remark{{.*}}clrsb32 define i32 @clrsb32(i32 %x) #2 { entry: %shr = ashr i32 %x, 31 @@ -18,9 +20,15 @@ ret i32 %0 ; CHECK-LABEL: clrsb32 ; CHECK: cls [[TEMP:w[0-9]+]], [[TEMP]] + +; FIXME: We should produce the same result here to save some code size. After +; that, we can remove the GISEL special casing. +; GISEL-LABEL: clrsb32 +; GISEL: clz } ; Function Attrs: nounwind ssp +; FALLBACK-NOT: remark{{.*}}clrsb64 define i64 @clrsb64(i64 %x) #3 { entry: %shr = ashr i64 %x, 63 @@ -32,4 +40,6 @@ ret i64 %0 ; CHECK-LABEL: clrsb64 ; CHECK: cls [[TEMP:x[0-9]+]], [[TEMP]] +; GISEL-LABEL: clrsb64 +; GISEL: cls [[TEMP:x[0-9]+]], [[TEMP]] } Index: llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; FALLBACK-NOT: remark{{.*}}test_vclz_u8 define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclz_u8: ; CHECK: clz.8b v0, v0 @@ -8,6 +10,7 @@ ret <8 x i8> %vclz.i } +; FALLBACK-NOT: remark{{.*}}test_vclz_s8 define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclz_s8: ; CHECK: clz.8b v0, v0 @@ -16,6 +19,7 @@ ret <8 x i8> %vclz.i } +; FALLBACK-NOT: remark{{.*}}test_vclz_u16 define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclz_u16: ; CHECK: clz.4h v0, v0 @@ -24,6 +28,7 @@ ret <4 x i16> %vclz1.i } +; FALLBACK-NOT: remark{{.*}}test_vclz_s16 define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclz_s16: ; CHECK: clz.4h v0, v0 @@ -32,6 +37,7 @@ ret <4 x i16> %vclz1.i } +; FALLBACK-NOT: remark{{.*}}test_vclz_u32 define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclz_u32: ; CHECK: clz.2s v0, v0 @@ -40,6 +46,7 @@ ret <2 x i32> %vclz1.i } +; FALLBACK-NOT: remark{{.*}}test_vclz_s32 define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclz_s32: ; CHECK: clz.2s v0, v0 @@ -48,18 +55,21 @@ ret <2 x i32> %vclz1.i } +; FALLBACK-NOT: remark{{.*}}test_vclz_u64 define <1 x i64> @test_vclz_u64(<1 x i64> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclz_u64: %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind ret <1 x i64> %vclz1.i } +; FALLBACK-NOT: remark{{.*}}test_vclz_s64 define <1 x i64> @test_vclz_s64(<1 x i64> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclz_s64: %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind ret <1 x i64> %vclz1.i } +; FALLBACK-NOT: remark{{.*}}test_vclzq_u8 define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclzq_u8: ; CHECK: clz.16b v0, v0 @@ -68,6 +78,7 @@ ret <16 x i8> %vclz.i } +; FALLBACK-NOT: remark{{.*}}test_vclzq_s8 define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclzq_s8: ; CHECK: clz.16b v0, v0 @@ -76,6 +87,7 @@ ret <16 x i8> %vclz.i } +; FALLBACK-NOT: remark{{.*}}test_vclzq_u16 define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclzq_u16: ; CHECK: clz.8h v0, v0 @@ -84,6 +96,7 @@ ret <8 x i16> %vclz1.i } +; FALLBACK-NOT: remark{{.*}}test_vclzq_s16 define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclzq_s16: ; CHECK: clz.8h v0, v0 @@ -92,6 +105,7 @@ ret <8 x i16> %vclz1.i } +; FALLBACK-NOT: remark{{.*}}test_vclzq_u32 define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclzq_u32: ; CHECK: clz.4s v0, v0 @@ -100,6 +114,7 @@ ret <4 x i32> %vclz1.i } +; FALLBACK-NOT: remark{{.*}}test_vclzq_s32 define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclzq_s32: ; CHECK: clz.4s v0, v0 @@ -108,12 +123,14 @@ ret <4 x i32> %vclz1.i } +; FALLBACK-NOT: remark{{.*}}test_vclzq_u64 define <2 x i64> @test_vclzq_u64(<2 x i64> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclzq_u64: %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind ret <2 x i64> %vclz1.i } +; FALLBACK-NOT: remark{{.*}}test_vclzq_s64 define <2 x i64> @test_vclzq_s64(<2 x i64> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclzq_s64: %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind