diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4173,10 +4173,26 @@ case AArch64ISD::SETCC_MERGE_ZERO: return Reinterpret; case ISD::INTRINSIC_WO_CHAIN: - if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue) + switch (InOp.getConstantOperandVal(0)) { + case Intrinsic::aarch64_sve_ptrue: + case Intrinsic::aarch64_sve_cmpeq_wide: + case Intrinsic::aarch64_sve_cmpne_wide: + case Intrinsic::aarch64_sve_cmpge_wide: + case Intrinsic::aarch64_sve_cmpgt_wide: + case Intrinsic::aarch64_sve_cmplt_wide: + case Intrinsic::aarch64_sve_cmple_wide: + case Intrinsic::aarch64_sve_cmphs_wide: + case Intrinsic::aarch64_sve_cmphi_wide: + case Intrinsic::aarch64_sve_cmplo_wide: + case Intrinsic::aarch64_sve_cmpls_wide: return Reinterpret; + } } + // Splat vectors of 1 will generate ptrue instructions + if (ISD::isConstantSplatVectorAllOnes(InOp.getNode())) + return Reinterpret; + // Otherwise, zero the newly introduced lanes. SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all); SDValue MaskReinterpret = diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret-no-streaming.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret-no-streaming.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret-no-streaming.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; This test should belong in sve-intrinsics-reinterpret.ll, but uses types +; that are invalid with sve-streaming + +define @reinterpret_bool_from_splat() { +; CHECK-LABEL: reinterpret_bool_from_splat: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ret + %ins = insertelement undef, i1 1, i32 0 + %splat = shufflevector %ins, undef, zeroinitializer + %out = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %splat) + ret %out +} + +declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() + diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll @@ -102,7 +102,22 @@ ret %2 } +; The first reinterpret should prevent the second one from being simplified as a nop +define @chained_reinterpret() { +; CHECK-LABEL: chained_reinterpret: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b +; CHECK-NEXT: ret + %in = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %cast2 = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %in) + %out = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %cast2) + ret %out +} + declare @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg) +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) declare @llvm.aarch64.sve.cmpgt.nxv8i16(, , ) declare @llvm.aarch64.sve.convert.to.svbool.nxv16i1() diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll @@ -46,9 +46,43 @@ ret i32 %conv } +define i32 @cmpeq_wide_nxv8i16( %pg, %a, %b) { +; CHECK-LABEL: cmpeq_wide_nxv8i16: +; CHECK: cmpeq p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpeq.wide.nxv8i16( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + +define i32 @cmpeq_wide_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmpeq_wide_nxv4i32: +; CHECK: cmpeq p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpeq.wide.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + declare @llvm.aarch64.sve.cmpeq.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpeq.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpeq.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmpeq.wide.nxv4i32(, , ) declare i1 @llvm.aarch64.sve.ptest.any(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll @@ -46,9 +46,43 @@ ret i32 %conv } +define i32 @cmpge_wide_nxv8i16( %pg, %a, %b) { +; CHECK-LABEL: cmpge_wide_nxv8i16: +; CHECK: cmpge p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpge.wide.nxv8i16( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + +define i32 @cmpge_wide_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmpge_wide_nxv4i32: +; CHECK: cmpge p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpge.wide.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + declare @llvm.aarch64.sve.cmpge.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpge.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpge.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmpge.wide.nxv4i32(, , ) declare i1 @llvm.aarch64.sve.ptest.any(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll @@ -46,9 +46,43 @@ ret i32 %conv } +define i32 @cmpgt_wide_nxv8i16( %pg, %a, %b) { +; CHECK-LABEL: cmpgt_wide_nxv8i16: +; CHECK: cmpgt p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpgt.wide.nxv8i16( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + +define i32 @cmpgt_wide_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmpgt_wide_nxv4i32: +; CHECK: cmpgt p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpgt.wide.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + declare @llvm.aarch64.sve.cmpgt.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpgt.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpgt.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmpgt.wide.nxv4i32(, , ) declare i1 @llvm.aarch64.sve.ptest.any(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll @@ -46,9 +46,43 @@ ret i32 %conv } +define i32 @cmphi_wide_nxv8i16( %pg, %a, %b) { +; CHECK-LABEL: cmphi_wide_nxv8i16: +; CHECK: cmphi p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmphi.wide.nxv8i16( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + +define i32 @cmphi_wide_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmphi_wide_nxv4i32: +; CHECK: cmphi p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmphi.wide.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + declare @llvm.aarch64.sve.cmphi.nxv16i8(, , ) declare @llvm.aarch64.sve.cmphi.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmphi.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmphi.wide.nxv4i32(, , ) declare i1 @llvm.aarch64.sve.ptest.any(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll @@ -46,9 +46,43 @@ ret i32 %conv } +define i32 @cmphs_wide_nxv8i16( %pg, %a, %b) { +; CHECK-LABEL: cmphs_wide_nxv8i16: +; CHECK: cmphs p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmphs.wide.nxv8i16( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + +define i32 @cmphs_wide_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmphs_wide_nxv4i32: +; CHECK: cmphs p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmphs.wide.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + declare @llvm.aarch64.sve.cmphs.nxv16i8(, , ) declare @llvm.aarch64.sve.cmphs.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmphs.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmphs.wide.nxv4i32(, , ) declare i1 @llvm.aarch64.sve.ptest.any(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll @@ -31,9 +31,43 @@ ret i32 %conv } +define i32 @cmple_wide_nxv8i16( %pg, %a, %b) { +; CHECK-LABEL: cmple_wide_nxv8i16: +; CHECK: cmple p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmple.wide.nxv8i16( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + +define i32 @cmple_wide_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmple_wide_nxv4i32: +; CHECK: cmple p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmple.wide.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + declare @llvm.aarch64.sve.cmpge.nxv16i8(, , ) declare @llvm.aarch64.sve.cmple.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmple.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmple.wide.nxv4i32(, , ) declare i1 @llvm.aarch64.sve.ptest.any(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll @@ -31,9 +31,43 @@ ret i32 %conv } +define i32 @cmplo_wide_nxv8i16( %pg, %a, %b) { +; CHECK-LABEL: cmplo_wide_nxv8i16: +; CHECK: cmplo p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmplo.wide.nxv8i16( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + +define i32 @cmplo_wide_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmplo_wide_nxv4i32: +; CHECK: cmplo p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmplo.wide.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + declare @llvm.aarch64.sve.cmphi.nxv16i8(, , ) declare @llvm.aarch64.sve.cmplo.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmplo.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmplo.wide.nxv4i32(, , ) declare i1 @llvm.aarch64.sve.ptest.any(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll @@ -31,9 +31,43 @@ ret i32 %conv } +define i32 @cmpls_wide_nxv8i16( %pg, %a, %b) { +; CHECK-LABEL: cmpls_wide_nxv8i16: +; CHECK: cmpls p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpls.wide.nxv8i16( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + +define i32 @cmpls_wide_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmpls_wide_nxv4i32: +; CHECK: cmpls p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpls.wide.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + declare @llvm.aarch64.sve.cmphs.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpls.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpls.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmpls.wide.nxv4i32(, , ) declare i1 @llvm.aarch64.sve.ptest.any(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll @@ -31,9 +31,43 @@ ret i32 %conv } +define i32 @cmplt_wide_nxv8i16( %pg, %a, %b) { +; CHECK-LABEL: cmplt_wide_nxv8i16: +; CHECK: cmplt p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmplt.wide.nxv8i16( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + +define i32 @cmplt_wide_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmplt_wide_nxv4i32: +; CHECK: cmplt p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmplt.wide.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + declare @llvm.aarch64.sve.cmpgt.nxv16i8(, , ) declare @llvm.aarch64.sve.cmplt.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmplt.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmplt.wide.nxv4i32(, , ) declare i1 @llvm.aarch64.sve.ptest.any(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll @@ -46,9 +46,43 @@ ret i32 %conv } +define i32 @cmpne_wide_nxv8i16( %pg, %a, %b) { +; CHECK-LABEL: cmpne_wide_nxv8i16: +; CHECK: cmpne p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + +define i32 @cmpne_wide_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmpne_wide_nxv4i32: +; CHECK: cmpne p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %conv = zext i1 %4 to i32 + ret i32 %conv +} + declare @llvm.aarch64.sve.cmpne.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpne.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpne.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmpne.wide.nxv4i32(, , ) declare i1 @llvm.aarch64.sve.ptest.any(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -587,5 +587,18 @@ ret %2 } +; Splat for predicates +; This guards optimizations that rely on splats of 1 being generated as a ptrue + +define @sve_splat_i1_allactive() { +; CHECK-LABEL: sve_splat_i1_allactive: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ret + %ins = insertelement undef, i1 1, i32 0 + %splat = shufflevector %ins, undef, zeroinitializer + ret %splat +} + ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+sve,+bf16" }