diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1512,6 +1512,29 @@ } return Cost; } + case Intrinsic::get_active_lane_mask: { + EVT ResVT = getTLI()->getValueType(DL, RetTy, true); + EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true); + + // If we're not expanding the intrinsic then we assume this is cheap + // to implement. + if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) { + std::pair LT = + getTLI()->getTypeLegalizationCost(DL, RetTy); + return LT.first; + } + + // Create the expanded types that will be used to calculate the uadd_sat + // operation. + Type *ExpRetTy = VectorType::get( + ICA.getArgTypes()[0], cast(RetTy)->getElementCount()); + IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF); + InstructionCost Cost = + thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy, + CmpInst::ICMP_ULT, CostKind); + return Cost; + } } // Assume that we need to scalarize this intrinsic. diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -408,4 +408,77 @@ declare @llvm.experimental.vector.splice.nxv2f64(, , i32) declare @llvm.experimental.vector.splice.nxv4f64(, , i32) +define void @get_lane_mask() #0 { +; CHECK-LABEL: 'get_lane_mask' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv8i1_i64 = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv4i1_i64 = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv2i1_i64 = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv16i1_i32 = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv8i1_i32 = call @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv4i1_i32 = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv2i1_i32 = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %mask_nxv32i1_i64 = call @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_nxv16i1_i16 = call @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void + %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) + %mask_nxv8i1_i64 = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) + %mask_nxv4i1_i64 = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) + %mask_nxv2i1_i64 = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) + + %mask_nxv16i1_i32 = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) + %mask_nxv8i1_i32 = call @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) + %mask_nxv4i1_i32 = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) + %mask_nxv2i1_i32 = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) + + %mask_nxv32i1_i64 = call @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) + %mask_nxv16i1_i16 = call @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) + + %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) + %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) + %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) + %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) + + %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) + %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) + %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) + %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) + + %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) + %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) + + ret void +} + +declare @llvm.get.active.lane.mask.nxv16i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv8i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv2i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv16i1.i32(i32, i32) +declare @llvm.get.active.lane.mask.nxv8i1.i32(i32, i32) +declare @llvm.get.active.lane.mask.nxv4i1.i32(i32, i32) +declare @llvm.get.active.lane.mask.nxv2i1.i32(i32, i32) +declare @llvm.get.active.lane.mask.nxv32i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv16i1.i16(i16, i16) +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64, i64) +declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64, i64) +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64, i64) +declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64, i64) +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) +declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) +declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32, i32) +declare <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64, i64) +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16, i16) + attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll b/llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes='print' 2>&1 -disable-output -mtriple=thumbv8m.main-none-eabi | FileCheck %s + +define void @get_lane_mask() { +; CHECK-LABEL: 'get_lane_mask' +; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i16 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i16 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i16 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) + %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) + %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) + %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) + + %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) + %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) + %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) + %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) + + %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) + %mask_v8i1_i16 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i16(i16 undef, i16 undef) + %mask_v4i1_i16 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i16(i16 undef, i16 undef) + %mask_v2i1_i16 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i16(i16 undef, i16 undef) + + ret void +} + +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64, i64) +declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64, i64) +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64, i64) +declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64, i64) +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) +declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) +declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32, i32) +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16, i16) +declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i16(i16, i16) +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i16(i16, i16) +declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i16(i16, i16) diff --git a/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll b/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes='print' 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh | FileCheck %s + +define void @get_lane_mask() { +; CHECK-LABEL: 'get_lane_mask' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv8i1_i64 = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv4i1_i64 = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv2i1_i64 = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv1i1_i64 = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv16i1_i32 = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv8i1_i32 = call @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv4i1_i32 = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv2i1_i32 = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv1i1_i32 = call @llvm.get.active.lane.mask.nxv1i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_nxv32i1_i64 = call @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv16i1_i16 = call @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) + %mask_nxv8i1_i64 = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) + %mask_nxv4i1_i64 = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) + %mask_nxv2i1_i64 = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) + %mask_nxv1i1_i64 = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 undef, i64 undef) + + %mask_nxv16i1_i32 = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) + %mask_nxv8i1_i32 = call @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) + %mask_nxv4i1_i32 = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) + %mask_nxv2i1_i32 = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) + %mask_nxv1i1_i32 = call @llvm.get.active.lane.mask.nxv1i1.i32(i32 undef, i32 undef) + + %mask_nxv32i1_i64 = call @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) + %mask_nxv16i1_i16 = call @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) + + %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) + %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) + %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) + %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) + + %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) + %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) + %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) + %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) + + %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) + %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) + + ret void +} + +declare @llvm.get.active.lane.mask.nxv16i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv8i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv2i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv1i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv16i1.i32(i32, i32) +declare @llvm.get.active.lane.mask.nxv8i1.i32(i32, i32) +declare @llvm.get.active.lane.mask.nxv4i1.i32(i32, i32) +declare @llvm.get.active.lane.mask.nxv2i1.i32(i32, i32) +declare @llvm.get.active.lane.mask.nxv1i1.i32(i32, i32) +declare @llvm.get.active.lane.mask.nxv32i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv16i1.i16(i16, i16) +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64, i64) +declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64, i64) +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64, i64) +declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64, i64) +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) +declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) +declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32, i32) +declare <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64, i64) +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16, i16)