Index: lib/Basic/Targets/ARM.cpp =================================================================== --- lib/Basic/Targets/ARM.cpp +++ lib/Basic/Targets/ARM.cpp @@ -440,8 +440,17 @@ HW_FP |= HW_FP_HP; } else if (Feature == "+fullfp16") { HasLegalHalfType = true; + HW_FP |= HW_FP_SP | HW_FP_DP | HW_FP_HP; + FPU |= VFP4FPU; } else if (Feature == "+dotprod") { + FPU |= NeonFPU; + HW_FP |= HW_FP_SP | HW_FP_DP; DotProd = true; + } else if (Feature == "+fp16fml"){ + HW_FP |= HW_FP_HP; + HasLegalHalfType = true; + FPU |= VFP4FPU; + HW_FP |= HW_FP_SP | HW_FP_DP | HW_FP_HP; } } HW_FP &= ~HW_FP_remove; Index: test/CodeGen/arm_neon_intrinsics.c =================================================================== --- test/CodeGen/arm_neon_intrinsics.c +++ test/CodeGen/arm_neon_intrinsics.c @@ -4,6 +4,9 @@ // RUN: -disable-O0-optnone -emit-llvm -o - %s \ // RUN: | opt -S -mem2reg | FileCheck %s +// RUN: %clang -O1 -target armv8a-linux-eabi -march=armv8a+fp16fml\ +// RUN: -S -emit-llvm -o - %s | FileCheck %s.v8 + #include // CHECK-LABEL: @test_vaba_s8( Index: test/CodeGen/arm_neon_intrinsics.c.v8 =================================================================== --- /dev/null +++ test/CodeGen/arm_neon_intrinsics.c.v8 @@ -0,0 +1,12234 @@ +// CHECK-LABEL: test_vaba_s8 +// CHECK: entry: +// CHECK-NEXT: %vabd_v.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: %add.i = add <8 x i8> %vabd_v.i.i, %a +// CHECK-NEXT: ret <8 x i8> %add.i +// CHECK-LABEL: test_vaba_s16 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c) +// CHECK-NEXT: %add.i = add <4 x i16> %vabd_v2.i.i, %a +// CHECK-NEXT: ret <4 x i16> %add.i +// CHECK-LABEL: test_vaba_s32 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c) +// CHECK-NEXT: %add.i = add <2 x i32> %vabd_v2.i.i, %a +// CHECK-NEXT: ret <2 x i32> %add.i +// CHECK-LABEL: test_vaba_u8 +// CHECK: entry: +// CHECK-NEXT: %vabd_v.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: %add.i = add <8 x i8> %vabd_v.i.i, %a +// CHECK-NEXT: ret <8 x i8> %add.i +// CHECK-LABEL: test_vaba_u16 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c) +// CHECK-NEXT: %add.i = add <4 x i16> %vabd_v2.i.i, %a +// CHECK-NEXT: ret <4 x i16> %add.i +// CHECK-LABEL: test_vaba_u32 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c) +// CHECK-NEXT: %add.i = add <2 x i32> %vabd_v2.i.i, %a +// CHECK-NEXT: ret <2 x i32> %add.i +// CHECK-LABEL: test_vabaq_s8 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v.i.i = tail call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c) +// CHECK-NEXT: %add.i = add <16 x i8> %vabdq_v.i.i, %a +// CHECK-NEXT: ret <16 x i8> %add.i +// CHECK-LABEL: test_vabaq_s16 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v2.i.i = tail call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %b, <8 x i16> %c) +// CHECK-NEXT: %add.i = add <8 x i16> %vabdq_v2.i.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vabaq_s32 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v2.i.i = tail call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %b, <4 x i32> %c) +// CHECK-NEXT: %add.i = add <4 x i32> %vabdq_v2.i.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vabaq_u8 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v.i.i = tail call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c) +// CHECK-NEXT: %add.i = add <16 x i8> %vabdq_v.i.i, %a +// CHECK-NEXT: ret <16 x i8> %add.i +// CHECK-LABEL: test_vabaq_u16 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v2.i.i = tail call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %b, <8 x i16> %c) +// CHECK-NEXT: %add.i = add <8 x i16> %vabdq_v2.i.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vabaq_u32 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v2.i.i = tail call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %b, <4 x i32> %c) +// CHECK-NEXT: %add.i = add <4 x i32> %vabdq_v2.i.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vabal_s8 +// CHECK: entry: +// CHECK-NEXT: %vabd_v.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: %vmovl.i.i.i = zext <8 x i8> %vabd_v.i.i.i to <8 x i16> +// CHECK-NEXT: %add.i = add <8 x i16> %vmovl.i.i.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vabal_s16 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c) +// CHECK-NEXT: %vmovl.i.i.i = zext <4 x i16> %vabd_v2.i.i.i to <4 x i32> +// CHECK-NEXT: %add.i = add <4 x i32> %vmovl.i.i.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vabal_s32 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c) +// CHECK-NEXT: %vmovl.i.i.i = zext <2 x i32> %vabd_v2.i.i.i to <2 x i64> +// CHECK-NEXT: %add.i = add <2 x i64> %vmovl.i.i.i, %a +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vabal_u8 +// CHECK: entry: +// CHECK-NEXT: %vabd_v.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: %vmovl.i.i.i = zext <8 x i8> %vabd_v.i.i.i to <8 x i16> +// CHECK-NEXT: %add.i = add <8 x i16> %vmovl.i.i.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vabal_u16 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c) +// CHECK-NEXT: %vmovl.i.i.i = zext <4 x i16> %vabd_v2.i.i.i to <4 x i32> +// CHECK-NEXT: %add.i = add <4 x i32> %vmovl.i.i.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vabal_u32 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c) +// CHECK-NEXT: %vmovl.i.i.i = zext <2 x i32> %vabd_v2.i.i.i to <2 x i64> +// CHECK-NEXT: %add.i = add <2 x i64> %vmovl.i.i.i, %a +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vabd_s8 +// CHECK: entry: +// CHECK-NEXT: %vabd_v.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vabd_v.i +// CHECK-LABEL: test_vabd_s16 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vabd_v2.i +// CHECK-LABEL: test_vabd_s32 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vabd_v2.i +// CHECK-LABEL: test_vabd_u8 +// CHECK: entry: +// CHECK-NEXT: %vabd_v.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vabd_v.i +// CHECK-LABEL: test_vabd_u16 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vabd_v2.i +// CHECK-LABEL: test_vabd_u32 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vabd_v2.i +// CHECK-LABEL: test_vabd_f32 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i = tail call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK-NEXT: ret <2 x float> %vabd_v2.i +// CHECK-LABEL: test_vabdq_s8 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v.i = tail call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vabdq_v.i +// CHECK-LABEL: test_vabdq_s16 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v2.i = tail call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vabdq_v2.i +// CHECK-LABEL: test_vabdq_s32 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v2.i = tail call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vabdq_v2.i +// CHECK-LABEL: test_vabdq_u8 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v.i = tail call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vabdq_v.i +// CHECK-LABEL: test_vabdq_u16 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v2.i = tail call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vabdq_v2.i +// CHECK-LABEL: test_vabdq_u32 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v2.i = tail call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vabdq_v2.i +// CHECK-LABEL: test_vabdq_f32 +// CHECK: entry: +// CHECK-NEXT: %vabdq_v2.i = tail call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %a, <4 x float> %b) +// CHECK-NEXT: ret <4 x float> %vabdq_v2.i +// CHECK-LABEL: test_vabdl_s8 +// CHECK: entry: +// CHECK-NEXT: %vabd_v.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: %vmovl.i.i = zext <8 x i8> %vabd_v.i.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %vmovl.i.i +// CHECK-LABEL: test_vabdl_s16 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: %vmovl.i.i = zext <4 x i16> %vabd_v2.i.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %vmovl.i.i +// CHECK-LABEL: test_vabdl_s32 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: %vmovl.i.i = zext <2 x i32> %vabd_v2.i.i to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %vmovl.i.i +// CHECK-LABEL: test_vabdl_u8 +// CHECK: entry: +// CHECK-NEXT: %vabd_v.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: %vmovl.i.i = zext <8 x i8> %vabd_v.i.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %vmovl.i.i +// CHECK-LABEL: test_vabdl_u16 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: %vmovl.i.i = zext <4 x i16> %vabd_v2.i.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %vmovl.i.i +// CHECK-LABEL: test_vabdl_u32 +// CHECK: entry: +// CHECK-NEXT: %vabd_v2.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: %vmovl.i.i = zext <2 x i32> %vabd_v2.i.i to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %vmovl.i.i +// CHECK-LABEL: test_vabs_s8 +// CHECK: entry: +// CHECK-NEXT: %vabs.i = tail call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) +// CHECK-NEXT: ret <8 x i8> %vabs.i +// CHECK-LABEL: test_vabs_s16 +// CHECK: entry: +// CHECK-NEXT: %vabs1.i = tail call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a) +// CHECK-NEXT: ret <4 x i16> %vabs1.i +// CHECK-LABEL: test_vabs_s32 +// CHECK: entry: +// CHECK-NEXT: %vabs1.i = tail call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a) +// CHECK-NEXT: ret <2 x i32> %vabs1.i +// CHECK-LABEL: test_vabs_f32 +// CHECK: entry: +// CHECK-NEXT: %vabs1.i = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) +// CHECK-NEXT: ret <2 x float> %vabs1.i +// CHECK-LABEL: test_vabsq_s8 +// CHECK: entry: +// CHECK-NEXT: %vabs.i = tail call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) +// CHECK-NEXT: ret <16 x i8> %vabs.i +// CHECK-LABEL: test_vabsq_s16 +// CHECK: entry: +// CHECK-NEXT: %vabs1.i = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a) +// CHECK-NEXT: ret <8 x i16> %vabs1.i +// CHECK-LABEL: test_vabsq_s32 +// CHECK: entry: +// CHECK-NEXT: %vabs1.i = tail call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a) +// CHECK-NEXT: ret <4 x i32> %vabs1.i +// CHECK-LABEL: test_vabsq_f32 +// CHECK: entry: +// CHECK-NEXT: %vabs1.i = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) +// CHECK-NEXT: ret <4 x float> %vabs1.i +// CHECK-LABEL: test_vadd_s8 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <8 x i8> %b, %a +// CHECK-NEXT: ret <8 x i8> %add.i +// CHECK-LABEL: test_vadd_s16 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <4 x i16> %b, %a +// CHECK-NEXT: ret <4 x i16> %add.i +// CHECK-LABEL: test_vadd_s32 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <2 x i32> %b, %a +// CHECK-NEXT: ret <2 x i32> %add.i +// CHECK-LABEL: test_vadd_s64 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <1 x i64> %b, %a +// CHECK-NEXT: ret <1 x i64> %add.i +// CHECK-LABEL: test_vadd_f32 +// CHECK: entry: +// CHECK-NEXT: %add.i = fadd <2 x float> %a, %b +// CHECK-NEXT: ret <2 x float> %add.i +// CHECK-LABEL: test_vadd_u8 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <8 x i8> %b, %a +// CHECK-NEXT: ret <8 x i8> %add.i +// CHECK-LABEL: test_vadd_u16 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <4 x i16> %b, %a +// CHECK-NEXT: ret <4 x i16> %add.i +// CHECK-LABEL: test_vadd_u32 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <2 x i32> %b, %a +// CHECK-NEXT: ret <2 x i32> %add.i +// CHECK-LABEL: test_vadd_u64 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <1 x i64> %b, %a +// CHECK-NEXT: ret <1 x i64> %add.i +// CHECK-LABEL: test_vaddq_s8 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <16 x i8> %b, %a +// CHECK-NEXT: ret <16 x i8> %add.i +// CHECK-LABEL: test_vaddq_s16 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <8 x i16> %b, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vaddq_s32 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <4 x i32> %b, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vaddq_s64 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <2 x i64> %b, %a +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vaddq_f32 +// CHECK: entry: +// CHECK-NEXT: %add.i = fadd <4 x float> %a, %b +// CHECK-NEXT: ret <4 x float> %add.i +// CHECK-LABEL: test_vaddq_u8 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <16 x i8> %b, %a +// CHECK-NEXT: ret <16 x i8> %add.i +// CHECK-LABEL: test_vaddq_u16 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <8 x i16> %b, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vaddq_u32 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <4 x i32> %b, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vaddq_u64 +// CHECK: entry: +// CHECK-NEXT: %add.i = add <2 x i64> %b, %a +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vaddhn_s16 +// CHECK: entry: +// CHECK-NEXT: %vaddhn.i = add <8 x i16> %b, %a +// CHECK-NEXT: %vaddhn1.i = lshr <8 x i16> %vaddhn.i, +// CHECK-NEXT: %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %vaddhn2.i +// CHECK-LABEL: test_vaddhn_s32 +// CHECK: entry: +// CHECK-NEXT: %vaddhn.i = add <4 x i32> %b, %a +// CHECK-NEXT: %vaddhn1.i = lshr <4 x i32> %vaddhn.i, +// CHECK-NEXT: %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %vaddhn2.i +// CHECK-LABEL: test_vaddhn_s64 +// CHECK: entry: +// CHECK-NEXT: %vaddhn.i = add <2 x i64> %b, %a +// CHECK-NEXT: %vaddhn1.i = lshr <2 x i64> %vaddhn.i, +// CHECK-NEXT: %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vaddhn2.i +// CHECK-LABEL: test_vaddhn_u16 +// CHECK: entry: +// CHECK-NEXT: %vaddhn.i = add <8 x i16> %b, %a +// CHECK-NEXT: %vaddhn1.i = lshr <8 x i16> %vaddhn.i, +// CHECK-NEXT: %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %vaddhn2.i +// CHECK-LABEL: test_vaddhn_u32 +// CHECK: entry: +// CHECK-NEXT: %vaddhn.i = add <4 x i32> %b, %a +// CHECK-NEXT: %vaddhn1.i = lshr <4 x i32> %vaddhn.i, +// CHECK-NEXT: %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %vaddhn2.i +// CHECK-LABEL: test_vaddhn_u64 +// CHECK: entry: +// CHECK-NEXT: %vaddhn.i = add <2 x i64> %b, %a +// CHECK-NEXT: %vaddhn1.i = lshr <2 x i64> %vaddhn.i, +// CHECK-NEXT: %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vaddhn2.i +// CHECK-LABEL: test_vaddl_s8 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <8 x i8> %a to <8 x i16> +// CHECK-NEXT: %vmovl.i3.i = sext <8 x i8> %b to <8 x i16> +// CHECK-NEXT: %add.i = add nsw <8 x i16> %vmovl.i3.i, %vmovl.i.i +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vaddl_s16 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <4 x i16> %a to <4 x i32> +// CHECK-NEXT: %vmovl.i3.i = sext <4 x i16> %b to <4 x i32> +// CHECK-NEXT: %add.i = add nsw <4 x i32> %vmovl.i3.i, %vmovl.i.i +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vaddl_s32 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <2 x i32> %a to <2 x i64> +// CHECK-NEXT: %vmovl.i3.i = sext <2 x i32> %b to <2 x i64> +// CHECK-NEXT: %add.i = add nsw <2 x i64> %vmovl.i3.i, %vmovl.i.i +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vaddl_u8 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <8 x i8> %a to <8 x i16> +// CHECK-NEXT: %vmovl.i3.i = zext <8 x i8> %b to <8 x i16> +// CHECK-NEXT: %add.i = add nuw nsw <8 x i16> %vmovl.i3.i, %vmovl.i.i +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vaddl_u16 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <4 x i16> %a to <4 x i32> +// CHECK-NEXT: %vmovl.i3.i = zext <4 x i16> %b to <4 x i32> +// CHECK-NEXT: %add.i = add nuw nsw <4 x i32> %vmovl.i3.i, %vmovl.i.i +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vaddl_u32 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <2 x i32> %a to <2 x i64> +// CHECK-NEXT: %vmovl.i3.i = zext <2 x i32> %b to <2 x i64> +// CHECK-NEXT: %add.i = add nuw nsw <2 x i64> %vmovl.i3.i, %vmovl.i.i +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vaddw_s8 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <8 x i8> %b to <8 x i16> +// CHECK-NEXT: %add.i = add <8 x i16> %vmovl.i.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vaddw_s16 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <4 x i16> %b to <4 x i32> +// CHECK-NEXT: %add.i = add <4 x i32> %vmovl.i.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vaddw_s32 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <2 x i32> %b to <2 x i64> +// CHECK-NEXT: %add.i = add <2 x i64> %vmovl.i.i, %a +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vaddw_u8 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <8 x i8> %b to <8 x i16> +// CHECK-NEXT: %add.i = add <8 x i16> %vmovl.i.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vaddw_u16 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <4 x i16> %b to <4 x i32> +// CHECK-NEXT: %add.i = add <4 x i32> %vmovl.i.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vaddw_u32 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <2 x i32> %b to <2 x i64> +// CHECK-NEXT: %add.i = add <2 x i64> %vmovl.i.i, %a +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vand_s8 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <8 x i8> %b, %a +// CHECK-NEXT: ret <8 x i8> %and.i +// CHECK-LABEL: test_vand_s16 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <4 x i16> %b, %a +// CHECK-NEXT: ret <4 x i16> %and.i +// CHECK-LABEL: test_vand_s32 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <2 x i32> %b, %a +// CHECK-NEXT: ret <2 x i32> %and.i +// CHECK-LABEL: test_vand_s64 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <1 x i64> %b, %a +// CHECK-NEXT: ret <1 x i64> %and.i +// CHECK-LABEL: test_vand_u8 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <8 x i8> %b, %a +// CHECK-NEXT: ret <8 x i8> %and.i +// CHECK-LABEL: test_vand_u16 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <4 x i16> %b, %a +// CHECK-NEXT: ret <4 x i16> %and.i +// CHECK-LABEL: test_vand_u32 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <2 x i32> %b, %a +// CHECK-NEXT: ret <2 x i32> %and.i +// CHECK-LABEL: test_vand_u64 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <1 x i64> %b, %a +// CHECK-NEXT: ret <1 x i64> %and.i +// CHECK-LABEL: test_vandq_s8 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <16 x i8> %b, %a +// CHECK-NEXT: ret <16 x i8> %and.i +// CHECK-LABEL: test_vandq_s16 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <8 x i16> %b, %a +// CHECK-NEXT: ret <8 x i16> %and.i +// CHECK-LABEL: test_vandq_s32 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <4 x i32> %b, %a +// CHECK-NEXT: ret <4 x i32> %and.i +// CHECK-LABEL: test_vandq_s64 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <2 x i64> %b, %a +// CHECK-NEXT: ret <2 x i64> %and.i +// CHECK-LABEL: test_vandq_u8 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <16 x i8> %b, %a +// CHECK-NEXT: ret <16 x i8> %and.i +// CHECK-LABEL: test_vandq_u16 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <8 x i16> %b, %a +// CHECK-NEXT: ret <8 x i16> %and.i +// CHECK-LABEL: test_vandq_u32 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <4 x i32> %b, %a +// CHECK-NEXT: ret <4 x i32> %and.i +// CHECK-LABEL: test_vandq_u64 +// CHECK: entry: +// CHECK-NEXT: %and.i = and <2 x i64> %b, %a +// CHECK-NEXT: ret <2 x i64> %and.i +// CHECK-LABEL: test_vbic_s8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i8> %b, +// CHECK-NEXT: %and.i = and <8 x i8> %neg.i, %a +// CHECK-NEXT: ret <8 x i8> %and.i +// CHECK-LABEL: test_vbic_s16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i16> %b, +// CHECK-NEXT: %and.i = and <4 x i16> %neg.i, %a +// CHECK-NEXT: ret <4 x i16> %and.i +// CHECK-LABEL: test_vbic_s32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <2 x i32> %b, +// CHECK-NEXT: %and.i = and <2 x i32> %neg.i, %a +// CHECK-NEXT: ret <2 x i32> %and.i +// CHECK-LABEL: test_vbic_s64 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <1 x i64> %b, +// CHECK-NEXT: %and.i = and <1 x i64> %neg.i, %a +// CHECK-NEXT: ret <1 x i64> %and.i +// CHECK-LABEL: test_vbic_u8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i8> %b, +// CHECK-NEXT: %and.i = and <8 x i8> %neg.i, %a +// CHECK-NEXT: ret <8 x i8> %and.i +// CHECK-LABEL: test_vbic_u16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i16> %b, +// CHECK-NEXT: %and.i = and <4 x i16> %neg.i, %a +// CHECK-NEXT: ret <4 x i16> %and.i +// CHECK-LABEL: test_vbic_u32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <2 x i32> %b, +// CHECK-NEXT: %and.i = and <2 x i32> %neg.i, %a +// CHECK-NEXT: ret <2 x i32> %and.i +// CHECK-LABEL: test_vbic_u64 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <1 x i64> %b, +// CHECK-NEXT: %and.i = and <1 x i64> %neg.i, %a +// CHECK-NEXT: ret <1 x i64> %and.i +// CHECK-LABEL: test_vbicq_s8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <16 x i8> %b, +// CHECK-NEXT: %and.i = and <16 x i8> %neg.i, %a +// CHECK-NEXT: ret <16 x i8> %and.i +// CHECK-LABEL: test_vbicq_s16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i16> %b, +// CHECK-NEXT: %and.i = and <8 x i16> %neg.i, %a +// CHECK-NEXT: ret <8 x i16> %and.i +// CHECK-LABEL: test_vbicq_s32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i32> %b, +// CHECK-NEXT: %and.i = and <4 x i32> %neg.i, %a +// CHECK-NEXT: ret <4 x i32> %and.i +// CHECK-LABEL: test_vbicq_s64 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <2 x i64> %b, +// CHECK-NEXT: %and.i = and <2 x i64> %neg.i, %a +// CHECK-NEXT: ret <2 x i64> %and.i +// CHECK-LABEL: test_vbicq_u8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <16 x i8> %b, +// CHECK-NEXT: %and.i = and <16 x i8> %neg.i, %a +// CHECK-NEXT: ret <16 x i8> %and.i +// CHECK-LABEL: test_vbicq_u16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i16> %b, +// CHECK-NEXT: %and.i = and <8 x i16> %neg.i, %a +// CHECK-NEXT: ret <8 x i16> %and.i +// CHECK-LABEL: test_vbicq_u32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i32> %b, +// CHECK-NEXT: %and.i = and <4 x i32> %neg.i, %a +// CHECK-NEXT: ret <4 x i32> %and.i +// CHECK-LABEL: test_vbicq_u64 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <2 x i64> %b, +// CHECK-NEXT: %and.i = and <2 x i64> %neg.i, %a +// CHECK-NEXT: ret <2 x i64> %and.i +// CHECK-LABEL: test_vbsl_s8 +// CHECK: entry: +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vbsl_v.i +// CHECK-LABEL: test_vbsl_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: %1 = bitcast <4 x i16> %b to <8 x i8> +// CHECK-NEXT: %2 = bitcast <4 x i16> %c to <8 x i8> +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) +// CHECK-NEXT: %3 = bitcast <8 x i8> %vbsl_v.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %3 +// CHECK-LABEL: test_vbsl_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <8 x i8> +// CHECK-NEXT: %1 = bitcast <2 x i32> %b to <8 x i8> +// CHECK-NEXT: %2 = bitcast <2 x i32> %c to <8 x i8> +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) +// CHECK-NEXT: %3 = bitcast <8 x i8> %vbsl_v.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %3 +// CHECK-LABEL: test_vbsl_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <8 x i8> +// CHECK-NEXT: %1 = bitcast <1 x i64> %b to <8 x i8> +// CHECK-NEXT: %2 = bitcast <1 x i64> %c to <8 x i8> +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) +// CHECK-NEXT: %3 = bitcast <8 x i8> %vbsl_v.i to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %3 +// CHECK-LABEL: test_vbsl_u8 +// CHECK: entry: +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vbsl_v.i +// CHECK-LABEL: test_vbsl_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: %1 = bitcast <4 x i16> %b to <8 x i8> +// CHECK-NEXT: %2 = bitcast <4 x i16> %c to <8 x i8> +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) +// CHECK-NEXT: %3 = bitcast <8 x i8> %vbsl_v.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %3 +// CHECK-LABEL: test_vbsl_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <8 x i8> +// CHECK-NEXT: %1 = bitcast <2 x i32> %b to <8 x i8> +// CHECK-NEXT: %2 = bitcast <2 x i32> %c to <8 x i8> +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) +// CHECK-NEXT: %3 = bitcast <8 x i8> %vbsl_v.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %3 +// CHECK-LABEL: test_vbsl_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <8 x i8> +// CHECK-NEXT: %1 = bitcast <1 x i64> %b to <8 x i8> +// CHECK-NEXT: %2 = bitcast <1 x i64> %c to <8 x i8> +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) +// CHECK-NEXT: %3 = bitcast <8 x i8> %vbsl_v.i to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %3 +// CHECK-LABEL: test_vbsl_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <8 x i8> +// CHECK-NEXT: %1 = bitcast <2 x float> %b to <8 x i8> +// CHECK-NEXT: %2 = bitcast <2 x float> %c to <8 x i8> +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) +// CHECK-NEXT: %3 = bitcast <8 x i8> %vbsl_v.i to <2 x float> +// CHECK-NEXT: ret <2 x float> %3 +// CHECK-LABEL: test_vbsl_p8 +// CHECK: entry: +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vbsl_v.i +// CHECK-LABEL: test_vbsl_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: %1 = bitcast <4 x i16> %b to <8 x i8> +// CHECK-NEXT: %2 = bitcast <4 x i16> %c to <8 x i8> +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) +// CHECK-NEXT: %3 = bitcast <8 x i8> %vbsl_v.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %3 +// CHECK-LABEL: test_vbslq_s8 +// CHECK: entry: +// CHECK-NEXT: %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) +// CHECK-NEXT: ret <16 x i8> %vbslq_v.i +// CHECK-LABEL: test_vbslq_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: %1 = bitcast <8 x i16> %b to <16 x i8> +// CHECK-NEXT: %2 = bitcast <8 x i16> %c to <16 x i8> +// CHECK-NEXT: %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) +// CHECK-NEXT: %3 = bitcast <16 x i8> %vbslq_v.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %3 +// CHECK-LABEL: test_vbslq_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <16 x i8> +// CHECK-NEXT: %1 = bitcast <4 x i32> %b to <16 x i8> +// CHECK-NEXT: %2 = bitcast <4 x i32> %c to <16 x i8> +// CHECK-NEXT: %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) +// CHECK-NEXT: %3 = bitcast <16 x i8> %vbslq_v.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %3 +// CHECK-LABEL: test_vbslq_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <16 x i8> +// CHECK-NEXT: %1 = bitcast <2 x i64> %b to <16 x i8> +// CHECK-NEXT: %2 = bitcast <2 x i64> %c to <16 x i8> +// CHECK-NEXT: %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) +// CHECK-NEXT: %3 = bitcast <16 x i8> %vbslq_v.i to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %3 +// CHECK-LABEL: test_vbslq_u8 +// CHECK: entry: +// CHECK-NEXT: %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) +// CHECK-NEXT: ret <16 x i8> %vbslq_v.i +// CHECK-LABEL: test_vbslq_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: %1 = bitcast <8 x i16> %b to <16 x i8> +// CHECK-NEXT: %2 = bitcast <8 x i16> %c to <16 x i8> +// CHECK-NEXT: %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) +// CHECK-NEXT: %3 = bitcast <16 x i8> %vbslq_v.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %3 +// CHECK-LABEL: test_vbslq_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <16 x i8> +// CHECK-NEXT: %1 = bitcast <4 x i32> %b to <16 x i8> +// CHECK-NEXT: %2 = bitcast <4 x i32> %c to <16 x i8> +// CHECK-NEXT: %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) +// CHECK-NEXT: %3 = bitcast <16 x i8> %vbslq_v.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %3 +// CHECK-LABEL: test_vbslq_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <16 x i8> +// CHECK-NEXT: %1 = bitcast <2 x i64> %b to <16 x i8> +// CHECK-NEXT: %2 = bitcast <2 x i64> %c to <16 x i8> +// CHECK-NEXT: %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) +// CHECK-NEXT: %3 = bitcast <16 x i8> %vbslq_v.i to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %3 +// CHECK-LABEL: test_vbslq_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <16 x i8> +// CHECK-NEXT: %1 = bitcast <4 x float> %b to <16 x i8> +// CHECK-NEXT: %2 = bitcast <4 x float> %c to <16 x i8> +// CHECK-NEXT: %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) +// CHECK-NEXT: %3 = bitcast <16 x i8> %vbslq_v.i to <4 x float> +// CHECK-NEXT: ret <4 x float> %3 +// CHECK-LABEL: test_vbslq_p8 +// CHECK: entry: +// CHECK-NEXT: %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) +// CHECK-NEXT: ret <16 x i8> %vbslq_v.i +// CHECK-LABEL: test_vbslq_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: %1 = bitcast <8 x i16> %b to <16 x i8> +// CHECK-NEXT: %2 = bitcast <8 x i16> %c to <16 x i8> +// CHECK-NEXT: %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) +// CHECK-NEXT: %3 = bitcast <16 x i8> %vbslq_v.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %3 +// CHECK-LABEL: test_vcage_f32 +// CHECK: entry: +// CHECK-NEXT: %vcage_v2.i = tail call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK-NEXT: ret <2 x i32> %vcage_v2.i +// CHECK-LABEL: test_vcageq_f32 +// CHECK: entry: +// CHECK-NEXT: %vcageq_v2.i = tail call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %a, <4 x float> %b) +// CHECK-NEXT: ret <4 x i32> %vcageq_v2.i +// CHECK-LABEL: test_vcagt_f32 +// CHECK: entry: +// CHECK-NEXT: %vcagt_v2.i = tail call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK-NEXT: ret <2 x i32> %vcagt_v2.i +// CHECK-LABEL: test_vcagtq_f32 +// CHECK: entry: +// CHECK-NEXT: %vcagtq_v2.i = tail call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %a, <4 x float> %b) +// CHECK-NEXT: ret <4 x i32> %vcagtq_v2.i +// CHECK-LABEL: test_vcale_f32 +// CHECK: entry: +// CHECK-NEXT: %vcale_v2.i = tail call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %b, <2 x float> %a) +// CHECK-NEXT: ret <2 x i32> %vcale_v2.i +// CHECK-LABEL: test_vcaleq_f32 +// CHECK: entry: +// CHECK-NEXT: %vcaleq_v2.i = tail call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %b, <4 x float> %a) +// CHECK-NEXT: ret <4 x i32> %vcaleq_v2.i +// CHECK-LABEL: test_vcalt_f32 +// CHECK: entry: +// CHECK-NEXT: %vcalt_v2.i = tail call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %b, <2 x float> %a) +// CHECK-NEXT: ret <2 x i32> %vcalt_v2.i +// CHECK-LABEL: test_vcaltq_f32 +// CHECK: entry: +// CHECK-NEXT: %vcaltq_v2.i = tail call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %b, <4 x float> %a) +// CHECK-NEXT: ret <4 x i32> %vcaltq_v2.i +// CHECK-LABEL: test_vceq_s8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <8 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %sext.i +// CHECK-LABEL: test_vceq_s16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <4 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %sext.i +// CHECK-LABEL: test_vceq_s32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <2 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vceq_f32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = fcmp oeq <2 x float> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vceq_u8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <8 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %sext.i +// CHECK-LABEL: test_vceq_u16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <4 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %sext.i +// CHECK-LABEL: test_vceq_u32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <2 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vceq_p8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <8 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %sext.i +// CHECK-LABEL: test_vceqq_s8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <16 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <16 x i1> %cmp.i to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %sext.i +// CHECK-LABEL: test_vceqq_s16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <8 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %sext.i +// CHECK-LABEL: test_vceqq_s32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <4 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vceqq_f32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = fcmp oeq <4 x float> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vceqq_u8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <16 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <16 x i1> %cmp.i to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %sext.i +// CHECK-LABEL: test_vceqq_u16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <8 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %sext.i +// CHECK-LABEL: test_vceqq_u32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <4 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vceqq_p8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp eq <16 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <16 x i1> %cmp.i to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %sext.i +// CHECK-LABEL: test_vcge_s8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sge <8 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %sext.i +// CHECK-LABEL: test_vcge_s16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sge <4 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %sext.i +// CHECK-LABEL: test_vcge_s32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sge <2 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vcge_f32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = fcmp oge <2 x float> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vcge_u8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp uge <8 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %sext.i +// CHECK-LABEL: test_vcge_u16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp uge <4 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %sext.i +// CHECK-LABEL: test_vcge_u32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp uge <2 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vcgeq_s8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sge <16 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <16 x i1> %cmp.i to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %sext.i +// CHECK-LABEL: test_vcgeq_s16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sge <8 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %sext.i +// CHECK-LABEL: test_vcgeq_s32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sge <4 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vcgeq_f32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = fcmp oge <4 x float> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vcgeq_u8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp uge <16 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <16 x i1> %cmp.i to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %sext.i +// CHECK-LABEL: test_vcgeq_u16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp uge <8 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %sext.i +// CHECK-LABEL: test_vcgeq_u32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp uge <4 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vcgt_s8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sgt <8 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %sext.i +// CHECK-LABEL: test_vcgt_s16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sgt <4 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %sext.i +// CHECK-LABEL: test_vcgt_s32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sgt <2 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vcgt_f32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = fcmp ogt <2 x float> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vcgt_u8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ugt <8 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %sext.i +// CHECK-LABEL: test_vcgt_u16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ugt <4 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %sext.i +// CHECK-LABEL: test_vcgt_u32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ugt <2 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vcgtq_s8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sgt <16 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <16 x i1> %cmp.i to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %sext.i +// CHECK-LABEL: test_vcgtq_s16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sgt <8 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %sext.i +// CHECK-LABEL: test_vcgtq_s32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sgt <4 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vcgtq_f32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = fcmp ogt <4 x float> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vcgtq_u8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ugt <16 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <16 x i1> %cmp.i to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %sext.i +// CHECK-LABEL: test_vcgtq_u16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ugt <8 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %sext.i +// CHECK-LABEL: test_vcgtq_u32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ugt <4 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vcle_s8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sle <8 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %sext.i +// CHECK-LABEL: test_vcle_s16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sle <4 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %sext.i +// CHECK-LABEL: test_vcle_s32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sle <2 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vcle_f32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = fcmp ole <2 x float> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vcle_u8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ule <8 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %sext.i +// CHECK-LABEL: test_vcle_u16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ule <4 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %sext.i +// CHECK-LABEL: test_vcle_u32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ule <2 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vcleq_s8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sle <16 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <16 x i1> %cmp.i to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %sext.i +// CHECK-LABEL: test_vcleq_s16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sle <8 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %sext.i +// CHECK-LABEL: test_vcleq_s32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp sle <4 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vcleq_f32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = fcmp ole <4 x float> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vcleq_u8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ule <16 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <16 x i1> %cmp.i to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %sext.i +// CHECK-LABEL: test_vcleq_u16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ule <8 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %sext.i +// CHECK-LABEL: test_vcleq_u32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ule <4 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vcls_s8 +// CHECK: entry: +// CHECK-NEXT: %vcls_v.i = tail call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) +// CHECK-NEXT: ret <8 x i8> %vcls_v.i +// CHECK-LABEL: test_vcls_s16 +// CHECK: entry: +// CHECK-NEXT: %vcls_v1.i = tail call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a) +// CHECK-NEXT: ret <4 x i16> %vcls_v1.i +// CHECK-LABEL: test_vcls_s32 +// CHECK: entry: +// CHECK-NEXT: %vcls_v1.i = tail call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a) +// CHECK-NEXT: ret <2 x i32> %vcls_v1.i +// CHECK-LABEL: test_vclsq_s8 +// CHECK: entry: +// CHECK-NEXT: %vclsq_v.i = tail call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) +// CHECK-NEXT: ret <16 x i8> %vclsq_v.i +// CHECK-LABEL: test_vclsq_s16 +// CHECK: entry: +// CHECK-NEXT: %vclsq_v1.i = tail call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a) +// CHECK-NEXT: ret <8 x i16> %vclsq_v1.i +// CHECK-LABEL: test_vclsq_s32 +// CHECK: entry: +// CHECK-NEXT: %vclsq_v1.i = tail call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a) +// CHECK-NEXT: ret <4 x i32> %vclsq_v1.i +// CHECK-LABEL: test_vclt_s8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp slt <8 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %sext.i +// CHECK-LABEL: test_vclt_s16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp slt <4 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %sext.i +// CHECK-LABEL: test_vclt_s32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp slt <2 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vclt_f32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = fcmp olt <2 x float> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vclt_u8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ult <8 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %sext.i +// CHECK-LABEL: test_vclt_u16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ult <4 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %sext.i +// CHECK-LABEL: test_vclt_u32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ult <2 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <2 x i1> %cmp.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %sext.i +// CHECK-LABEL: test_vcltq_s8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp slt <16 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <16 x i1> %cmp.i to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %sext.i +// CHECK-LABEL: test_vcltq_s16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp slt <8 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %sext.i +// CHECK-LABEL: test_vcltq_s32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp slt <4 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vcltq_f32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = fcmp olt <4 x float> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vcltq_u8 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ult <16 x i8> %a, %b +// CHECK-NEXT: %sext.i = sext <16 x i1> %cmp.i to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %sext.i +// CHECK-LABEL: test_vcltq_u16 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ult <8 x i16> %a, %b +// CHECK-NEXT: %sext.i = sext <8 x i1> %cmp.i to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %sext.i +// CHECK-LABEL: test_vcltq_u32 +// CHECK: entry: +// CHECK-NEXT: %cmp.i = icmp ult <4 x i32> %a, %b +// CHECK-NEXT: %sext.i = sext <4 x i1> %cmp.i to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %sext.i +// CHECK-LABEL: test_vclz_s8 +// CHECK: entry: +// CHECK-NEXT: %vclz_v.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) +// CHECK-NEXT: ret <8 x i8> %vclz_v.i +// CHECK-LABEL: test_vclz_s16 +// CHECK: entry: +// CHECK-NEXT: %vclz_v1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) +// CHECK-NEXT: ret <4 x i16> %vclz_v1.i +// CHECK-LABEL: test_vclz_s32 +// CHECK: entry: +// CHECK-NEXT: %vclz_v1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) +// CHECK-NEXT: ret <2 x i32> %vclz_v1.i +// CHECK-LABEL: test_vclz_u8 +// CHECK: entry: +// CHECK-NEXT: %vclz_v.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) +// CHECK-NEXT: ret <8 x i8> %vclz_v.i +// CHECK-LABEL: test_vclz_u16 +// CHECK: entry: +// CHECK-NEXT: %vclz_v1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) +// CHECK-NEXT: ret <4 x i16> %vclz_v1.i +// CHECK-LABEL: test_vclz_u32 +// CHECK: entry: +// CHECK-NEXT: %vclz_v1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) +// CHECK-NEXT: ret <2 x i32> %vclz_v1.i +// CHECK-LABEL: test_vclzq_s8 +// CHECK: entry: +// CHECK-NEXT: %vclzq_v.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +// CHECK-NEXT: ret <16 x i8> %vclzq_v.i +// CHECK-LABEL: test_vclzq_s16 +// CHECK: entry: +// CHECK-NEXT: %vclzq_v1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +// CHECK-NEXT: ret <8 x i16> %vclzq_v1.i +// CHECK-LABEL: test_vclzq_s32 +// CHECK: entry: +// CHECK-NEXT: %vclzq_v1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +// CHECK-NEXT: ret <4 x i32> %vclzq_v1.i +// CHECK-LABEL: test_vclzq_u8 +// CHECK: entry: +// CHECK-NEXT: %vclzq_v.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +// CHECK-NEXT: ret <16 x i8> %vclzq_v.i +// CHECK-LABEL: test_vclzq_u16 +// CHECK: entry: +// CHECK-NEXT: %vclzq_v1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +// CHECK-NEXT: ret <8 x i16> %vclzq_v1.i +// CHECK-LABEL: test_vclzq_u32 +// CHECK: entry: +// CHECK-NEXT: %vclzq_v1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +// CHECK-NEXT: ret <4 x i32> %vclzq_v1.i +// CHECK-LABEL: test_vcnt_u8 +// CHECK: entry: +// CHECK-NEXT: %vcnt_v.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) +// CHECK-NEXT: ret <8 x i8> %vcnt_v.i +// CHECK-LABEL: test_vcnt_s8 +// CHECK: entry: +// CHECK-NEXT: %vcnt_v.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) +// CHECK-NEXT: ret <8 x i8> %vcnt_v.i +// CHECK-LABEL: test_vcnt_p8 +// CHECK: entry: +// CHECK-NEXT: %vcnt_v.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) +// CHECK-NEXT: ret <8 x i8> %vcnt_v.i +// CHECK-LABEL: test_vcntq_u8 +// CHECK: entry: +// CHECK-NEXT: %vcntq_v.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) +// CHECK-NEXT: ret <16 x i8> %vcntq_v.i +// CHECK-LABEL: test_vcntq_s8 +// CHECK: entry: +// CHECK-NEXT: %vcntq_v.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) +// CHECK-NEXT: ret <16 x i8> %vcntq_v.i +// CHECK-LABEL: test_vcntq_p8 +// CHECK: entry: +// CHECK-NEXT: %vcntq_v.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) +// CHECK-NEXT: ret <16 x i8> %vcntq_v.i +// CHECK-LABEL: test_vcombine_s8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vcombine_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle.i +// CHECK-LABEL: test_vcombine_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> +// CHECK-NEXT: ret <4 x i32> %shuffle.i +// CHECK-LABEL: test_vcombine_s64 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> +// CHECK-NEXT: ret <2 x i64> %shuffle.i +// CHECK-LABEL: test_vcombine_f16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> +// CHECK-NEXT: ret <8 x half> %shuffle.i +// CHECK-LABEL: test_vcombine_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> +// CHECK-NEXT: ret <4 x float> %shuffle.i +// CHECK-LABEL: test_vcombine_u8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vcombine_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle.i +// CHECK-LABEL: test_vcombine_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> +// CHECK-NEXT: ret <4 x i32> %shuffle.i +// CHECK-LABEL: test_vcombine_u64 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> +// CHECK-NEXT: ret <2 x i64> %shuffle.i +// CHECK-LABEL: test_vcombine_p8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vcombine_p16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle.i +// CHECK-LABEL: test_vcreate_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64 %a to <8 x i8> +// CHECK-NEXT: %vclz_v.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %0, i1 false) +// CHECK-NEXT: ret <8 x i8> %vclz_v.i +// CHECK-LABEL: test_vcreate_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64 %a to <4 x i16> +// CHECK-NEXT: %vclz_v1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %0, i1 false) +// CHECK-NEXT: ret <4 x i16> %vclz_v1.i +// CHECK-LABEL: test_vcreate_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64 %a to <2 x i32> +// CHECK-NEXT: %vclz_v1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %0, i1 false) +// CHECK-NEXT: ret <2 x i32> %vclz_v1.i +// CHECK-LABEL: test_vcreate_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64 %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vcreate_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64 %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vcreate_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64 %a to <8 x i8> +// CHECK-NEXT: %vclz_v.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %0, i1 false) +// CHECK-NEXT: ret <8 x i8> %vclz_v.i +// CHECK-LABEL: test_vcreate_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64 %a to <4 x i16> +// CHECK-NEXT: %vclz_v1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %0, i1 false) +// CHECK-NEXT: ret <4 x i16> %vclz_v1.i +// CHECK-LABEL: test_vcreate_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64 %a to <2 x i32> +// CHECK-NEXT: %vclz_v1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %0, i1 false) +// CHECK-NEXT: ret <2 x i32> %vclz_v1.i +// CHECK-LABEL: test_vcreate_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = insertelement <1 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: %add.i = shl <1 x i64> %0, +// CHECK-NEXT: ret <1 x i64> %add.i +// CHECK-LABEL: test_vcreate_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64 %a to <8 x i8> +// CHECK-NEXT: %vcnt_v.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %0) +// CHECK-NEXT: ret <8 x i8> %vcnt_v.i +// CHECK-LABEL: test_vcreate_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64 %a to <8 x i8> +// CHECK-NEXT: %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %0, <8 x i8> %0) +// CHECK-NEXT: %1 = bitcast <8 x i8> %vbsl_v.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %1 +// CHECK-LABEL: test_vcreate_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = insertelement <1 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: %add.i = shl <1 x i64> %0, +// CHECK-NEXT: ret <1 x i64> %add.i +// CHECK-LABEL: test_vcvt_f16_f32 +// CHECK: entry: +// CHECK-NEXT: %vcvt_f16_f321.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a) +// CHECK-NEXT: %0 = bitcast <4 x i16> %vcvt_f16_f321.i to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vcvt_f32_s32 +// CHECK: entry: +// CHECK-NEXT: %vcvt.i = sitofp <2 x i32> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %vcvt.i +// CHECK-LABEL: test_vcvt_f32_u32 +// CHECK: entry: +// CHECK-NEXT: %vcvt.i = uitofp <2 x i32> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %vcvt.i +// CHECK-LABEL: test_vcvtq_f32_s32 +// CHECK: entry: +// CHECK-NEXT: %vcvt.i = sitofp <4 x i32> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %vcvt.i +// CHECK-LABEL: test_vcvtq_f32_u32 +// CHECK: entry: +// CHECK-NEXT: %vcvt.i = uitofp <4 x i32> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %vcvt.i +// CHECK-LABEL: test_vcvt_f32_f16 +// CHECK: entry: +// CHECK-NEXT: %vcvt_f32_f16.i = bitcast <4 x half> %a to <4 x i16> +// CHECK-NEXT: %vcvt_f32_f161.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %vcvt_f32_f16.i) +// CHECK-NEXT: ret <4 x float> %vcvt_f32_f161.i +// CHECK-LABEL: test_vcvt_n_f32_s32 +// CHECK: entry: +// CHECK-NEXT: %vcvt_n1 = tail call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 1) +// CHECK-NEXT: ret <2 x float> %vcvt_n1 +// CHECK-LABEL: test_vcvt_n_f32_u32 +// CHECK: entry: +// CHECK-NEXT: %vcvt_n1 = tail call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 1) +// CHECK-NEXT: ret <2 x float> %vcvt_n1 +// CHECK-LABEL: test_vcvtq_n_f32_s32 +// CHECK: entry: +// CHECK-NEXT: %vcvt_n1 = tail call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 3) +// CHECK-NEXT: ret <4 x float> %vcvt_n1 +// CHECK-LABEL: test_vcvtq_n_f32_u32 +// CHECK: entry: +// CHECK-NEXT: %vcvt_n1 = tail call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 3) +// CHECK-NEXT: ret <4 x float> %vcvt_n1 +// CHECK-LABEL: test_vcvt_n_s32_f32 +// CHECK: entry: +// CHECK-NEXT: %vcvt_n1 = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %a, i32 1) +// CHECK-NEXT: ret <2 x i32> %vcvt_n1 +// CHECK-LABEL: test_vcvtq_n_s32_f32 +// CHECK: entry: +// CHECK-NEXT: %vcvt_n1 = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %a, i32 3) +// CHECK-NEXT: ret <4 x i32> %vcvt_n1 +// CHECK-LABEL: test_vcvt_n_u32_f32 +// CHECK: entry: +// CHECK-NEXT: %vcvt_n1 = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %a, i32 1) +// CHECK-NEXT: ret <2 x i32> %vcvt_n1 +// CHECK-LABEL: test_vcvtq_n_u32_f32 +// CHECK: entry: +// CHECK-NEXT: %vcvt_n1 = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %a, i32 3) +// CHECK-NEXT: ret <4 x i32> %vcvt_n1 +// CHECK-LABEL: test_vcvt_s32_f32 +// CHECK: entry: +// CHECK-NEXT: %vcvt.i = fptosi <2 x float> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vcvt.i +// CHECK-LABEL: test_vcvtq_s32_f32 +// CHECK: entry: +// CHECK-NEXT: %vcvt.i = fptosi <4 x float> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %vcvt.i +// CHECK-LABEL: test_vcvt_u32_f32 +// CHECK: entry: +// CHECK-NEXT: %vcvt.i = fptoui <2 x float> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vcvt.i +// CHECK-LABEL: test_vcvtq_u32_f32 +// CHECK: entry: +// CHECK-NEXT: %vcvt.i = fptoui <4 x float> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %vcvt.i +// CHECK-LABEL: test_vdup_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle +// CHECK-LABEL: test_vdup_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle +// CHECK-LABEL: test_vdup_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: ret <2 x i32> %shuffle +// CHECK-LABEL: test_vdup_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle +// CHECK-LABEL: test_vdup_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle +// CHECK-LABEL: test_vdup_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: ret <2 x i32> %shuffle +// CHECK-LABEL: test_vdup_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle +// CHECK-LABEL: test_vdup_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle +// CHECK-LABEL: test_vdup_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> +// CHECK-NEXT: ret <2 x float> %shuffle +// CHECK-LABEL: test_vdupq_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <8 x i8> %a, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle +// CHECK-LABEL: test_vdupq_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %a, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle +// CHECK-LABEL: test_vdupq_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i32> %shuffle +// CHECK-LABEL: test_vdupq_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <8 x i8> %a, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle +// CHECK-LABEL: test_vdupq_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %a, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle +// CHECK-LABEL: test_vdupq_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i32> %shuffle +// CHECK-LABEL: test_vdupq_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <8 x i8> %a, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle +// CHECK-LABEL: test_vdupq_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %a, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle +// CHECK-LABEL: test_vdupq_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> +// CHECK-NEXT: ret <4 x float> %shuffle +// CHECK-LABEL: test_vdup_lane_s64 +// CHECK: entry: +// CHECK-NEXT: ret <1 x i64> %a +// CHECK-LABEL: test_vdup_lane_u64 +// CHECK: entry: +// CHECK-NEXT: ret <1 x i64> %a +// CHECK-LABEL: test_vdupq_lane_s64 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <1 x i64> %a, <1 x i64> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i64> %shuffle +// CHECK-LABEL: test_vdupq_lane_u64 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <1 x i64> %a, <1 x i64> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i64> %shuffle +// CHECK-LABEL: test_vdup_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i8> %vecinit.i, <8 x i8> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> %vecinit7.i +// CHECK-LABEL: test_vdup_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i16> %vecinit3.i +// CHECK-LABEL: test_vdup_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %a, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i32> %vecinit1.i +// CHECK-LABEL: test_vdup_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i8> %vecinit.i, <8 x i8> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> %vecinit7.i +// CHECK-LABEL: test_vdup_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i16> %vecinit3.i +// CHECK-LABEL: test_vdup_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %a, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i32> %vecinit1.i +// CHECK-LABEL: test_vdup_n_p8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i8> %vecinit.i, <8 x i8> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> %vecinit7.i +// CHECK-LABEL: test_vdup_n_p16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i16> %vecinit3.i +// CHECK-LABEL: test_vdup_n_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = load half, half* %a, align 2, !tbaa !3 +// CHECK-NEXT: %vecinit = insertelement <4 x half> undef, half %0, i32 0 +// CHECK-NEXT: %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x half> %vecinit3 +// CHECK-LABEL: test_vdup_n_f32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x float> undef, float %a, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x float> %vecinit.i, <2 x float> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x float> %vecinit1.i +// CHECK-LABEL: test_vdupq_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <16 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit15.i = shufflevector <16 x i8> %vecinit.i, <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: ret <16 x i8> %vecinit15.i +// CHECK-LABEL: test_vdupq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i16> %vecinit7.i +// CHECK-LABEL: test_vdupq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i32> %vecinit3.i +// CHECK-LABEL: test_vdupq_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <16 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit15.i = shufflevector <16 x i8> %vecinit.i, <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: ret <16 x i8> %vecinit15.i +// CHECK-LABEL: test_vdupq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i16> %vecinit7.i +// CHECK-LABEL: test_vdupq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i32> %vecinit3.i +// CHECK-LABEL: test_vdupq_n_p8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <16 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit15.i = shufflevector <16 x i8> %vecinit.i, <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: ret <16 x i8> %vecinit15.i +// CHECK-LABEL: test_vdupq_n_p16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i16> %vecinit7.i +// CHECK-LABEL: test_vdupq_n_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = load half, half* %a, align 2, !tbaa !3 +// CHECK-NEXT: %vecinit = insertelement <8 x half> undef, half %0, i32 0 +// CHECK-NEXT: %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x half> %vecinit7 +// CHECK-LABEL: test_vdupq_n_f32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x float> undef, float %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x float> %vecinit3.i +// CHECK-LABEL: test_vdup_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <1 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: %add.i = shl <1 x i64> %vecinit.i, +// CHECK-NEXT: ret <1 x i64> %add.i +// CHECK-LABEL: test_vdup_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <1 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: %add.i = shl <1 x i64> %vecinit.i, +// CHECK-NEXT: ret <1 x i64> %add.i +// CHECK-LABEL: test_vdupq_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: %0 = shl <2 x i64> %vecinit.i, +// CHECK-NEXT: %add.i = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vdupq_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: %0 = shl <2 x i64> %vecinit.i, +// CHECK-NEXT: %add.i = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_veor_s8 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <8 x i8> %b, %a +// CHECK-NEXT: ret <8 x i8> %xor.i +// CHECK-LABEL: test_veor_s16 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <4 x i16> %b, %a +// CHECK-NEXT: ret <4 x i16> %xor.i +// CHECK-LABEL: test_veor_s32 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <2 x i32> %b, %a +// CHECK-NEXT: ret <2 x i32> %xor.i +// CHECK-LABEL: test_veor_s64 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <1 x i64> %b, %a +// CHECK-NEXT: ret <1 x i64> %xor.i +// CHECK-LABEL: test_veor_u8 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <8 x i8> %b, %a +// CHECK-NEXT: ret <8 x i8> %xor.i +// CHECK-LABEL: test_veor_u16 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <4 x i16> %b, %a +// CHECK-NEXT: ret <4 x i16> %xor.i +// CHECK-LABEL: test_veor_u32 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <2 x i32> %b, %a +// CHECK-NEXT: ret <2 x i32> %xor.i +// CHECK-LABEL: test_veor_u64 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <1 x i64> %b, %a +// CHECK-NEXT: ret <1 x i64> %xor.i +// CHECK-LABEL: test_veorq_s8 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <16 x i8> %b, %a +// CHECK-NEXT: ret <16 x i8> %xor.i +// CHECK-LABEL: test_veorq_s16 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <8 x i16> %b, %a +// CHECK-NEXT: ret <8 x i16> %xor.i +// CHECK-LABEL: test_veorq_s32 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <4 x i32> %b, %a +// CHECK-NEXT: ret <4 x i32> %xor.i +// CHECK-LABEL: test_veorq_s64 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <2 x i64> %b, %a +// CHECK-NEXT: ret <2 x i64> %xor.i +// CHECK-LABEL: test_veorq_u8 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <16 x i8> %b, %a +// CHECK-NEXT: ret <16 x i8> %xor.i +// CHECK-LABEL: test_veorq_u16 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <8 x i16> %b, %a +// CHECK-NEXT: ret <8 x i16> %xor.i +// CHECK-LABEL: test_veorq_u32 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <4 x i32> %b, %a +// CHECK-NEXT: ret <4 x i32> %xor.i +// CHECK-LABEL: test_veorq_u64 +// CHECK: entry: +// CHECK-NEXT: %xor.i = xor <2 x i64> %b, %a +// CHECK-NEXT: ret <2 x i64> %xor.i +// CHECK-LABEL: test_vext_s8 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %vext +// CHECK-LABEL: test_vext_u8 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %vext +// CHECK-LABEL: test_vext_p8 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %vext +// CHECK-LABEL: test_vext_s16 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %vext +// CHECK-LABEL: test_vext_u16 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %vext +// CHECK-LABEL: test_vext_p16 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %vext +// CHECK-LABEL: test_vext_s32 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vext +// CHECK-LABEL: test_vext_u32 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vext +// CHECK-LABEL: test_vext_s64 +// CHECK: entry: +// CHECK-NEXT: ret <1 x i64> %a +// CHECK-LABEL: test_vext_u64 +// CHECK: entry: +// CHECK-NEXT: ret <1 x i64> %a +// CHECK-LABEL: test_vext_f32 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK-NEXT: ret <2 x float> %vext +// CHECK-LABEL: test_vextq_s8 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %vext +// CHECK-LABEL: test_vextq_u8 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %vext +// CHECK-LABEL: test_vextq_p8 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %vext +// CHECK-LABEL: test_vextq_s16 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %vext +// CHECK-LABEL: test_vextq_u16 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %vext +// CHECK-LABEL: test_vextq_p16 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %vext +// CHECK-LABEL: test_vextq_s32 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: ret <4 x i32> %vext +// CHECK-LABEL: test_vextq_u32 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: ret <4 x i32> %vext +// CHECK-LABEL: test_vextq_s64 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> +// CHECK-NEXT: ret <2 x i64> %vext +// CHECK-LABEL: test_vextq_u64 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> +// CHECK-NEXT: ret <2 x i64> %vext +// CHECK-LABEL: test_vextq_f32 +// CHECK: entry: +// CHECK-NEXT: %vext = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK-NEXT: ret <4 x float> %vext +// CHECK-LABEL: test_vfma_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a) +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vfmaq_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a) +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vfms_f32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = fsub <2 x float> , %b +// CHECK-NEXT: %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %sub.i, <2 x float> %c, <2 x float> %a) +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vfmsq_f32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = fsub <4 x float> , %b +// CHECK-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %c, <4 x float> %a) +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vget_high_s8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vget_high_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vget_high_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> +// CHECK-NEXT: ret <2 x i32> %shuffle.i +// CHECK-LABEL: test_vget_high_s64 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> +// CHECK-NEXT: ret <1 x i64> %shuffle.i +// CHECK-LABEL: test_vget_high_f16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> +// CHECK-NEXT: ret <4 x half> %shuffle.i +// CHECK-LABEL: test_vget_high_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> +// CHECK-NEXT: ret <2 x float> %shuffle.i +// CHECK-LABEL: test_vget_high_u8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vget_high_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vget_high_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> +// CHECK-NEXT: ret <2 x i32> %shuffle.i +// CHECK-LABEL: test_vget_high_u64 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> +// CHECK-NEXT: ret <1 x i64> %shuffle.i +// CHECK-LABEL: test_vget_high_p8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vget_high_p16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vget_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <8 x i8> %a, i32 7 +// CHECK-NEXT: ret i8 %vget_lane +// CHECK-LABEL: test_vget_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <4 x i16> %a, i32 3 +// CHECK-NEXT: ret i16 %vget_lane +// CHECK-LABEL: test_vget_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <2 x i32> %a, i32 1 +// CHECK-NEXT: ret i32 %vget_lane +// CHECK-LABEL: test_vget_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <8 x i8> %a, i32 7 +// CHECK-NEXT: ret i8 %vget_lane +// CHECK-LABEL: test_vget_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <4 x i16> %a, i32 3 +// CHECK-NEXT: ret i16 %vget_lane +// CHECK-LABEL: test_vget_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <2 x i32> %a, i32 1 +// CHECK-NEXT: ret i32 %vget_lane +// CHECK-LABEL: test_vget_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <8 x i8> %a, i32 7 +// CHECK-NEXT: ret i8 %vget_lane +// CHECK-LABEL: test_vget_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <4 x i16> %a, i32 3 +// CHECK-NEXT: ret i16 %vget_lane +// CHECK-LABEL: test_vget_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <2 x float> %a, i32 1 +// CHECK-NEXT: ret float %vget_lane +// CHECK-LABEL: test_vget_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <4 x half> %a, i32 1 +// CHECK-NEXT: %conv = fpext half %0 to float +// CHECK-NEXT: ret float %conv +// CHECK-LABEL: test_vgetq_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <16 x i8> %a, i32 15 +// CHECK-NEXT: ret i8 %vget_lane +// CHECK-LABEL: test_vgetq_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <8 x i16> %a, i32 7 +// CHECK-NEXT: ret i16 %vget_lane +// CHECK-LABEL: test_vgetq_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <4 x i32> %a, i32 3 +// CHECK-NEXT: ret i32 %vget_lane +// CHECK-LABEL: test_vgetq_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <16 x i8> %a, i32 15 +// CHECK-NEXT: ret i8 %vget_lane +// CHECK-LABEL: test_vgetq_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <8 x i16> %a, i32 7 +// CHECK-NEXT: ret i16 %vget_lane +// CHECK-LABEL: test_vgetq_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <4 x i32> %a, i32 3 +// CHECK-NEXT: ret i32 %vget_lane +// CHECK-LABEL: test_vgetq_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <16 x i8> %a, i32 15 +// CHECK-NEXT: ret i8 %vget_lane +// CHECK-LABEL: test_vgetq_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <8 x i16> %a, i32 7 +// CHECK-NEXT: ret i16 %vget_lane +// CHECK-LABEL: test_vgetq_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <4 x float> %a, i32 3 +// CHECK-NEXT: ret float %vget_lane +// CHECK-LABEL: test_vgetq_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <8 x half> %a, i32 3 +// CHECK-NEXT: %conv = fpext half %0 to float +// CHECK-NEXT: ret float %conv +// CHECK-LABEL: test_vget_lane_s64 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <1 x i64> %a, i32 0 +// CHECK-NEXT: ret i64 %vget_lane +// CHECK-LABEL: test_vget_lane_u64 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <1 x i64> %a, i32 0 +// CHECK-NEXT: ret i64 %vget_lane +// CHECK-LABEL: test_vgetq_lane_s64 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <2 x i64> %a, i32 1 +// CHECK-NEXT: ret i64 %vget_lane +// CHECK-LABEL: test_vgetq_lane_u64 +// CHECK: entry: +// CHECK-NEXT: %vget_lane = extractelement <2 x i64> %a, i32 1 +// CHECK-NEXT: ret i64 %vget_lane +// CHECK-LABEL: test_vget_low_s8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vget_low_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vget_low_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> +// CHECK-NEXT: ret <2 x i32> %shuffle.i +// CHECK-LABEL: test_vget_low_s64 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer +// CHECK-NEXT: ret <1 x i64> %shuffle.i +// CHECK-LABEL: test_vget_low_f16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> +// CHECK-NEXT: ret <4 x half> %shuffle.i +// CHECK-LABEL: test_vget_low_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> +// CHECK-NEXT: ret <2 x float> %shuffle.i +// CHECK-LABEL: test_vget_low_u8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vget_low_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vget_low_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> +// CHECK-NEXT: ret <2 x i32> %shuffle.i +// CHECK-LABEL: test_vget_low_u64 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer +// CHECK-NEXT: ret <1 x i64> %shuffle.i +// CHECK-LABEL: test_vget_low_p8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vget_low_p16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vhadd_s8 +// CHECK: entry: +// CHECK-NEXT: %vhadd_v.i = tail call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vhadd_v.i +// CHECK-LABEL: test_vhadd_s16 +// CHECK: entry: +// CHECK-NEXT: %vhadd_v2.i = tail call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vhadd_v2.i +// CHECK-LABEL: test_vhadd_s32 +// CHECK: entry: +// CHECK-NEXT: %vhadd_v2.i = tail call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vhadd_v2.i +// CHECK-LABEL: test_vhadd_u8 +// CHECK: entry: +// CHECK-NEXT: %vhadd_v.i = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vhadd_v.i +// CHECK-LABEL: test_vhadd_u16 +// CHECK: entry: +// CHECK-NEXT: %vhadd_v2.i = tail call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vhadd_v2.i +// CHECK-LABEL: test_vhadd_u32 +// CHECK: entry: +// CHECK-NEXT: %vhadd_v2.i = tail call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vhadd_v2.i +// CHECK-LABEL: test_vhaddq_s8 +// CHECK: entry: +// CHECK-NEXT: %vhaddq_v.i = tail call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vhaddq_v.i +// CHECK-LABEL: test_vhaddq_s16 +// CHECK: entry: +// CHECK-NEXT: %vhaddq_v2.i = tail call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vhaddq_v2.i +// CHECK-LABEL: test_vhaddq_s32 +// CHECK: entry: +// CHECK-NEXT: %vhaddq_v2.i = tail call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vhaddq_v2.i +// CHECK-LABEL: test_vhaddq_u8 +// CHECK: entry: +// CHECK-NEXT: %vhaddq_v.i = tail call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vhaddq_v.i +// CHECK-LABEL: test_vhaddq_u16 +// CHECK: entry: +// CHECK-NEXT: %vhaddq_v2.i = tail call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vhaddq_v2.i +// CHECK-LABEL: test_vhaddq_u32 +// CHECK: entry: +// CHECK-NEXT: %vhaddq_v2.i = tail call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vhaddq_v2.i +// CHECK-LABEL: test_vhsub_s8 +// CHECK: entry: +// CHECK-NEXT: %vhsub_v.i = tail call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vhsub_v.i +// CHECK-LABEL: test_vhsub_s16 +// CHECK: entry: +// CHECK-NEXT: %vhsub_v2.i = tail call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vhsub_v2.i +// CHECK-LABEL: test_vhsub_s32 +// CHECK: entry: +// CHECK-NEXT: %vhsub_v2.i = tail call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vhsub_v2.i +// CHECK-LABEL: test_vhsub_u8 +// CHECK: entry: +// CHECK-NEXT: %vhsub_v.i = tail call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vhsub_v.i +// CHECK-LABEL: test_vhsub_u16 +// CHECK: entry: +// CHECK-NEXT: %vhsub_v2.i = tail call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vhsub_v2.i +// CHECK-LABEL: test_vhsub_u32 +// CHECK: entry: +// CHECK-NEXT: %vhsub_v2.i = tail call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vhsub_v2.i +// CHECK-LABEL: test_vhsubq_s8 +// CHECK: entry: +// CHECK-NEXT: %vhsubq_v.i = tail call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vhsubq_v.i +// CHECK-LABEL: test_vhsubq_s16 +// CHECK: entry: +// CHECK-NEXT: %vhsubq_v2.i = tail call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vhsubq_v2.i +// CHECK-LABEL: test_vhsubq_s32 +// CHECK: entry: +// CHECK-NEXT: %vhsubq_v2.i = tail call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vhsubq_v2.i +// CHECK-LABEL: test_vhsubq_u8 +// CHECK: entry: +// CHECK-NEXT: %vhsubq_v.i = tail call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vhsubq_v.i +// CHECK-LABEL: test_vhsubq_u16 +// CHECK: entry: +// CHECK-NEXT: %vhsubq_v2.i = tail call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vhsubq_v2.i +// CHECK-LABEL: test_vhsubq_u32 +// CHECK: entry: +// CHECK-NEXT: %vhsubq_v2.i = tail call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vhsubq_v2.i +// CHECK-LABEL: test_vld1q_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i8* %a to <16 x i8>* +// CHECK-NEXT: %1 = load <16 x i8>, <16 x i8>* %0, align 1 +// CHECK-NEXT: ret <16 x i8> %1 +// CHECK-LABEL: test_vld1q_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to <8 x i16>* +// CHECK-NEXT: %1 = load <8 x i16>, <8 x i16>* %0, align 2 +// CHECK-NEXT: ret <8 x i16> %1 +// CHECK-LABEL: test_vld1q_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to <4 x i32>* +// CHECK-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 4 +// CHECK-NEXT: ret <4 x i32> %1 +// CHECK-LABEL: test_vld1q_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to <2 x i64>* +// CHECK-NEXT: %1 = load <2 x i64>, <2 x i64>* %0, align 8 +// CHECK-NEXT: ret <2 x i64> %1 +// CHECK-LABEL: test_vld1q_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i8* %a to <16 x i8>* +// CHECK-NEXT: %1 = load <16 x i8>, <16 x i8>* %0, align 1 +// CHECK-NEXT: ret <16 x i8> %1 +// CHECK-LABEL: test_vld1q_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to <8 x i16>* +// CHECK-NEXT: %1 = load <8 x i16>, <8 x i16>* %0, align 2 +// CHECK-NEXT: ret <8 x i16> %1 +// CHECK-LABEL: test_vld1q_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to <4 x i32>* +// CHECK-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 4 +// CHECK-NEXT: ret <4 x i32> %1 +// CHECK-LABEL: test_vld1q_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to <2 x i64>* +// CHECK-NEXT: %1 = load <2 x i64>, <2 x i64>* %0, align 8 +// CHECK-NEXT: ret <2 x i64> %1 +// CHECK-LABEL: test_vld1q_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to <8 x half>* +// CHECK-NEXT: %1 = load <8 x half>, <8 x half>* %0, align 2 +// CHECK-NEXT: ret <8 x half> %1 +// CHECK-LABEL: test_vld1q_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast float* %a to <4 x float>* +// CHECK-NEXT: %1 = load <4 x float>, <4 x float>* %0, align 4 +// CHECK-NEXT: ret <4 x float> %1 +// CHECK-LABEL: test_vld1q_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i8* %a to <16 x i8>* +// CHECK-NEXT: %1 = load <16 x i8>, <16 x i8>* %0, align 1 +// CHECK-NEXT: ret <16 x i8> %1 +// CHECK-LABEL: test_vld1q_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to <8 x i16>* +// CHECK-NEXT: %1 = load <8 x i16>, <8 x i16>* %0, align 2 +// CHECK-NEXT: ret <8 x i16> %1 +// CHECK-LABEL: test_vld1_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i8* %a to <8 x i8>* +// CHECK-NEXT: %1 = load <8 x i8>, <8 x i8>* %0, align 1 +// CHECK-NEXT: ret <8 x i8> %1 +// CHECK-LABEL: test_vld1_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to <4 x i16>* +// CHECK-NEXT: %1 = load <4 x i16>, <4 x i16>* %0, align 2 +// CHECK-NEXT: ret <4 x i16> %1 +// CHECK-LABEL: test_vld1_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to <2 x i32>* +// CHECK-NEXT: %1 = load <2 x i32>, <2 x i32>* %0, align 4 +// CHECK-NEXT: ret <2 x i32> %1 +// CHECK-LABEL: test_vld1_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to <1 x i64>* +// CHECK-NEXT: %1 = load <1 x i64>, <1 x i64>* %0, align 8 +// CHECK-NEXT: ret <1 x i64> %1 +// CHECK-LABEL: test_vld1_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i8* %a to <8 x i8>* +// CHECK-NEXT: %1 = load <8 x i8>, <8 x i8>* %0, align 1 +// CHECK-NEXT: ret <8 x i8> %1 +// CHECK-LABEL: test_vld1_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to <4 x i16>* +// CHECK-NEXT: %1 = load <4 x i16>, <4 x i16>* %0, align 2 +// CHECK-NEXT: ret <4 x i16> %1 +// CHECK-LABEL: test_vld1_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to <2 x i32>* +// CHECK-NEXT: %1 = load <2 x i32>, <2 x i32>* %0, align 4 +// CHECK-NEXT: ret <2 x i32> %1 +// CHECK-LABEL: test_vld1_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to <1 x i64>* +// CHECK-NEXT: %1 = load <1 x i64>, <1 x i64>* %0, align 8 +// CHECK-NEXT: ret <1 x i64> %1 +// CHECK-LABEL: test_vld1_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to <4 x half>* +// CHECK-NEXT: %1 = load <4 x half>, <4 x half>* %0, align 2 +// CHECK-NEXT: ret <4 x half> %1 +// CHECK-LABEL: test_vld1_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast float* %a to <2 x float>* +// CHECK-NEXT: %1 = load <2 x float>, <2 x float>* %0, align 4 +// CHECK-NEXT: ret <2 x float> %1 +// CHECK-LABEL: test_vld1_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i8* %a to <8 x i8>* +// CHECK-NEXT: %1 = load <8 x i8>, <8 x i8>* %0, align 1 +// CHECK-NEXT: ret <8 x i8> %1 +// CHECK-LABEL: test_vld1_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to <4 x i16>* +// CHECK-NEXT: %1 = load <4 x i16>, <4 x i16>* %0, align 2 +// CHECK-NEXT: ret <4 x i16> %1 +// CHECK-LABEL: test_vld1q_dup_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %1 = insertelement <16 x i8> undef, i8 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: ret <16 x i8> %lane +// CHECK-LABEL: test_vld1q_dup_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %1 = insertelement <8 x i16> undef, i16 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i16> %lane +// CHECK-LABEL: test_vld1q_dup_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = load i32, i32* %a, align 4 +// CHECK-NEXT: %1 = insertelement <4 x i32> undef, i32 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i32> %lane +// CHECK-LABEL: test_vld1q_dup_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = load i64, i64* %a, align 8 +// CHECK-NEXT: %1 = insertelement <2 x i64> undef, i64 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i64> %lane +// CHECK-LABEL: test_vld1q_dup_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %1 = insertelement <16 x i8> undef, i8 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: ret <16 x i8> %lane +// CHECK-LABEL: test_vld1q_dup_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %1 = insertelement <8 x i16> undef, i16 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i16> %lane +// CHECK-LABEL: test_vld1q_dup_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = load i32, i32* %a, align 4 +// CHECK-NEXT: %1 = insertelement <4 x i32> undef, i32 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i32> %lane +// CHECK-LABEL: test_vld1q_dup_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = load i64, i64* %a, align 8 +// CHECK-NEXT: %1 = insertelement <2 x i64> undef, i64 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i64> %lane +// CHECK-LABEL: test_vld1q_dup_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = load half, half* %a, align 2 +// CHECK-NEXT: %1 = insertelement <8 x half> undef, half %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <8 x half> %1, <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x half> %lane +// CHECK-LABEL: test_vld1q_dup_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = load float, float* %a, align 4 +// CHECK-NEXT: %1 = insertelement <4 x float> undef, float %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x float> %lane +// CHECK-LABEL: test_vld1q_dup_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %1 = insertelement <16 x i8> undef, i8 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: ret <16 x i8> %lane +// CHECK-LABEL: test_vld1q_dup_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %1 = insertelement <8 x i16> undef, i16 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i16> %lane +// CHECK-LABEL: test_vld1_dup_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %1 = insertelement <8 x i8> undef, i8 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> %lane +// CHECK-LABEL: test_vld1_dup_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %1 = insertelement <4 x i16> undef, i16 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i16> %lane +// CHECK-LABEL: test_vld1_dup_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = load i32, i32* %a, align 4 +// CHECK-NEXT: %1 = insertelement <2 x i32> undef, i32 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i32> %lane +// CHECK-LABEL: test_vld1_dup_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = load i64, i64* %a, align 8 +// CHECK-NEXT: %1 = insertelement <1 x i64> undef, i64 %0, i32 0 +// CHECK-NEXT: ret <1 x i64> %1 +// CHECK-LABEL: test_vld1_dup_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %1 = insertelement <8 x i8> undef, i8 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> %lane +// CHECK-LABEL: test_vld1_dup_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %1 = insertelement <4 x i16> undef, i16 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i16> %lane +// CHECK-LABEL: test_vld1_dup_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = load i32, i32* %a, align 4 +// CHECK-NEXT: %1 = insertelement <2 x i32> undef, i32 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i32> %lane +// CHECK-LABEL: test_vld1_dup_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = load i64, i64* %a, align 8 +// CHECK-NEXT: %1 = insertelement <1 x i64> undef, i64 %0, i32 0 +// CHECK-NEXT: ret <1 x i64> %1 +// CHECK-LABEL: test_vld1_dup_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = load half, half* %a, align 2 +// CHECK-NEXT: %1 = insertelement <4 x half> undef, half %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <4 x half> %1, <4 x half> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x half> %lane +// CHECK-LABEL: test_vld1_dup_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = load float, float* %a, align 4 +// CHECK-NEXT: %1 = insertelement <2 x float> undef, float %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x float> %lane +// CHECK-LABEL: test_vld1_dup_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %1 = insertelement <8 x i8> undef, i8 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> %lane +// CHECK-LABEL: test_vld1_dup_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %1 = insertelement <4 x i16> undef, i16 %0, i32 0 +// CHECK-NEXT: %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i16> %lane +// CHECK-LABEL: test_vld1q_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15 +// CHECK-NEXT: ret <16 x i8> %vld1_lane +// CHECK-LABEL: test_vld1q_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7 +// CHECK-NEXT: ret <8 x i16> %vld1_lane +// CHECK-LABEL: test_vld1q_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = load i32, i32* %a, align 4 +// CHECK-NEXT: %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3 +// CHECK-NEXT: ret <4 x i32> %vld1_lane +// CHECK-LABEL: test_vld1q_lane_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = shufflevector <2 x i64> %b, <2 x i64> undef, <1 x i32> zeroinitializer +// CHECK-NEXT: %1 = bitcast i64* %a to <1 x i64>* +// CHECK-NEXT: %2 = load <1 x i64>, <1 x i64>* %1, align 8 +// CHECK-NEXT: %vld1q_lane = shufflevector <1 x i64> %0, <1 x i64> %2, <2 x i32> +// CHECK-NEXT: ret <2 x i64> %vld1q_lane +// CHECK-LABEL: test_vld1q_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15 +// CHECK-NEXT: ret <16 x i8> %vld1_lane +// CHECK-LABEL: test_vld1q_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7 +// CHECK-NEXT: ret <8 x i16> %vld1_lane +// CHECK-LABEL: test_vld1q_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = load i32, i32* %a, align 4 +// CHECK-NEXT: %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3 +// CHECK-NEXT: ret <4 x i32> %vld1_lane +// CHECK-LABEL: test_vld1q_lane_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = shufflevector <2 x i64> %b, <2 x i64> undef, <1 x i32> zeroinitializer +// CHECK-NEXT: %1 = bitcast i64* %a to <1 x i64>* +// CHECK-NEXT: %2 = load <1 x i64>, <1 x i64>* %1, align 8 +// CHECK-NEXT: %vld1q_lane = shufflevector <1 x i64> %0, <1 x i64> %2, <2 x i32> +// CHECK-NEXT: ret <2 x i64> %vld1q_lane +// CHECK-LABEL: test_vld1q_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = load half, half* %a, align 2 +// CHECK-NEXT: %vld1_lane = insertelement <8 x half> %b, half %0, i32 7 +// CHECK-NEXT: ret <8 x half> %vld1_lane +// CHECK-LABEL: test_vld1q_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = load float, float* %a, align 4 +// CHECK-NEXT: %vld1_lane = insertelement <4 x float> %b, float %0, i32 3 +// CHECK-NEXT: ret <4 x float> %vld1_lane +// CHECK-LABEL: test_vld1q_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15 +// CHECK-NEXT: ret <16 x i8> %vld1_lane +// CHECK-LABEL: test_vld1q_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7 +// CHECK-NEXT: ret <8 x i16> %vld1_lane +// CHECK-LABEL: test_vld1_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7 +// CHECK-NEXT: ret <8 x i8> %vld1_lane +// CHECK-LABEL: test_vld1_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3 +// CHECK-NEXT: ret <4 x i16> %vld1_lane +// CHECK-LABEL: test_vld1_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = load i32, i32* %a, align 4 +// CHECK-NEXT: %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1 +// CHECK-NEXT: ret <2 x i32> %vld1_lane +// CHECK-LABEL: test_vld1_lane_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = load i64, i64* %a, align 8 +// CHECK-NEXT: %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0 +// CHECK-NEXT: ret <1 x i64> %vld1_lane +// CHECK-LABEL: test_vld1_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7 +// CHECK-NEXT: ret <8 x i8> %vld1_lane +// CHECK-LABEL: test_vld1_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3 +// CHECK-NEXT: ret <4 x i16> %vld1_lane +// CHECK-LABEL: test_vld1_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = load i32, i32* %a, align 4 +// CHECK-NEXT: %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1 +// CHECK-NEXT: ret <2 x i32> %vld1_lane +// CHECK-LABEL: test_vld1_lane_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = load i64, i64* %a, align 8 +// CHECK-NEXT: %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0 +// CHECK-NEXT: ret <1 x i64> %vld1_lane +// CHECK-LABEL: test_vld1_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = load half, half* %a, align 2 +// CHECK-NEXT: %vld1_lane = insertelement <4 x half> %b, half %0, i32 3 +// CHECK-NEXT: ret <4 x half> %vld1_lane +// CHECK-LABEL: test_vld1_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = load float, float* %a, align 4 +// CHECK-NEXT: %vld1_lane = insertelement <2 x float> %b, float %0, i32 1 +// CHECK-NEXT: ret <2 x float> %vld1_lane +// CHECK-LABEL: test_vld1_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = load i8, i8* %a, align 1 +// CHECK-NEXT: %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7 +// CHECK-NEXT: ret <8 x i8> %vld1_lane +// CHECK-LABEL: test_vld1_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = load i16, i16* %a, align 2 +// CHECK-NEXT: %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3 +// CHECK-NEXT: ret <4 x i16> %vld1_lane +// CHECK-LABEL: test_vld2q_u8 +// CHECK: entry: +// CHECK-NEXT: %vld2q_v = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld2q_v.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2q_v, 0 +// CHECK-NEXT: %vld2q_v.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2q_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <16 x i8> %vld2q_v.fca.0.extract, <16 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <16 x i8> %vld2q_v.fca.1.extract, <16 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2q_v = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld2q_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_v, 0 +// CHECK-NEXT: %vld2q_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld2q_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld2q_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld2q_v = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld2q_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2q_v, 0 +// CHECK-NEXT: %vld2q_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2q_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld2q_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld2q_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_s8 +// CHECK: entry: +// CHECK-NEXT: %vld2q_v = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld2q_v.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2q_v, 0 +// CHECK-NEXT: %vld2q_v.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2q_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <16 x i8> %vld2q_v.fca.0.extract, <16 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <16 x i8> %vld2q_v.fca.1.extract, <16 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2q_v = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld2q_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_v, 0 +// CHECK-NEXT: %vld2q_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld2q_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld2q_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld2q_v = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld2q_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2q_v, 0 +// CHECK-NEXT: %vld2q_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2q_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld2q_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld2q_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to i8* +// CHECK-NEXT: %vld2q_v = tail call { <8 x half>, <8 x half> } @llvm.arm.neon.vld2.v8f16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld2q_v.fca.0.extract = extractvalue { <8 x half>, <8 x half> } %vld2q_v, 0 +// CHECK-NEXT: %vld2q_v.fca.1.extract = extractvalue { <8 x half>, <8 x half> } %vld2q_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x half> %vld2q_v.fca.0.extract, <8 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x half> %vld2q_v.fca.1.extract, <8 x half>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast float* %a to i8* +// CHECK-NEXT: %vld2q_v = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld2q_v.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2q_v, 0 +// CHECK-NEXT: %vld2q_v.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2q_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x float> %vld2q_v.fca.0.extract, <4 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x float> %vld2q_v.fca.1.extract, <4 x float>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_p8 +// CHECK: entry: +// CHECK-NEXT: %vld2q_v = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld2q_v.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2q_v, 0 +// CHECK-NEXT: %vld2q_v.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2q_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <16 x i8> %vld2q_v.fca.0.extract, <16 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <16 x i8> %vld2q_v.fca.1.extract, <16 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2q_v = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld2q_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_v, 0 +// CHECK-NEXT: %vld2q_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld2q_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld2q_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_u8 +// CHECK: entry: +// CHECK-NEXT: %vld2_v = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld2_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld2_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2_v = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld2_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld2_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld2_v = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld2_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld2_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: %vld2_v = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* %0, i32 8) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <1 x i64> %vld2_v.fca.0.extract, <1 x i64>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <1 x i64> %vld2_v.fca.1.extract, <1 x i64>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_s8 +// CHECK: entry: +// CHECK-NEXT: %vld2_v = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld2_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld2_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2_v = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld2_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld2_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld2_v = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld2_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld2_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: %vld2_v = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* %0, i32 8) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <1 x i64> %vld2_v.fca.0.extract, <1 x i64>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <1 x i64> %vld2_v.fca.1.extract, <1 x i64>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to i8* +// CHECK-NEXT: %vld2_v = tail call { <4 x half>, <4 x half> } @llvm.arm.neon.vld2.v4f16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <4 x half>, <4 x half> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <4 x half>, <4 x half> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x half> %vld2_v.fca.0.extract, <4 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x half> %vld2_v.fca.1.extract, <4 x half>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast float* %a to i8* +// CHECK-NEXT: %vld2_v = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x float> %vld2_v.fca.0.extract, <2 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x float> %vld2_v.fca.1.extract, <2 x float>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_p8 +// CHECK: entry: +// CHECK-NEXT: %vld2_v = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld2_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld2_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2_v = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld2_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_v, 0 +// CHECK-NEXT: %vld2_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld2_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld2_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2q_lane_v = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* %4, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld2q_lane_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_lane_v, 0 +// CHECK-NEXT: %vld2q_lane_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld2q_lane_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld2q_lane_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld2q_lane_v = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* %4, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, i32 3, i32 4) +// CHECK-NEXT: %vld2q_lane_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2q_lane_v, 0 +// CHECK-NEXT: %vld2q_lane_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2q_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld2q_lane_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld2q_lane_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2q_lane_v = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* %4, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld2q_lane_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_lane_v, 0 +// CHECK-NEXT: %vld2q_lane_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld2q_lane_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld2q_lane_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld2q_lane_v = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* %4, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, i32 3, i32 4) +// CHECK-NEXT: %vld2q_lane_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2q_lane_v, 0 +// CHECK-NEXT: %vld2q_lane_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2q_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld2q_lane_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld2q_lane_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x half> %0, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x half> %1, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x half> %b.sroa.0.0.vec.expand, <8 x half> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x half> %2, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x half> %3, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x half> %b.sroa.3.16.vec.expand, <8 x half> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast half* %a to i8* +// CHECK-NEXT: %vld2q_lane_v = tail call { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0i8(i8* %4, <8 x half> %b.sroa.0.8.vecblend, <8 x half> %b.sroa.3.24.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld2q_lane_v.fca.0.extract = extractvalue { <8 x half>, <8 x half> } %vld2q_lane_v, 0 +// CHECK-NEXT: %vld2q_lane_v.fca.1.extract = extractvalue { <8 x half>, <8 x half> } %vld2q_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x half> %vld2q_lane_v.fca.0.extract, <8 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x half> %vld2q_lane_v.fca.1.extract, <8 x half>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x float> %b.sroa.0.0.vec.expand, <4 x float> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x float> %b.sroa.3.16.vec.expand, <4 x float> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %4 = bitcast float* %a to i8* +// CHECK-NEXT: %vld2q_lane_v = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32.p0i8(i8* %4, <4 x float> %b.sroa.0.8.vecblend, <4 x float> %b.sroa.3.24.vecblend, i32 3, i32 4) +// CHECK-NEXT: %vld2q_lane_v.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2q_lane_v, 0 +// CHECK-NEXT: %vld2q_lane_v.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2q_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x float> %vld2q_lane_v.fca.0.extract, <4 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x float> %vld2q_lane_v.fca.1.extract, <4 x float>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2q_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2q_lane_v = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* %4, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld2q_lane_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_lane_v, 0 +// CHECK-NEXT: %vld2q_lane_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2q_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld2q_lane_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld2q_lane_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %vld2_lane_v = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> %0, <8 x i8> %1, i32 7, i32 1) +// CHECK-NEXT: %vld2_lane_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane_v, 0 +// CHECK-NEXT: %vld2_lane_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx3 = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld2_lane_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx3, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld2_lane_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2_lane_v = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %2, <4 x i16> %0, <4 x i16> %1, i32 3, i32 2) +// CHECK-NEXT: %vld2_lane_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane_v, 0 +// CHECK-NEXT: %vld2_lane_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld2_lane_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld2_lane_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld2_lane_v = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %2, <2 x i32> %0, <2 x i32> %1, i32 1, i32 4) +// CHECK-NEXT: %vld2_lane_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane_v, 0 +// CHECK-NEXT: %vld2_lane_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld2_lane_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld2_lane_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %vld2_lane_v = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> %0, <8 x i8> %1, i32 7, i32 1) +// CHECK-NEXT: %vld2_lane_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane_v, 0 +// CHECK-NEXT: %vld2_lane_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx3 = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld2_lane_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx3, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld2_lane_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2_lane_v = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %2, <4 x i16> %0, <4 x i16> %1, i32 3, i32 2) +// CHECK-NEXT: %vld2_lane_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane_v, 0 +// CHECK-NEXT: %vld2_lane_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld2_lane_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld2_lane_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld2_lane_v = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %2, <2 x i32> %0, <2 x i32> %1, i32 1, i32 4) +// CHECK-NEXT: %vld2_lane_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane_v, 0 +// CHECK-NEXT: %vld2_lane_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld2_lane_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld2_lane_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %2 = bitcast half* %a to i8* +// CHECK-NEXT: %vld2_lane_v = tail call { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0i8(i8* %2, <4 x half> %0, <4 x half> %1, i32 3, i32 2) +// CHECK-NEXT: %vld2_lane_v.fca.0.extract = extractvalue { <4 x half>, <4 x half> } %vld2_lane_v, 0 +// CHECK-NEXT: %vld2_lane_v.fca.1.extract = extractvalue { <4 x half>, <4 x half> } %vld2_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x half> %vld2_lane_v.fca.0.extract, <4 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x half> %vld2_lane_v.fca.1.extract, <4 x half>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %2 = bitcast float* %a to i8* +// CHECK-NEXT: %vld2_lane_v = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* %2, <2 x float> %0, <2 x float> %1, i32 1, i32 4) +// CHECK-NEXT: %vld2_lane_v.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane_v, 0 +// CHECK-NEXT: %vld2_lane_v.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x float> %vld2_lane_v.fca.0.extract, <2 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x float> %vld2_lane_v.fca.1.extract, <2 x float>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %vld2_lane_v = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> %0, <8 x i8> %1, i32 7, i32 1) +// CHECK-NEXT: %vld2_lane_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane_v, 0 +// CHECK-NEXT: %vld2_lane_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx3 = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld2_lane_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx3, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld2_lane_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld2_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld2_lane_v = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %2, <4 x i16> %0, <4 x i16> %1, i32 3, i32 2) +// CHECK-NEXT: %vld2_lane_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane_v, 0 +// CHECK-NEXT: %vld2_lane_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane_v, 1 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld2_lane_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld2_lane_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_u8 +// CHECK: entry: +// CHECK-NEXT: %vld3q_v = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld3q_v.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3q_v, 0 +// CHECK-NEXT: %vld3q_v.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3q_v, 1 +// CHECK-NEXT: %vld3q_v.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3q_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <16 x i8> %vld3q_v.fca.0.extract, <16 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <16 x i8> %vld3q_v.fca.1.extract, <16 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <16 x i8> %vld3q_v.fca.2.extract, <16 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3q_v = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld3q_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_v, 0 +// CHECK-NEXT: %vld3q_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_v, 1 +// CHECK-NEXT: %vld3q_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld3q_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld3q_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld3q_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld3q_v = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld3q_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_v, 0 +// CHECK-NEXT: %vld3q_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_v, 1 +// CHECK-NEXT: %vld3q_v.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld3q_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld3q_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i32> %vld3q_v.fca.2.extract, <4 x i32>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_s8 +// CHECK: entry: +// CHECK-NEXT: %vld3q_v = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld3q_v.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3q_v, 0 +// CHECK-NEXT: %vld3q_v.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3q_v, 1 +// CHECK-NEXT: %vld3q_v.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3q_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <16 x i8> %vld3q_v.fca.0.extract, <16 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <16 x i8> %vld3q_v.fca.1.extract, <16 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <16 x i8> %vld3q_v.fca.2.extract, <16 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3q_v = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld3q_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_v, 0 +// CHECK-NEXT: %vld3q_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_v, 1 +// CHECK-NEXT: %vld3q_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld3q_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld3q_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld3q_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld3q_v = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld3q_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_v, 0 +// CHECK-NEXT: %vld3q_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_v, 1 +// CHECK-NEXT: %vld3q_v.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld3q_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld3q_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i32> %vld3q_v.fca.2.extract, <4 x i32>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to i8* +// CHECK-NEXT: %vld3q_v = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3.v8f16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld3q_v.fca.0.extract = extractvalue { <8 x half>, <8 x half>, <8 x half> } %vld3q_v, 0 +// CHECK-NEXT: %vld3q_v.fca.1.extract = extractvalue { <8 x half>, <8 x half>, <8 x half> } %vld3q_v, 1 +// CHECK-NEXT: %vld3q_v.fca.2.extract = extractvalue { <8 x half>, <8 x half>, <8 x half> } %vld3q_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x half> %vld3q_v.fca.0.extract, <8 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x half> %vld3q_v.fca.1.extract, <8 x half>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x half> %vld3q_v.fca.2.extract, <8 x half>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast float* %a to i8* +// CHECK-NEXT: %vld3q_v = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld3q_v.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3q_v, 0 +// CHECK-NEXT: %vld3q_v.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3q_v, 1 +// CHECK-NEXT: %vld3q_v.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3q_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x float> %vld3q_v.fca.0.extract, <4 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x float> %vld3q_v.fca.1.extract, <4 x float>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x float> %vld3q_v.fca.2.extract, <4 x float>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_p8 +// CHECK: entry: +// CHECK-NEXT: %vld3q_v = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld3q_v.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3q_v, 0 +// CHECK-NEXT: %vld3q_v.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3q_v, 1 +// CHECK-NEXT: %vld3q_v.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3q_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <16 x i8> %vld3q_v.fca.0.extract, <16 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <16 x i8> %vld3q_v.fca.1.extract, <16 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <16 x i8> %vld3q_v.fca.2.extract, <16 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3q_v = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld3q_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_v, 0 +// CHECK-NEXT: %vld3q_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_v, 1 +// CHECK-NEXT: %vld3q_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld3q_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld3q_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld3q_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_u8 +// CHECK: entry: +// CHECK-NEXT: %vld3_v = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld3_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld3_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld3_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3_v = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld3_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld3_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld3_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld3_v = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld3_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld3_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x i32> %vld3_v.fca.2.extract, <2 x i32>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: %vld3_v = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* %0, i32 8) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <1 x i64> %vld3_v.fca.0.extract, <1 x i64>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <1 x i64> %vld3_v.fca.1.extract, <1 x i64>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <1 x i64> %vld3_v.fca.2.extract, <1 x i64>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_s8 +// CHECK: entry: +// CHECK-NEXT: %vld3_v = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld3_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld3_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld3_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3_v = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld3_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld3_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld3_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld3_v = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld3_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld3_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x i32> %vld3_v.fca.2.extract, <2 x i32>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: %vld3_v = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* %0, i32 8) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <1 x i64> %vld3_v.fca.0.extract, <1 x i64>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <1 x i64> %vld3_v.fca.1.extract, <1 x i64>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <1 x i64> %vld3_v.fca.2.extract, <1 x i64>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to i8* +// CHECK-NEXT: %vld3_v = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3.v4f16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <4 x half>, <4 x half>, <4 x half> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <4 x half>, <4 x half>, <4 x half> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <4 x half>, <4 x half>, <4 x half> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x half> %vld3_v.fca.0.extract, <4 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x half> %vld3_v.fca.1.extract, <4 x half>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x half> %vld3_v.fca.2.extract, <4 x half>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast float* %a to i8* +// CHECK-NEXT: %vld3_v = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x float> %vld3_v.fca.0.extract, <2 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x float> %vld3_v.fca.1.extract, <2 x float>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x float> %vld3_v.fca.2.extract, <2 x float>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_p8 +// CHECK: entry: +// CHECK-NEXT: %vld3_v = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld3_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld3_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld3_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3_v = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld3_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_v, 0 +// CHECK-NEXT: %vld3_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_v, 1 +// CHECK-NEXT: %vld3_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld3_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld3_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld3_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3q_lane_v = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %6, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld3q_lane_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_lane_v, 0 +// CHECK-NEXT: %vld3q_lane_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_lane_v, 1 +// CHECK-NEXT: %vld3q_lane_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld3q_lane_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld3q_lane_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld3q_lane_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %6 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld3q_lane_v = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* %6, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, i32 3, i32 4) +// CHECK-NEXT: %vld3q_lane_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_lane_v, 0 +// CHECK-NEXT: %vld3q_lane_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_lane_v, 1 +// CHECK-NEXT: %vld3q_lane_v.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld3q_lane_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld3q_lane_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i32> %vld3q_lane_v.fca.2.extract, <4 x i32>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3q_lane_v = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %6, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld3q_lane_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_lane_v, 0 +// CHECK-NEXT: %vld3q_lane_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_lane_v, 1 +// CHECK-NEXT: %vld3q_lane_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld3q_lane_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld3q_lane_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld3q_lane_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %6 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld3q_lane_v = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* %6, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, i32 3, i32 4) +// CHECK-NEXT: %vld3q_lane_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_lane_v, 0 +// CHECK-NEXT: %vld3q_lane_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_lane_v, 1 +// CHECK-NEXT: %vld3q_lane_v.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3q_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld3q_lane_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld3q_lane_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i32> %vld3q_lane_v.fca.2.extract, <4 x i32>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x half> %0, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x half> %1, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x half> %b.sroa.0.0.vec.expand, <8 x half> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x half> %2, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x half> %3, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x half> %b.sroa.3.16.vec.expand, <8 x half> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x half> %4, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x half> %5, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x half> %b.sroa.6.32.vec.expand, <8 x half> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast half* %a to i8* +// CHECK-NEXT: %vld3q_lane_v = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0i8(i8* %6, <8 x half> %b.sroa.0.8.vecblend, <8 x half> %b.sroa.3.24.vecblend, <8 x half> %b.sroa.6.40.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld3q_lane_v.fca.0.extract = extractvalue { <8 x half>, <8 x half>, <8 x half> } %vld3q_lane_v, 0 +// CHECK-NEXT: %vld3q_lane_v.fca.1.extract = extractvalue { <8 x half>, <8 x half>, <8 x half> } %vld3q_lane_v, 1 +// CHECK-NEXT: %vld3q_lane_v.fca.2.extract = extractvalue { <8 x half>, <8 x half>, <8 x half> } %vld3q_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x half> %vld3q_lane_v.fca.0.extract, <8 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x half> %vld3q_lane_v.fca.1.extract, <8 x half>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x half> %vld3q_lane_v.fca.2.extract, <8 x half>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x float> %b.sroa.0.0.vec.expand, <4 x float> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x float> %b.sroa.3.16.vec.expand, <4 x float> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x float> %4, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x float> %b.sroa.6.32.vec.expand, <4 x float> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %6 = bitcast float* %a to i8* +// CHECK-NEXT: %vld3q_lane_v = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32.p0i8(i8* %6, <4 x float> %b.sroa.0.8.vecblend, <4 x float> %b.sroa.3.24.vecblend, <4 x float> %b.sroa.6.40.vecblend, i32 3, i32 4) +// CHECK-NEXT: %vld3q_lane_v.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3q_lane_v, 0 +// CHECK-NEXT: %vld3q_lane_v.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3q_lane_v, 1 +// CHECK-NEXT: %vld3q_lane_v.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3q_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x float> %vld3q_lane_v.fca.0.extract, <4 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x float> %vld3q_lane_v.fca.1.extract, <4 x float>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x float> %vld3q_lane_v.fca.2.extract, <4 x float>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3q_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3q_lane_v = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %6, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld3q_lane_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_lane_v, 0 +// CHECK-NEXT: %vld3q_lane_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_lane_v, 1 +// CHECK-NEXT: %vld3q_lane_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3q_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld3q_lane_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld3q_lane_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld3q_lane_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %vld3_lane_v = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1) +// CHECK-NEXT: %vld3_lane_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane_v, 0 +// CHECK-NEXT: %vld3_lane_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane_v, 1 +// CHECK-NEXT: %vld3_lane_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx5 = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld3_lane_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx5, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld3_lane_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx11 = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld3_lane_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx11, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3_lane_v = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %3, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, i32 3, i32 2) +// CHECK-NEXT: %vld3_lane_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane_v, 0 +// CHECK-NEXT: %vld3_lane_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane_v, 1 +// CHECK-NEXT: %vld3_lane_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld3_lane_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld3_lane_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld3_lane_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld3_lane_v = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* %3, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, i32 1, i32 4) +// CHECK-NEXT: %vld3_lane_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane_v, 0 +// CHECK-NEXT: %vld3_lane_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane_v, 1 +// CHECK-NEXT: %vld3_lane_v.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld3_lane_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld3_lane_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x i32> %vld3_lane_v.fca.2.extract, <2 x i32>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %vld3_lane_v = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1) +// CHECK-NEXT: %vld3_lane_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane_v, 0 +// CHECK-NEXT: %vld3_lane_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane_v, 1 +// CHECK-NEXT: %vld3_lane_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx5 = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld3_lane_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx5, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld3_lane_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx11 = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld3_lane_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx11, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3_lane_v = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %3, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, i32 3, i32 2) +// CHECK-NEXT: %vld3_lane_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane_v, 0 +// CHECK-NEXT: %vld3_lane_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane_v, 1 +// CHECK-NEXT: %vld3_lane_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld3_lane_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld3_lane_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld3_lane_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld3_lane_v = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* %3, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, i32 1, i32 4) +// CHECK-NEXT: %vld3_lane_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane_v, 0 +// CHECK-NEXT: %vld3_lane_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane_v, 1 +// CHECK-NEXT: %vld3_lane_v.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld3_lane_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld3_lane_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x i32> %vld3_lane_v.fca.2.extract, <2 x i32>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %3 = bitcast half* %a to i8* +// CHECK-NEXT: %vld3_lane_v = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0i8(i8* %3, <4 x half> %0, <4 x half> %1, <4 x half> %2, i32 3, i32 2) +// CHECK-NEXT: %vld3_lane_v.fca.0.extract = extractvalue { <4 x half>, <4 x half>, <4 x half> } %vld3_lane_v, 0 +// CHECK-NEXT: %vld3_lane_v.fca.1.extract = extractvalue { <4 x half>, <4 x half>, <4 x half> } %vld3_lane_v, 1 +// CHECK-NEXT: %vld3_lane_v.fca.2.extract = extractvalue { <4 x half>, <4 x half>, <4 x half> } %vld3_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x half> %vld3_lane_v.fca.0.extract, <4 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x half> %vld3_lane_v.fca.1.extract, <4 x half>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x half> %vld3_lane_v.fca.2.extract, <4 x half>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %3 = bitcast float* %a to i8* +// CHECK-NEXT: %vld3_lane_v = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* %3, <2 x float> %0, <2 x float> %1, <2 x float> %2, i32 1, i32 4) +// CHECK-NEXT: %vld3_lane_v.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane_v, 0 +// CHECK-NEXT: %vld3_lane_v.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane_v, 1 +// CHECK-NEXT: %vld3_lane_v.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x float> %vld3_lane_v.fca.0.extract, <2 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x float> %vld3_lane_v.fca.1.extract, <2 x float>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x float> %vld3_lane_v.fca.2.extract, <2 x float>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %vld3_lane_v = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1) +// CHECK-NEXT: %vld3_lane_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane_v, 0 +// CHECK-NEXT: %vld3_lane_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane_v, 1 +// CHECK-NEXT: %vld3_lane_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx5 = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld3_lane_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx5, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld3_lane_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx11 = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld3_lane_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx11, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld3_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld3_lane_v = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %3, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, i32 3, i32 2) +// CHECK-NEXT: %vld3_lane_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane_v, 0 +// CHECK-NEXT: %vld3_lane_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane_v, 1 +// CHECK-NEXT: %vld3_lane_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane_v, 2 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld3_lane_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx6 = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld3_lane_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx6, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx8 = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld3_lane_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx8, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_u8 +// CHECK: entry: +// CHECK-NEXT: %vld4q_v = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld4q_v.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 0 +// CHECK-NEXT: %vld4q_v.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 1 +// CHECK-NEXT: %vld4q_v.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 2 +// CHECK-NEXT: %vld4q_v.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.0.extract, <16 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.1.extract, <16 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.2.extract, <16 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx10 = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.3.extract, <16 x i8>* %__ret.sroa.6.0..sroa_idx10, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4q_v = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld4q_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 0 +// CHECK-NEXT: %vld4q_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 1 +// CHECK-NEXT: %vld4q_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 2 +// CHECK-NEXT: %vld4q_v.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.3.extract, <8 x i16>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld4q_v = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld4q_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_v, 0 +// CHECK-NEXT: %vld4q_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_v, 1 +// CHECK-NEXT: %vld4q_v.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_v, 2 +// CHECK-NEXT: %vld4q_v.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld4q_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld4q_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i32> %vld4q_v.fca.2.extract, <4 x i32>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x i32> %vld4q_v.fca.3.extract, <4 x i32>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_s8 +// CHECK: entry: +// CHECK-NEXT: %vld4q_v = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld4q_v.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 0 +// CHECK-NEXT: %vld4q_v.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 1 +// CHECK-NEXT: %vld4q_v.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 2 +// CHECK-NEXT: %vld4q_v.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.0.extract, <16 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.1.extract, <16 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.2.extract, <16 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx10 = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.3.extract, <16 x i8>* %__ret.sroa.6.0..sroa_idx10, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4q_v = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld4q_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 0 +// CHECK-NEXT: %vld4q_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 1 +// CHECK-NEXT: %vld4q_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 2 +// CHECK-NEXT: %vld4q_v.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.3.extract, <8 x i16>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld4q_v = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld4q_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_v, 0 +// CHECK-NEXT: %vld4q_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_v, 1 +// CHECK-NEXT: %vld4q_v.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_v, 2 +// CHECK-NEXT: %vld4q_v.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld4q_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld4q_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i32> %vld4q_v.fca.2.extract, <4 x i32>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x i32> %vld4q_v.fca.3.extract, <4 x i32>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to i8* +// CHECK-NEXT: %vld4q_v = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4.v8f16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld4q_v.fca.0.extract = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %vld4q_v, 0 +// CHECK-NEXT: %vld4q_v.fca.1.extract = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %vld4q_v, 1 +// CHECK-NEXT: %vld4q_v.fca.2.extract = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %vld4q_v, 2 +// CHECK-NEXT: %vld4q_v.fca.3.extract = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %vld4q_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x half> %vld4q_v.fca.0.extract, <8 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x half> %vld4q_v.fca.1.extract, <8 x half>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x half> %vld4q_v.fca.2.extract, <8 x half>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x half> %vld4q_v.fca.3.extract, <8 x half>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast float* %a to i8* +// CHECK-NEXT: %vld4q_v = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld4q_v.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4q_v, 0 +// CHECK-NEXT: %vld4q_v.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4q_v, 1 +// CHECK-NEXT: %vld4q_v.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4q_v, 2 +// CHECK-NEXT: %vld4q_v.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4q_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x float> %vld4q_v.fca.0.extract, <4 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x float> %vld4q_v.fca.1.extract, <4 x float>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x float> %vld4q_v.fca.2.extract, <4 x float>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x float> %vld4q_v.fca.3.extract, <4 x float>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_p8 +// CHECK: entry: +// CHECK-NEXT: %vld4q_v = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld4q_v.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 0 +// CHECK-NEXT: %vld4q_v.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 1 +// CHECK-NEXT: %vld4q_v.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 2 +// CHECK-NEXT: %vld4q_v.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4q_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.0.extract, <16 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.1.extract, <16 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.2.extract, <16 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx10 = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <16 x i8> %vld4q_v.fca.3.extract, <16 x i8>* %__ret.sroa.6.0..sroa_idx10, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4q_v = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld4q_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 0 +// CHECK-NEXT: %vld4q_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 1 +// CHECK-NEXT: %vld4q_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 2 +// CHECK-NEXT: %vld4q_v.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i16> %vld4q_v.fca.3.extract, <8 x i16>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_u8 +// CHECK: entry: +// CHECK-NEXT: %vld4_v = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx10 = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.3.extract, <8 x i8>* %__ret.sroa.6.0..sroa_idx10, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4_v = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.3.extract, <4 x i16>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld4_v = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld4_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld4_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x i32> %vld4_v.fca.2.extract, <2 x i32>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <2 x i32> %vld4_v.fca.3.extract, <2 x i32>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: %vld4_v = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* %0, i32 8) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <1 x i64> %vld4_v.fca.0.extract, <1 x i64>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <1 x i64> %vld4_v.fca.1.extract, <1 x i64>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <1 x i64> %vld4_v.fca.2.extract, <1 x i64>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <1 x i64> %vld4_v.fca.3.extract, <1 x i64>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_s8 +// CHECK: entry: +// CHECK-NEXT: %vld4_v = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx10 = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.3.extract, <8 x i8>* %__ret.sroa.6.0..sroa_idx10, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4_v = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.3.extract, <4 x i16>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld4_v = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld4_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld4_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x i32> %vld4_v.fca.2.extract, <2 x i32>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <2 x i32> %vld4_v.fca.3.extract, <2 x i32>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: %vld4_v = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* %0, i32 8) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <1 x i64> %vld4_v.fca.0.extract, <1 x i64>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <1 x i64> %vld4_v.fca.1.extract, <1 x i64>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <1 x i64> %vld4_v.fca.2.extract, <1 x i64>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <1 x i64> %vld4_v.fca.3.extract, <1 x i64>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to i8* +// CHECK-NEXT: %vld4_v = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4.v4f16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x half> %vld4_v.fca.0.extract, <4 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x half> %vld4_v.fca.1.extract, <4 x half>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x half> %vld4_v.fca.2.extract, <4 x half>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x half> %vld4_v.fca.3.extract, <4 x half>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast float* %a to i8* +// CHECK-NEXT: %vld4_v = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32.p0i8(i8* %0, i32 4) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x float> %vld4_v.fca.0.extract, <2 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x float> %vld4_v.fca.1.extract, <2 x float>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x float> %vld4_v.fca.2.extract, <2 x float>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <2 x float> %vld4_v.fca.3.extract, <2 x float>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_p8 +// CHECK: entry: +// CHECK-NEXT: %vld4_v = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx1 = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx1, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx4 = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx7 = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx7, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx10 = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i8> %vld4_v.fca.3.extract, <8 x i8>* %__ret.sroa.6.0..sroa_idx10, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4_v = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* %0, i32 2) +// CHECK-NEXT: %vld4_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 0 +// CHECK-NEXT: %vld4_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 1 +// CHECK-NEXT: %vld4_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 2 +// CHECK-NEXT: %vld4_v.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx2 = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx2, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx4 = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx4, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx6 = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x i16> %vld4_v.fca.3.extract, <4 x i16>* %__ret.sroa.6.0..sroa_idx6, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x i16> %6, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x i16> %7, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x i16> %b.sroa.9.48.vec.expand, <8 x i16> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4q_lane_v = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* %8, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, <8 x i16> %b.sroa.9.56.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld4q_lane_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 0 +// CHECK-NEXT: %vld4q_lane_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 1 +// CHECK-NEXT: %vld4q_lane_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 2 +// CHECK-NEXT: %vld4q_lane_v.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.3.extract, <8 x i16>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <2 x i32> %6, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <2 x i32> %7, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <4 x i32> %b.sroa.9.48.vec.expand, <4 x i32> %b.sroa.9.56.vec.expand, <4 x i32> +// CHECK-NEXT: %8 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld4q_lane_v = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* %8, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, <4 x i32> %b.sroa.9.56.vecblend, i32 3, i32 4) +// CHECK-NEXT: %vld4q_lane_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_lane_v, 0 +// CHECK-NEXT: %vld4q_lane_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_lane_v, 1 +// CHECK-NEXT: %vld4q_lane_v.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_lane_v, 2 +// CHECK-NEXT: %vld4q_lane_v.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld4q_lane_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld4q_lane_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i32> %vld4q_lane_v.fca.2.extract, <4 x i32>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x i32> %vld4q_lane_v.fca.3.extract, <4 x i32>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x i16> %6, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x i16> %7, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x i16> %b.sroa.9.48.vec.expand, <8 x i16> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4q_lane_v = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* %8, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, <8 x i16> %b.sroa.9.56.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld4q_lane_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 0 +// CHECK-NEXT: %vld4q_lane_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 1 +// CHECK-NEXT: %vld4q_lane_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 2 +// CHECK-NEXT: %vld4q_lane_v.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.3.extract, <8 x i16>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <2 x i32> %6, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <2 x i32> %7, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <4 x i32> %b.sroa.9.48.vec.expand, <4 x i32> %b.sroa.9.56.vec.expand, <4 x i32> +// CHECK-NEXT: %8 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld4q_lane_v = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* %8, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, <4 x i32> %b.sroa.9.56.vecblend, i32 3, i32 4) +// CHECK-NEXT: %vld4q_lane_v.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_lane_v, 0 +// CHECK-NEXT: %vld4q_lane_v.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_lane_v, 1 +// CHECK-NEXT: %vld4q_lane_v.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_lane_v, 2 +// CHECK-NEXT: %vld4q_lane_v.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4q_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i32> %vld4q_lane_v.fca.0.extract, <4 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i32> %vld4q_lane_v.fca.1.extract, <4 x i32>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i32> %vld4q_lane_v.fca.2.extract, <4 x i32>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x i32> %vld4q_lane_v.fca.3.extract, <4 x i32>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x half> %0, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x half> %1, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x half> %b.sroa.0.0.vec.expand, <8 x half> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x half> %2, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x half> %3, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x half> %b.sroa.3.16.vec.expand, <8 x half> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x half> %4, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x half> %5, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x half> %b.sroa.6.32.vec.expand, <8 x half> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x half> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x half> %6, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x half> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x half> %7, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x half> %b.sroa.9.48.vec.expand, <8 x half> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast half* %a to i8* +// CHECK-NEXT: %vld4q_lane_v = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0i8(i8* %8, <8 x half> %b.sroa.0.8.vecblend, <8 x half> %b.sroa.3.24.vecblend, <8 x half> %b.sroa.6.40.vecblend, <8 x half> %b.sroa.9.56.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld4q_lane_v.fca.0.extract = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %vld4q_lane_v, 0 +// CHECK-NEXT: %vld4q_lane_v.fca.1.extract = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %vld4q_lane_v, 1 +// CHECK-NEXT: %vld4q_lane_v.fca.2.extract = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %vld4q_lane_v, 2 +// CHECK-NEXT: %vld4q_lane_v.fca.3.extract = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %vld4q_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x half> %vld4q_lane_v.fca.0.extract, <8 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x half> %vld4q_lane_v.fca.1.extract, <8 x half>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x half> %vld4q_lane_v.fca.2.extract, <8 x half>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x half> %vld4q_lane_v.fca.3.extract, <8 x half>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x float> %b.sroa.0.0.vec.expand, <4 x float> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x float> %b.sroa.3.16.vec.expand, <4 x float> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x float> %4, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x float> %b.sroa.6.32.vec.expand, <4 x float> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <2 x float> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <2 x float> %6, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <2 x float> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <2 x float> %7, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <4 x float> %b.sroa.9.48.vec.expand, <4 x float> %b.sroa.9.56.vec.expand, <4 x i32> +// CHECK-NEXT: %8 = bitcast float* %a to i8* +// CHECK-NEXT: %vld4q_lane_v = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32.p0i8(i8* %8, <4 x float> %b.sroa.0.8.vecblend, <4 x float> %b.sroa.3.24.vecblend, <4 x float> %b.sroa.6.40.vecblend, <4 x float> %b.sroa.9.56.vecblend, i32 3, i32 4) +// CHECK-NEXT: %vld4q_lane_v.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4q_lane_v, 0 +// CHECK-NEXT: %vld4q_lane_v.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4q_lane_v, 1 +// CHECK-NEXT: %vld4q_lane_v.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4q_lane_v, 2 +// CHECK-NEXT: %vld4q_lane_v.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4q_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x float> %vld4q_lane_v.fca.0.extract, <4 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x float> %vld4q_lane_v.fca.1.extract, <4 x float>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x float> %vld4q_lane_v.fca.2.extract, <4 x float>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x float> %vld4q_lane_v.fca.3.extract, <4 x float>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4q_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x i16> %6, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x i16> %7, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x i16> %b.sroa.9.48.vec.expand, <8 x i16> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4q_lane_v = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* %8, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, <8 x i16> %b.sroa.9.56.vecblend, i32 7, i32 2) +// CHECK-NEXT: %vld4q_lane_v.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 0 +// CHECK-NEXT: %vld4q_lane_v.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 1 +// CHECK-NEXT: %vld4q_lane_v.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 2 +// CHECK-NEXT: %vld4q_lane_v.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4q_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.0.extract, <8 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.1.extract, <8 x i16>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.2.extract, <8 x i16>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i16> %vld4q_lane_v.fca.3.extract, <8 x i16>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %vld4_lane_v = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 7, i32 1) +// CHECK-NEXT: %vld4_lane_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 0 +// CHECK-NEXT: %vld4_lane_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 1 +// CHECK-NEXT: %vld4_lane_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 2 +// CHECK-NEXT: %vld4_lane_v.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx7 = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx7, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx10 = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx13 = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx13, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx16 = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.3.extract, <8 x i8>* %__ret.sroa.6.0..sroa_idx16, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4_lane_v = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %4, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2) +// CHECK-NEXT: %vld4_lane_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 0 +// CHECK-NEXT: %vld4_lane_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 1 +// CHECK-NEXT: %vld4_lane_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 2 +// CHECK-NEXT: %vld4_lane_v.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.3.extract, <4 x i16>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld4_lane_v = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %4, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 1, i32 4) +// CHECK-NEXT: %vld4_lane_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_lane_v, 0 +// CHECK-NEXT: %vld4_lane_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_lane_v, 1 +// CHECK-NEXT: %vld4_lane_v.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_lane_v, 2 +// CHECK-NEXT: %vld4_lane_v.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld4_lane_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld4_lane_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x i32> %vld4_lane_v.fca.2.extract, <2 x i32>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <2 x i32> %vld4_lane_v.fca.3.extract, <2 x i32>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %vld4_lane_v = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 7, i32 1) +// CHECK-NEXT: %vld4_lane_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 0 +// CHECK-NEXT: %vld4_lane_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 1 +// CHECK-NEXT: %vld4_lane_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 2 +// CHECK-NEXT: %vld4_lane_v.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx7 = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx7, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx10 = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx13 = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx13, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx16 = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.3.extract, <8 x i8>* %__ret.sroa.6.0..sroa_idx16, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4_lane_v = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %4, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2) +// CHECK-NEXT: %vld4_lane_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 0 +// CHECK-NEXT: %vld4_lane_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 1 +// CHECK-NEXT: %vld4_lane_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 2 +// CHECK-NEXT: %vld4_lane_v.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.3.extract, <4 x i16>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: %vld4_lane_v = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %4, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 1, i32 4) +// CHECK-NEXT: %vld4_lane_v.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_lane_v, 0 +// CHECK-NEXT: %vld4_lane_v.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_lane_v, 1 +// CHECK-NEXT: %vld4_lane_v.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_lane_v, 2 +// CHECK-NEXT: %vld4_lane_v.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x i32> %vld4_lane_v.fca.0.extract, <2 x i32>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> %vld4_lane_v.fca.1.extract, <2 x i32>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x i32> %vld4_lane_v.fca.2.extract, <2 x i32>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <2 x i32> %vld4_lane_v.fca.3.extract, <2 x i32>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %4 = bitcast half* %a to i8* +// CHECK-NEXT: %vld4_lane_v = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0i8(i8* %4, <4 x half> %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 3, i32 2) +// CHECK-NEXT: %vld4_lane_v.fca.0.extract = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %vld4_lane_v, 0 +// CHECK-NEXT: %vld4_lane_v.fca.1.extract = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %vld4_lane_v, 1 +// CHECK-NEXT: %vld4_lane_v.fca.2.extract = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %vld4_lane_v, 2 +// CHECK-NEXT: %vld4_lane_v.fca.3.extract = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %vld4_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x half> %vld4_lane_v.fca.0.extract, <4 x half>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x half> %vld4_lane_v.fca.1.extract, <4 x half>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x half> %vld4_lane_v.fca.2.extract, <4 x half>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x half> %vld4_lane_v.fca.3.extract, <4 x half>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %4 = bitcast float* %a to i8* +// CHECK-NEXT: %vld4_lane_v = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* %4, <2 x float> %0, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 1, i32 4) +// CHECK-NEXT: %vld4_lane_v.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4_lane_v, 0 +// CHECK-NEXT: %vld4_lane_v.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4_lane_v, 1 +// CHECK-NEXT: %vld4_lane_v.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4_lane_v, 2 +// CHECK-NEXT: %vld4_lane_v.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <2 x float> %vld4_lane_v.fca.0.extract, <2 x float>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <2 x float> %vld4_lane_v.fca.1.extract, <2 x float>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <2 x float> %vld4_lane_v.fca.2.extract, <2 x float>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <2 x float> %vld4_lane_v.fca.3.extract, <2 x float>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %vld4_lane_v = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 7, i32 1) +// CHECK-NEXT: %vld4_lane_v.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 0 +// CHECK-NEXT: %vld4_lane_v.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 1 +// CHECK-NEXT: %vld4_lane_v.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 2 +// CHECK-NEXT: %vld4_lane_v.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx7 = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.0.extract, <8 x i8>* %__ret.sroa.0.0..sroa_idx7, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx10 = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.1.extract, <8 x i8>* %__ret.sroa.4.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx13 = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.2.extract, <8 x i8>* %__ret.sroa.5.0..sroa_idx13, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx16 = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <8 x i8> %vld4_lane_v.fca.3.extract, <8 x i8>* %__ret.sroa.6.0..sroa_idx16, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vld4_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: %vld4_lane_v = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %4, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2) +// CHECK-NEXT: %vld4_lane_v.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 0 +// CHECK-NEXT: %vld4_lane_v.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 1 +// CHECK-NEXT: %vld4_lane_v.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 2 +// CHECK-NEXT: %vld4_lane_v.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4_lane_v, 3 +// CHECK-NEXT: %__ret.sroa.0.0..sroa_idx = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.0.extract, <4 x i16>* %__ret.sroa.0.0..sroa_idx, align 8 +// CHECK-NEXT: %__ret.sroa.4.0..sroa_idx8 = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.1.extract, <4 x i16>* %__ret.sroa.4.0..sroa_idx8, align 8 +// CHECK-NEXT: %__ret.sroa.5.0..sroa_idx10 = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* %agg.result, i32 0, i32 0, i32 2 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.2.extract, <4 x i16>* %__ret.sroa.5.0..sroa_idx10, align 8 +// CHECK-NEXT: %__ret.sroa.6.0..sroa_idx12 = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* %agg.result, i32 0, i32 0, i32 3 +// CHECK-NEXT: store <4 x i16> %vld4_lane_v.fca.3.extract, <4 x i16>* %__ret.sroa.6.0..sroa_idx12, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vmax_s8 +// CHECK: entry: +// CHECK-NEXT: %vmax_v.i = tail call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vmax_v.i +// CHECK-LABEL: test_vmax_s16 +// CHECK: entry: +// CHECK-NEXT: %vmax_v2.i = tail call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vmax_v2.i +// CHECK-LABEL: test_vmax_s32 +// CHECK: entry: +// CHECK-NEXT: %vmax_v2.i = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vmax_v2.i +// CHECK-LABEL: test_vmax_u8 +// CHECK: entry: +// CHECK-NEXT: %vmax_v.i = tail call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vmax_v.i +// CHECK-LABEL: test_vmax_u16 +// CHECK: entry: +// CHECK-NEXT: %vmax_v2.i = tail call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vmax_v2.i +// CHECK-LABEL: test_vmax_u32 +// CHECK: entry: +// CHECK-NEXT: %vmax_v2.i = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vmax_v2.i +// CHECK-LABEL: test_vmax_f32 +// CHECK: entry: +// CHECK-NEXT: %vmax_v2.i = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK-NEXT: ret <2 x float> %vmax_v2.i +// CHECK-LABEL: test_vmaxq_s8 +// CHECK: entry: +// CHECK-NEXT: %vmaxq_v.i = tail call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vmaxq_v.i +// CHECK-LABEL: test_vmaxq_s16 +// CHECK: entry: +// CHECK-NEXT: %vmaxq_v2.i = tail call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vmaxq_v2.i +// CHECK-LABEL: test_vmaxq_s32 +// CHECK: entry: +// CHECK-NEXT: %vmaxq_v2.i = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vmaxq_v2.i +// CHECK-LABEL: test_vmaxq_u8 +// CHECK: entry: +// CHECK-NEXT: %vmaxq_v.i = tail call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vmaxq_v.i +// CHECK-LABEL: test_vmaxq_u16 +// CHECK: entry: +// CHECK-NEXT: %vmaxq_v2.i = tail call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vmaxq_v2.i +// CHECK-LABEL: test_vmaxq_u32 +// CHECK: entry: +// CHECK-NEXT: %vmaxq_v2.i = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vmaxq_v2.i +// CHECK-LABEL: test_vmaxq_f32 +// CHECK: entry: +// CHECK-NEXT: %vmaxq_v2.i = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %b) +// CHECK-NEXT: ret <4 x float> %vmaxq_v2.i +// CHECK-LABEL: test_vmin_s8 +// CHECK: entry: +// CHECK-NEXT: %vmin_v.i = tail call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vmin_v.i +// CHECK-LABEL: test_vmin_s16 +// CHECK: entry: +// CHECK-NEXT: %vmin_v2.i = tail call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vmin_v2.i +// CHECK-LABEL: test_vmin_s32 +// CHECK: entry: +// CHECK-NEXT: %vmin_v2.i = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vmin_v2.i +// CHECK-LABEL: test_vmin_u8 +// CHECK: entry: +// CHECK-NEXT: %vmin_v.i = tail call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vmin_v.i +// CHECK-LABEL: test_vmin_u16 +// CHECK: entry: +// CHECK-NEXT: %vmin_v2.i = tail call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vmin_v2.i +// CHECK-LABEL: test_vmin_u32 +// CHECK: entry: +// CHECK-NEXT: %vmin_v2.i = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vmin_v2.i +// CHECK-LABEL: test_vmin_f32 +// CHECK: entry: +// CHECK-NEXT: %vmin_v2.i = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK-NEXT: ret <2 x float> %vmin_v2.i +// CHECK-LABEL: test_vminq_s8 +// CHECK: entry: +// CHECK-NEXT: %vminq_v.i = tail call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vminq_v.i +// CHECK-LABEL: test_vminq_s16 +// CHECK: entry: +// CHECK-NEXT: %vminq_v2.i = tail call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vminq_v2.i +// CHECK-LABEL: test_vminq_s32 +// CHECK: entry: +// CHECK-NEXT: %vminq_v2.i = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vminq_v2.i +// CHECK-LABEL: test_vminq_u8 +// CHECK: entry: +// CHECK-NEXT: %vminq_v.i = tail call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vminq_v.i +// CHECK-LABEL: test_vminq_u16 +// CHECK: entry: +// CHECK-NEXT: %vminq_v2.i = tail call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vminq_v2.i +// CHECK-LABEL: test_vminq_u32 +// CHECK: entry: +// CHECK-NEXT: %vminq_v2.i = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vminq_v2.i +// CHECK-LABEL: test_vminq_f32 +// CHECK: entry: +// CHECK-NEXT: %vminq_v2.i = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %a, <4 x float> %b) +// CHECK-NEXT: ret <4 x float> %vminq_v2.i +// CHECK-LABEL: test_vmla_s8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i8> %c, %b +// CHECK-NEXT: %add.i = add <8 x i8> %mul.i, %a +// CHECK-NEXT: ret <8 x i8> %add.i +// CHECK-LABEL: test_vmla_s16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i16> %c, %b +// CHECK-NEXT: %add.i = add <4 x i16> %mul.i, %a +// CHECK-NEXT: ret <4 x i16> %add.i +// CHECK-LABEL: test_vmla_s32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <2 x i32> %c, %b +// CHECK-NEXT: %add.i = add <2 x i32> %mul.i, %a +// CHECK-NEXT: ret <2 x i32> %add.i +// CHECK-LABEL: test_vmla_f32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = fmul <2 x float> %b, %c +// CHECK-NEXT: %add.i = fadd <2 x float> %mul.i, %a +// CHECK-NEXT: ret <2 x float> %add.i +// CHECK-LABEL: test_vmla_u8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i8> %c, %b +// CHECK-NEXT: %add.i = add <8 x i8> %mul.i, %a +// CHECK-NEXT: ret <8 x i8> %add.i +// CHECK-LABEL: test_vmla_u16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i16> %c, %b +// CHECK-NEXT: %add.i = add <4 x i16> %mul.i, %a +// CHECK-NEXT: ret <4 x i16> %add.i +// CHECK-LABEL: test_vmla_u32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <2 x i32> %c, %b +// CHECK-NEXT: %add.i = add <2 x i32> %mul.i, %a +// CHECK-NEXT: ret <2 x i32> %add.i +// CHECK-LABEL: test_vmlaq_s8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <16 x i8> %c, %b +// CHECK-NEXT: %add.i = add <16 x i8> %mul.i, %a +// CHECK-NEXT: ret <16 x i8> %add.i +// CHECK-LABEL: test_vmlaq_s16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i16> %c, %b +// CHECK-NEXT: %add.i = add <8 x i16> %mul.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vmlaq_s32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i32> %c, %b +// CHECK-NEXT: %add.i = add <4 x i32> %mul.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vmlaq_f32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = fmul <4 x float> %b, %c +// CHECK-NEXT: %add.i = fadd <4 x float> %mul.i, %a +// CHECK-NEXT: ret <4 x float> %add.i +// CHECK-LABEL: test_vmlaq_u8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <16 x i8> %c, %b +// CHECK-NEXT: %add.i = add <16 x i8> %mul.i, %a +// CHECK-NEXT: ret <16 x i8> %add.i +// CHECK-LABEL: test_vmlaq_u16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i16> %c, %b +// CHECK-NEXT: %add.i = add <8 x i16> %mul.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vmlaq_u32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i32> %c, %b +// CHECK-NEXT: %add.i = add <4 x i32> %mul.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vmlal_s8 +// CHECK: entry: +// CHECK-NEXT: %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: %add.i = add <8 x i16> %vmull.i.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vmlal_s16 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) +// CHECK-NEXT: %add.i = add <4 x i32> %vmull2.i.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vmlal_s32 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) +// CHECK-NEXT: %add.i = add <2 x i64> %vmull2.i.i, %a +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vmlal_u8 +// CHECK: entry: +// CHECK-NEXT: %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: %add.i = add <8 x i16> %vmull.i.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vmlal_u16 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) +// CHECK-NEXT: %add.i = add <4 x i32> %vmull2.i.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vmlal_u32 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) +// CHECK-NEXT: %add.i = add <2 x i64> %vmull2.i.i, %a +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vmlal_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) +// CHECK-NEXT: %add = add <4 x i32> %vmull2.i, %a +// CHECK-NEXT: ret <4 x i32> %add +// CHECK-LABEL: test_vmlal_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) +// CHECK-NEXT: %add = add <2 x i64> %vmull2.i, %a +// CHECK-NEXT: ret <2 x i64> %add +// CHECK-LABEL: test_vmlal_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) +// CHECK-NEXT: %add = add <4 x i32> %vmull2.i, %a +// CHECK-NEXT: ret <4 x i32> %add +// CHECK-LABEL: test_vmlal_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) +// CHECK-NEXT: %add = add <2 x i64> %vmull2.i, %a +// CHECK-NEXT: ret <2 x i64> %add +// CHECK-LABEL: test_vmlal_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %vecinit3.i) +// CHECK-NEXT: %add.i = add <4 x i32> %vmull2.i.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vmlal_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %vecinit1.i) +// CHECK-NEXT: %add.i = add <2 x i64> %vmull2.i.i, %a +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vmlal_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %vecinit3.i) +// CHECK-NEXT: %add.i = add <4 x i32> %vmull2.i.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vmlal_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %vecinit1.i) +// CHECK-NEXT: %add.i = add <2 x i64> %vmull2.i.i, %a +// CHECK-NEXT: ret <2 x i64> %add.i +// CHECK-LABEL: test_vmla_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i16> %shuffle, %b +// CHECK-NEXT: %add = add <4 x i16> %mul, %a +// CHECK-NEXT: ret <4 x i16> %add +// CHECK-LABEL: test_vmla_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %mul = mul <2 x i32> %shuffle, %b +// CHECK-NEXT: %add = add <2 x i32> %mul, %a +// CHECK-NEXT: ret <2 x i32> %add +// CHECK-LABEL: test_vmla_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i16> %shuffle, %b +// CHECK-NEXT: %add = add <4 x i16> %mul, %a +// CHECK-NEXT: ret <4 x i16> %add +// CHECK-LABEL: test_vmla_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %mul = mul <2 x i32> %shuffle, %b +// CHECK-NEXT: %add = add <2 x i32> %mul, %a +// CHECK-NEXT: ret <2 x i32> %add +// CHECK-LABEL: test_vmla_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x float> %c, <2 x float> undef, <2 x i32> +// CHECK-NEXT: %mul = fmul <2 x float> %shuffle, %b +// CHECK-NEXT: %add = fadd <2 x float> %mul, %a +// CHECK-NEXT: ret <2 x float> %add +// CHECK-LABEL: test_vmlaq_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %mul = mul <8 x i16> %shuffle, %b +// CHECK-NEXT: %add = add <8 x i16> %mul, %a +// CHECK-NEXT: ret <8 x i16> %add +// CHECK-LABEL: test_vmlaq_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i32> %shuffle, %b +// CHECK-NEXT: %add = add <4 x i32> %mul, %a +// CHECK-NEXT: ret <4 x i32> %add +// CHECK-LABEL: test_vmlaq_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %mul = mul <8 x i16> %shuffle, %b +// CHECK-NEXT: %add = add <8 x i16> %mul, %a +// CHECK-NEXT: ret <8 x i16> %add +// CHECK-LABEL: test_vmlaq_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i32> %shuffle, %b +// CHECK-NEXT: %add = add <4 x i32> %mul, %a +// CHECK-NEXT: ret <4 x i32> %add +// CHECK-LABEL: test_vmlaq_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x float> %c, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %mul = fmul <4 x float> %shuffle, %b +// CHECK-NEXT: %add = fadd <4 x float> %mul, %a +// CHECK-NEXT: ret <4 x float> %add +// CHECK-LABEL: test_vmla_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i16> %vecinit3.i, %b +// CHECK-NEXT: %add.i = add <4 x i16> %mul.i, %a +// CHECK-NEXT: ret <4 x i16> %add.i +// CHECK-LABEL: test_vmla_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <2 x i32> %vecinit1.i, %b +// CHECK-NEXT: %add.i = add <2 x i32> %mul.i, %a +// CHECK-NEXT: ret <2 x i32> %add.i +// CHECK-LABEL: test_vmla_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i16> %vecinit3.i, %b +// CHECK-NEXT: %add.i = add <4 x i16> %mul.i, %a +// CHECK-NEXT: ret <4 x i16> %add.i +// CHECK-LABEL: test_vmla_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <2 x i32> %vecinit1.i, %b +// CHECK-NEXT: %add.i = add <2 x i32> %mul.i, %a +// CHECK-NEXT: ret <2 x i32> %add.i +// CHECK-LABEL: test_vmla_n_f32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x float> undef, float %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x float> %vecinit.i, <2 x float> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = fmul <2 x float> %vecinit1.i, %b +// CHECK-NEXT: %add.i = fadd <2 x float> %mul.i, %a +// CHECK-NEXT: ret <2 x float> %add.i +// CHECK-LABEL: test_vmlaq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <8 x i16> %vecinit7.i, %b +// CHECK-NEXT: %add.i = add <8 x i16> %mul.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vmlaq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i32> %vecinit3.i, %b +// CHECK-NEXT: %add.i = add <4 x i32> %mul.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vmlaq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <8 x i16> %vecinit7.i, %b +// CHECK-NEXT: %add.i = add <8 x i16> %mul.i, %a +// CHECK-NEXT: ret <8 x i16> %add.i +// CHECK-LABEL: test_vmlaq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i32> %vecinit3.i, %b +// CHECK-NEXT: %add.i = add <4 x i32> %mul.i, %a +// CHECK-NEXT: ret <4 x i32> %add.i +// CHECK-LABEL: test_vmlaq_n_f32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x float> undef, float %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = fmul <4 x float> %vecinit3.i, %b +// CHECK-NEXT: %add.i = fadd <4 x float> %mul.i, %a +// CHECK-NEXT: ret <4 x float> %add.i +// CHECK-LABEL: test_vmls_s8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i8> %c, %b +// CHECK-NEXT: %sub.i = sub <8 x i8> %a, %mul.i +// CHECK-NEXT: ret <8 x i8> %sub.i +// CHECK-LABEL: test_vmls_s16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i16> %c, %b +// CHECK-NEXT: %sub.i = sub <4 x i16> %a, %mul.i +// CHECK-NEXT: ret <4 x i16> %sub.i +// CHECK-LABEL: test_vmls_s32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <2 x i32> %c, %b +// CHECK-NEXT: %sub.i = sub <2 x i32> %a, %mul.i +// CHECK-NEXT: ret <2 x i32> %sub.i +// CHECK-LABEL: test_vmls_f32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = fmul <2 x float> %b, %c +// CHECK-NEXT: %sub.i = fsub <2 x float> %a, %mul.i +// CHECK-NEXT: ret <2 x float> %sub.i +// CHECK-LABEL: test_vmls_u8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i8> %c, %b +// CHECK-NEXT: %sub.i = sub <8 x i8> %a, %mul.i +// CHECK-NEXT: ret <8 x i8> %sub.i +// CHECK-LABEL: test_vmls_u16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i16> %c, %b +// CHECK-NEXT: %sub.i = sub <4 x i16> %a, %mul.i +// CHECK-NEXT: ret <4 x i16> %sub.i +// CHECK-LABEL: test_vmls_u32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <2 x i32> %c, %b +// CHECK-NEXT: %sub.i = sub <2 x i32> %a, %mul.i +// CHECK-NEXT: ret <2 x i32> %sub.i +// CHECK-LABEL: test_vmlsq_s8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <16 x i8> %c, %b +// CHECK-NEXT: %sub.i = sub <16 x i8> %a, %mul.i +// CHECK-NEXT: ret <16 x i8> %sub.i +// CHECK-LABEL: test_vmlsq_s16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i16> %c, %b +// CHECK-NEXT: %sub.i = sub <8 x i16> %a, %mul.i +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vmlsq_s32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i32> %c, %b +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %mul.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vmlsq_f32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = fmul <4 x float> %b, %c +// CHECK-NEXT: %sub.i = fsub <4 x float> %a, %mul.i +// CHECK-NEXT: ret <4 x float> %sub.i +// CHECK-LABEL: test_vmlsq_u8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <16 x i8> %c, %b +// CHECK-NEXT: %sub.i = sub <16 x i8> %a, %mul.i +// CHECK-NEXT: ret <16 x i8> %sub.i +// CHECK-LABEL: test_vmlsq_u16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i16> %c, %b +// CHECK-NEXT: %sub.i = sub <8 x i16> %a, %mul.i +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vmlsq_u32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i32> %c, %b +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %mul.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vmlsl_s8 +// CHECK: entry: +// CHECK-NEXT: %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: %sub.i = sub <8 x i16> %a, %vmull.i.i +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vmlsl_s16 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %vmull2.i.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vmlsl_s32 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) +// CHECK-NEXT: %sub.i = sub <2 x i64> %a, %vmull2.i.i +// CHECK-NEXT: ret <2 x i64> %sub.i +// CHECK-LABEL: test_vmlsl_u8 +// CHECK: entry: +// CHECK-NEXT: %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: %sub.i = sub <8 x i16> %a, %vmull.i.i +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vmlsl_u16 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %vmull2.i.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vmlsl_u32 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) +// CHECK-NEXT: %sub.i = sub <2 x i64> %a, %vmull2.i.i +// CHECK-NEXT: ret <2 x i64> %sub.i +// CHECK-LABEL: test_vmlsl_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) +// CHECK-NEXT: %sub = sub <4 x i32> %a, %vmull2.i +// CHECK-NEXT: ret <4 x i32> %sub +// CHECK-LABEL: test_vmlsl_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) +// CHECK-NEXT: %sub = sub <2 x i64> %a, %vmull2.i +// CHECK-NEXT: ret <2 x i64> %sub +// CHECK-LABEL: test_vmlsl_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) +// CHECK-NEXT: %sub = sub <4 x i32> %a, %vmull2.i +// CHECK-NEXT: ret <4 x i32> %sub +// CHECK-LABEL: test_vmlsl_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) +// CHECK-NEXT: %sub = sub <2 x i64> %a, %vmull2.i +// CHECK-NEXT: ret <2 x i64> %sub +// CHECK-LABEL: test_vmlsl_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %vecinit3.i) +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %vmull2.i.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vmlsl_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %vecinit1.i) +// CHECK-NEXT: %sub.i = sub <2 x i64> %a, %vmull2.i.i +// CHECK-NEXT: ret <2 x i64> %sub.i +// CHECK-LABEL: test_vmlsl_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %vecinit3.i) +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %vmull2.i.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vmlsl_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %vecinit1.i) +// CHECK-NEXT: %sub.i = sub <2 x i64> %a, %vmull2.i.i +// CHECK-NEXT: ret <2 x i64> %sub.i +// CHECK-LABEL: test_vmls_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i16> %shuffle, %b +// CHECK-NEXT: %sub = sub <4 x i16> %a, %mul +// CHECK-NEXT: ret <4 x i16> %sub +// CHECK-LABEL: test_vmls_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %mul = mul <2 x i32> %shuffle, %b +// CHECK-NEXT: %sub = sub <2 x i32> %a, %mul +// CHECK-NEXT: ret <2 x i32> %sub +// CHECK-LABEL: test_vmls_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i16> %shuffle, %b +// CHECK-NEXT: %sub = sub <4 x i16> %a, %mul +// CHECK-NEXT: ret <4 x i16> %sub +// CHECK-LABEL: test_vmls_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %mul = mul <2 x i32> %shuffle, %b +// CHECK-NEXT: %sub = sub <2 x i32> %a, %mul +// CHECK-NEXT: ret <2 x i32> %sub +// CHECK-LABEL: test_vmls_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x float> %c, <2 x float> undef, <2 x i32> +// CHECK-NEXT: %mul = fmul <2 x float> %shuffle, %b +// CHECK-NEXT: %sub = fsub <2 x float> %a, %mul +// CHECK-NEXT: ret <2 x float> %sub +// CHECK-LABEL: test_vmlsq_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %mul = mul <8 x i16> %shuffle, %b +// CHECK-NEXT: %sub = sub <8 x i16> %a, %mul +// CHECK-NEXT: ret <8 x i16> %sub +// CHECK-LABEL: test_vmlsq_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i32> %shuffle, %b +// CHECK-NEXT: %sub = sub <4 x i32> %a, %mul +// CHECK-NEXT: ret <4 x i32> %sub +// CHECK-LABEL: test_vmlsq_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %mul = mul <8 x i16> %shuffle, %b +// CHECK-NEXT: %sub = sub <8 x i16> %a, %mul +// CHECK-NEXT: ret <8 x i16> %sub +// CHECK-LABEL: test_vmlsq_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i32> %shuffle, %b +// CHECK-NEXT: %sub = sub <4 x i32> %a, %mul +// CHECK-NEXT: ret <4 x i32> %sub +// CHECK-LABEL: test_vmlsq_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x float> %c, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %mul = fmul <4 x float> %shuffle, %b +// CHECK-NEXT: %sub = fsub <4 x float> %a, %mul +// CHECK-NEXT: ret <4 x float> %sub +// CHECK-LABEL: test_vmls_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i16> %vecinit3.i, %b +// CHECK-NEXT: %sub.i = sub <4 x i16> %a, %mul.i +// CHECK-NEXT: ret <4 x i16> %sub.i +// CHECK-LABEL: test_vmls_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <2 x i32> %vecinit1.i, %b +// CHECK-NEXT: %sub.i = sub <2 x i32> %a, %mul.i +// CHECK-NEXT: ret <2 x i32> %sub.i +// CHECK-LABEL: test_vmls_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i16> %vecinit3.i, %b +// CHECK-NEXT: %sub.i = sub <4 x i16> %a, %mul.i +// CHECK-NEXT: ret <4 x i16> %sub.i +// CHECK-LABEL: test_vmls_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <2 x i32> %vecinit1.i, %b +// CHECK-NEXT: %sub.i = sub <2 x i32> %a, %mul.i +// CHECK-NEXT: ret <2 x i32> %sub.i +// CHECK-LABEL: test_vmls_n_f32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x float> undef, float %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x float> %vecinit.i, <2 x float> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = fmul <2 x float> %vecinit1.i, %b +// CHECK-NEXT: %sub.i = fsub <2 x float> %a, %mul.i +// CHECK-NEXT: ret <2 x float> %sub.i +// CHECK-LABEL: test_vmlsq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <8 x i16> %vecinit7.i, %b +// CHECK-NEXT: %sub.i = sub <8 x i16> %a, %mul.i +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vmlsq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i32> %vecinit3.i, %b +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %mul.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vmlsq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <8 x i16> %vecinit7.i, %b +// CHECK-NEXT: %sub.i = sub <8 x i16> %a, %mul.i +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vmlsq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i32> %vecinit3.i, %b +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %mul.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vmlsq_n_f32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x float> undef, float %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = fmul <4 x float> %vecinit3.i, %b +// CHECK-NEXT: %sub.i = fsub <4 x float> %a, %mul.i +// CHECK-NEXT: ret <4 x float> %sub.i +// CHECK-LABEL: test_vmovl_s8 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i = sext <8 x i8> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %vmovl.i +// CHECK-LABEL: test_vmovl_s16 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i = sext <4 x i16> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %vmovl.i +// CHECK-LABEL: test_vmovl_s32 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i = sext <2 x i32> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %vmovl.i +// CHECK-LABEL: test_vmovl_u8 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i = zext <8 x i8> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %vmovl.i +// CHECK-LABEL: test_vmovl_u16 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i = zext <4 x i16> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %vmovl.i +// CHECK-LABEL: test_vmovl_u32 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i = zext <2 x i32> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %vmovl.i +// CHECK-LABEL: test_vmovn_s16 +// CHECK: entry: +// CHECK-NEXT: %vmovn.i = trunc <8 x i16> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %vmovn.i +// CHECK-LABEL: test_vmovn_s32 +// CHECK: entry: +// CHECK-NEXT: %vmovn.i = trunc <4 x i32> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %vmovn.i +// CHECK-LABEL: test_vmovn_s64 +// CHECK: entry: +// CHECK-NEXT: %vmovn.i = trunc <2 x i64> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vmovn.i +// CHECK-LABEL: test_vmovn_u16 +// CHECK: entry: +// CHECK-NEXT: %vmovn.i = trunc <8 x i16> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %vmovn.i +// CHECK-LABEL: test_vmovn_u32 +// CHECK: entry: +// CHECK-NEXT: %vmovn.i = trunc <4 x i32> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %vmovn.i +// CHECK-LABEL: test_vmovn_u64 +// CHECK: entry: +// CHECK-NEXT: %vmovn.i = trunc <2 x i64> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vmovn.i +// CHECK-LABEL: test_vmov_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i8> %vecinit.i, <8 x i8> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> %vecinit7.i +// CHECK-LABEL: test_vmov_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i16> %vecinit3.i +// CHECK-LABEL: test_vmov_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %a, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i32> %vecinit1.i +// CHECK-LABEL: test_vmov_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i8> %vecinit.i, <8 x i8> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> %vecinit7.i +// CHECK-LABEL: test_vmov_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i16> %vecinit3.i +// CHECK-LABEL: test_vmov_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %a, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i32> %vecinit1.i +// CHECK-LABEL: test_vmov_n_p8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i8> %vecinit.i, <8 x i8> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> %vecinit7.i +// CHECK-LABEL: test_vmov_n_p16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i16> %vecinit3.i +// CHECK-LABEL: test_vmov_n_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = load half, half* %a, align 2, !tbaa !3 +// CHECK-NEXT: %vecinit = insertelement <4 x half> undef, half %0, i32 0 +// CHECK-NEXT: %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x half> %vecinit3 +// CHECK-LABEL: test_vmov_n_f32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x float> undef, float %a, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x float> %vecinit.i, <2 x float> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x float> %vecinit1.i +// CHECK-LABEL: test_vmovq_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <16 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit15.i = shufflevector <16 x i8> %vecinit.i, <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: ret <16 x i8> %vecinit15.i +// CHECK-LABEL: test_vmovq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i16> %vecinit7.i +// CHECK-LABEL: test_vmovq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i32> %vecinit3.i +// CHECK-LABEL: test_vmovq_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <16 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit15.i = shufflevector <16 x i8> %vecinit.i, <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: ret <16 x i8> %vecinit15.i +// CHECK-LABEL: test_vmovq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i16> %vecinit7.i +// CHECK-LABEL: test_vmovq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x i32> %vecinit3.i +// CHECK-LABEL: test_vmovq_n_p8 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <16 x i8> undef, i8 %a, i32 0 +// CHECK-NEXT: %vecinit15.i = shufflevector <16 x i8> %vecinit.i, <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: ret <16 x i8> %vecinit15.i +// CHECK-LABEL: test_vmovq_n_p16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %a, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i16> %vecinit7.i +// CHECK-LABEL: test_vmovq_n_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = load half, half* %a, align 2, !tbaa !3 +// CHECK-NEXT: %vecinit = insertelement <8 x half> undef, half %0, i32 0 +// CHECK-NEXT: %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x half> %vecinit7 +// CHECK-LABEL: test_vmovq_n_f32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x float> undef, float %a, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: ret <4 x float> %vecinit3.i +// CHECK-LABEL: test_vmov_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <1 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: %add.i = shl <1 x i64> %vecinit.i, +// CHECK-NEXT: ret <1 x i64> %add.i +// CHECK-LABEL: test_vmov_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <1 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: %add.i = shl <1 x i64> %vecinit.i, +// CHECK-NEXT: ret <1 x i64> %add.i +// CHECK-LABEL: test_vmovq_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i64> %vecinit1.i +// CHECK-LABEL: test_vmovq_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: ret <2 x i64> %vecinit1.i +// CHECK-LABEL: test_vmul_s8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i8> %b, %a +// CHECK-NEXT: ret <8 x i8> %mul.i +// CHECK-LABEL: test_vmul_s16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i16> %b, %a +// CHECK-NEXT: ret <4 x i16> %mul.i +// CHECK-LABEL: test_vmul_s32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <2 x i32> %b, %a +// CHECK-NEXT: ret <2 x i32> %mul.i +// CHECK-LABEL: test_vmul_f32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = fmul <2 x float> %a, %b +// CHECK-NEXT: ret <2 x float> %mul.i +// CHECK-LABEL: test_vmul_u8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i8> %b, %a +// CHECK-NEXT: ret <8 x i8> %mul.i +// CHECK-LABEL: test_vmul_u16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i16> %b, %a +// CHECK-NEXT: ret <4 x i16> %mul.i +// CHECK-LABEL: test_vmul_u32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <2 x i32> %b, %a +// CHECK-NEXT: ret <2 x i32> %mul.i +// CHECK-LABEL: test_vmulq_s8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <16 x i8> %b, %a +// CHECK-NEXT: ret <16 x i8> %mul.i +// CHECK-LABEL: test_vmulq_s16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i16> %b, %a +// CHECK-NEXT: ret <8 x i16> %mul.i +// CHECK-LABEL: test_vmulq_s32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i32> %b, %a +// CHECK-NEXT: ret <4 x i32> %mul.i +// CHECK-LABEL: test_vmulq_f32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = fmul <4 x float> %a, %b +// CHECK-NEXT: ret <4 x float> %mul.i +// CHECK-LABEL: test_vmulq_u8 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <16 x i8> %b, %a +// CHECK-NEXT: ret <16 x i8> %mul.i +// CHECK-LABEL: test_vmulq_u16 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <8 x i16> %b, %a +// CHECK-NEXT: ret <8 x i16> %mul.i +// CHECK-LABEL: test_vmulq_u32 +// CHECK: entry: +// CHECK-NEXT: %mul.i = mul <4 x i32> %b, %a +// CHECK-NEXT: ret <4 x i32> %mul.i +// CHECK-LABEL: test_vmull_s8 +// CHECK: entry: +// CHECK-NEXT: %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i16> %vmull.i +// CHECK-LABEL: test_vmull_s16 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i32> %vmull2.i +// CHECK-LABEL: test_vmull_s32 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i64> %vmull2.i +// CHECK-LABEL: test_vmull_u8 +// CHECK: entry: +// CHECK-NEXT: %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i16> %vmull.i +// CHECK-LABEL: test_vmull_u16 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i32> %vmull2.i +// CHECK-LABEL: test_vmull_u32 +// CHECK: entry: +// CHECK-NEXT: %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i64> %vmull2.i +// CHECK-LABEL: test_vmull_p8 +// CHECK: entry: +// CHECK-NEXT: %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i16> %vmull.i +// CHECK-LABEL: test_vmull_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) +// CHECK-NEXT: ret <4 x i32> %vmull2.i +// CHECK-LABEL: test_vmull_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) +// CHECK-NEXT: ret <2 x i64> %vmull2.i +// CHECK-LABEL: test_vmull_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) +// CHECK-NEXT: ret <4 x i32> %vmull2.i +// CHECK-LABEL: test_vmull_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) +// CHECK-NEXT: ret <2 x i64> %vmull2.i +// CHECK-LABEL: test_vmull_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vmull5.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %vecinit3.i) +// CHECK-NEXT: ret <4 x i32> %vmull5.i +// CHECK-LABEL: test_vmull_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %b, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %vmull3.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %vecinit1.i) +// CHECK-NEXT: ret <2 x i64> %vmull3.i +// CHECK-LABEL: test_vmull_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vmull5.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %vecinit3.i) +// CHECK-NEXT: ret <4 x i32> %vmull5.i +// CHECK-LABEL: test_vmull_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %b, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %vmull3.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %vecinit1.i) +// CHECK-NEXT: ret <2 x i64> %vmull3.i +// CHECK-LABEL: test_vmul_p8 +// CHECK: entry: +// CHECK-NEXT: %vmul_v.i = tail call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vmul_v.i +// CHECK-LABEL: test_vmulq_p8 +// CHECK: entry: +// CHECK-NEXT: %vmulq_v.i = tail call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vmulq_v.i +// CHECK-LABEL: test_vmul_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i16> %shuffle, %a +// CHECK-NEXT: ret <4 x i16> %mul +// CHECK-LABEL: test_vmul_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %mul = mul <2 x i32> %shuffle, %a +// CHECK-NEXT: ret <2 x i32> %mul +// CHECK-LABEL: test_vmul_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x float> %b, <2 x float> undef, <2 x i32> +// CHECK-NEXT: %mul = fmul <2 x float> %shuffle, %a +// CHECK-NEXT: ret <2 x float> %mul +// CHECK-LABEL: test_vmul_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i16> %shuffle, %a +// CHECK-NEXT: ret <4 x i16> %mul +// CHECK-LABEL: test_vmul_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %mul = mul <2 x i32> %shuffle, %a +// CHECK-NEXT: ret <2 x i32> %mul +// CHECK-LABEL: test_vmulq_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %b, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %mul = mul <8 x i16> %shuffle, %a +// CHECK-NEXT: ret <8 x i16> %mul +// CHECK-LABEL: test_vmulq_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %b, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i32> %shuffle, %a +// CHECK-NEXT: ret <4 x i32> %mul +// CHECK-LABEL: test_vmulq_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x float> %b, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %mul = fmul <4 x float> %shuffle, %a +// CHECK-NEXT: ret <4 x float> %mul +// CHECK-LABEL: test_vmulq_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %b, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %mul = mul <8 x i16> %shuffle, %a +// CHECK-NEXT: ret <8 x i16> %mul +// CHECK-LABEL: test_vmulq_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %b, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %mul = mul <4 x i32> %shuffle, %a +// CHECK-NEXT: ret <4 x i32> %mul +// CHECK-LABEL: test_vmul_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i16> %vecinit3.i, %a +// CHECK-NEXT: ret <4 x i16> %mul.i +// CHECK-LABEL: test_vmul_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %b, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <2 x i32> %vecinit1.i, %a +// CHECK-NEXT: ret <2 x i32> %mul.i +// CHECK-LABEL: test_vmul_n_f32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x float> undef, float %b, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x float> %vecinit.i, <2 x float> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = fmul <2 x float> %vecinit1.i, %a +// CHECK-NEXT: ret <2 x float> %mul.i +// CHECK-LABEL: test_vmul_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i16> %vecinit3.i, %a +// CHECK-NEXT: ret <4 x i16> %mul.i +// CHECK-LABEL: test_vmul_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %b, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <2 x i32> %vecinit1.i, %a +// CHECK-NEXT: ret <2 x i32> %mul.i +// CHECK-LABEL: test_vmulq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %b, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <8 x i16> %vecinit7.i, %a +// CHECK-NEXT: ret <8 x i16> %mul.i +// CHECK-LABEL: test_vmulq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i32> %vecinit3.i, %a +// CHECK-NEXT: ret <4 x i32> %mul.i +// CHECK-LABEL: test_vmulq_n_f32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x float> undef, float %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = fmul <4 x float> %vecinit3.i, %a +// CHECK-NEXT: ret <4 x float> %mul.i +// CHECK-LABEL: test_vmulq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %b, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <8 x i16> %vecinit7.i, %a +// CHECK-NEXT: ret <8 x i16> %mul.i +// CHECK-LABEL: test_vmulq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %mul.i = mul <4 x i32> %vecinit3.i, %a +// CHECK-NEXT: ret <4 x i32> %mul.i +// CHECK-LABEL: test_vmvn_s8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i8> %a, +// CHECK-NEXT: ret <8 x i8> %neg.i +// CHECK-LABEL: test_vmvn_s16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i16> %a, +// CHECK-NEXT: ret <4 x i16> %neg.i +// CHECK-LABEL: test_vmvn_s32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <2 x i32> %a, +// CHECK-NEXT: ret <2 x i32> %neg.i +// CHECK-LABEL: test_vmvn_u8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i8> %a, +// CHECK-NEXT: ret <8 x i8> %neg.i +// CHECK-LABEL: test_vmvn_u16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i16> %a, +// CHECK-NEXT: ret <4 x i16> %neg.i +// CHECK-LABEL: test_vmvn_u32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <2 x i32> %a, +// CHECK-NEXT: ret <2 x i32> %neg.i +// CHECK-LABEL: test_vmvn_p8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i8> %a, +// CHECK-NEXT: ret <8 x i8> %neg.i +// CHECK-LABEL: test_vmvnq_s8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <16 x i8> %a, +// CHECK-NEXT: ret <16 x i8> %neg.i +// CHECK-LABEL: test_vmvnq_s16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i16> %a, +// CHECK-NEXT: ret <8 x i16> %neg.i +// CHECK-LABEL: test_vmvnq_s32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i32> %a, +// CHECK-NEXT: ret <4 x i32> %neg.i +// CHECK-LABEL: test_vmvnq_u8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <16 x i8> %a, +// CHECK-NEXT: ret <16 x i8> %neg.i +// CHECK-LABEL: test_vmvnq_u16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i16> %a, +// CHECK-NEXT: ret <8 x i16> %neg.i +// CHECK-LABEL: test_vmvnq_u32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i32> %a, +// CHECK-NEXT: ret <4 x i32> %neg.i +// CHECK-LABEL: test_vmvnq_p8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <16 x i8> %a, +// CHECK-NEXT: ret <16 x i8> %neg.i +// CHECK-LABEL: test_vneg_s8 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <8 x i8> zeroinitializer, %a +// CHECK-NEXT: ret <8 x i8> %sub.i +// CHECK-LABEL: test_vneg_s16 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <4 x i16> zeroinitializer, %a +// CHECK-NEXT: ret <4 x i16> %sub.i +// CHECK-LABEL: test_vneg_s32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <2 x i32> zeroinitializer, %a +// CHECK-NEXT: ret <2 x i32> %sub.i +// CHECK-LABEL: test_vneg_f32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = fsub <2 x float> , %a +// CHECK-NEXT: ret <2 x float> %sub.i +// CHECK-LABEL: test_vnegq_s8 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <16 x i8> zeroinitializer, %a +// CHECK-NEXT: ret <16 x i8> %sub.i +// CHECK-LABEL: test_vnegq_s16 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <8 x i16> zeroinitializer, %a +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vnegq_s32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <4 x i32> zeroinitializer, %a +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vnegq_f32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = fsub <4 x float> , %a +// CHECK-NEXT: ret <4 x float> %sub.i +// CHECK-LABEL: test_vorn_s8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i8> %b, +// CHECK-NEXT: %or.i = or <8 x i8> %neg.i, %a +// CHECK-NEXT: ret <8 x i8> %or.i +// CHECK-LABEL: test_vorn_s16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i16> %b, +// CHECK-NEXT: %or.i = or <4 x i16> %neg.i, %a +// CHECK-NEXT: ret <4 x i16> %or.i +// CHECK-LABEL: test_vorn_s32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <2 x i32> %b, +// CHECK-NEXT: %or.i = or <2 x i32> %neg.i, %a +// CHECK-NEXT: ret <2 x i32> %or.i +// CHECK-LABEL: test_vorn_s64 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <1 x i64> %b, +// CHECK-NEXT: %or.i = or <1 x i64> %neg.i, %a +// CHECK-NEXT: ret <1 x i64> %or.i +// CHECK-LABEL: test_vorn_u8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i8> %b, +// CHECK-NEXT: %or.i = or <8 x i8> %neg.i, %a +// CHECK-NEXT: ret <8 x i8> %or.i +// CHECK-LABEL: test_vorn_u16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i16> %b, +// CHECK-NEXT: %or.i = or <4 x i16> %neg.i, %a +// CHECK-NEXT: ret <4 x i16> %or.i +// CHECK-LABEL: test_vorn_u32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <2 x i32> %b, +// CHECK-NEXT: %or.i = or <2 x i32> %neg.i, %a +// CHECK-NEXT: ret <2 x i32> %or.i +// CHECK-LABEL: test_vorn_u64 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <1 x i64> %b, +// CHECK-NEXT: %or.i = or <1 x i64> %neg.i, %a +// CHECK-NEXT: ret <1 x i64> %or.i +// CHECK-LABEL: test_vornq_s8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <16 x i8> %b, +// CHECK-NEXT: %or.i = or <16 x i8> %neg.i, %a +// CHECK-NEXT: ret <16 x i8> %or.i +// CHECK-LABEL: test_vornq_s16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i16> %b, +// CHECK-NEXT: %or.i = or <8 x i16> %neg.i, %a +// CHECK-NEXT: ret <8 x i16> %or.i +// CHECK-LABEL: test_vornq_s32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i32> %b, +// CHECK-NEXT: %or.i = or <4 x i32> %neg.i, %a +// CHECK-NEXT: ret <4 x i32> %or.i +// CHECK-LABEL: test_vornq_s64 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <2 x i64> %b, +// CHECK-NEXT: %or.i = or <2 x i64> %neg.i, %a +// CHECK-NEXT: ret <2 x i64> %or.i +// CHECK-LABEL: test_vornq_u8 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <16 x i8> %b, +// CHECK-NEXT: %or.i = or <16 x i8> %neg.i, %a +// CHECK-NEXT: ret <16 x i8> %or.i +// CHECK-LABEL: test_vornq_u16 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <8 x i16> %b, +// CHECK-NEXT: %or.i = or <8 x i16> %neg.i, %a +// CHECK-NEXT: ret <8 x i16> %or.i +// CHECK-LABEL: test_vornq_u32 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <4 x i32> %b, +// CHECK-NEXT: %or.i = or <4 x i32> %neg.i, %a +// CHECK-NEXT: ret <4 x i32> %or.i +// CHECK-LABEL: test_vornq_u64 +// CHECK: entry: +// CHECK-NEXT: %neg.i = xor <2 x i64> %b, +// CHECK-NEXT: %or.i = or <2 x i64> %neg.i, %a +// CHECK-NEXT: ret <2 x i64> %or.i +// CHECK-LABEL: test_vorr_s8 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <8 x i8> %b, %a +// CHECK-NEXT: ret <8 x i8> %or.i +// CHECK-LABEL: test_vorr_s16 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <4 x i16> %b, %a +// CHECK-NEXT: ret <4 x i16> %or.i +// CHECK-LABEL: test_vorr_s32 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <2 x i32> %b, %a +// CHECK-NEXT: ret <2 x i32> %or.i +// CHECK-LABEL: test_vorr_s64 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <1 x i64> %b, %a +// CHECK-NEXT: ret <1 x i64> %or.i +// CHECK-LABEL: test_vorr_u8 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <8 x i8> %b, %a +// CHECK-NEXT: ret <8 x i8> %or.i +// CHECK-LABEL: test_vorr_u16 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <4 x i16> %b, %a +// CHECK-NEXT: ret <4 x i16> %or.i +// CHECK-LABEL: test_vorr_u32 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <2 x i32> %b, %a +// CHECK-NEXT: ret <2 x i32> %or.i +// CHECK-LABEL: test_vorr_u64 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <1 x i64> %b, %a +// CHECK-NEXT: ret <1 x i64> %or.i +// CHECK-LABEL: test_vorrq_s8 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <16 x i8> %b, %a +// CHECK-NEXT: ret <16 x i8> %or.i +// CHECK-LABEL: test_vorrq_s16 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <8 x i16> %b, %a +// CHECK-NEXT: ret <8 x i16> %or.i +// CHECK-LABEL: test_vorrq_s32 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <4 x i32> %b, %a +// CHECK-NEXT: ret <4 x i32> %or.i +// CHECK-LABEL: test_vorrq_s64 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <2 x i64> %b, %a +// CHECK-NEXT: ret <2 x i64> %or.i +// CHECK-LABEL: test_vorrq_u8 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <16 x i8> %b, %a +// CHECK-NEXT: ret <16 x i8> %or.i +// CHECK-LABEL: test_vorrq_u16 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <8 x i16> %b, %a +// CHECK-NEXT: ret <8 x i16> %or.i +// CHECK-LABEL: test_vorrq_u32 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <4 x i32> %b, %a +// CHECK-NEXT: ret <4 x i32> %or.i +// CHECK-LABEL: test_vorrq_u64 +// CHECK: entry: +// CHECK-NEXT: %or.i = or <2 x i64> %b, %a +// CHECK-NEXT: ret <2 x i64> %or.i +// CHECK-LABEL: test_vpadal_s8 +// CHECK: entry: +// CHECK-NEXT: %vpadal_v1.i = tail call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) +// CHECK-NEXT: ret <4 x i16> %vpadal_v1.i +// CHECK-LABEL: test_vpadal_s16 +// CHECK: entry: +// CHECK-NEXT: %vpadal_v2.i = tail call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) +// CHECK-NEXT: ret <2 x i32> %vpadal_v2.i +// CHECK-LABEL: test_vpadal_s32 +// CHECK: entry: +// CHECK-NEXT: %vpadal_v2.i = tail call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) +// CHECK-NEXT: ret <1 x i64> %vpadal_v2.i +// CHECK-LABEL: test_vpadal_u8 +// CHECK: entry: +// CHECK-NEXT: %vpadal_v1.i = tail call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) +// CHECK-NEXT: ret <4 x i16> %vpadal_v1.i +// CHECK-LABEL: test_vpadal_u16 +// CHECK: entry: +// CHECK-NEXT: %vpadal_v2.i = tail call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) +// CHECK-NEXT: ret <2 x i32> %vpadal_v2.i +// CHECK-LABEL: test_vpadal_u32 +// CHECK: entry: +// CHECK-NEXT: %vpadal_v2.i = tail call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) +// CHECK-NEXT: ret <1 x i64> %vpadal_v2.i +// CHECK-LABEL: test_vpadalq_s8 +// CHECK: entry: +// CHECK-NEXT: %vpadalq_v1.i = tail call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) +// CHECK-NEXT: ret <8 x i16> %vpadalq_v1.i +// CHECK-LABEL: test_vpadalq_s16 +// CHECK: entry: +// CHECK-NEXT: %vpadalq_v2.i = tail call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) +// CHECK-NEXT: ret <4 x i32> %vpadalq_v2.i +// CHECK-LABEL: test_vpadalq_s32 +// CHECK: entry: +// CHECK-NEXT: %vpadalq_v2.i = tail call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) +// CHECK-NEXT: ret <2 x i64> %vpadalq_v2.i +// CHECK-LABEL: test_vpadalq_u8 +// CHECK: entry: +// CHECK-NEXT: %vpadalq_v1.i = tail call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) +// CHECK-NEXT: ret <8 x i16> %vpadalq_v1.i +// CHECK-LABEL: test_vpadalq_u16 +// CHECK: entry: +// CHECK-NEXT: %vpadalq_v2.i = tail call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) +// CHECK-NEXT: ret <4 x i32> %vpadalq_v2.i +// CHECK-LABEL: test_vpadalq_u32 +// CHECK: entry: +// CHECK-NEXT: %vpadalq_v2.i = tail call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) +// CHECK-NEXT: ret <2 x i64> %vpadalq_v2.i +// CHECK-LABEL: test_vpadd_s8 +// CHECK: entry: +// CHECK-NEXT: %vpadd_v.i = tail call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vpadd_v.i +// CHECK-LABEL: test_vpadd_s16 +// CHECK: entry: +// CHECK-NEXT: %vpadd_v2.i = tail call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vpadd_v2.i +// CHECK-LABEL: test_vpadd_s32 +// CHECK: entry: +// CHECK-NEXT: %vpadd_v2.i = tail call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vpadd_v2.i +// CHECK-LABEL: test_vpadd_u8 +// CHECK: entry: +// CHECK-NEXT: %vpadd_v.i = tail call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vpadd_v.i +// CHECK-LABEL: test_vpadd_u16 +// CHECK: entry: +// CHECK-NEXT: %vpadd_v2.i = tail call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vpadd_v2.i +// CHECK-LABEL: test_vpadd_u32 +// CHECK: entry: +// CHECK-NEXT: %vpadd_v2.i = tail call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vpadd_v2.i +// CHECK-LABEL: test_vpadd_f32 +// CHECK: entry: +// CHECK-NEXT: %vpadd_v2.i = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK-NEXT: ret <2 x float> %vpadd_v2.i +// CHECK-LABEL: test_vpaddl_s8 +// CHECK: entry: +// CHECK-NEXT: %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) +// CHECK-NEXT: ret <4 x i16> %vpaddl.i +// CHECK-LABEL: test_vpaddl_s16 +// CHECK: entry: +// CHECK-NEXT: %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a) +// CHECK-NEXT: ret <2 x i32> %vpaddl1.i +// CHECK-LABEL: test_vpaddl_s32 +// CHECK: entry: +// CHECK-NEXT: %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a) +// CHECK-NEXT: ret <1 x i64> %vpaddl1.i +// CHECK-LABEL: test_vpaddl_u8 +// CHECK: entry: +// CHECK-NEXT: %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) +// CHECK-NEXT: ret <4 x i16> %vpaddl.i +// CHECK-LABEL: test_vpaddl_u16 +// CHECK: entry: +// CHECK-NEXT: %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a) +// CHECK-NEXT: ret <2 x i32> %vpaddl1.i +// CHECK-LABEL: test_vpaddl_u32 +// CHECK: entry: +// CHECK-NEXT: %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a) +// CHECK-NEXT: ret <1 x i64> %vpaddl1.i +// CHECK-LABEL: test_vpaddlq_s8 +// CHECK: entry: +// CHECK-NEXT: %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) +// CHECK-NEXT: ret <8 x i16> %vpaddl.i +// CHECK-LABEL: test_vpaddlq_s16 +// CHECK: entry: +// CHECK-NEXT: %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a) +// CHECK-NEXT: ret <4 x i32> %vpaddl1.i +// CHECK-LABEL: test_vpaddlq_s32 +// CHECK: entry: +// CHECK-NEXT: %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a) +// CHECK-NEXT: ret <2 x i64> %vpaddl1.i +// CHECK-LABEL: test_vpaddlq_u8 +// CHECK: entry: +// CHECK-NEXT: %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) +// CHECK-NEXT: ret <8 x i16> %vpaddl.i +// CHECK-LABEL: test_vpaddlq_u16 +// CHECK: entry: +// CHECK-NEXT: %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a) +// CHECK-NEXT: ret <4 x i32> %vpaddl1.i +// CHECK-LABEL: test_vpaddlq_u32 +// CHECK: entry: +// CHECK-NEXT: %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a) +// CHECK-NEXT: ret <2 x i64> %vpaddl1.i +// CHECK-LABEL: test_vpmax_s8 +// CHECK: entry: +// CHECK-NEXT: %vpmax_v.i = tail call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vpmax_v.i +// CHECK-LABEL: test_vpmax_s16 +// CHECK: entry: +// CHECK-NEXT: %vpmax_v2.i = tail call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vpmax_v2.i +// CHECK-LABEL: test_vpmax_s32 +// CHECK: entry: +// CHECK-NEXT: %vpmax_v2.i = tail call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vpmax_v2.i +// CHECK-LABEL: test_vpmax_u8 +// CHECK: entry: +// CHECK-NEXT: %vpmax_v.i = tail call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vpmax_v.i +// CHECK-LABEL: test_vpmax_u16 +// CHECK: entry: +// CHECK-NEXT: %vpmax_v2.i = tail call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vpmax_v2.i +// CHECK-LABEL: test_vpmax_u32 +// CHECK: entry: +// CHECK-NEXT: %vpmax_v2.i = tail call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vpmax_v2.i +// CHECK-LABEL: test_vpmax_f32 +// CHECK: entry: +// CHECK-NEXT: %vpmax_v2.i = tail call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK-NEXT: ret <2 x float> %vpmax_v2.i +// CHECK-LABEL: test_vpmin_s8 +// CHECK: entry: +// CHECK-NEXT: %vpmin_v.i = tail call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vpmin_v.i +// CHECK-LABEL: test_vpmin_s16 +// CHECK: entry: +// CHECK-NEXT: %vpmin_v2.i = tail call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vpmin_v2.i +// CHECK-LABEL: test_vpmin_s32 +// CHECK: entry: +// CHECK-NEXT: %vpmin_v2.i = tail call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vpmin_v2.i +// CHECK-LABEL: test_vpmin_u8 +// CHECK: entry: +// CHECK-NEXT: %vpmin_v.i = tail call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vpmin_v.i +// CHECK-LABEL: test_vpmin_u16 +// CHECK: entry: +// CHECK-NEXT: %vpmin_v2.i = tail call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vpmin_v2.i +// CHECK-LABEL: test_vpmin_u32 +// CHECK: entry: +// CHECK-NEXT: %vpmin_v2.i = tail call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vpmin_v2.i +// CHECK-LABEL: test_vpmin_f32 +// CHECK: entry: +// CHECK-NEXT: %vpmin_v2.i = tail call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK-NEXT: ret <2 x float> %vpmin_v2.i +// CHECK-LABEL: test_vqabs_s8 +// CHECK: entry: +// CHECK-NEXT: %vqabs_v.i = tail call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) +// CHECK-NEXT: ret <8 x i8> %vqabs_v.i +// CHECK-LABEL: test_vqabs_s16 +// CHECK: entry: +// CHECK-NEXT: %vqabs_v1.i = tail call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a) +// CHECK-NEXT: ret <4 x i16> %vqabs_v1.i +// CHECK-LABEL: test_vqabs_s32 +// CHECK: entry: +// CHECK-NEXT: %vqabs_v1.i = tail call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a) +// CHECK-NEXT: ret <2 x i32> %vqabs_v1.i +// CHECK-LABEL: test_vqabsq_s8 +// CHECK: entry: +// CHECK-NEXT: %vqabsq_v.i = tail call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) +// CHECK-NEXT: ret <16 x i8> %vqabsq_v.i +// CHECK-LABEL: test_vqabsq_s16 +// CHECK: entry: +// CHECK-NEXT: %vqabsq_v1.i = tail call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a) +// CHECK-NEXT: ret <8 x i16> %vqabsq_v1.i +// CHECK-LABEL: test_vqabsq_s32 +// CHECK: entry: +// CHECK-NEXT: %vqabsq_v1.i = tail call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a) +// CHECK-NEXT: ret <4 x i32> %vqabsq_v1.i +// CHECK-LABEL: test_vqadd_s8 +// CHECK: entry: +// CHECK-NEXT: %vqadd_v.i = tail call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vqadd_v.i +// CHECK-LABEL: test_vqadd_s16 +// CHECK: entry: +// CHECK-NEXT: %vqadd_v2.i = tail call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vqadd_v2.i +// CHECK-LABEL: test_vqadd_s32 +// CHECK: entry: +// CHECK-NEXT: %vqadd_v2.i = tail call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vqadd_v2.i +// CHECK-LABEL: test_vqadd_s64 +// CHECK: entry: +// CHECK-NEXT: %vqadd_v2.i = tail call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vqadd_v2.i +// CHECK-LABEL: test_vqadd_u8 +// CHECK: entry: +// CHECK-NEXT: %vqadd_v.i = tail call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vqadd_v.i +// CHECK-LABEL: test_vqadd_u16 +// CHECK: entry: +// CHECK-NEXT: %vqadd_v2.i = tail call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vqadd_v2.i +// CHECK-LABEL: test_vqadd_u32 +// CHECK: entry: +// CHECK-NEXT: %vqadd_v2.i = tail call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vqadd_v2.i +// CHECK-LABEL: test_vqadd_u64 +// CHECK: entry: +// CHECK-NEXT: %vqadd_v2.i = tail call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vqadd_v2.i +// CHECK-LABEL: test_vqaddq_s8 +// CHECK: entry: +// CHECK-NEXT: %vqaddq_v.i = tail call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vqaddq_v.i +// CHECK-LABEL: test_vqaddq_s16 +// CHECK: entry: +// CHECK-NEXT: %vqaddq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vqaddq_v2.i +// CHECK-LABEL: test_vqaddq_s32 +// CHECK: entry: +// CHECK-NEXT: %vqaddq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vqaddq_v2.i +// CHECK-LABEL: test_vqaddq_s64 +// CHECK: entry: +// CHECK-NEXT: %vqaddq_v2.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vqaddq_v2.i +// CHECK-LABEL: test_vqaddq_u8 +// CHECK: entry: +// CHECK-NEXT: %vqaddq_v.i = tail call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vqaddq_v.i +// CHECK-LABEL: test_vqaddq_u16 +// CHECK: entry: +// CHECK-NEXT: %vqaddq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vqaddq_v2.i +// CHECK-LABEL: test_vqaddq_u32 +// CHECK: entry: +// CHECK-NEXT: %vqaddq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vqaddq_v2.i +// CHECK-LABEL: test_vqaddq_u64 +// CHECK: entry: +// CHECK-NEXT: %vqaddq_v2.i = tail call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vqaddq_v2.i +// CHECK-LABEL: test_vqdmlal_s16 +// CHECK: entry: +// CHECK-NEXT: %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) +// CHECK-NEXT: %vqdmlal_v3.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) +// CHECK-NEXT: ret <4 x i32> %vqdmlal_v3.i +// CHECK-LABEL: test_vqdmlal_s32 +// CHECK: entry: +// CHECK-NEXT: %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) +// CHECK-NEXT: %vqdmlal_v3.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) +// CHECK-NEXT: ret <2 x i64> %vqdmlal_v3.i +// CHECK-LABEL: test_vqdmlal_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) +// CHECK-NEXT: %vqdmlal_v3.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) +// CHECK-NEXT: ret <4 x i32> %vqdmlal_v3.i +// CHECK-LABEL: test_vqdmlal_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) +// CHECK-NEXT: %vqdmlal_v3.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) +// CHECK-NEXT: ret <2 x i64> %vqdmlal_v3.i +// CHECK-LABEL: test_vqdmlal_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vqdmlal5.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %vecinit3.i) +// CHECK-NEXT: %vqdmlal_v6.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal5.i) +// CHECK-NEXT: ret <4 x i32> %vqdmlal_v6.i +// CHECK-LABEL: test_vqdmlal_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %vqdmlal3.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %vecinit1.i) +// CHECK-NEXT: %vqdmlal_v4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal3.i) +// CHECK-NEXT: ret <2 x i64> %vqdmlal_v4.i +// CHECK-LABEL: test_vqdmlsl_s16 +// CHECK: entry: +// CHECK-NEXT: %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) +// CHECK-NEXT: %vqdmlsl_v3.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) +// CHECK-NEXT: ret <4 x i32> %vqdmlsl_v3.i +// CHECK-LABEL: test_vqdmlsl_s32 +// CHECK: entry: +// CHECK-NEXT: %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) +// CHECK-NEXT: %vqdmlsl_v3.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) +// CHECK-NEXT: ret <2 x i64> %vqdmlsl_v3.i +// CHECK-LABEL: test_vqdmlsl_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) +// CHECK-NEXT: %vqdmlsl_v3.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) +// CHECK-NEXT: ret <4 x i32> %vqdmlsl_v3.i +// CHECK-LABEL: test_vqdmlsl_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) +// CHECK-NEXT: %vqdmlsl_v3.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) +// CHECK-NEXT: ret <2 x i64> %vqdmlsl_v3.i +// CHECK-LABEL: test_vqdmlsl_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %c, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vqdmlal5.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %vecinit3.i) +// CHECK-NEXT: %vqdmlsl_v6.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal5.i) +// CHECK-NEXT: ret <4 x i32> %vqdmlsl_v6.i +// CHECK-LABEL: test_vqdmlsl_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %c, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %vqdmlal3.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %vecinit1.i) +// CHECK-NEXT: %vqdmlsl_v4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal3.i) +// CHECK-NEXT: ret <2 x i64> %vqdmlsl_v4.i +// CHECK-LABEL: test_vqdmulh_s16 +// CHECK: entry: +// CHECK-NEXT: %vqdmulh_v2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vqdmulh_v2.i +// CHECK-LABEL: test_vqdmulh_s32 +// CHECK: entry: +// CHECK-NEXT: %vqdmulh_v2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vqdmulh_v2.i +// CHECK-LABEL: test_vqdmulhq_s16 +// CHECK: entry: +// CHECK-NEXT: %vqdmulhq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vqdmulhq_v2.i +// CHECK-LABEL: test_vqdmulhq_s32 +// CHECK: entry: +// CHECK-NEXT: %vqdmulhq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vqdmulhq_v2.i +// CHECK-LABEL: test_vqdmulh_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %vqdmulh_v2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) +// CHECK-NEXT: ret <4 x i16> %vqdmulh_v2.i +// CHECK-LABEL: test_vqdmulh_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %vqdmulh_v2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) +// CHECK-NEXT: ret <2 x i32> %vqdmulh_v2.i +// CHECK-LABEL: test_vqdmulhq_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %b, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %vqdmulhq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) +// CHECK-NEXT: ret <8 x i16> %vqdmulhq_v2.i +// CHECK-LABEL: test_vqdmulhq_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %b, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %vqdmulhq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) +// CHECK-NEXT: ret <4 x i32> %vqdmulhq_v2.i +// CHECK-LABEL: test_vqdmulh_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vqdmulh_v5.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %vecinit3.i) +// CHECK-NEXT: ret <4 x i16> %vqdmulh_v5.i +// CHECK-LABEL: test_vqdmulh_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %b, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %vqdmulh_v3.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %vecinit1.i) +// CHECK-NEXT: ret <2 x i32> %vqdmulh_v3.i +// CHECK-LABEL: test_vqdmulhq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %b, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: %vqdmulhq_v9.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %vecinit7.i) +// CHECK-NEXT: ret <8 x i16> %vqdmulhq_v9.i +// CHECK-LABEL: test_vqdmulhq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vqdmulhq_v5.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %vecinit3.i) +// CHECK-NEXT: ret <4 x i32> %vqdmulhq_v5.i +// CHECK-LABEL: test_vqdmull_s16 +// CHECK: entry: +// CHECK-NEXT: %vqdmull_v2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i32> %vqdmull_v2.i +// CHECK-LABEL: test_vqdmull_s32 +// CHECK: entry: +// CHECK-NEXT: %vqdmull_v2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i64> %vqdmull_v2.i +// CHECK-LABEL: test_vqdmull_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %vqdmull_v2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) +// CHECK-NEXT: ret <4 x i32> %vqdmull_v2.i +// CHECK-LABEL: test_vqdmull_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %vqdmull_v2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) +// CHECK-NEXT: ret <2 x i64> %vqdmull_v2.i +// CHECK-LABEL: test_vqdmull_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vqdmull_v5.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %vecinit3.i) +// CHECK-NEXT: ret <4 x i32> %vqdmull_v5.i +// CHECK-LABEL: test_vqdmull_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %b, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %vqdmull_v3.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %vecinit1.i) +// CHECK-NEXT: ret <2 x i64> %vqdmull_v3.i +// CHECK-LABEL: test_vqmovn_s16 +// CHECK: entry: +// CHECK-NEXT: %vqmovn_v1.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a) +// CHECK-NEXT: ret <8 x i8> %vqmovn_v1.i +// CHECK-LABEL: test_vqmovn_s32 +// CHECK: entry: +// CHECK-NEXT: %vqmovn_v1.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a) +// CHECK-NEXT: ret <4 x i16> %vqmovn_v1.i +// CHECK-LABEL: test_vqmovn_s64 +// CHECK: entry: +// CHECK-NEXT: %vqmovn_v1.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a) +// CHECK-NEXT: ret <2 x i32> %vqmovn_v1.i +// CHECK-LABEL: test_vqmovn_u16 +// CHECK: entry: +// CHECK-NEXT: %vqmovn_v1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a) +// CHECK-NEXT: ret <8 x i8> %vqmovn_v1.i +// CHECK-LABEL: test_vqmovn_u32 +// CHECK: entry: +// CHECK-NEXT: %vqmovn_v1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a) +// CHECK-NEXT: ret <4 x i16> %vqmovn_v1.i +// CHECK-LABEL: test_vqmovn_u64 +// CHECK: entry: +// CHECK-NEXT: %vqmovn_v1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a) +// CHECK-NEXT: ret <2 x i32> %vqmovn_v1.i +// CHECK-LABEL: test_vqmovun_s16 +// CHECK: entry: +// CHECK-NEXT: %vqmovun_v1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a) +// CHECK-NEXT: ret <8 x i8> %vqmovun_v1.i +// CHECK-LABEL: test_vqmovun_s32 +// CHECK: entry: +// CHECK-NEXT: %vqmovun_v1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a) +// CHECK-NEXT: ret <4 x i16> %vqmovun_v1.i +// CHECK-LABEL: test_vqmovun_s64 +// CHECK: entry: +// CHECK-NEXT: %vqmovun_v1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a) +// CHECK-NEXT: ret <2 x i32> %vqmovun_v1.i +// CHECK-LABEL: test_vqneg_s8 +// CHECK: entry: +// CHECK-NEXT: %vqneg_v.i = tail call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) +// CHECK-NEXT: ret <8 x i8> %vqneg_v.i +// CHECK-LABEL: test_vqneg_s16 +// CHECK: entry: +// CHECK-NEXT: %vqneg_v1.i = tail call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a) +// CHECK-NEXT: ret <4 x i16> %vqneg_v1.i +// CHECK-LABEL: test_vqneg_s32 +// CHECK: entry: +// CHECK-NEXT: %vqneg_v1.i = tail call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a) +// CHECK-NEXT: ret <2 x i32> %vqneg_v1.i +// CHECK-LABEL: test_vqnegq_s8 +// CHECK: entry: +// CHECK-NEXT: %vqnegq_v.i = tail call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) +// CHECK-NEXT: ret <16 x i8> %vqnegq_v.i +// CHECK-LABEL: test_vqnegq_s16 +// CHECK: entry: +// CHECK-NEXT: %vqnegq_v1.i = tail call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a) +// CHECK-NEXT: ret <8 x i16> %vqnegq_v1.i +// CHECK-LABEL: test_vqnegq_s32 +// CHECK: entry: +// CHECK-NEXT: %vqnegq_v1.i = tail call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a) +// CHECK-NEXT: ret <4 x i32> %vqnegq_v1.i +// CHECK-LABEL: test_vqrdmulh_s16 +// CHECK: entry: +// CHECK-NEXT: %vqrdmulh_v2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vqrdmulh_v2.i +// CHECK-LABEL: test_vqrdmulh_s32 +// CHECK: entry: +// CHECK-NEXT: %vqrdmulh_v2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vqrdmulh_v2.i +// CHECK-LABEL: test_vqrdmulhq_s16 +// CHECK: entry: +// CHECK-NEXT: %vqrdmulhq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vqrdmulhq_v2.i +// CHECK-LABEL: test_vqrdmulhq_s32 +// CHECK: entry: +// CHECK-NEXT: %vqrdmulhq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vqrdmulhq_v2.i +// CHECK-LABEL: test_vqrdmulh_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: %vqrdmulh_v2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) +// CHECK-NEXT: ret <4 x i16> %vqrdmulh_v2.i +// CHECK-LABEL: test_vqrdmulh_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: %vqrdmulh_v2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) +// CHECK-NEXT: ret <2 x i32> %vqrdmulh_v2.i +// CHECK-LABEL: test_vqrdmulhq_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <4 x i16> %b, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %vqrdmulhq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) +// CHECK-NEXT: ret <8 x i16> %vqrdmulhq_v2.i +// CHECK-LABEL: test_vqrdmulhq_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle = shufflevector <2 x i32> %b, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %vqrdmulhq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) +// CHECK-NEXT: ret <4 x i32> %vqrdmulhq_v2.i +// CHECK-LABEL: test_vqrdmulh_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i16> undef, i16 %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i16> %vecinit.i, <4 x i16> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vqrdmulh_v5.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %vecinit3.i) +// CHECK-NEXT: ret <4 x i16> %vqrdmulh_v5.i +// CHECK-LABEL: test_vqrdmulh_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <2 x i32> undef, i32 %b, i32 0 +// CHECK-NEXT: %vecinit1.i = shufflevector <2 x i32> %vecinit.i, <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK-NEXT: %vqrdmulh_v3.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %vecinit1.i) +// CHECK-NEXT: ret <2 x i32> %vqrdmulh_v3.i +// CHECK-LABEL: test_vqrdmulhq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <8 x i16> undef, i16 %b, i32 0 +// CHECK-NEXT: %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: %vqrdmulhq_v9.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %vecinit7.i) +// CHECK-NEXT: ret <8 x i16> %vqrdmulhq_v9.i +// CHECK-LABEL: test_vqrdmulhq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vecinit.i = insertelement <4 x i32> undef, i32 %b, i32 0 +// CHECK-NEXT: %vecinit3.i = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: %vqrdmulhq_v5.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %vecinit3.i) +// CHECK-NEXT: ret <4 x i32> %vqrdmulhq_v5.i +// CHECK-LABEL: test_vqrshl_s8 +// CHECK: entry: +// CHECK-NEXT: %vqrshl_v.i = tail call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vqrshl_v.i +// CHECK-LABEL: test_vqrshl_s16 +// CHECK: entry: +// CHECK-NEXT: %vqrshl_v2.i = tail call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vqrshl_v2.i +// CHECK-LABEL: test_vqrshl_s32 +// CHECK: entry: +// CHECK-NEXT: %vqrshl_v2.i = tail call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vqrshl_v2.i +// CHECK-LABEL: test_vqrshl_s64 +// CHECK: entry: +// CHECK-NEXT: %vqrshl_v2.i = tail call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vqrshl_v2.i +// CHECK-LABEL: test_vqrshl_u8 +// CHECK: entry: +// CHECK-NEXT: %vqrshl_v.i = tail call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vqrshl_v.i +// CHECK-LABEL: test_vqrshl_u16 +// CHECK: entry: +// CHECK-NEXT: %vqrshl_v2.i = tail call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vqrshl_v2.i +// CHECK-LABEL: test_vqrshl_u32 +// CHECK: entry: +// CHECK-NEXT: %vqrshl_v2.i = tail call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vqrshl_v2.i +// CHECK-LABEL: test_vqrshl_u64 +// CHECK: entry: +// CHECK-NEXT: %vqrshl_v2.i = tail call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vqrshl_v2.i +// CHECK-LABEL: test_vqrshlq_s8 +// CHECK: entry: +// CHECK-NEXT: %vqrshlq_v.i = tail call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vqrshlq_v.i +// CHECK-LABEL: test_vqrshlq_s16 +// CHECK: entry: +// CHECK-NEXT: %vqrshlq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vqrshlq_v2.i +// CHECK-LABEL: test_vqrshlq_s32 +// CHECK: entry: +// CHECK-NEXT: %vqrshlq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vqrshlq_v2.i +// CHECK-LABEL: test_vqrshlq_s64 +// CHECK: entry: +// CHECK-NEXT: %vqrshlq_v2.i = tail call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vqrshlq_v2.i +// CHECK-LABEL: test_vqrshlq_u8 +// CHECK: entry: +// CHECK-NEXT: %vqrshlq_v.i = tail call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vqrshlq_v.i +// CHECK-LABEL: test_vqrshlq_u16 +// CHECK: entry: +// CHECK-NEXT: %vqrshlq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vqrshlq_v2.i +// CHECK-LABEL: test_vqrshlq_u32 +// CHECK: entry: +// CHECK-NEXT: %vqrshlq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vqrshlq_v2.i +// CHECK-LABEL: test_vqrshlq_u64 +// CHECK: entry: +// CHECK-NEXT: %vqrshlq_v2.i = tail call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vqrshlq_v2.i +// CHECK-LABEL: test_vqrshrn_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vqrshrn_n1 = tail call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i8> %vqrshrn_n1 +// CHECK-LABEL: test_vqrshrn_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vqrshrn_n1 = tail call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i16> %vqrshrn_n1 +// CHECK-LABEL: test_vqrshrn_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vqrshrn_n1 = tail call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i32> %vqrshrn_n1 +// CHECK-LABEL: test_vqrshrn_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vqrshrn_n1 = tail call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i8> %vqrshrn_n1 +// CHECK-LABEL: test_vqrshrn_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vqrshrn_n1 = tail call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i16> %vqrshrn_n1 +// CHECK-LABEL: test_vqrshrn_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vqrshrn_n1 = tail call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i32> %vqrshrn_n1 +// CHECK-LABEL: test_vqrshrun_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vqrshrun_n1 = tail call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i8> %vqrshrun_n1 +// CHECK-LABEL: test_vqrshrun_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vqrshrun_n1 = tail call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i16> %vqrshrun_n1 +// CHECK-LABEL: test_vqrshrun_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vqrshrun_n1 = tail call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i32> %vqrshrun_n1 +// CHECK-LABEL: test_vqshl_s8 +// CHECK: entry: +// CHECK-NEXT: %vqshl_v.i = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vqshl_v.i +// CHECK-LABEL: test_vqshl_s16 +// CHECK: entry: +// CHECK-NEXT: %vqshl_v2.i = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vqshl_v2.i +// CHECK-LABEL: test_vqshl_s32 +// CHECK: entry: +// CHECK-NEXT: %vqshl_v2.i = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vqshl_v2.i +// CHECK-LABEL: test_vqshl_s64 +// CHECK: entry: +// CHECK-NEXT: %vqshl_v2.i = tail call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vqshl_v2.i +// CHECK-LABEL: test_vqshl_u8 +// CHECK: entry: +// CHECK-NEXT: %vqshl_v.i = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vqshl_v.i +// CHECK-LABEL: test_vqshl_u16 +// CHECK: entry: +// CHECK-NEXT: %vqshl_v2.i = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vqshl_v2.i +// CHECK-LABEL: test_vqshl_u32 +// CHECK: entry: +// CHECK-NEXT: %vqshl_v2.i = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vqshl_v2.i +// CHECK-LABEL: test_vqshl_u64 +// CHECK: entry: +// CHECK-NEXT: %vqshl_v2.i = tail call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vqshl_v2.i +// CHECK-LABEL: test_vqshlq_s8 +// CHECK: entry: +// CHECK-NEXT: %vqshlq_v.i = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vqshlq_v.i +// CHECK-LABEL: test_vqshlq_s16 +// CHECK: entry: +// CHECK-NEXT: %vqshlq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vqshlq_v2.i +// CHECK-LABEL: test_vqshlq_s32 +// CHECK: entry: +// CHECK-NEXT: %vqshlq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vqshlq_v2.i +// CHECK-LABEL: test_vqshlq_s64 +// CHECK: entry: +// CHECK-NEXT: %vqshlq_v2.i = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vqshlq_v2.i +// CHECK-LABEL: test_vqshlq_u8 +// CHECK: entry: +// CHECK-NEXT: %vqshlq_v.i = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vqshlq_v.i +// CHECK-LABEL: test_vqshlq_u16 +// CHECK: entry: +// CHECK-NEXT: %vqshlq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vqshlq_v2.i +// CHECK-LABEL: test_vqshlq_u32 +// CHECK: entry: +// CHECK-NEXT: %vqshlq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vqshlq_v2.i +// CHECK-LABEL: test_vqshlq_u64 +// CHECK: entry: +// CHECK-NEXT: %vqshlq_v2.i = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vqshlq_v2.i +// CHECK-LABEL: test_vqshlu_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vqshlu_n = tail call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK-NEXT: ret <8 x i8> %vqshlu_n +// CHECK-LABEL: test_vqshlu_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vqshlu_n1 = tail call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %a, <4 x i16> ) +// CHECK-NEXT: ret <4 x i16> %vqshlu_n1 +// CHECK-LABEL: test_vqshlu_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vqshlu_n1 = tail call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %a, <2 x i32> ) +// CHECK-NEXT: ret <2 x i32> %vqshlu_n1 +// CHECK-LABEL: test_vqshlu_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vqshlu_n1 = tail call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %a, <1 x i64> ) +// CHECK-NEXT: ret <1 x i64> %vqshlu_n1 +// CHECK-LABEL: test_vqshluq_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vqshlu_n = tail call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK-NEXT: ret <16 x i8> %vqshlu_n +// CHECK-LABEL: test_vqshluq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vqshlu_n1 = tail call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i16> %vqshlu_n1 +// CHECK-LABEL: test_vqshluq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vqshlu_n1 = tail call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i32> %vqshlu_n1 +// CHECK-LABEL: test_vqshluq_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vqshlu_n1 = tail call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i64> %vqshlu_n1 +// CHECK-LABEL: test_vqshl_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK-NEXT: ret <8 x i8> %vqshl_n +// CHECK-LABEL: test_vqshl_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> ) +// CHECK-NEXT: ret <4 x i16> %vqshl_n1 +// CHECK-LABEL: test_vqshl_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> ) +// CHECK-NEXT: ret <2 x i32> %vqshl_n1 +// CHECK-LABEL: test_vqshl_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %a, <1 x i64> ) +// CHECK-NEXT: ret <1 x i64> %vqshl_n1 +// CHECK-LABEL: test_vqshl_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK-NEXT: ret <8 x i8> %vqshl_n +// CHECK-LABEL: test_vqshl_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> ) +// CHECK-NEXT: ret <4 x i16> %vqshl_n1 +// CHECK-LABEL: test_vqshl_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> ) +// CHECK-NEXT: ret <2 x i32> %vqshl_n1 +// CHECK-LABEL: test_vqshl_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %a, <1 x i64> ) +// CHECK-NEXT: ret <1 x i64> %vqshl_n1 +// CHECK-LABEL: test_vqshlq_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK-NEXT: ret <16 x i8> %vqshl_n +// CHECK-LABEL: test_vqshlq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i16> %vqshl_n1 +// CHECK-LABEL: test_vqshlq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i32> %vqshl_n1 +// CHECK-LABEL: test_vqshlq_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i64> %vqshl_n1 +// CHECK-LABEL: test_vqshlq_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK-NEXT: ret <16 x i8> %vqshl_n +// CHECK-LABEL: test_vqshlq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i16> %vqshl_n1 +// CHECK-LABEL: test_vqshlq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i32> %vqshl_n1 +// CHECK-LABEL: test_vqshlq_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i64> %vqshl_n1 +// CHECK-LABEL: test_vqshrn_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vqshrn_n1 = tail call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i8> %vqshrn_n1 +// CHECK-LABEL: test_vqshrn_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vqshrn_n1 = tail call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i16> %vqshrn_n1 +// CHECK-LABEL: test_vqshrn_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vqshrn_n1 = tail call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i32> %vqshrn_n1 +// CHECK-LABEL: test_vqshrn_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vqshrn_n1 = tail call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i8> %vqshrn_n1 +// CHECK-LABEL: test_vqshrn_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vqshrn_n1 = tail call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i16> %vqshrn_n1 +// CHECK-LABEL: test_vqshrn_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vqshrn_n1 = tail call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i32> %vqshrn_n1 +// CHECK-LABEL: test_vqshrun_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vqshrun_n1 = tail call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i8> %vqshrun_n1 +// CHECK-LABEL: test_vqshrun_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vqshrun_n1 = tail call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i16> %vqshrun_n1 +// CHECK-LABEL: test_vqshrun_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vqshrun_n1 = tail call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i32> %vqshrun_n1 +// CHECK-LABEL: test_vqsub_s8 +// CHECK: entry: +// CHECK-NEXT: %vqsub_v.i = tail call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vqsub_v.i +// CHECK-LABEL: test_vqsub_s16 +// CHECK: entry: +// CHECK-NEXT: %vqsub_v2.i = tail call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vqsub_v2.i +// CHECK-LABEL: test_vqsub_s32 +// CHECK: entry: +// CHECK-NEXT: %vqsub_v2.i = tail call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vqsub_v2.i +// CHECK-LABEL: test_vqsub_s64 +// CHECK: entry: +// CHECK-NEXT: %vqsub_v2.i = tail call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vqsub_v2.i +// CHECK-LABEL: test_vqsub_u8 +// CHECK: entry: +// CHECK-NEXT: %vqsub_v.i = tail call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vqsub_v.i +// CHECK-LABEL: test_vqsub_u16 +// CHECK: entry: +// CHECK-NEXT: %vqsub_v2.i = tail call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vqsub_v2.i +// CHECK-LABEL: test_vqsub_u32 +// CHECK: entry: +// CHECK-NEXT: %vqsub_v2.i = tail call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vqsub_v2.i +// CHECK-LABEL: test_vqsub_u64 +// CHECK: entry: +// CHECK-NEXT: %vqsub_v2.i = tail call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vqsub_v2.i +// CHECK-LABEL: test_vqsubq_s8 +// CHECK: entry: +// CHECK-NEXT: %vqsubq_v.i = tail call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vqsubq_v.i +// CHECK-LABEL: test_vqsubq_s16 +// CHECK: entry: +// CHECK-NEXT: %vqsubq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vqsubq_v2.i +// CHECK-LABEL: test_vqsubq_s32 +// CHECK: entry: +// CHECK-NEXT: %vqsubq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vqsubq_v2.i +// CHECK-LABEL: test_vqsubq_s64 +// CHECK: entry: +// CHECK-NEXT: %vqsubq_v2.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vqsubq_v2.i +// CHECK-LABEL: test_vqsubq_u8 +// CHECK: entry: +// CHECK-NEXT: %vqsubq_v.i = tail call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vqsubq_v.i +// CHECK-LABEL: test_vqsubq_u16 +// CHECK: entry: +// CHECK-NEXT: %vqsubq_v2.i = tail call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vqsubq_v2.i +// CHECK-LABEL: test_vqsubq_u32 +// CHECK: entry: +// CHECK-NEXT: %vqsubq_v2.i = tail call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vqsubq_v2.i +// CHECK-LABEL: test_vqsubq_u64 +// CHECK: entry: +// CHECK-NEXT: %vqsubq_v2.i = tail call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vqsubq_v2.i +// CHECK-LABEL: test_vraddhn_s16 +// CHECK: entry: +// CHECK-NEXT: %vraddhn_v2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i8> %vraddhn_v2.i +// CHECK-LABEL: test_vraddhn_s32 +// CHECK: entry: +// CHECK-NEXT: %vraddhn_v2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i16> %vraddhn_v2.i +// CHECK-LABEL: test_vraddhn_s64 +// CHECK: entry: +// CHECK-NEXT: %vraddhn_v2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i32> %vraddhn_v2.i +// CHECK-LABEL: test_vraddhn_u16 +// CHECK: entry: +// CHECK-NEXT: %vraddhn_v2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i8> %vraddhn_v2.i +// CHECK-LABEL: test_vraddhn_u32 +// CHECK: entry: +// CHECK-NEXT: %vraddhn_v2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i16> %vraddhn_v2.i +// CHECK-LABEL: test_vraddhn_u64 +// CHECK: entry: +// CHECK-NEXT: %vraddhn_v2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i32> %vraddhn_v2.i +// CHECK-LABEL: test_vrecpe_f32 +// CHECK: entry: +// CHECK-NEXT: %vrecpe_v1.i = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a) +// CHECK-NEXT: ret <2 x float> %vrecpe_v1.i +// CHECK-LABEL: test_vrecpe_u32 +// CHECK: entry: +// CHECK-NEXT: %vrecpe_v1.i = tail call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a) +// CHECK-NEXT: ret <2 x i32> %vrecpe_v1.i +// CHECK-LABEL: test_vrecpeq_f32 +// CHECK: entry: +// CHECK-NEXT: %vrecpeq_v1.i = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a) +// CHECK-NEXT: ret <4 x float> %vrecpeq_v1.i +// CHECK-LABEL: test_vrecpeq_u32 +// CHECK: entry: +// CHECK-NEXT: %vrecpeq_v1.i = tail call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a) +// CHECK-NEXT: ret <4 x i32> %vrecpeq_v1.i +// CHECK-LABEL: test_vrecps_f32 +// CHECK: entry: +// CHECK-NEXT: %vrecps_v2.i = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK-NEXT: ret <2 x float> %vrecps_v2.i +// CHECK-LABEL: test_vrecpsq_f32 +// CHECK: entry: +// CHECK-NEXT: %vrecpsq_v2.i = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %a, <4 x float> %b) +// CHECK-NEXT: ret <4 x float> %vrecpsq_v2.i +// CHECK-LABEL: test_vreinterpret_s8_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_s8_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_s8_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_s8_u8 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i8> %a +// CHECK-LABEL: test_vreinterpret_s8_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_s8_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_s8_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_s8_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x half> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_s8_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x float> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_s8_p8 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i8> %a +// CHECK-LABEL: test_vreinterpret_s8_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_s16_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_s16_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_s16_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_s16_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_s16_u16 +// CHECK: entry: +// CHECK-NEXT: ret <4 x i16> %a +// CHECK-LABEL: test_vreinterpret_s16_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_s16_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_s16_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x half> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_s16_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x float> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_s16_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_s16_p16 +// CHECK: entry: +// CHECK-NEXT: ret <4 x i16> %a +// CHECK-LABEL: test_vreinterpret_s32_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_s32_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_s32_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_s32_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_s32_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_s32_u32 +// CHECK: entry: +// CHECK-NEXT: ret <2 x i32> %a +// CHECK-LABEL: test_vreinterpret_s32_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_s32_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x half> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_s32_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x float> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_s32_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_s32_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_s64_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_s64_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_s64_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_s64_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_s64_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_s64_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_s64_u64 +// CHECK: entry: +// CHECK-NEXT: ret <1 x i64> %a +// CHECK-LABEL: test_vreinterpret_s64_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x half> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_s64_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x float> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_s64_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_s64_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_u8_s8 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i8> %a +// CHECK-LABEL: test_vreinterpret_u8_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_u8_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_u8_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_u8_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_u8_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_u8_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_u8_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x half> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_u8_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x float> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_u8_p8 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i8> %a +// CHECK-LABEL: test_vreinterpret_u8_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_u16_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_u16_s16 +// CHECK: entry: +// CHECK-NEXT: ret <4 x i16> %a +// CHECK-LABEL: test_vreinterpret_u16_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_u16_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_u16_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_u16_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_u16_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_u16_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x half> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_u16_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x float> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_u16_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_u16_p16 +// CHECK: entry: +// CHECK-NEXT: ret <4 x i16> %a +// CHECK-LABEL: test_vreinterpret_u32_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_u32_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_u32_s32 +// CHECK: entry: +// CHECK-NEXT: ret <2 x i32> %a +// CHECK-LABEL: test_vreinterpret_u32_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_u32_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_u32_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_u32_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_u32_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x half> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_u32_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x float> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_u32_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_u32_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vreinterpret_u64_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_u64_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_u64_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_u64_s64 +// CHECK: entry: +// CHECK-NEXT: ret <1 x i64> %a +// CHECK-LABEL: test_vreinterpret_u64_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_u64_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_u64_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_u64_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x half> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_u64_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x float> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_u64_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_u64_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <1 x i64> +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vreinterpret_f16_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vreinterpret_f16_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vreinterpret_f16_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vreinterpret_f16_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vreinterpret_f16_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vreinterpret_f16_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vreinterpret_f16_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vreinterpret_f16_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vreinterpret_f16_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x float> %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vreinterpret_f16_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vreinterpret_f16_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <4 x half> +// CHECK-NEXT: ret <4 x half> %0 +// CHECK-LABEL: test_vreinterpret_f32_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vreinterpret_f32_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vreinterpret_f32_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vreinterpret_f32_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vreinterpret_f32_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vreinterpret_f32_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vreinterpret_f32_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vreinterpret_f32_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vreinterpret_f32_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x half> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vreinterpret_f32_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vreinterpret_f32_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <2 x float> +// CHECK-NEXT: ret <2 x float> %0 +// CHECK-LABEL: test_vreinterpret_p8_s8 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i8> %a +// CHECK-LABEL: test_vreinterpret_p8_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_p8_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_p8_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_p8_u8 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i8> %a +// CHECK-LABEL: test_vreinterpret_p8_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_p8_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_p8_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_p8_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x half> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_p8_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x float> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_p8_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i16> %a to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vreinterpret_p16_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_p16_s16 +// CHECK: entry: +// CHECK-NEXT: ret <4 x i16> %a +// CHECK-LABEL: test_vreinterpret_p16_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_p16_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_p16_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_p16_u16 +// CHECK: entry: +// CHECK-NEXT: ret <4 x i16> %a +// CHECK-LABEL: test_vreinterpret_p16_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i32> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_p16_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <1 x i64> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_p16_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x half> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_p16_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x float> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpret_p16_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i8> %a to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_s8_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_s8_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_s8_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_s8_u8 +// CHECK: entry: +// CHECK-NEXT: ret <16 x i8> %a +// CHECK-LABEL: test_vreinterpretq_s8_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_s8_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_s8_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_s8_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x half> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_s8_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x float> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_s8_p8 +// CHECK: entry: +// CHECK-NEXT: ret <16 x i8> %a +// CHECK-LABEL: test_vreinterpretq_s8_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_s16_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_s16_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_s16_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_s16_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_s16_u16 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i16> %a +// CHECK-LABEL: test_vreinterpretq_s16_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_s16_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_s16_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x half> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_s16_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x float> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_s16_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_s16_p16 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i16> %a +// CHECK-LABEL: test_vreinterpretq_s32_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_s32_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_s32_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_s32_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_s32_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_s32_u32 +// CHECK: entry: +// CHECK-NEXT: ret <4 x i32> %a +// CHECK-LABEL: test_vreinterpretq_s32_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_s32_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x half> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_s32_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x float> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_s32_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_s32_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_s64_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_s64_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_s64_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_s64_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_s64_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_s64_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_s64_u64 +// CHECK: entry: +// CHECK-NEXT: ret <2 x i64> %a +// CHECK-LABEL: test_vreinterpretq_s64_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x half> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_s64_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x float> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_s64_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_s64_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_u8_s8 +// CHECK: entry: +// CHECK-NEXT: ret <16 x i8> %a +// CHECK-LABEL: test_vreinterpretq_u8_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_u8_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_u8_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_u8_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_u8_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_u8_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_u8_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x half> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_u8_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x float> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_u8_p8 +// CHECK: entry: +// CHECK-NEXT: ret <16 x i8> %a +// CHECK-LABEL: test_vreinterpretq_u8_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_u16_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_u16_s16 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i16> %a +// CHECK-LABEL: test_vreinterpretq_u16_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_u16_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_u16_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_u16_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_u16_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_u16_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x half> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_u16_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x float> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_u16_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_u16_p16 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i16> %a +// CHECK-LABEL: test_vreinterpretq_u32_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_u32_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_u32_s32 +// CHECK: entry: +// CHECK-NEXT: ret <4 x i32> %a +// CHECK-LABEL: test_vreinterpretq_u32_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_u32_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_u32_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_u32_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_u32_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x half> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_u32_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x float> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_u32_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_u32_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vreinterpretq_u64_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_u64_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_u64_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_u64_s64 +// CHECK: entry: +// CHECK-NEXT: ret <2 x i64> %a +// CHECK-LABEL: test_vreinterpretq_u64_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_u64_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_u64_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_u64_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x half> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_u64_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x float> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_u64_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_u64_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <2 x i64> +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vreinterpretq_f16_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x half> +// CHECK-NEXT: ret <8 x half> %0 +// CHECK-LABEL: test_vreinterpretq_f16_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <8 x half> +// CHECK-NEXT: ret <8 x half> %0 +// CHECK-LABEL: test_vreinterpretq_f16_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <8 x half> +// CHECK-NEXT: ret <8 x half> %0 +// CHECK-LABEL: test_vreinterpretq_f16_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <8 x half> +// CHECK-NEXT: ret <8 x half> %0 +// CHECK-LABEL: test_vreinterpretq_f16_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x half> +// CHECK-NEXT: ret <8 x half> %0 +// CHECK-LABEL: test_vreinterpretq_f16_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <8 x half> +// CHECK-NEXT: ret <8 x half> %0 +// CHECK-LABEL: test_vreinterpretq_f16_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <8 x half> +// CHECK-NEXT: ret <8 x half> %0 +// CHECK-LABEL: test_vreinterpretq_f16_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <8 x half> +// CHECK-NEXT: ret <8 x half> %0 +// CHECK-LABEL: test_vreinterpretq_f16_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x float> %a to <8 x half> +// CHECK-NEXT: ret <8 x half> %0 +// CHECK-LABEL: test_vreinterpretq_f16_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x half> +// CHECK-NEXT: ret <8 x half> %0 +// CHECK-LABEL: test_vreinterpretq_f16_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <8 x half> +// CHECK-NEXT: ret <8 x half> %0 +// CHECK-LABEL: test_vreinterpretq_f32_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vreinterpretq_f32_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vreinterpretq_f32_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vreinterpretq_f32_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vreinterpretq_f32_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vreinterpretq_f32_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vreinterpretq_f32_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vreinterpretq_f32_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vreinterpretq_f32_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x half> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vreinterpretq_f32_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vreinterpretq_f32_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <4 x float> +// CHECK-NEXT: ret <4 x float> %0 +// CHECK-LABEL: test_vreinterpretq_p8_s8 +// CHECK: entry: +// CHECK-NEXT: ret <16 x i8> %a +// CHECK-LABEL: test_vreinterpretq_p8_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_p8_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_p8_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_p8_u8 +// CHECK: entry: +// CHECK-NEXT: ret <16 x i8> %a +// CHECK-LABEL: test_vreinterpretq_p8_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_p8_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_p8_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_p8_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x half> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_p8_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x float> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_p8_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x i16> %a to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vreinterpretq_p16_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_p16_s16 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i16> %a +// CHECK-LABEL: test_vreinterpretq_p16_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_p16_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_p16_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_p16_u16 +// CHECK: entry: +// CHECK-NEXT: ret <8 x i16> %a +// CHECK-LABEL: test_vreinterpretq_p16_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x i32> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_p16_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <2 x i64> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_p16_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <8 x half> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_p16_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <4 x float> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vreinterpretq_p16_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast <16 x i8> %a to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vrev16_s8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vrev16_u8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vrev16_p8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vrev16q_s8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vrev16q_u8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vrev16q_p8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vrev32_s8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vrev32_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vrev32_u8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vrev32_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vrev32_p8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vrev32_p16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vrev32q_s8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vrev32q_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle.i +// CHECK-LABEL: test_vrev32q_u8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vrev32q_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle.i +// CHECK-LABEL: test_vrev32q_p8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vrev32q_p16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle.i +// CHECK-LABEL: test_vrev64_s8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vrev64_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vrev64_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: ret <2 x i32> %shuffle.i +// CHECK-LABEL: test_vrev64_u8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vrev64_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vrev64_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> +// CHECK-NEXT: ret <2 x i32> %shuffle.i +// CHECK-LABEL: test_vrev64_p8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i8> %shuffle.i +// CHECK-LABEL: test_vrev64_p16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i16> %shuffle.i +// CHECK-LABEL: test_vrev64_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> +// CHECK-NEXT: ret <2 x float> %shuffle.i +// CHECK-LABEL: test_vrev64q_s8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vrev64q_s16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle.i +// CHECK-LABEL: test_vrev64q_s32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i32> %shuffle.i +// CHECK-LABEL: test_vrev64q_u8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vrev64q_u16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle.i +// CHECK-LABEL: test_vrev64q_u32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> +// CHECK-NEXT: ret <4 x i32> %shuffle.i +// CHECK-LABEL: test_vrev64q_p8 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> +// CHECK-NEXT: ret <16 x i8> %shuffle.i +// CHECK-LABEL: test_vrev64q_p16 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> +// CHECK-NEXT: ret <8 x i16> %shuffle.i +// CHECK-LABEL: test_vrev64q_f32 +// CHECK: entry: +// CHECK-NEXT: %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> +// CHECK-NEXT: ret <4 x float> %shuffle.i +// CHECK-LABEL: test_vrhadd_s8 +// CHECK: entry: +// CHECK-NEXT: %vrhadd_v.i = tail call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vrhadd_v.i +// CHECK-LABEL: test_vrhadd_s16 +// CHECK: entry: +// CHECK-NEXT: %vrhadd_v2.i = tail call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vrhadd_v2.i +// CHECK-LABEL: test_vrhadd_s32 +// CHECK: entry: +// CHECK-NEXT: %vrhadd_v2.i = tail call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vrhadd_v2.i +// CHECK-LABEL: test_vrhadd_u8 +// CHECK: entry: +// CHECK-NEXT: %vrhadd_v.i = tail call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vrhadd_v.i +// CHECK-LABEL: test_vrhadd_u16 +// CHECK: entry: +// CHECK-NEXT: %vrhadd_v2.i = tail call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vrhadd_v2.i +// CHECK-LABEL: test_vrhadd_u32 +// CHECK: entry: +// CHECK-NEXT: %vrhadd_v2.i = tail call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vrhadd_v2.i +// CHECK-LABEL: test_vrhaddq_s8 +// CHECK: entry: +// CHECK-NEXT: %vrhaddq_v.i = tail call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vrhaddq_v.i +// CHECK-LABEL: test_vrhaddq_s16 +// CHECK: entry: +// CHECK-NEXT: %vrhaddq_v2.i = tail call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vrhaddq_v2.i +// CHECK-LABEL: test_vrhaddq_s32 +// CHECK: entry: +// CHECK-NEXT: %vrhaddq_v2.i = tail call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vrhaddq_v2.i +// CHECK-LABEL: test_vrhaddq_u8 +// CHECK: entry: +// CHECK-NEXT: %vrhaddq_v.i = tail call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vrhaddq_v.i +// CHECK-LABEL: test_vrhaddq_u16 +// CHECK: entry: +// CHECK-NEXT: %vrhaddq_v2.i = tail call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vrhaddq_v2.i +// CHECK-LABEL: test_vrhaddq_u32 +// CHECK: entry: +// CHECK-NEXT: %vrhaddq_v2.i = tail call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vrhaddq_v2.i +// CHECK-LABEL: test_vrshl_s8 +// CHECK: entry: +// CHECK-NEXT: %vrshl_v.i = tail call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vrshl_v.i +// CHECK-LABEL: test_vrshl_s16 +// CHECK: entry: +// CHECK-NEXT: %vrshl_v2.i = tail call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vrshl_v2.i +// CHECK-LABEL: test_vrshl_s32 +// CHECK: entry: +// CHECK-NEXT: %vrshl_v2.i = tail call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vrshl_v2.i +// CHECK-LABEL: test_vrshl_s64 +// CHECK: entry: +// CHECK-NEXT: %vrshl_v2.i = tail call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vrshl_v2.i +// CHECK-LABEL: test_vrshl_u8 +// CHECK: entry: +// CHECK-NEXT: %vrshl_v.i = tail call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vrshl_v.i +// CHECK-LABEL: test_vrshl_u16 +// CHECK: entry: +// CHECK-NEXT: %vrshl_v2.i = tail call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vrshl_v2.i +// CHECK-LABEL: test_vrshl_u32 +// CHECK: entry: +// CHECK-NEXT: %vrshl_v2.i = tail call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vrshl_v2.i +// CHECK-LABEL: test_vrshl_u64 +// CHECK: entry: +// CHECK-NEXT: %vrshl_v2.i = tail call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vrshl_v2.i +// CHECK-LABEL: test_vrshlq_s8 +// CHECK: entry: +// CHECK-NEXT: %vrshlq_v.i = tail call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vrshlq_v.i +// CHECK-LABEL: test_vrshlq_s16 +// CHECK: entry: +// CHECK-NEXT: %vrshlq_v2.i = tail call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vrshlq_v2.i +// CHECK-LABEL: test_vrshlq_s32 +// CHECK: entry: +// CHECK-NEXT: %vrshlq_v2.i = tail call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vrshlq_v2.i +// CHECK-LABEL: test_vrshlq_s64 +// CHECK: entry: +// CHECK-NEXT: %vrshlq_v2.i = tail call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vrshlq_v2.i +// CHECK-LABEL: test_vrshlq_u8 +// CHECK: entry: +// CHECK-NEXT: %vrshlq_v.i = tail call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vrshlq_v.i +// CHECK-LABEL: test_vrshlq_u16 +// CHECK: entry: +// CHECK-NEXT: %vrshlq_v2.i = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vrshlq_v2.i +// CHECK-LABEL: test_vrshlq_u32 +// CHECK: entry: +// CHECK-NEXT: %vrshlq_v2.i = tail call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vrshlq_v2.i +// CHECK-LABEL: test_vrshlq_u64 +// CHECK: entry: +// CHECK-NEXT: %vrshlq_v2.i = tail call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vrshlq_v2.i +// CHECK-LABEL: test_vrshrn_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vrshrn_n1 = tail call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i8> %vrshrn_n1 +// CHECK-LABEL: test_vrshrn_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vrshrn_n1 = tail call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i16> %vrshrn_n1 +// CHECK-LABEL: test_vrshrn_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vrshrn_n1 = tail call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i32> %vrshrn_n1 +// CHECK-LABEL: test_vrshrn_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vrshrn_n1 = tail call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i8> %vrshrn_n1 +// CHECK-LABEL: test_vrshrn_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vrshrn_n1 = tail call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i16> %vrshrn_n1 +// CHECK-LABEL: test_vrshrn_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vrshrn_n1 = tail call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i32> %vrshrn_n1 +// CHECK-LABEL: test_vrshr_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n = tail call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK-NEXT: ret <8 x i8> %vrshr_n +// CHECK-LABEL: test_vrshr_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %a, <4 x i16> ) +// CHECK-NEXT: ret <4 x i16> %vrshr_n1 +// CHECK-LABEL: test_vrshr_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %a, <2 x i32> ) +// CHECK-NEXT: ret <2 x i32> %vrshr_n1 +// CHECK-LABEL: test_vrshr_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %a, <1 x i64> ) +// CHECK-NEXT: ret <1 x i64> %vrshr_n1 +// CHECK-LABEL: test_vrshr_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n = tail call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK-NEXT: ret <8 x i8> %vrshr_n +// CHECK-LABEL: test_vrshr_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %a, <4 x i16> ) +// CHECK-NEXT: ret <4 x i16> %vrshr_n1 +// CHECK-LABEL: test_vrshr_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %a, <2 x i32> ) +// CHECK-NEXT: ret <2 x i32> %vrshr_n1 +// CHECK-LABEL: test_vrshr_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %a, <1 x i64> ) +// CHECK-NEXT: ret <1 x i64> %vrshr_n1 +// CHECK-LABEL: test_vrshrq_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n = tail call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK-NEXT: ret <16 x i8> %vrshr_n +// CHECK-LABEL: test_vrshrq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i16> %vrshr_n1 +// CHECK-LABEL: test_vrshrq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i32> %vrshr_n1 +// CHECK-LABEL: test_vrshrq_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i64> %vrshr_n1 +// CHECK-LABEL: test_vrshrq_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n = tail call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK-NEXT: ret <16 x i8> %vrshr_n +// CHECK-LABEL: test_vrshrq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %a, <8 x i16> ) +// CHECK-NEXT: ret <8 x i16> %vrshr_n1 +// CHECK-LABEL: test_vrshrq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %a, <4 x i32> ) +// CHECK-NEXT: ret <4 x i32> %vrshr_n1 +// CHECK-LABEL: test_vrshrq_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vrshr_n1 = tail call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %a, <2 x i64> ) +// CHECK-NEXT: ret <2 x i64> %vrshr_n1 +// CHECK-LABEL: test_vrsqrte_f32 +// CHECK: entry: +// CHECK-NEXT: %vrsqrte_v1.i = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a) +// CHECK-NEXT: ret <2 x float> %vrsqrte_v1.i +// CHECK-LABEL: test_vrsqrte_u32 +// CHECK: entry: +// CHECK-NEXT: %vrsqrte_v1.i = tail call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a) +// CHECK-NEXT: ret <2 x i32> %vrsqrte_v1.i +// CHECK-LABEL: test_vrsqrteq_f32 +// CHECK: entry: +// CHECK-NEXT: %vrsqrteq_v1.i = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a) +// CHECK-NEXT: ret <4 x float> %vrsqrteq_v1.i +// CHECK-LABEL: test_vrsqrteq_u32 +// CHECK: entry: +// CHECK-NEXT: %vrsqrteq_v1.i = tail call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a) +// CHECK-NEXT: ret <4 x i32> %vrsqrteq_v1.i +// CHECK-LABEL: test_vrsqrts_f32 +// CHECK: entry: +// CHECK-NEXT: %vrsqrts_v2.i = tail call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK-NEXT: ret <2 x float> %vrsqrts_v2.i +// CHECK-LABEL: test_vrsqrtsq_f32 +// CHECK: entry: +// CHECK-NEXT: %vrsqrtsq_v2.i = tail call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %a, <4 x float> %b) +// CHECK-NEXT: ret <4 x float> %vrsqrtsq_v2.i +// CHECK-LABEL: test_vrsra_n_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> ) +// CHECK-NEXT: %vrsra_n = add <8 x i8> %0, %a +// CHECK-NEXT: ret <8 x i8> %vrsra_n +// CHECK-LABEL: test_vrsra_n_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %b, <4 x i16> ) +// CHECK-NEXT: %vrsra_n = add <4 x i16> %0, %a +// CHECK-NEXT: ret <4 x i16> %vrsra_n +// CHECK-LABEL: test_vrsra_n_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %b, <2 x i32> ) +// CHECK-NEXT: %vrsra_n = add <2 x i32> %0, %a +// CHECK-NEXT: ret <2 x i32> %vrsra_n +// CHECK-LABEL: test_vrsra_n_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %b, <1 x i64> ) +// CHECK-NEXT: %vrsra_n = add <1 x i64> %0, %a +// CHECK-NEXT: ret <1 x i64> %vrsra_n +// CHECK-LABEL: test_vrsra_n_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> ) +// CHECK-NEXT: %vrsra_n = add <8 x i8> %0, %a +// CHECK-NEXT: ret <8 x i8> %vrsra_n +// CHECK-LABEL: test_vrsra_n_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %b, <4 x i16> ) +// CHECK-NEXT: %vrsra_n = add <4 x i16> %0, %a +// CHECK-NEXT: ret <4 x i16> %vrsra_n +// CHECK-LABEL: test_vrsra_n_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %b, <2 x i32> ) +// CHECK-NEXT: %vrsra_n = add <2 x i32> %0, %a +// CHECK-NEXT: ret <2 x i32> %vrsra_n +// CHECK-LABEL: test_vrsra_n_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %b, <1 x i64> ) +// CHECK-NEXT: %vrsra_n = add <1 x i64> %0, %a +// CHECK-NEXT: ret <1 x i64> %vrsra_n +// CHECK-LABEL: test_vrsraq_n_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> ) +// CHECK-NEXT: %vrsra_n = add <16 x i8> %0, %a +// CHECK-NEXT: ret <16 x i8> %vrsra_n +// CHECK-LABEL: test_vrsraq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %b, <8 x i16> ) +// CHECK-NEXT: %vrsra_n = add <8 x i16> %0, %a +// CHECK-NEXT: ret <8 x i16> %vrsra_n +// CHECK-LABEL: test_vrsraq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %b, <4 x i32> ) +// CHECK-NEXT: %vrsra_n = add <4 x i32> %0, %a +// CHECK-NEXT: ret <4 x i32> %vrsra_n +// CHECK-LABEL: test_vrsraq_n_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %b, <2 x i64> ) +// CHECK-NEXT: %vrsra_n = add <2 x i64> %0, %a +// CHECK-NEXT: ret <2 x i64> %vrsra_n +// CHECK-LABEL: test_vrsraq_n_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> ) +// CHECK-NEXT: %vrsra_n = add <16 x i8> %0, %a +// CHECK-NEXT: ret <16 x i8> %vrsra_n +// CHECK-LABEL: test_vrsraq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %b, <8 x i16> ) +// CHECK-NEXT: %vrsra_n = add <8 x i16> %0, %a +// CHECK-NEXT: ret <8 x i16> %vrsra_n +// CHECK-LABEL: test_vrsraq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %b, <4 x i32> ) +// CHECK-NEXT: %vrsra_n = add <4 x i32> %0, %a +// CHECK-NEXT: ret <4 x i32> %vrsra_n +// CHECK-LABEL: test_vrsraq_n_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = tail call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %b, <2 x i64> ) +// CHECK-NEXT: %vrsra_n = add <2 x i64> %0, %a +// CHECK-NEXT: ret <2 x i64> %vrsra_n +// CHECK-LABEL: test_vrsubhn_s16 +// CHECK: entry: +// CHECK-NEXT: %vrsubhn_v2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i8> %vrsubhn_v2.i +// CHECK-LABEL: test_vrsubhn_s32 +// CHECK: entry: +// CHECK-NEXT: %vrsubhn_v2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i16> %vrsubhn_v2.i +// CHECK-LABEL: test_vrsubhn_s64 +// CHECK: entry: +// CHECK-NEXT: %vrsubhn_v2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i32> %vrsubhn_v2.i +// CHECK-LABEL: test_vrsubhn_u16 +// CHECK: entry: +// CHECK-NEXT: %vrsubhn_v2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i8> %vrsubhn_v2.i +// CHECK-LABEL: test_vrsubhn_u32 +// CHECK: entry: +// CHECK-NEXT: %vrsubhn_v2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i16> %vrsubhn_v2.i +// CHECK-LABEL: test_vrsubhn_u64 +// CHECK: entry: +// CHECK-NEXT: %vrsubhn_v2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i32> %vrsubhn_v2.i +// CHECK-LABEL: test_vset_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <8 x i8> %b, i8 %a, i32 7 +// CHECK-NEXT: ret <8 x i8> %vset_lane +// CHECK-LABEL: test_vset_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <4 x i16> %b, i16 %a, i32 3 +// CHECK-NEXT: ret <4 x i16> %vset_lane +// CHECK-LABEL: test_vset_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <2 x i32> %b, i32 %a, i32 1 +// CHECK-NEXT: ret <2 x i32> %vset_lane +// CHECK-LABEL: test_vset_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <8 x i8> %b, i8 %a, i32 7 +// CHECK-NEXT: ret <8 x i8> %vset_lane +// CHECK-LABEL: test_vset_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <4 x i16> %b, i16 %a, i32 3 +// CHECK-NEXT: ret <4 x i16> %vset_lane +// CHECK-LABEL: test_vset_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <2 x i32> %b, i32 %a, i32 1 +// CHECK-NEXT: ret <2 x i32> %vset_lane +// CHECK-LABEL: test_vset_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <8 x i8> %b, i8 %a, i32 7 +// CHECK-NEXT: ret <8 x i8> %vset_lane +// CHECK-LABEL: test_vset_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <4 x i16> %b, i16 %a, i32 3 +// CHECK-NEXT: ret <4 x i16> %vset_lane +// CHECK-LABEL: test_vset_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <2 x float> %b, float %a, i32 1 +// CHECK-NEXT: ret <2 x float> %vset_lane +// CHECK-LABEL: test_vset_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to i16* +// CHECK-NEXT: %1 = load i16, i16* %0, align 2, !tbaa !3 +// CHECK-NEXT: %2 = bitcast <4 x half> %b to <4 x i16> +// CHECK-NEXT: %vset_lane = insertelement <4 x i16> %2, i16 %1, i32 1 +// CHECK-NEXT: %3 = bitcast <4 x i16> %vset_lane to <4 x half> +// CHECK-NEXT: ret <4 x half> %3 +// CHECK-LABEL: test_vsetq_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <16 x i8> %b, i8 %a, i32 15 +// CHECK-NEXT: ret <16 x i8> %vset_lane +// CHECK-LABEL: test_vsetq_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <8 x i16> %b, i16 %a, i32 7 +// CHECK-NEXT: ret <8 x i16> %vset_lane +// CHECK-LABEL: test_vsetq_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <4 x i32> %b, i32 %a, i32 3 +// CHECK-NEXT: ret <4 x i32> %vset_lane +// CHECK-LABEL: test_vsetq_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <16 x i8> %b, i8 %a, i32 15 +// CHECK-NEXT: ret <16 x i8> %vset_lane +// CHECK-LABEL: test_vsetq_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <8 x i16> %b, i16 %a, i32 7 +// CHECK-NEXT: ret <8 x i16> %vset_lane +// CHECK-LABEL: test_vsetq_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <4 x i32> %b, i32 %a, i32 3 +// CHECK-NEXT: ret <4 x i32> %vset_lane +// CHECK-LABEL: test_vsetq_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <16 x i8> %b, i8 %a, i32 15 +// CHECK-NEXT: ret <16 x i8> %vset_lane +// CHECK-LABEL: test_vsetq_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <8 x i16> %b, i16 %a, i32 7 +// CHECK-NEXT: ret <8 x i16> %vset_lane +// CHECK-LABEL: test_vsetq_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <4 x float> %b, float %a, i32 3 +// CHECK-NEXT: ret <4 x float> %vset_lane +// CHECK-LABEL: test_vsetq_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to i16* +// CHECK-NEXT: %1 = load i16, i16* %0, align 2, !tbaa !3 +// CHECK-NEXT: %2 = bitcast <8 x half> %b to <8 x i16> +// CHECK-NEXT: %vset_lane = insertelement <8 x i16> %2, i16 %1, i32 3 +// CHECK-NEXT: %3 = bitcast <8 x i16> %vset_lane to <8 x half> +// CHECK-NEXT: ret <8 x half> %3 +// CHECK-LABEL: test_vset_lane_s64 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <1 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: ret <1 x i64> %vset_lane +// CHECK-LABEL: test_vset_lane_u64 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <1 x i64> undef, i64 %a, i32 0 +// CHECK-NEXT: ret <1 x i64> %vset_lane +// CHECK-LABEL: test_vsetq_lane_s64 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <2 x i64> %b, i64 %a, i32 1 +// CHECK-NEXT: ret <2 x i64> %vset_lane +// CHECK-LABEL: test_vsetq_lane_u64 +// CHECK: entry: +// CHECK-NEXT: %vset_lane = insertelement <2 x i64> %b, i64 %a, i32 1 +// CHECK-NEXT: ret <2 x i64> %vset_lane +// CHECK-LABEL: test_vshl_s8 +// CHECK: entry: +// CHECK-NEXT: %vshl_v.i = tail call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vshl_v.i +// CHECK-LABEL: test_vshl_s16 +// CHECK: entry: +// CHECK-NEXT: %vshl_v2.i = tail call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vshl_v2.i +// CHECK-LABEL: test_vshl_s32 +// CHECK: entry: +// CHECK-NEXT: %vshl_v2.i = tail call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vshl_v2.i +// CHECK-LABEL: test_vshl_s64 +// CHECK: entry: +// CHECK-NEXT: %vshl_v2.i = tail call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vshl_v2.i +// CHECK-LABEL: test_vshl_u8 +// CHECK: entry: +// CHECK-NEXT: %vshl_v.i = tail call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vshl_v.i +// CHECK-LABEL: test_vshl_u16 +// CHECK: entry: +// CHECK-NEXT: %vshl_v2.i = tail call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK-NEXT: ret <4 x i16> %vshl_v2.i +// CHECK-LABEL: test_vshl_u32 +// CHECK: entry: +// CHECK-NEXT: %vshl_v2.i = tail call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK-NEXT: ret <2 x i32> %vshl_v2.i +// CHECK-LABEL: test_vshl_u64 +// CHECK: entry: +// CHECK-NEXT: %vshl_v2.i = tail call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK-NEXT: ret <1 x i64> %vshl_v2.i +// CHECK-LABEL: test_vshlq_s8 +// CHECK: entry: +// CHECK-NEXT: %vshlq_v.i = tail call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vshlq_v.i +// CHECK-LABEL: test_vshlq_s16 +// CHECK: entry: +// CHECK-NEXT: %vshlq_v2.i = tail call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vshlq_v2.i +// CHECK-LABEL: test_vshlq_s32 +// CHECK: entry: +// CHECK-NEXT: %vshlq_v2.i = tail call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vshlq_v2.i +// CHECK-LABEL: test_vshlq_s64 +// CHECK: entry: +// CHECK-NEXT: %vshlq_v2.i = tail call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vshlq_v2.i +// CHECK-LABEL: test_vshlq_u8 +// CHECK: entry: +// CHECK-NEXT: %vshlq_v.i = tail call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK-NEXT: ret <16 x i8> %vshlq_v.i +// CHECK-LABEL: test_vshlq_u16 +// CHECK: entry: +// CHECK-NEXT: %vshlq_v2.i = tail call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK-NEXT: ret <8 x i16> %vshlq_v2.i +// CHECK-LABEL: test_vshlq_u32 +// CHECK: entry: +// CHECK-NEXT: %vshlq_v2.i = tail call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK-NEXT: ret <4 x i32> %vshlq_v2.i +// CHECK-LABEL: test_vshlq_u64 +// CHECK: entry: +// CHECK-NEXT: %vshlq_v2.i = tail call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK-NEXT: ret <2 x i64> %vshlq_v2.i +// CHECK-LABEL: test_vshll_n_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = sext <8 x i8> %a to <8 x i16> +// CHECK-NEXT: %vshll_n = shl nsw <8 x i16> %0, +// CHECK-NEXT: ret <8 x i16> %vshll_n +// CHECK-LABEL: test_vshll_n_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = sext <4 x i16> %a to <4 x i32> +// CHECK-NEXT: %vshll_n = shl nsw <4 x i32> %0, +// CHECK-NEXT: ret <4 x i32> %vshll_n +// CHECK-LABEL: test_vshll_n_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = sext <2 x i32> %a to <2 x i64> +// CHECK-NEXT: %vshll_n = shl nsw <2 x i64> %0, +// CHECK-NEXT: ret <2 x i64> %vshll_n +// CHECK-LABEL: test_vshll_n_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = zext <8 x i8> %a to <8 x i16> +// CHECK-NEXT: %vshll_n = shl nuw nsw <8 x i16> %0, +// CHECK-NEXT: ret <8 x i16> %vshll_n +// CHECK-LABEL: test_vshll_n_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = zext <4 x i16> %a to <4 x i32> +// CHECK-NEXT: %vshll_n = shl nuw nsw <4 x i32> %0, +// CHECK-NEXT: ret <4 x i32> %vshll_n +// CHECK-LABEL: test_vshll_n_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = zext <2 x i32> %a to <2 x i64> +// CHECK-NEXT: %vshll_n = shl nuw nsw <2 x i64> %0, +// CHECK-NEXT: ret <2 x i64> %vshll_n +// CHECK-LABEL: test_vshl_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <8 x i8> %a, +// CHECK-NEXT: ret <8 x i8> %vshl_n +// CHECK-LABEL: test_vshl_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <4 x i16> %a, +// CHECK-NEXT: ret <4 x i16> %vshl_n +// CHECK-LABEL: test_vshl_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <2 x i32> %a, +// CHECK-NEXT: ret <2 x i32> %vshl_n +// CHECK-LABEL: test_vshl_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <1 x i64> %a, +// CHECK-NEXT: ret <1 x i64> %vshl_n +// CHECK-LABEL: test_vshl_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <8 x i8> %a, +// CHECK-NEXT: ret <8 x i8> %vshl_n +// CHECK-LABEL: test_vshl_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <4 x i16> %a, +// CHECK-NEXT: ret <4 x i16> %vshl_n +// CHECK-LABEL: test_vshl_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <2 x i32> %a, +// CHECK-NEXT: ret <2 x i32> %vshl_n +// CHECK-LABEL: test_vshl_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <1 x i64> %a, +// CHECK-NEXT: ret <1 x i64> %vshl_n +// CHECK-LABEL: test_vshlq_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <16 x i8> %a, +// CHECK-NEXT: ret <16 x i8> %vshl_n +// CHECK-LABEL: test_vshlq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <8 x i16> %a, +// CHECK-NEXT: ret <8 x i16> %vshl_n +// CHECK-LABEL: test_vshlq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <4 x i32> %a, +// CHECK-NEXT: ret <4 x i32> %vshl_n +// CHECK-LABEL: test_vshlq_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <2 x i64> %a, +// CHECK-NEXT: ret <2 x i64> %vshl_n +// CHECK-LABEL: test_vshlq_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <16 x i8> %a, +// CHECK-NEXT: ret <16 x i8> %vshl_n +// CHECK-LABEL: test_vshlq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <8 x i16> %a, +// CHECK-NEXT: ret <8 x i16> %vshl_n +// CHECK-LABEL: test_vshlq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <4 x i32> %a, +// CHECK-NEXT: ret <4 x i32> %vshl_n +// CHECK-LABEL: test_vshlq_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vshl_n = shl <2 x i64> %a, +// CHECK-NEXT: ret <2 x i64> %vshl_n +// CHECK-LABEL: test_vshrn_n_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = lshr <8 x i16> %a, +// CHECK-NEXT: %vshrn_n = trunc <8 x i16> %0 to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %vshrn_n +// CHECK-LABEL: test_vshrn_n_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = lshr <4 x i32> %a, +// CHECK-NEXT: %vshrn_n = trunc <4 x i32> %0 to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %vshrn_n +// CHECK-LABEL: test_vshrn_n_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = lshr <2 x i64> %a, +// CHECK-NEXT: %vshrn_n = trunc <2 x i64> %0 to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vshrn_n +// CHECK-LABEL: test_vshrn_n_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = lshr <8 x i16> %a, +// CHECK-NEXT: %vshrn_n = trunc <8 x i16> %0 to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %vshrn_n +// CHECK-LABEL: test_vshrn_n_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = lshr <4 x i32> %a, +// CHECK-NEXT: %vshrn_n = trunc <4 x i32> %0 to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %vshrn_n +// CHECK-LABEL: test_vshrn_n_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = lshr <2 x i64> %a, +// CHECK-NEXT: %vshrn_n = trunc <2 x i64> %0 to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vshrn_n +// CHECK-LABEL: test_vshr_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = ashr <8 x i8> %a, +// CHECK-NEXT: ret <8 x i8> %vshr_n +// CHECK-LABEL: test_vshr_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = ashr <4 x i16> %a, +// CHECK-NEXT: ret <4 x i16> %vshr_n +// CHECK-LABEL: test_vshr_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = ashr <2 x i32> %a, +// CHECK-NEXT: ret <2 x i32> %vshr_n +// CHECK-LABEL: test_vshr_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = ashr <1 x i64> %a, +// CHECK-NEXT: ret <1 x i64> %vshr_n +// CHECK-LABEL: test_vshr_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = lshr <8 x i8> %a, +// CHECK-NEXT: ret <8 x i8> %vshr_n +// CHECK-LABEL: test_vshr_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = lshr <4 x i16> %a, +// CHECK-NEXT: ret <4 x i16> %vshr_n +// CHECK-LABEL: test_vshr_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = lshr <2 x i32> %a, +// CHECK-NEXT: ret <2 x i32> %vshr_n +// CHECK-LABEL: test_vshr_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = lshr <1 x i64> %a, +// CHECK-NEXT: ret <1 x i64> %vshr_n +// CHECK-LABEL: test_vshrq_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = ashr <16 x i8> %a, +// CHECK-NEXT: ret <16 x i8> %vshr_n +// CHECK-LABEL: test_vshrq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = ashr <8 x i16> %a, +// CHECK-NEXT: ret <8 x i16> %vshr_n +// CHECK-LABEL: test_vshrq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = ashr <4 x i32> %a, +// CHECK-NEXT: ret <4 x i32> %vshr_n +// CHECK-LABEL: test_vshrq_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = ashr <2 x i64> %a, +// CHECK-NEXT: ret <2 x i64> %vshr_n +// CHECK-LABEL: test_vshrq_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = lshr <16 x i8> %a, +// CHECK-NEXT: ret <16 x i8> %vshr_n +// CHECK-LABEL: test_vshrq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = lshr <8 x i16> %a, +// CHECK-NEXT: ret <8 x i16> %vshr_n +// CHECK-LABEL: test_vshrq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = lshr <4 x i32> %a, +// CHECK-NEXT: ret <4 x i32> %vshr_n +// CHECK-LABEL: test_vshrq_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vshr_n = lshr <2 x i64> %a, +// CHECK-NEXT: ret <2 x i64> %vshr_n +// CHECK-LABEL: test_vsli_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK-NEXT: ret <8 x i8> %vsli_n +// CHECK-LABEL: test_vsli_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> ) +// CHECK-NEXT: ret <4 x i16> %vsli_n2 +// CHECK-LABEL: test_vsli_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> ) +// CHECK-NEXT: ret <2 x i32> %vsli_n2 +// CHECK-LABEL: test_vsli_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> ) +// CHECK-NEXT: ret <1 x i64> %vsli_n2 +// CHECK-LABEL: test_vsli_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK-NEXT: ret <8 x i8> %vsli_n +// CHECK-LABEL: test_vsli_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> ) +// CHECK-NEXT: ret <4 x i16> %vsli_n2 +// CHECK-LABEL: test_vsli_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> ) +// CHECK-NEXT: ret <2 x i32> %vsli_n2 +// CHECK-LABEL: test_vsli_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> ) +// CHECK-NEXT: ret <1 x i64> %vsli_n2 +// CHECK-LABEL: test_vsli_n_p8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK-NEXT: ret <8 x i8> %vsli_n +// CHECK-LABEL: test_vsli_n_p16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> ) +// CHECK-NEXT: ret <4 x i16> %vsli_n2 +// CHECK-LABEL: test_vsliq_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK-NEXT: ret <16 x i8> %vsli_n +// CHECK-LABEL: test_vsliq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +// CHECK-NEXT: ret <8 x i16> %vsli_n2 +// CHECK-LABEL: test_vsliq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +// CHECK-NEXT: ret <4 x i32> %vsli_n2 +// CHECK-LABEL: test_vsliq_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +// CHECK-NEXT: ret <2 x i64> %vsli_n2 +// CHECK-LABEL: test_vsliq_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK-NEXT: ret <16 x i8> %vsli_n +// CHECK-LABEL: test_vsliq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +// CHECK-NEXT: ret <8 x i16> %vsli_n2 +// CHECK-LABEL: test_vsliq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +// CHECK-NEXT: ret <4 x i32> %vsli_n2 +// CHECK-LABEL: test_vsliq_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +// CHECK-NEXT: ret <2 x i64> %vsli_n2 +// CHECK-LABEL: test_vsliq_n_p8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK-NEXT: ret <16 x i8> %vsli_n +// CHECK-LABEL: test_vsliq_n_p16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +// CHECK-NEXT: ret <8 x i16> %vsli_n2 +// CHECK-LABEL: test_vsra_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = ashr <8 x i8> %b, +// CHECK-NEXT: %0 = add <8 x i8> %vsra_n, %a +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vsra_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = ashr <4 x i16> %b, +// CHECK-NEXT: %0 = add <4 x i16> %vsra_n, %a +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vsra_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = ashr <2 x i32> %b, +// CHECK-NEXT: %0 = add <2 x i32> %vsra_n, %a +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vsra_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = ashr <1 x i64> %b, +// CHECK-NEXT: %0 = add <1 x i64> %vsra_n, %a +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vsra_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = lshr <8 x i8> %b, +// CHECK-NEXT: %0 = add <8 x i8> %vsra_n, %a +// CHECK-NEXT: ret <8 x i8> %0 +// CHECK-LABEL: test_vsra_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = lshr <4 x i16> %b, +// CHECK-NEXT: %0 = add <4 x i16> %vsra_n, %a +// CHECK-NEXT: ret <4 x i16> %0 +// CHECK-LABEL: test_vsra_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = lshr <2 x i32> %b, +// CHECK-NEXT: %0 = add <2 x i32> %vsra_n, %a +// CHECK-NEXT: ret <2 x i32> %0 +// CHECK-LABEL: test_vsra_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = lshr <1 x i64> %b, +// CHECK-NEXT: %0 = add <1 x i64> %vsra_n, %a +// CHECK-NEXT: ret <1 x i64> %0 +// CHECK-LABEL: test_vsraq_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = ashr <16 x i8> %b, +// CHECK-NEXT: %0 = add <16 x i8> %vsra_n, %a +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vsraq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = ashr <8 x i16> %b, +// CHECK-NEXT: %0 = add <8 x i16> %vsra_n, %a +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vsraq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = ashr <4 x i32> %b, +// CHECK-NEXT: %0 = add <4 x i32> %vsra_n, %a +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vsraq_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = ashr <2 x i64> %b, +// CHECK-NEXT: %0 = add <2 x i64> %vsra_n, %a +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vsraq_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = lshr <16 x i8> %b, +// CHECK-NEXT: %0 = add <16 x i8> %vsra_n, %a +// CHECK-NEXT: ret <16 x i8> %0 +// CHECK-LABEL: test_vsraq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = lshr <8 x i16> %b, +// CHECK-NEXT: %0 = add <8 x i16> %vsra_n, %a +// CHECK-NEXT: ret <8 x i16> %0 +// CHECK-LABEL: test_vsraq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = lshr <4 x i32> %b, +// CHECK-NEXT: %0 = add <4 x i32> %vsra_n, %a +// CHECK-NEXT: ret <4 x i32> %0 +// CHECK-LABEL: test_vsraq_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vsra_n = lshr <2 x i64> %b, +// CHECK-NEXT: %0 = add <2 x i64> %vsra_n, %a +// CHECK-NEXT: ret <2 x i64> %0 +// CHECK-LABEL: test_vsri_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK-NEXT: ret <8 x i8> %vsli_n +// CHECK-LABEL: test_vsri_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> ) +// CHECK-NEXT: ret <4 x i16> %vsli_n2 +// CHECK-LABEL: test_vsri_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> ) +// CHECK-NEXT: ret <2 x i32> %vsli_n2 +// CHECK-LABEL: test_vsri_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> ) +// CHECK-NEXT: ret <1 x i64> %vsli_n2 +// CHECK-LABEL: test_vsri_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK-NEXT: ret <8 x i8> %vsli_n +// CHECK-LABEL: test_vsri_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> ) +// CHECK-NEXT: ret <4 x i16> %vsli_n2 +// CHECK-LABEL: test_vsri_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> ) +// CHECK-NEXT: ret <2 x i32> %vsli_n2 +// CHECK-LABEL: test_vsri_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> ) +// CHECK-NEXT: ret <1 x i64> %vsli_n2 +// CHECK-LABEL: test_vsri_n_p8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK-NEXT: ret <8 x i8> %vsli_n +// CHECK-LABEL: test_vsri_n_p16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> ) +// CHECK-NEXT: ret <4 x i16> %vsli_n2 +// CHECK-LABEL: test_vsriq_n_s8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK-NEXT: ret <16 x i8> %vsli_n +// CHECK-LABEL: test_vsriq_n_s16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +// CHECK-NEXT: ret <8 x i16> %vsli_n2 +// CHECK-LABEL: test_vsriq_n_s32 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +// CHECK-NEXT: ret <4 x i32> %vsli_n2 +// CHECK-LABEL: test_vsriq_n_s64 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +// CHECK-NEXT: ret <2 x i64> %vsli_n2 +// CHECK-LABEL: test_vsriq_n_u8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK-NEXT: ret <16 x i8> %vsli_n +// CHECK-LABEL: test_vsriq_n_u16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +// CHECK-NEXT: ret <8 x i16> %vsli_n2 +// CHECK-LABEL: test_vsriq_n_u32 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +// CHECK-NEXT: ret <4 x i32> %vsli_n2 +// CHECK-LABEL: test_vsriq_n_u64 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +// CHECK-NEXT: ret <2 x i64> %vsli_n2 +// CHECK-LABEL: test_vsriq_n_p8 +// CHECK: entry: +// CHECK-NEXT: %vsli_n = tail call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK-NEXT: ret <16 x i8> %vsli_n +// CHECK-LABEL: test_vsriq_n_p16 +// CHECK: entry: +// CHECK-NEXT: %vsli_n2 = tail call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +// CHECK-NEXT: ret <8 x i16> %vsli_n2 +// CHECK-LABEL: test_vst1q_u8 +// CHECK: entry: +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %0, <8 x i16> %b, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %0, <4 x i32> %b, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %0, <2 x i64> %b, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_s8 +// CHECK: entry: +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %0, <8 x i16> %b, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %0, <4 x i32> %b, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %0, <2 x i64> %b, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* %0, <8 x half> %b, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %0, <4 x float> %b, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_p8 +// CHECK: entry: +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %0, <8 x i16> %b, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_u8 +// CHECK: entry: +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* %0, <4 x i16> %b, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %0, <2 x i32> %b, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %0, <1 x i64> %b, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_s8 +// CHECK: entry: +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* %0, <4 x i16> %b, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %0, <2 x i32> %b, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %0, <1 x i64> %b, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* %0, <4 x half> %b, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %0, <2 x float> %b, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_p8 +// CHECK: entry: +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* %0, <4 x i16> %b, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <16 x i8> %b, i32 15 +// CHECK-NEXT: store i8 %0, i8* %a, align 1 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <8 x i16> %b, i32 7 +// CHECK-NEXT: store i16 %0, i16* %a, align 2 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <4 x i32> %b, i32 3 +// CHECK-NEXT: store i32 %0, i32* %a, align 4 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: %1 = shufflevector <2 x i64> %b, <2 x i64> undef, <1 x i32> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %0, <1 x i64> %1, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <16 x i8> %b, i32 15 +// CHECK-NEXT: store i8 %0, i8* %a, align 1 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <8 x i16> %b, i32 7 +// CHECK-NEXT: store i16 %0, i16* %a, align 2 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <4 x i32> %b, i32 3 +// CHECK-NEXT: store i32 %0, i32* %a, align 4 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = bitcast i64* %a to i8* +// CHECK-NEXT: %1 = shufflevector <2 x i64> %b, <2 x i64> undef, <1 x i32> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %0, <1 x i64> %1, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <8 x half> %b, i32 7 +// CHECK-NEXT: store half %0, half* %a, align 2 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <4 x float> %b, i32 3 +// CHECK-NEXT: store float %0, float* %a, align 4 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <16 x i8> %b, i32 15 +// CHECK-NEXT: store i8 %0, i8* %a, align 1 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1q_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <8 x i16> %b, i32 7 +// CHECK-NEXT: store i16 %0, i16* %a, align 2 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <8 x i8> %b, i32 7 +// CHECK-NEXT: store i8 %0, i8* %a, align 1 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <4 x i16> %b, i32 3 +// CHECK-NEXT: store i16 %0, i16* %a, align 2 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <2 x i32> %b, i32 1 +// CHECK-NEXT: store i32 %0, i32* %a, align 4 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_u64 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <1 x i64> %b, i32 0 +// CHECK-NEXT: store i64 %0, i64* %a, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <8 x i8> %b, i32 7 +// CHECK-NEXT: store i8 %0, i8* %a, align 1 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <4 x i16> %b, i32 3 +// CHECK-NEXT: store i16 %0, i16* %a, align 2 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <2 x i32> %b, i32 1 +// CHECK-NEXT: store i32 %0, i32* %a, align 4 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_s64 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <1 x i64> %b, i32 0 +// CHECK-NEXT: store i64 %0, i64* %a, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <4 x half> %b, i32 3 +// CHECK-NEXT: store half %0, half* %a, align 2 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <2 x float> %b, i32 1 +// CHECK-NEXT: store float %0, float* %a, align 4 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <8 x i8> %b, i32 7 +// CHECK-NEXT: store i8 %0, i8* %a, align 1 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst1_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = extractelement <4 x i16> %b, i32 3 +// CHECK-NEXT: store i16 %0, i16* %a, align 2 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <16 x i8> %b.sroa.0.0.vec.expand, <16 x i8> %b.sroa.0.8.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <8 x i8> %2, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <8 x i8> %3, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <16 x i8> %b.sroa.3.16.vec.expand, <16 x i8> %b.sroa.3.24.vec.expand, <16 x i32> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> %b.sroa.0.8.vecblend, <16 x i8> %b.sroa.3.24.vecblend, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* %4, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* %4, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <16 x i8> %b.sroa.0.0.vec.expand, <16 x i8> %b.sroa.0.8.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <8 x i8> %2, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <8 x i8> %3, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <16 x i8> %b.sroa.3.16.vec.expand, <16 x i8> %b.sroa.3.24.vec.expand, <16 x i32> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> %b.sroa.0.8.vecblend, <16 x i8> %b.sroa.3.24.vecblend, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* %4, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* %4, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x half> %0, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x half> %1, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x half> %b.sroa.0.0.vec.expand, <8 x half> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x half> %2, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x half> %3, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x half> %b.sroa.3.16.vec.expand, <8 x half> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v8f16(i8* %4, <8 x half> %b.sroa.0.8.vecblend, <8 x half> %b.sroa.3.24.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x float> %b.sroa.0.0.vec.expand, <4 x float> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x float> %b.sroa.3.16.vec.expand, <4 x float> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %4 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %4, <4 x float> %b.sroa.0.8.vecblend, <4 x float> %b.sroa.3.24.vecblend, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <16 x i8> %b.sroa.0.0.vec.expand, <16 x i8> %b.sroa.0.8.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <8 x i8> %2, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <8 x i8> %3, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <16 x i8> %b.sroa.3.16.vec.expand, <16 x i8> %b.sroa.3.24.vec.expand, <16 x i32> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> %b.sroa.0.8.vecblend, <16 x i8> %b.sroa.3.24.vecblend, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* %4, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* %2, <4 x i16> %0, <4 x i16> %1, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* %2, <2 x i32> %0, <2 x i32> %1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_u64 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = insertelement <1 x i64> undef, i64 %b.coerce.fca.0.extract, i32 0 +// CHECK-NEXT: %1 = insertelement <1 x i64> undef, i64 %b.coerce.fca.1.extract, i32 0 +// CHECK-NEXT: %2 = bitcast i64* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* %2, <1 x i64> %0, <1 x i64> %1, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* %2, <4 x i16> %0, <4 x i16> %1, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* %2, <2 x i32> %0, <2 x i32> %1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_s64 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = insertelement <1 x i64> undef, i64 %b.coerce.fca.0.extract, i32 0 +// CHECK-NEXT: %1 = insertelement <1 x i64> undef, i64 %b.coerce.fca.1.extract, i32 0 +// CHECK-NEXT: %2 = bitcast i64* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* %2, <1 x i64> %0, <1 x i64> %1, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %2 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v4f16(i8* %2, <4 x half> %0, <4 x half> %1, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %2 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v2f32(i8* %2, <2 x float> %0, <2 x float> %1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* %2, <4 x i16> %0, <4 x i16> %1, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* %4, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* %4, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, i32 3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* %4, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* %4, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, i32 3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x half> %0, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x half> %1, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x half> %b.sroa.0.0.vec.expand, <8 x half> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x half> %2, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x half> %3, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x half> %b.sroa.3.16.vec.expand, <8 x half> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8* %4, <8 x half> %b.sroa.0.8.vecblend, <8 x half> %b.sroa.3.24.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x float> %b.sroa.0.0.vec.expand, <4 x float> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x float> %b.sroa.3.16.vec.expand, <4 x float> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %4 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8* %4, <4 x float> %b.sroa.0.8.vecblend, <4 x float> %b.sroa.3.24.vecblend, i32 3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2q_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* %4, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, i32 7, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* %2, <4 x i16> %0, <4 x i16> %1, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* %2, <2 x i32> %0, <2 x i32> %1, i32 1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, i32 7, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* %2, <4 x i16> %0, <4 x i16> %1, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* %2, <2 x i32> %0, <2 x i32> %1, i32 1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %2 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8* %2, <4 x half> %0, <4 x half> %1, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %2 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8* %2, <2 x float> %0, <2 x float> %1, i32 1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, i32 7, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst2_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* %2, <4 x i16> %0, <4 x i16> %1, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <16 x i8> %b.sroa.0.0.vec.expand, <16 x i8> %b.sroa.0.8.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <8 x i8> %2, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <8 x i8> %3, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <16 x i8> %b.sroa.3.16.vec.expand, <16 x i8> %b.sroa.3.24.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <8 x i8> %4, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <8 x i8> %5, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <16 x i8> %b.sroa.6.32.vec.expand, <16 x i8> %b.sroa.6.40.vec.expand, <16 x i32> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> %b.sroa.0.8.vecblend, <16 x i8> %b.sroa.3.24.vecblend, <16 x i8> %b.sroa.6.40.vecblend, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* %6, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %6 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* %6, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <16 x i8> %b.sroa.0.0.vec.expand, <16 x i8> %b.sroa.0.8.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <8 x i8> %2, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <8 x i8> %3, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <16 x i8> %b.sroa.3.16.vec.expand, <16 x i8> %b.sroa.3.24.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <8 x i8> %4, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <8 x i8> %5, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <16 x i8> %b.sroa.6.32.vec.expand, <16 x i8> %b.sroa.6.40.vec.expand, <16 x i32> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> %b.sroa.0.8.vecblend, <16 x i8> %b.sroa.3.24.vecblend, <16 x i8> %b.sroa.6.40.vecblend, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* %6, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %6 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* %6, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x half> %0, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x half> %1, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x half> %b.sroa.0.0.vec.expand, <8 x half> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x half> %2, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x half> %3, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x half> %b.sroa.3.16.vec.expand, <8 x half> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x half> %4, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x half> %5, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x half> %b.sroa.6.32.vec.expand, <8 x half> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v8f16(i8* %6, <8 x half> %b.sroa.0.8.vecblend, <8 x half> %b.sroa.3.24.vecblend, <8 x half> %b.sroa.6.40.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x float> %b.sroa.0.0.vec.expand, <4 x float> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x float> %b.sroa.3.16.vec.expand, <4 x float> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x float> %4, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x float> %b.sroa.6.32.vec.expand, <4 x float> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %6 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v4f32(i8* %6, <4 x float> %b.sroa.0.8.vecblend, <4 x float> %b.sroa.3.24.vecblend, <4 x float> %b.sroa.6.40.vecblend, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <16 x i8> %b.sroa.0.0.vec.expand, <16 x i8> %b.sroa.0.8.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <8 x i8> %2, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <8 x i8> %3, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <16 x i8> %b.sroa.3.16.vec.expand, <16 x i8> %b.sroa.3.24.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <8 x i8> %4, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <8 x i8> %5, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <16 x i8> %b.sroa.6.32.vec.expand, <16 x i8> %b.sroa.6.40.vec.expand, <16 x i32> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> %b.sroa.0.8.vecblend, <16 x i8> %b.sroa.3.24.vecblend, <16 x i8> %b.sroa.6.40.vecblend, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* %6, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* %3, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* %3, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_u64 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = insertelement <1 x i64> undef, i64 %b.coerce.fca.0.extract, i32 0 +// CHECK-NEXT: %1 = insertelement <1 x i64> undef, i64 %b.coerce.fca.1.extract, i32 0 +// CHECK-NEXT: %2 = insertelement <1 x i64> undef, i64 %b.coerce.fca.2.extract, i32 0 +// CHECK-NEXT: %3 = bitcast i64* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* %3, <1 x i64> %0, <1 x i64> %1, <1 x i64> %2, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* %3, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* %3, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_s64 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = insertelement <1 x i64> undef, i64 %b.coerce.fca.0.extract, i32 0 +// CHECK-NEXT: %1 = insertelement <1 x i64> undef, i64 %b.coerce.fca.1.extract, i32 0 +// CHECK-NEXT: %2 = insertelement <1 x i64> undef, i64 %b.coerce.fca.2.extract, i32 0 +// CHECK-NEXT: %3 = bitcast i64* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* %3, <1 x i64> %0, <1 x i64> %1, <1 x i64> %2, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %3 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v4f16(i8* %3, <4 x half> %0, <4 x half> %1, <4 x half> %2, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %3 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v2f32(i8* %3, <2 x float> %0, <2 x float> %1, <2 x float> %2, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* %3, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* %6, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %6 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* %6, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, i32 3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* %6, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %6 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* %6, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, i32 3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x half> %0, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x half> %1, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x half> %b.sroa.0.0.vec.expand, <8 x half> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x half> %2, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x half> %3, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x half> %b.sroa.3.16.vec.expand, <8 x half> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x half> %4, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x half> %5, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x half> %b.sroa.6.32.vec.expand, <8 x half> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8* %6, <8 x half> %b.sroa.0.8.vecblend, <8 x half> %b.sroa.3.24.vecblend, <8 x half> %b.sroa.6.40.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x float> %b.sroa.0.0.vec.expand, <4 x float> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x float> %b.sroa.3.16.vec.expand, <4 x float> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x float> %4, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x float> %b.sroa.6.32.vec.expand, <4 x float> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %6 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8* %6, <4 x float> %b.sroa.0.8.vecblend, <4 x float> %b.sroa.3.24.vecblend, <4 x float> %b.sroa.6.40.vecblend, i32 3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3q_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [6 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [6 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [6 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [6 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [6 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [6 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %6 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* %6, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* %3, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* %3, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, i32 1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* %3, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* %3, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, i32 1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %3 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8* %3, <4 x half> %0, <4 x half> %1, <4 x half> %2, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %3 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8* %3, <2 x float> %0, <2 x float> %1, <2 x float> %2, i32 1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst3_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* %3, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <16 x i8> %b.sroa.0.0.vec.expand, <16 x i8> %b.sroa.0.8.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <8 x i8> %2, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <8 x i8> %3, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <16 x i8> %b.sroa.3.16.vec.expand, <16 x i8> %b.sroa.3.24.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <8 x i8> %4, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <8 x i8> %5, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <16 x i8> %b.sroa.6.32.vec.expand, <16 x i8> %b.sroa.6.40.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <8 x i8> %6, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <8 x i8> %7, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <16 x i8> %b.sroa.9.48.vec.expand, <16 x i8> %b.sroa.9.56.vec.expand, <16 x i32> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> %b.sroa.0.8.vecblend, <16 x i8> %b.sroa.3.24.vecblend, <16 x i8> %b.sroa.6.40.vecblend, <16 x i8> %b.sroa.9.56.vecblend, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x i16> %6, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x i16> %7, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x i16> %b.sroa.9.48.vec.expand, <8 x i16> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* %8, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, <8 x i16> %b.sroa.9.56.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <2 x i32> %6, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <2 x i32> %7, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <4 x i32> %b.sroa.9.48.vec.expand, <4 x i32> %b.sroa.9.56.vec.expand, <4 x i32> +// CHECK-NEXT: %8 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* %8, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, <4 x i32> %b.sroa.9.56.vecblend, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <16 x i8> %b.sroa.0.0.vec.expand, <16 x i8> %b.sroa.0.8.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <8 x i8> %2, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <8 x i8> %3, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <16 x i8> %b.sroa.3.16.vec.expand, <16 x i8> %b.sroa.3.24.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <8 x i8> %4, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <8 x i8> %5, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <16 x i8> %b.sroa.6.32.vec.expand, <16 x i8> %b.sroa.6.40.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <8 x i8> %6, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <8 x i8> %7, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <16 x i8> %b.sroa.9.48.vec.expand, <16 x i8> %b.sroa.9.56.vec.expand, <16 x i32> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> %b.sroa.0.8.vecblend, <16 x i8> %b.sroa.3.24.vecblend, <16 x i8> %b.sroa.6.40.vecblend, <16 x i8> %b.sroa.9.56.vecblend, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x i16> %6, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x i16> %7, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x i16> %b.sroa.9.48.vec.expand, <8 x i16> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* %8, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, <8 x i16> %b.sroa.9.56.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <2 x i32> %6, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <2 x i32> %7, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <4 x i32> %b.sroa.9.48.vec.expand, <4 x i32> %b.sroa.9.56.vec.expand, <4 x i32> +// CHECK-NEXT: %8 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* %8, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, <4 x i32> %b.sroa.9.56.vecblend, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x half> %0, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x half> %1, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x half> %b.sroa.0.0.vec.expand, <8 x half> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x half> %2, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x half> %3, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x half> %b.sroa.3.16.vec.expand, <8 x half> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x half> %4, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x half> %5, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x half> %b.sroa.6.32.vec.expand, <8 x half> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x half> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x half> %6, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x half> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x half> %7, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x half> %b.sroa.9.48.vec.expand, <8 x half> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v8f16(i8* %8, <8 x half> %b.sroa.0.8.vecblend, <8 x half> %b.sroa.3.24.vecblend, <8 x half> %b.sroa.6.40.vecblend, <8 x half> %b.sroa.9.56.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x float> %b.sroa.0.0.vec.expand, <4 x float> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x float> %b.sroa.3.16.vec.expand, <4 x float> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x float> %4, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x float> %b.sroa.6.32.vec.expand, <4 x float> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <2 x float> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <2 x float> %6, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <2 x float> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <2 x float> %7, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <4 x float> %b.sroa.9.48.vec.expand, <4 x float> %b.sroa.9.56.vec.expand, <4 x i32> +// CHECK-NEXT: %8 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* %8, <4 x float> %b.sroa.0.8.vecblend, <4 x float> %b.sroa.3.24.vecblend, <4 x float> %b.sroa.6.40.vecblend, <4 x float> %b.sroa.9.56.vecblend, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <16 x i8> %b.sroa.0.0.vec.expand, <16 x i8> %b.sroa.0.8.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <8 x i8> %2, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <8 x i8> %3, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <16 x i8> %b.sroa.3.16.vec.expand, <16 x i8> %b.sroa.3.24.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <8 x i8> %4, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <8 x i8> %5, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <16 x i8> %b.sroa.6.32.vec.expand, <16 x i8> %b.sroa.6.40.vec.expand, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <8 x i8> %6, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <8 x i8> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <8 x i8> %7, <8 x i8> undef, <16 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <16 x i8> %b.sroa.9.48.vec.expand, <16 x i8> %b.sroa.9.56.vec.expand, <16 x i32> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> %b.sroa.0.8.vecblend, <16 x i8> %b.sroa.3.24.vecblend, <16 x i8> %b.sroa.6.40.vecblend, <16 x i8> %b.sroa.9.56.vecblend, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x i16> %6, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x i16> %7, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x i16> %b.sroa.9.48.vec.expand, <8 x i16> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* %8, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, <8 x i16> %b.sroa.9.56.vecblend, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* %4, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* %4, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_u64 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = insertelement <1 x i64> undef, i64 %b.coerce.fca.0.extract, i32 0 +// CHECK-NEXT: %1 = insertelement <1 x i64> undef, i64 %b.coerce.fca.1.extract, i32 0 +// CHECK-NEXT: %2 = insertelement <1 x i64> undef, i64 %b.coerce.fca.2.extract, i32 0 +// CHECK-NEXT: %3 = insertelement <1 x i64> undef, i64 %b.coerce.fca.3.extract, i32 0 +// CHECK-NEXT: %4 = bitcast i64* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %4, <1 x i64> %0, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* %4, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* %4, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_s64 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = insertelement <1 x i64> undef, i64 %b.coerce.fca.0.extract, i32 0 +// CHECK-NEXT: %1 = insertelement <1 x i64> undef, i64 %b.coerce.fca.1.extract, i32 0 +// CHECK-NEXT: %2 = insertelement <1 x i64> undef, i64 %b.coerce.fca.2.extract, i32 0 +// CHECK-NEXT: %3 = insertelement <1 x i64> undef, i64 %b.coerce.fca.3.extract, i32 0 +// CHECK-NEXT: %4 = bitcast i64* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %4, <1 x i64> %0, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %4 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v4f16(i8* %4, <4 x half> %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %4 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v2f32(i8* %4, <2 x float> %0, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* %4, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x i16> %6, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x i16> %7, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x i16> %b.sroa.9.48.vec.expand, <8 x i16> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* %8, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, <8 x i16> %b.sroa.9.56.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <2 x i32> %6, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <2 x i32> %7, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <4 x i32> %b.sroa.9.48.vec.expand, <4 x i32> %b.sroa.9.56.vec.expand, <4 x i32> +// CHECK-NEXT: %8 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* %8, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, <4 x i32> %b.sroa.9.56.vecblend, i32 3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x i16> %6, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x i16> %7, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x i16> %b.sroa.9.48.vec.expand, <8 x i16> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* %8, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, <8 x i16> %b.sroa.9.56.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x i32> %b.sroa.0.0.vec.expand, <4 x i32> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x i32> %2, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x i32> %3, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x i32> %b.sroa.3.16.vec.expand, <4 x i32> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x i32> %b.sroa.6.32.vec.expand, <4 x i32> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <2 x i32> %6, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <2 x i32> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <2 x i32> %7, <2 x i32> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <4 x i32> %b.sroa.9.48.vec.expand, <4 x i32> %b.sroa.9.56.vec.expand, <4 x i32> +// CHECK-NEXT: %8 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* %8, <4 x i32> %b.sroa.0.8.vecblend, <4 x i32> %b.sroa.3.24.vecblend, <4 x i32> %b.sroa.6.40.vecblend, <4 x i32> %b.sroa.9.56.vecblend, i32 3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x half> %0, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x half> %1, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x half> %b.sroa.0.0.vec.expand, <8 x half> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x half> %2, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x half> %3, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x half> %b.sroa.3.16.vec.expand, <8 x half> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x half> %4, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x half> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x half> %5, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x half> %b.sroa.6.32.vec.expand, <8 x half> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x half> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x half> %6, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x half> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x half> %7, <4 x half> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x half> %b.sroa.9.48.vec.expand, <8 x half> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8* %8, <8 x half> %b.sroa.0.8.vecblend, <8 x half> %b.sroa.3.24.vecblend, <8 x half> %b.sroa.6.40.vecblend, <8 x half> %b.sroa.9.56.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <4 x float> %b.sroa.0.0.vec.expand, <4 x float> %b.sroa.0.8.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <4 x float> %b.sroa.3.16.vec.expand, <4 x float> %b.sroa.3.24.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <2 x float> %4, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <2 x float> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <4 x float> %b.sroa.6.32.vec.expand, <4 x float> %b.sroa.6.40.vec.expand, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <2 x float> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <2 x float> %6, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <2 x float> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <2 x float> %7, <2 x float> undef, <4 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <4 x float> %b.sroa.9.48.vec.expand, <4 x float> %b.sroa.9.56.vec.expand, <4 x i32> +// CHECK-NEXT: %8 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8* %8, <4 x float> %b.sroa.0.8.vecblend, <4 x float> %b.sroa.3.24.vecblend, <4 x float> %b.sroa.6.40.vecblend, <4 x float> %b.sroa.9.56.vecblend, i32 3, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4q_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [8 x i64] %b.coerce, 0 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.0.vec.expand = shufflevector <4 x i16> %0, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [8 x i64] %b.coerce, 1 +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.0.8.vec.expand = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.0.8.vecblend = shufflevector <8 x i16> %b.sroa.0.0.vec.expand, <8 x i16> %b.sroa.0.8.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [8 x i64] %b.coerce, 2 +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.16.vec.expand = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [8 x i64] %b.coerce, 3 +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.3.24.vec.expand = shufflevector <4 x i16> %3, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.3.24.vecblend = shufflevector <8 x i16> %b.sroa.3.16.vec.expand, <8 x i16> %b.sroa.3.24.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.4.extract = extractvalue [8 x i64] %b.coerce, 4 +// CHECK-NEXT: %4 = bitcast i64 %b.coerce.fca.4.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.32.vec.expand = shufflevector <4 x i16> %4, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.5.extract = extractvalue [8 x i64] %b.coerce, 5 +// CHECK-NEXT: %5 = bitcast i64 %b.coerce.fca.5.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.6.40.vec.expand = shufflevector <4 x i16> %5, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.6.40.vecblend = shufflevector <8 x i16> %b.sroa.6.32.vec.expand, <8 x i16> %b.sroa.6.40.vec.expand, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.6.extract = extractvalue [8 x i64] %b.coerce, 6 +// CHECK-NEXT: %6 = bitcast i64 %b.coerce.fca.6.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.48.vec.expand = shufflevector <4 x i16> %6, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.coerce.fca.7.extract = extractvalue [8 x i64] %b.coerce, 7 +// CHECK-NEXT: %7 = bitcast i64 %b.coerce.fca.7.extract to <4 x i16> +// CHECK-NEXT: %b.sroa.9.56.vec.expand = shufflevector <4 x i16> %7, <4 x i16> undef, <8 x i32> +// CHECK-NEXT: %b.sroa.9.56.vecblend = shufflevector <8 x i16> %b.sroa.9.48.vec.expand, <8 x i16> %b.sroa.9.56.vec.expand, <8 x i32> +// CHECK-NEXT: %8 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* %8, <8 x i16> %b.sroa.0.8.vecblend, <8 x i16> %b.sroa.3.24.vecblend, <8 x i16> %b.sroa.6.40.vecblend, <8 x i16> %b.sroa.9.56.vecblend, i32 7, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_lane_u8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 7, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_lane_u16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* %4, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_lane_u32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* %4, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_lane_s8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 7, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_lane_s16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* %4, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_lane_s32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x i32> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x i32> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x i32> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x i32> +// CHECK-NEXT: %4 = bitcast i32* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* %4, <2 x i32> %0, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_lane_f16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x half> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x half> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x half> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x half> +// CHECK-NEXT: %4 = bitcast half* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8* %4, <4 x half> %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_lane_f32 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <2 x float> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <2 x float> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <2 x float> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <2 x float> +// CHECK-NEXT: %4 = bitcast float* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8* %4, <2 x float> %0, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 1, i32 4) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_lane_p8 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <8 x i8> +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 7, i32 1) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vst4_lane_p16 +// CHECK: entry: +// CHECK-NEXT: %b.coerce.fca.0.extract = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %b.coerce.fca.1.extract = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %b.coerce.fca.2.extract = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %b.coerce.fca.3.extract = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %b.coerce.fca.0.extract to <4 x i16> +// CHECK-NEXT: %1 = bitcast i64 %b.coerce.fca.1.extract to <4 x i16> +// CHECK-NEXT: %2 = bitcast i64 %b.coerce.fca.2.extract to <4 x i16> +// CHECK-NEXT: %3 = bitcast i64 %b.coerce.fca.3.extract to <4 x i16> +// CHECK-NEXT: %4 = bitcast i16* %a to i8* +// CHECK-NEXT: tail call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* %4, <4 x i16> %0, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2) +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vsub_s8 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <8 x i8> %a, %b +// CHECK-NEXT: ret <8 x i8> %sub.i +// CHECK-LABEL: test_vsub_s16 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <4 x i16> %a, %b +// CHECK-NEXT: ret <4 x i16> %sub.i +// CHECK-LABEL: test_vsub_s32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <2 x i32> %a, %b +// CHECK-NEXT: ret <2 x i32> %sub.i +// CHECK-LABEL: test_vsub_s64 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <1 x i64> %a, %b +// CHECK-NEXT: ret <1 x i64> %sub.i +// CHECK-LABEL: test_vsub_f32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = fsub <2 x float> %a, %b +// CHECK-NEXT: ret <2 x float> %sub.i +// CHECK-LABEL: test_vsub_u8 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <8 x i8> %a, %b +// CHECK-NEXT: ret <8 x i8> %sub.i +// CHECK-LABEL: test_vsub_u16 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <4 x i16> %a, %b +// CHECK-NEXT: ret <4 x i16> %sub.i +// CHECK-LABEL: test_vsub_u32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <2 x i32> %a, %b +// CHECK-NEXT: ret <2 x i32> %sub.i +// CHECK-LABEL: test_vsub_u64 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <1 x i64> %a, %b +// CHECK-NEXT: ret <1 x i64> %sub.i +// CHECK-LABEL: test_vsubq_s8 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <16 x i8> %a, %b +// CHECK-NEXT: ret <16 x i8> %sub.i +// CHECK-LABEL: test_vsubq_s16 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <8 x i16> %a, %b +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vsubq_s32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %b +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vsubq_s64 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <2 x i64> %a, %b +// CHECK-NEXT: ret <2 x i64> %sub.i +// CHECK-LABEL: test_vsubq_f32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = fsub <4 x float> %a, %b +// CHECK-NEXT: ret <4 x float> %sub.i +// CHECK-LABEL: test_vsubq_u8 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <16 x i8> %a, %b +// CHECK-NEXT: ret <16 x i8> %sub.i +// CHECK-LABEL: test_vsubq_u16 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <8 x i16> %a, %b +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vsubq_u32 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %b +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vsubq_u64 +// CHECK: entry: +// CHECK-NEXT: %sub.i = sub <2 x i64> %a, %b +// CHECK-NEXT: ret <2 x i64> %sub.i +// CHECK-LABEL: test_vsubhn_s16 +// CHECK: entry: +// CHECK-NEXT: %vsubhn.i = sub <8 x i16> %a, %b +// CHECK-NEXT: %vsubhn1.i = lshr <8 x i16> %vsubhn.i, +// CHECK-NEXT: %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %vsubhn2.i +// CHECK-LABEL: test_vsubhn_s32 +// CHECK: entry: +// CHECK-NEXT: %vsubhn.i = sub <4 x i32> %a, %b +// CHECK-NEXT: %vsubhn1.i = lshr <4 x i32> %vsubhn.i, +// CHECK-NEXT: %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %vsubhn2.i +// CHECK-LABEL: test_vsubhn_s64 +// CHECK: entry: +// CHECK-NEXT: %vsubhn.i = sub <2 x i64> %a, %b +// CHECK-NEXT: %vsubhn1.i = lshr <2 x i64> %vsubhn.i, +// CHECK-NEXT: %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vsubhn2.i +// CHECK-LABEL: test_vsubhn_u16 +// CHECK: entry: +// CHECK-NEXT: %vsubhn.i = sub <8 x i16> %a, %b +// CHECK-NEXT: %vsubhn1.i = lshr <8 x i16> %vsubhn.i, +// CHECK-NEXT: %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %vsubhn2.i +// CHECK-LABEL: test_vsubhn_u32 +// CHECK: entry: +// CHECK-NEXT: %vsubhn.i = sub <4 x i32> %a, %b +// CHECK-NEXT: %vsubhn1.i = lshr <4 x i32> %vsubhn.i, +// CHECK-NEXT: %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %vsubhn2.i +// CHECK-LABEL: test_vsubhn_u64 +// CHECK: entry: +// CHECK-NEXT: %vsubhn.i = sub <2 x i64> %a, %b +// CHECK-NEXT: %vsubhn1.i = lshr <2 x i64> %vsubhn.i, +// CHECK-NEXT: %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vsubhn2.i +// CHECK-LABEL: test_vsubl_s8 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <8 x i8> %a to <8 x i16> +// CHECK-NEXT: %vmovl.i3.i = sext <8 x i8> %b to <8 x i16> +// CHECK-NEXT: %sub.i = sub nsw <8 x i16> %vmovl.i.i, %vmovl.i3.i +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vsubl_s16 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <4 x i16> %a to <4 x i32> +// CHECK-NEXT: %vmovl.i3.i = sext <4 x i16> %b to <4 x i32> +// CHECK-NEXT: %sub.i = sub nsw <4 x i32> %vmovl.i.i, %vmovl.i3.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vsubl_s32 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <2 x i32> %a to <2 x i64> +// CHECK-NEXT: %vmovl.i3.i = sext <2 x i32> %b to <2 x i64> +// CHECK-NEXT: %sub.i = sub nsw <2 x i64> %vmovl.i.i, %vmovl.i3.i +// CHECK-NEXT: ret <2 x i64> %sub.i +// CHECK-LABEL: test_vsubl_u8 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <8 x i8> %a to <8 x i16> +// CHECK-NEXT: %vmovl.i3.i = zext <8 x i8> %b to <8 x i16> +// CHECK-NEXT: %sub.i = sub nsw <8 x i16> %vmovl.i.i, %vmovl.i3.i +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vsubl_u16 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <4 x i16> %a to <4 x i32> +// CHECK-NEXT: %vmovl.i3.i = zext <4 x i16> %b to <4 x i32> +// CHECK-NEXT: %sub.i = sub nsw <4 x i32> %vmovl.i.i, %vmovl.i3.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vsubl_u32 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <2 x i32> %a to <2 x i64> +// CHECK-NEXT: %vmovl.i3.i = zext <2 x i32> %b to <2 x i64> +// CHECK-NEXT: %sub.i = sub nsw <2 x i64> %vmovl.i.i, %vmovl.i3.i +// CHECK-NEXT: ret <2 x i64> %sub.i +// CHECK-LABEL: test_vsubw_s8 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <8 x i8> %b to <8 x i16> +// CHECK-NEXT: %sub.i = sub <8 x i16> %a, %vmovl.i.i +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vsubw_s16 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <4 x i16> %b to <4 x i32> +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %vmovl.i.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vsubw_s32 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = sext <2 x i32> %b to <2 x i64> +// CHECK-NEXT: %sub.i = sub <2 x i64> %a, %vmovl.i.i +// CHECK-NEXT: ret <2 x i64> %sub.i +// CHECK-LABEL: test_vsubw_u8 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <8 x i8> %b to <8 x i16> +// CHECK-NEXT: %sub.i = sub <8 x i16> %a, %vmovl.i.i +// CHECK-NEXT: ret <8 x i16> %sub.i +// CHECK-LABEL: test_vsubw_u16 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <4 x i16> %b to <4 x i32> +// CHECK-NEXT: %sub.i = sub <4 x i32> %a, %vmovl.i.i +// CHECK-NEXT: ret <4 x i32> %sub.i +// CHECK-LABEL: test_vsubw_u32 +// CHECK: entry: +// CHECK-NEXT: %vmovl.i.i = zext <2 x i32> %b to <2 x i64> +// CHECK-NEXT: %sub.i = sub <2 x i64> %a, %vmovl.i.i +// CHECK-NEXT: ret <2 x i64> %sub.i +// CHECK-LABEL: test_vtbl1_u8 +// CHECK: entry: +// CHECK-NEXT: %vtbl1.i = tail call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl1.i +// CHECK-LABEL: test_vtbl1_s8 +// CHECK: entry: +// CHECK-NEXT: %vtbl1.i = tail call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl1.i +// CHECK-LABEL: test_vtbl1_p8 +// CHECK: entry: +// CHECK-NEXT: %vtbl1.i = tail call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl1.i +// CHECK-LABEL: test_vtbl2_u8 +// CHECK: entry: +// CHECK-NEXT: %__p0.coerce.fca.0.extract.i = extractvalue [2 x i64] %a.coerce, 0 +// CHECK-NEXT: %__p0.coerce.fca.1.extract.i = extractvalue [2 x i64] %a.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %__p0.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p0.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %vtbl2.i = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %0, <8 x i8> %1, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl2.i +// CHECK-LABEL: test_vtbl2_s8 +// CHECK: entry: +// CHECK-NEXT: %__p0.coerce.fca.0.extract.i = extractvalue [2 x i64] %a.coerce, 0 +// CHECK-NEXT: %__p0.coerce.fca.1.extract.i = extractvalue [2 x i64] %a.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %__p0.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p0.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %vtbl2.i = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %0, <8 x i8> %1, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl2.i +// CHECK-LABEL: test_vtbl2_p8 +// CHECK: entry: +// CHECK-NEXT: %__p0.coerce.fca.0.extract.i = extractvalue [2 x i64] %a.coerce, 0 +// CHECK-NEXT: %__p0.coerce.fca.1.extract.i = extractvalue [2 x i64] %a.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %__p0.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p0.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %vtbl2.i = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %0, <8 x i8> %1, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl2.i +// CHECK-LABEL: test_vtbl3_u8 +// CHECK: entry: +// CHECK-NEXT: %__p0.coerce.fca.0.extract.i = extractvalue [3 x i64] %a.coerce, 0 +// CHECK-NEXT: %__p0.coerce.fca.1.extract.i = extractvalue [3 x i64] %a.coerce, 1 +// CHECK-NEXT: %__p0.coerce.fca.2.extract.i = extractvalue [3 x i64] %a.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %__p0.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p0.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p0.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %vtbl3.i = tail call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl3.i +// CHECK-LABEL: test_vtbl3_s8 +// CHECK: entry: +// CHECK-NEXT: %__p0.coerce.fca.0.extract.i = extractvalue [3 x i64] %a.coerce, 0 +// CHECK-NEXT: %__p0.coerce.fca.1.extract.i = extractvalue [3 x i64] %a.coerce, 1 +// CHECK-NEXT: %__p0.coerce.fca.2.extract.i = extractvalue [3 x i64] %a.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %__p0.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p0.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p0.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %vtbl3.i = tail call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl3.i +// CHECK-LABEL: test_vtbl3_p8 +// CHECK: entry: +// CHECK-NEXT: %__p0.coerce.fca.0.extract.i = extractvalue [3 x i64] %a.coerce, 0 +// CHECK-NEXT: %__p0.coerce.fca.1.extract.i = extractvalue [3 x i64] %a.coerce, 1 +// CHECK-NEXT: %__p0.coerce.fca.2.extract.i = extractvalue [3 x i64] %a.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %__p0.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p0.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p0.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %vtbl3.i = tail call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl3.i +// CHECK-LABEL: test_vtbl4_u8 +// CHECK: entry: +// CHECK-NEXT: %__p0.coerce.fca.0.extract.i = extractvalue [4 x i64] %a.coerce, 0 +// CHECK-NEXT: %__p0.coerce.fca.1.extract.i = extractvalue [4 x i64] %a.coerce, 1 +// CHECK-NEXT: %__p0.coerce.fca.2.extract.i = extractvalue [4 x i64] %a.coerce, 2 +// CHECK-NEXT: %__p0.coerce.fca.3.extract.i = extractvalue [4 x i64] %a.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %__p0.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p0.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p0.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %__p0.coerce.fca.3.extract.i to <8 x i8> +// CHECK-NEXT: %vtbl4.i = tail call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl4.i +// CHECK-LABEL: test_vtbl4_s8 +// CHECK: entry: +// CHECK-NEXT: %__p0.coerce.fca.0.extract.i = extractvalue [4 x i64] %a.coerce, 0 +// CHECK-NEXT: %__p0.coerce.fca.1.extract.i = extractvalue [4 x i64] %a.coerce, 1 +// CHECK-NEXT: %__p0.coerce.fca.2.extract.i = extractvalue [4 x i64] %a.coerce, 2 +// CHECK-NEXT: %__p0.coerce.fca.3.extract.i = extractvalue [4 x i64] %a.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %__p0.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p0.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p0.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %__p0.coerce.fca.3.extract.i to <8 x i8> +// CHECK-NEXT: %vtbl4.i = tail call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl4.i +// CHECK-LABEL: test_vtbl4_p8 +// CHECK: entry: +// CHECK-NEXT: %__p0.coerce.fca.0.extract.i = extractvalue [4 x i64] %a.coerce, 0 +// CHECK-NEXT: %__p0.coerce.fca.1.extract.i = extractvalue [4 x i64] %a.coerce, 1 +// CHECK-NEXT: %__p0.coerce.fca.2.extract.i = extractvalue [4 x i64] %a.coerce, 2 +// CHECK-NEXT: %__p0.coerce.fca.3.extract.i = extractvalue [4 x i64] %a.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %__p0.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p0.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p0.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %__p0.coerce.fca.3.extract.i to <8 x i8> +// CHECK-NEXT: %vtbl4.i = tail call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %b) +// CHECK-NEXT: ret <8 x i8> %vtbl4.i +// CHECK-LABEL: test_vtbx1_u8 +// CHECK: entry: +// CHECK-NEXT: %vtbx1.i = tail call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx1.i +// CHECK-LABEL: test_vtbx1_s8 +// CHECK: entry: +// CHECK-NEXT: %vtbx1.i = tail call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx1.i +// CHECK-LABEL: test_vtbx1_p8 +// CHECK: entry: +// CHECK-NEXT: %vtbx1.i = tail call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx1.i +// CHECK-LABEL: test_vtbx2_u8 +// CHECK: entry: +// CHECK-NEXT: %__p1.coerce.fca.0.extract.i = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %__p1.coerce.fca.1.extract.i = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %__p1.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p1.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %vtbx2.i = tail call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx2.i +// CHECK-LABEL: test_vtbx2_s8 +// CHECK: entry: +// CHECK-NEXT: %__p1.coerce.fca.0.extract.i = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %__p1.coerce.fca.1.extract.i = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %__p1.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p1.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %vtbx2.i = tail call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx2.i +// CHECK-LABEL: test_vtbx2_p8 +// CHECK: entry: +// CHECK-NEXT: %__p1.coerce.fca.0.extract.i = extractvalue [2 x i64] %b.coerce, 0 +// CHECK-NEXT: %__p1.coerce.fca.1.extract.i = extractvalue [2 x i64] %b.coerce, 1 +// CHECK-NEXT: %0 = bitcast i64 %__p1.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p1.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %vtbx2.i = tail call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx2.i +// CHECK-LABEL: test_vtbx3_u8 +// CHECK: entry: +// CHECK-NEXT: %__p1.coerce.fca.0.extract.i = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %__p1.coerce.fca.1.extract.i = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %__p1.coerce.fca.2.extract.i = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %__p1.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p1.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p1.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %vtbx3.i = tail call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx3.i +// CHECK-LABEL: test_vtbx3_s8 +// CHECK: entry: +// CHECK-NEXT: %__p1.coerce.fca.0.extract.i = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %__p1.coerce.fca.1.extract.i = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %__p1.coerce.fca.2.extract.i = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %__p1.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p1.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p1.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %vtbx3.i = tail call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx3.i +// CHECK-LABEL: test_vtbx3_p8 +// CHECK: entry: +// CHECK-NEXT: %__p1.coerce.fca.0.extract.i = extractvalue [3 x i64] %b.coerce, 0 +// CHECK-NEXT: %__p1.coerce.fca.1.extract.i = extractvalue [3 x i64] %b.coerce, 1 +// CHECK-NEXT: %__p1.coerce.fca.2.extract.i = extractvalue [3 x i64] %b.coerce, 2 +// CHECK-NEXT: %0 = bitcast i64 %__p1.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p1.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p1.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %vtbx3.i = tail call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx3.i +// CHECK-LABEL: test_vtbx4_u8 +// CHECK: entry: +// CHECK-NEXT: %__p1.coerce.fca.0.extract.i = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %__p1.coerce.fca.1.extract.i = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %__p1.coerce.fca.2.extract.i = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %__p1.coerce.fca.3.extract.i = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %__p1.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p1.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p1.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %__p1.coerce.fca.3.extract.i to <8 x i8> +// CHECK-NEXT: %vtbx4.i = tail call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx4.i +// CHECK-LABEL: test_vtbx4_s8 +// CHECK: entry: +// CHECK-NEXT: %__p1.coerce.fca.0.extract.i = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %__p1.coerce.fca.1.extract.i = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %__p1.coerce.fca.2.extract.i = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %__p1.coerce.fca.3.extract.i = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %__p1.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p1.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p1.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %__p1.coerce.fca.3.extract.i to <8 x i8> +// CHECK-NEXT: %vtbx4.i = tail call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx4.i +// CHECK-LABEL: test_vtbx4_p8 +// CHECK: entry: +// CHECK-NEXT: %__p1.coerce.fca.0.extract.i = extractvalue [4 x i64] %b.coerce, 0 +// CHECK-NEXT: %__p1.coerce.fca.1.extract.i = extractvalue [4 x i64] %b.coerce, 1 +// CHECK-NEXT: %__p1.coerce.fca.2.extract.i = extractvalue [4 x i64] %b.coerce, 2 +// CHECK-NEXT: %__p1.coerce.fca.3.extract.i = extractvalue [4 x i64] %b.coerce, 3 +// CHECK-NEXT: %0 = bitcast i64 %__p1.coerce.fca.0.extract.i to <8 x i8> +// CHECK-NEXT: %1 = bitcast i64 %__p1.coerce.fca.1.extract.i to <8 x i8> +// CHECK-NEXT: %2 = bitcast i64 %__p1.coerce.fca.2.extract.i to <8 x i8> +// CHECK-NEXT: %3 = bitcast i64 %__p1.coerce.fca.3.extract.i to <8 x i8> +// CHECK-NEXT: %vtbx4.i = tail call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> %0, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %c) +// CHECK-NEXT: ret <8 x i8> %vtbx4.i +// CHECK-LABEL: test_vtrn_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vtrn.i, <8 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vtrn1.i, <8 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrn_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vtrn.i, <4 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vtrn1.i, <4 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrn_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vtrn.i, <2 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vtrn1.i, <2 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrn_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vtrn.i, <8 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vtrn1.i, <8 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrn_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vtrn.i, <4 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vtrn1.i, <4 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrn_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vtrn.i, <2 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vtrn1.i, <2 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrn_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK-NEXT: store <2 x float> %vtrn.i, <2 x float>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK-NEXT: store <2 x float> %vtrn1.i, <2 x float>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrn_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vtrn.i, <8 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vtrn1.i, <8 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrn_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vtrn.i, <4 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vtrn1.i, <4 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrnq_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vtrn.i, <16 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vtrn1.i, <16 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrnq_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vtrn.i, <8 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vtrn1.i, <8 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrnq_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vtrn.i, <4 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vtrn1.i, <4 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrnq_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vtrn.i, <16 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vtrn1.i, <16 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrnq_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vtrn.i, <8 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vtrn1.i, <8 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrnq_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vtrn.i, <4 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vtrn1.i, <4 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrnq_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK-NEXT: store <4 x float> %vtrn.i, <4 x float>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK-NEXT: store <4 x float> %vtrn1.i, <4 x float>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrnq_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vtrn.i, <16 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vtrn1.i, <16 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtrnq_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vtrn.i, <8 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vtrn1.i, <8 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vtst_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = and <8 x i8> %b, %a +// CHECK-NEXT: %1 = icmp ne <8 x i8> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <8 x i1> %1 to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %vtst.i +// CHECK-LABEL: test_vtst_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = and <4 x i16> %b, %a +// CHECK-NEXT: %1 = icmp ne <4 x i16> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <4 x i1> %1 to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %vtst.i +// CHECK-LABEL: test_vtst_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = and <2 x i32> %b, %a +// CHECK-NEXT: %1 = icmp ne <2 x i32> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <2 x i1> %1 to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vtst.i +// CHECK-LABEL: test_vtst_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = and <8 x i8> %b, %a +// CHECK-NEXT: %1 = icmp ne <8 x i8> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <8 x i1> %1 to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %vtst.i +// CHECK-LABEL: test_vtst_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = and <4 x i16> %b, %a +// CHECK-NEXT: %1 = icmp ne <4 x i16> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <4 x i1> %1 to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %vtst.i +// CHECK-LABEL: test_vtst_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = and <2 x i32> %b, %a +// CHECK-NEXT: %1 = icmp ne <2 x i32> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <2 x i1> %1 to <2 x i32> +// CHECK-NEXT: ret <2 x i32> %vtst.i +// CHECK-LABEL: test_vtst_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = and <8 x i8> %b, %a +// CHECK-NEXT: %1 = icmp ne <8 x i8> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <8 x i1> %1 to <8 x i8> +// CHECK-NEXT: ret <8 x i8> %vtst.i +// CHECK-LABEL: test_vtst_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = and <4 x i16> %b, %a +// CHECK-NEXT: %1 = icmp ne <4 x i16> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <4 x i1> %1 to <4 x i16> +// CHECK-NEXT: ret <4 x i16> %vtst.i +// CHECK-LABEL: test_vtstq_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = and <16 x i8> %b, %a +// CHECK-NEXT: %1 = icmp ne <16 x i8> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <16 x i1> %1 to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %vtst.i +// CHECK-LABEL: test_vtstq_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = and <8 x i16> %b, %a +// CHECK-NEXT: %1 = icmp ne <8 x i16> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <8 x i1> %1 to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %vtst.i +// CHECK-LABEL: test_vtstq_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = and <4 x i32> %b, %a +// CHECK-NEXT: %1 = icmp ne <4 x i32> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <4 x i1> %1 to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %vtst.i +// CHECK-LABEL: test_vtstq_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = and <16 x i8> %b, %a +// CHECK-NEXT: %1 = icmp ne <16 x i8> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <16 x i1> %1 to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %vtst.i +// CHECK-LABEL: test_vtstq_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = and <8 x i16> %b, %a +// CHECK-NEXT: %1 = icmp ne <8 x i16> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <8 x i1> %1 to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %vtst.i +// CHECK-LABEL: test_vtstq_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = and <4 x i32> %b, %a +// CHECK-NEXT: %1 = icmp ne <4 x i32> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <4 x i1> %1 to <4 x i32> +// CHECK-NEXT: ret <4 x i32> %vtst.i +// CHECK-LABEL: test_vtstq_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = and <16 x i8> %b, %a +// CHECK-NEXT: %1 = icmp ne <16 x i8> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <16 x i1> %1 to <16 x i8> +// CHECK-NEXT: ret <16 x i8> %vtst.i +// CHECK-LABEL: test_vtstq_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = and <8 x i16> %b, %a +// CHECK-NEXT: %1 = icmp ne <8 x i16> %0, zeroinitializer +// CHECK-NEXT: %vtst.i = sext <8 x i1> %1 to <8 x i16> +// CHECK-NEXT: ret <8 x i16> %vtst.i +// CHECK-LABEL: test_vuzp_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vuzp.i, <8 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vuzp1.i, <8 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzp_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vuzp.i, <4 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vuzp1.i, <4 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzp_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vuzp.i, <2 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vuzp1.i, <2 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzp_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vuzp.i, <8 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vuzp1.i, <8 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzp_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vuzp.i, <4 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vuzp1.i, <4 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzp_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vuzp.i, <2 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vuzp1.i, <2 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzp_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK-NEXT: store <2 x float> %vuzp.i, <2 x float>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK-NEXT: store <2 x float> %vuzp1.i, <2 x float>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzp_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vuzp.i, <8 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vuzp1.i, <8 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzp_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vuzp.i, <4 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vuzp1.i, <4 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzpq_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vuzp.i, <16 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vuzp1.i, <16 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzpq_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vuzp.i, <8 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vuzp1.i, <8 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzpq_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vuzp.i, <4 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vuzp1.i, <4 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzpq_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vuzp.i, <16 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vuzp1.i, <16 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzpq_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vuzp.i, <8 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vuzp1.i, <8 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzpq_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vuzp.i, <4 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vuzp1.i, <4 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzpq_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK-NEXT: store <4 x float> %vuzp.i, <4 x float>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK-NEXT: store <4 x float> %vuzp1.i, <4 x float>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzpq_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vuzp.i, <16 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vuzp1.i, <16 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vuzpq_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vuzp.i, <8 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vuzp1.i, <8 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzip_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vzip.i, <8 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vzip1.i, <8 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzip_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vzip.i, <4 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vzip1.i, <4 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzip_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vzip.i, <2 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vzip1.i, <2 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzip_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vzip.i, <8 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vzip1.i, <8 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzip_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vzip.i, <4 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vzip1.i, <4 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzip_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vzip.i, <2 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK-NEXT: store <2 x i32> %vzip1.i, <2 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzip_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK-NEXT: store <2 x float> %vzip.i, <2 x float>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK-NEXT: store <2 x float> %vzip1.i, <2 x float>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzip_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vzip.i, <8 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> +// CHECK-NEXT: store <8 x i8> %vzip1.i, <8 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzip_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vzip.i, <4 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK-NEXT: store <4 x i16> %vzip1.i, <4 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzipq_s8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vzip.i, <16 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vzip1.i, <16 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzipq_s16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vzip.i, <8 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vzip1.i, <8 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzipq_s32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vzip.i, <4 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vzip1.i, <4 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzipq_u8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vzip.i, <16 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vzip1.i, <16 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzipq_u16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vzip.i, <8 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vzip1.i, <8 x i16>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzipq_u32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vzip.i, <4 x i32>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK-NEXT: store <4 x i32> %vzip1.i, <4 x i32>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzipq_f32 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK-NEXT: store <4 x float> %vzip.i, <4 x float>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK-NEXT: store <4 x float> %vzip1.i, <4 x float>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzipq_p8 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vzip.i, <16 x i8>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +// CHECK-NEXT: store <16 x i8> %vzip1.i, <16 x i8>* %1, align 8 +// CHECK-NEXT: ret void +// CHECK-LABEL: test_vzipq_p16 +// CHECK: entry: +// CHECK-NEXT: %0 = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* %agg.result, i32 0, i32 0, i32 0 +// CHECK-NEXT: %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vzip.i, <8 x i16>* %0, align 8 +// CHECK-NEXT: %1 = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* %agg.result, i32 0, i32 0, i32 1 +// CHECK-NEXT: %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK-NEXT: store <8 x i16> %vzip1.i, <8 x i16>* %1, align 8 +// CHECK-NEXT: ret void