Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -4238,6 +4238,13 @@ } #endif + // Update the largest vector width if any arguments have vector types. + for (unsigned i = 0; i < IRCallArgs.size(); ++i) { + if (auto *VT = dyn_cast(IRCallArgs[i]->getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + } + // Compute the calling convention and attributes. unsigned CallingConv; llvm::AttributeList Attrs; @@ -4318,6 +4325,11 @@ if (!CI->getType()->isVoidTy()) CI->setName("call"); + // Update largest vector width from the return type. + if (auto *VT = dyn_cast(CI->getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of // IPVK_IndirectCallTarget in InstrProfData.inc. Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -430,7 +430,24 @@ NormalCleanupDest = Address::invalid(); } - // Add the required-vector-width attribute. + // Scan function arguments for vector width. + for (llvm::Argument &A : CurFn->args()) + if (auto *VT = dyn_cast(A.getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + + // Update vector width based on return type. + if (auto *VT = dyn_cast(CurFn->getReturnType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + + // Add the required-vector-width attribute. This contains the max width from: + // 1. min-vector-width attribute used in the source program. + // 2. Any builtins used that have a vector width specified. + // 3. Values passed in and out of inline assembly. + // 4. Width of vector arguments and return types for this function. + // 5. Width of vector aguments and return types for functions called by this + // function. if (LargestVectorWidth != 0) CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); Index: test/CodeGen/aarch64-neon-3v.c =================================================================== --- test/CodeGen/aarch64-neon-3v.c +++ test/CodeGen/aarch64-neon-3v.c @@ -11,7 +11,7 @@ return vand_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #1 { // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b // CHECK: ret <16 x i8> [[AND_I]] int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) { @@ -25,7 +25,7 @@ return vand_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #1 { // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b // CHECK: ret <8 x i16> [[AND_I]] int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) { @@ -39,7 +39,7 @@ return vand_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #1 { // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b // CHECK: ret <4 x i32> [[AND_I]] int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) { @@ -53,7 +53,7 @@ return vand_s64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b // CHECK: ret <2 x i64> [[AND_I]] int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) { @@ -67,7 +67,7 @@ return vand_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #1 { // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b // CHECK: ret <16 x i8> [[AND_I]] uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) { @@ -81,7 +81,7 @@ return vand_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #1 { // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b // CHECK: ret <8 x i16> [[AND_I]] uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) { @@ -95,7 +95,7 @@ return vand_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #1 { // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b // CHECK: ret <4 x i32> [[AND_I]] uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) { @@ -109,7 +109,7 @@ return vand_u64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b // CHECK: ret <2 x i64> [[AND_I]] uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) { @@ -123,7 +123,7 @@ return vorr_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #1 { // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b // CHECK: ret <16 x i8> [[OR_I]] int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) { @@ -137,7 +137,7 @@ return vorr_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #1 { // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b // CHECK: ret <8 x i16> [[OR_I]] int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) { @@ -151,7 +151,7 @@ return vorr_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #1 { // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b // CHECK: ret <4 x i32> [[OR_I]] int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) { @@ -165,7 +165,7 @@ return vorr_s64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b // CHECK: ret <2 x i64> [[OR_I]] int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) { @@ -179,7 +179,7 @@ return vorr_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #1 { // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b // CHECK: ret <16 x i8> [[OR_I]] uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) { @@ -193,7 +193,7 @@ return vorr_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #1 { // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b // CHECK: ret <8 x i16> [[OR_I]] uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) { @@ -207,7 +207,7 @@ return vorr_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #1 { // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b // CHECK: ret <4 x i32> [[OR_I]] uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) { @@ -221,7 +221,7 @@ return vorr_u64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b // CHECK: ret <2 x i64> [[OR_I]] uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) { @@ -235,7 +235,7 @@ return veor_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #1 { // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b // CHECK: ret <16 x i8> [[XOR_I]] int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) { @@ -249,7 +249,7 @@ return veor_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #1 { // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b // CHECK: ret <8 x i16> [[XOR_I]] int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) { @@ -263,7 +263,7 @@ return veor_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #1 { // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b // CHECK: ret <4 x i32> [[XOR_I]] int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) { @@ -277,7 +277,7 @@ return veor_s64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b // CHECK: ret <2 x i64> [[XOR_I]] int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) { @@ -291,7 +291,7 @@ return veor_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #1 { // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b // CHECK: ret <16 x i8> [[XOR_I]] uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) { @@ -305,7 +305,7 @@ return veor_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #1 { // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b // CHECK: ret <8 x i16> [[XOR_I]] uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) { @@ -319,7 +319,7 @@ return veor_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #1 { // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b // CHECK: ret <4 x i32> [[XOR_I]] uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) { @@ -333,7 +333,7 @@ return veor_u64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b // CHECK: ret <2 x i64> [[XOR_I]] uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) { @@ -348,7 +348,7 @@ return vbic_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] @@ -364,7 +364,7 @@ return vbic_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] @@ -380,7 +380,7 @@ return vbic_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] @@ -396,7 +396,7 @@ return vbic_s64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] @@ -412,7 +412,7 @@ return vbic_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] @@ -428,7 +428,7 @@ return vbic_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] @@ -444,7 +444,7 @@ return vbic_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] @@ -460,7 +460,7 @@ return vbic_u64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] @@ -476,7 +476,7 @@ return vorn_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] @@ -492,7 +492,7 @@ return vorn_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] @@ -508,7 +508,7 @@ return vorn_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] @@ -524,7 +524,7 @@ return vorn_s64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] @@ -540,7 +540,7 @@ return vorn_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] @@ -556,7 +556,7 @@ return vorn_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] @@ -572,7 +572,7 @@ return vorn_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] @@ -588,10 +588,13 @@ return vorn_u64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) { return vornq_u64(a, b); } + +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" Index: test/CodeGen/aarch64-neon-across.c =================================================================== --- test/CodeGen/aarch64-neon-across.c +++ test/CodeGen/aarch64-neon-across.c @@ -6,7 +6,7 @@ #include // CHECK-LABEL: define i16 @test_vaddlv_s8(<8 x i8> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #2 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] int16_t test_vaddlv_s8(int8x8_t a) { @@ -14,14 +14,14 @@ } // CHECK-LABEL: define i32 @test_vaddlv_s16(<4 x i16> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) #2 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) #3 // CHECK: ret i32 [[VADDLV_I]] int32_t test_vaddlv_s16(int16x4_t a) { return vaddlv_s16(a); } // CHECK-LABEL: define i16 @test_vaddlv_u8(<8 x i8> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #2 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] uint16_t test_vaddlv_u8(uint8x8_t a) { @@ -29,58 +29,58 @@ } // CHECK-LABEL: define i32 @test_vaddlv_u16(<4 x i16> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) #2 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) #3 // CHECK: ret i32 [[VADDLV_I]] uint32_t test_vaddlv_u16(uint16x4_t a) { return vaddlv_u16(a); } -// CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #2 +// CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #1 { +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] int16_t test_vaddlvq_s8(int8x16_t a) { return vaddlvq_s8(a); } -// CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) #2 +// CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #1 { +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) #3 // CHECK: ret i32 [[VADDLV_I]] int32_t test_vaddlvq_s16(int16x8_t a) { return vaddlvq_s16(a); } -// CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #0 { -// CHECK: [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) #2 +// CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #1 { +// CHECK: [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) #3 // CHECK: ret i64 [[VADDLVQ_S32_I]] int64_t test_vaddlvq_s32(int32x4_t a) { return vaddlvq_s32(a); } -// CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #2 +// CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #1 { +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] uint16_t test_vaddlvq_u8(uint8x16_t a) { return vaddlvq_u8(a); } -// CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) #2 +// CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #1 { +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) #3 // CHECK: ret i32 [[VADDLV_I]] uint32_t test_vaddlvq_u16(uint16x8_t a) { return vaddlvq_u16(a); } -// CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #0 { -// CHECK: [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) #2 +// CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #1 { +// CHECK: [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) #3 // CHECK: ret i64 [[VADDLVQ_U32_I]] uint64_t test_vaddlvq_u32(uint32x4_t a) { return vaddlvq_u32(a); } // CHECK-LABEL: define i8 @test_vmaxv_s8(<8 x i8> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #2 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vmaxv_s8(int8x8_t a) { @@ -88,7 +88,7 @@ } // CHECK-LABEL: define i16 @test_vmaxv_s16(<4 x i16> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) #2 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vmaxv_s16(int16x4_t a) { @@ -96,7 +96,7 @@ } // CHECK-LABEL: define i8 @test_vmaxv_u8(<8 x i8> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #2 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vmaxv_u8(uint8x8_t a) { @@ -104,61 +104,61 @@ } // CHECK-LABEL: define i16 @test_vmaxv_u16(<4 x i16> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) #2 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vmaxv_u16(uint16x4_t a) { return vmaxv_u16(a); } -// CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #2 +// CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #1 { +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vmaxvq_s8(int8x16_t a) { return vmaxvq_s8(a); } -// CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) #2 +// CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #1 { +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vmaxvq_s16(int16x8_t a) { return vmaxvq_s16(a); } -// CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #0 { -// CHECK: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) #2 +// CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #1 { +// CHECK: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) #3 // CHECK: ret i32 [[VMAXVQ_S32_I]] int32_t test_vmaxvq_s32(int32x4_t a) { return vmaxvq_s32(a); } -// CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #2 +// CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #1 { +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vmaxvq_u8(uint8x16_t a) { return vmaxvq_u8(a); } -// CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) #2 +// CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #1 { +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vmaxvq_u16(uint16x8_t a) { return vmaxvq_u16(a); } -// CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #0 { -// CHECK: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) #2 +// CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #1 { +// CHECK: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) #3 // CHECK: ret i32 [[VMAXVQ_U32_I]] uint32_t test_vmaxvq_u32(uint32x4_t a) { return vmaxvq_u32(a); } // CHECK-LABEL: define i8 @test_vminv_s8(<8 x i8> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #2 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vminv_s8(int8x8_t a) { @@ -166,7 +166,7 @@ } // CHECK-LABEL: define i16 @test_vminv_s16(<4 x i16> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) #2 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vminv_s16(int16x4_t a) { @@ -174,7 +174,7 @@ } // CHECK-LABEL: define i8 @test_vminv_u8(<8 x i8> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #2 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vminv_u8(uint8x8_t a) { @@ -182,61 +182,61 @@ } // CHECK-LABEL: define i16 @test_vminv_u16(<4 x i16> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) #2 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vminv_u16(uint16x4_t a) { return vminv_u16(a); } -// CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #2 +// CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #1 { +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vminvq_s8(int8x16_t a) { return vminvq_s8(a); } -// CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) #2 +// CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #1 { +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vminvq_s16(int16x8_t a) { return vminvq_s16(a); } -// CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #0 { -// CHECK: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) #2 +// CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #1 { +// CHECK: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) #3 // CHECK: ret i32 [[VMINVQ_S32_I]] int32_t test_vminvq_s32(int32x4_t a) { return vminvq_s32(a); } -// CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #2 +// CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #1 { +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vminvq_u8(uint8x16_t a) { return vminvq_u8(a); } -// CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) #2 +// CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #1 { +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vminvq_u16(uint16x8_t a) { return vminvq_u16(a); } -// CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #0 { -// CHECK: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) #2 +// CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #1 { +// CHECK: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) #3 // CHECK: ret i32 [[VMINVQ_U32_I]] uint32_t test_vminvq_u32(uint32x4_t a) { return vminvq_u32(a); } // CHECK-LABEL: define i8 @test_vaddv_s8(<8 x i8> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #2 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vaddv_s8(int8x8_t a) { @@ -244,7 +244,7 @@ } // CHECK-LABEL: define i16 @test_vaddv_s16(<4 x i16> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) #2 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vaddv_s16(int16x4_t a) { @@ -252,7 +252,7 @@ } // CHECK-LABEL: define i8 @test_vaddv_u8(<8 x i8> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #2 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vaddv_u8(uint8x8_t a) { @@ -260,83 +260,86 @@ } // CHECK-LABEL: define i16 @test_vaddv_u16(<4 x i16> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) #2 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vaddv_u16(uint16x4_t a) { return vaddv_u16(a); } -// CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #2 +// CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #1 { +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vaddvq_s8(int8x16_t a) { return vaddvq_s8(a); } -// CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) #2 +// CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #1 { +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vaddvq_s16(int16x8_t a) { return vaddvq_s16(a); } -// CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #0 { -// CHECK: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) #2 +// CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #1 { +// CHECK: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) #3 // CHECK: ret i32 [[VADDVQ_S32_I]] int32_t test_vaddvq_s32(int32x4_t a) { return vaddvq_s32(a); } -// CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #2 +// CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #1 { +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #3 // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vaddvq_u8(uint8x16_t a) { return vaddvq_u8(a); } -// CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) #2 +// CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #1 { +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) #3 // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vaddvq_u16(uint16x8_t a) { return vaddvq_u16(a); } -// CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #0 { -// CHECK: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) #2 +// CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #1 { +// CHECK: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) #3 // CHECK: ret i32 [[VADDVQ_U32_I]] uint32_t test_vaddvq_u32(uint32x4_t a) { return vaddvq_u32(a); } -// CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #0 { -// CHECK: [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #1 { +// CHECK: [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) #3 // CHECK: ret float [[VMAXVQ_F32_I]] float32_t test_vmaxvq_f32(float32x4_t a) { return vmaxvq_f32(a); } -// CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #0 { -// CHECK: [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #1 { +// CHECK: [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) #3 // CHECK: ret float [[VMINVQ_F32_I]] float32_t test_vminvq_f32(float32x4_t a) { return vminvq_f32(a); } -// CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #0 { -// CHECK: [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #1 { +// CHECK: [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) #3 // CHECK: ret float [[VMAXNMVQ_F32_I]] float32_t test_vmaxnmvq_f32(float32x4_t a) { return vmaxnmvq_f32(a); } -// CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #0 { -// CHECK: [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #1 { +// CHECK: [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) #3 // CHECK: ret float [[VMINNMVQ_F32_I]] float32_t test_vminnmvq_f32(float32x4_t a) { return vminnmvq_f32(a); } + +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" Index: test/CodeGen/aarch64-neon-fma.c =================================================================== --- test/CodeGen/aarch64-neon-fma.c +++ test/CodeGen/aarch64-neon-fma.c @@ -14,7 +14,7 @@ return vmla_n_f32(a, b, c); } -// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { +// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 { // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 @@ -26,7 +26,7 @@ return vmlaq_n_f32(a, b, c); } -// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { +// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] @@ -36,7 +36,7 @@ return vmlaq_n_f64(a, b, c); } -// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { +// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 { // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 @@ -58,7 +58,7 @@ return vmls_n_f32(a, b, c); } -// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { +// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] @@ -77,7 +77,7 @@ return vmla_lane_f32(a, b, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] @@ -86,7 +86,7 @@ return vmlaq_lane_f32(a, b, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] @@ -95,7 +95,7 @@ return vmla_laneq_f32(a, b, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] @@ -113,7 +113,7 @@ return vmls_lane_f32(a, b, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] @@ -122,7 +122,7 @@ return vmlsq_lane_f32(a, b, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] @@ -131,7 +131,7 @@ return vmls_laneq_f32(a, b, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] @@ -149,7 +149,7 @@ return vmla_lane_f32(a, b, v, 1); } -// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] @@ -158,7 +158,7 @@ return vmlaq_lane_f32(a, b, v, 1); } -// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] @@ -167,7 +167,7 @@ return vmla_laneq_f32(a, b, v, 3); } -// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] @@ -185,7 +185,7 @@ return vmls_lane_f32(a, b, v, 1); } -// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] @@ -193,7 +193,7 @@ float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { return vmlsq_lane_f32(a, b, v, 1); } -// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] @@ -202,7 +202,7 @@ return vmls_laneq_f32(a, b, v, 3); } -// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] @@ -211,7 +211,7 @@ return vmlsq_laneq_f32(a, b, v, 3); } -// CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { +// CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> [[VECINIT1_I]], <2 x double> %a) @@ -220,12 +220,15 @@ return vfmaq_n_f64(a, b, c); } -// CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { +// CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { // CHECK: [[SUB_I:%.*]] = fsub <2 x double> , %b // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 -// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #2 +// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #3 // CHECK: ret <2 x double> [[TMP6]] float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { return vfmsq_n_f64(a, b, c); } + +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" Index: test/CodeGen/aarch64-neon-ldst-one.c =================================================================== --- test/CodeGen/aarch64-neon-ldst-one.c +++ test/CodeGen/aarch64-neon-ldst-one.c @@ -152,7 +152,7 @@ return vld1q_dup_p64(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #0 { +// CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #1 { // CHECK: [[TMP0:%.*]] = load i8, i8* %a // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer @@ -161,7 +161,7 @@ return vld1_dup_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #0 { +// CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]] @@ -172,7 +172,7 @@ return vld1_dup_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #0 { +// CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]] @@ -183,7 +183,7 @@ return vld1_dup_u32(a); } -// CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #0 { +// CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]] @@ -194,7 +194,7 @@ return vld1_dup_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #0 { +// CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #1 { // CHECK: [[TMP0:%.*]] = load i8, i8* %a // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer @@ -203,7 +203,7 @@ return vld1_dup_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #0 { +// CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]] @@ -214,7 +214,7 @@ return vld1_dup_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #0 { +// CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]] @@ -225,7 +225,7 @@ return vld1_dup_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #0 { +// CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]] @@ -236,7 +236,7 @@ return vld1_dup_s64(a); } -// CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #0 { +// CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half* // CHECK: [[TMP2:%.*]] = load half, half* [[TMP1]] @@ -247,7 +247,7 @@ return vld1_dup_f16(a); } -// CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #0 { +// CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float* // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]] @@ -258,7 +258,7 @@ return vld1_dup_f32(a); } -// CHECK-LABEL: define <1 x double> @test_vld1_dup_f64(double* %a) #0 { +// CHECK-LABEL: define <1 x double> @test_vld1_dup_f64(double* %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to double* // CHECK: [[TMP2:%.*]] = load double, double* [[TMP1]] @@ -269,7 +269,7 @@ return vld1_dup_f64(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #0 { +// CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #1 { // CHECK: [[TMP0:%.*]] = load i8, i8* %a // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer @@ -278,7 +278,7 @@ return vld1_dup_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #0 { +// CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]] @@ -289,7 +289,7 @@ return vld1_dup_p16(a); } -// CHECK-LABEL: define <1 x i64> @test_vld1_dup_p64(i64* %a) #0 { +// CHECK-LABEL: define <1 x i64> @test_vld1_dup_p64(i64* %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]] @@ -300,7 +300,7 @@ return vld1_dup_p64(a); } -// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_dup_u64(i64* %a) #0 { +// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_dup_u64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* @@ -318,7 +318,7 @@ return vld2q_dup_u64(a); } -// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) #0 { +// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* @@ -336,7 +336,7 @@ return vld2q_dup_s64(a); } -// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) #0 { +// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* @@ -354,7 +354,7 @@ return vld2q_dup_f64(a); } -// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_dup_p64(i64* %a) #0 { +// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_dup_p64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* @@ -372,7 +372,7 @@ return vld2q_dup_p64(a); } -// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) #0 { +// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* @@ -390,7 +390,7 @@ return vld2_dup_f64(a); } -// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_dup_p64(i64* %a) #0 { +// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_dup_p64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* @@ -408,7 +408,7 @@ return vld2_dup_p64(a); } -// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_dup_u64(i64* %a) #0 { +// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_dup_u64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* @@ -427,7 +427,7 @@ // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) #0 { +// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* @@ -446,7 +446,7 @@ // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) #0 { +// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* @@ -465,7 +465,7 @@ // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_dup_p64(i64* %a) #0 { +// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_dup_p64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* @@ -484,7 +484,7 @@ // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) #0 { +// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* @@ -503,7 +503,7 @@ // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_dup_p64(i64* %a) #0 { +// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_dup_p64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* @@ -522,7 +522,7 @@ // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_dup_u64(i64* %a) #0 { +// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_dup_u64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* @@ -540,7 +540,7 @@ return vld4q_dup_u64(a); } -// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) #0 { +// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* @@ -558,7 +558,7 @@ return vld4q_dup_s64(a); } -// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) #0 { +// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* @@ -576,7 +576,7 @@ return vld4q_dup_f64(a); } -// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_dup_p64(i64* %a) #0 { +// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_dup_p64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* @@ -594,7 +594,7 @@ return vld4q_dup_p64(a); } -// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) #0 { +// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* @@ -612,7 +612,7 @@ return vld4_dup_f64(a); } -// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_dup_p64(i64* %a) #0 { +// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_dup_p64(i64* %a) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* @@ -786,7 +786,7 @@ return vld1q_lane_p64(a, b, 1); } -// CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #1 { // CHECK: [[TMP0:%.*]] = load i8, i8* %a // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 // CHECK: ret <8 x i8> [[VLD1_LANE]] @@ -794,7 +794,7 @@ return vld1_lane_u8(a, b, 7); } -// CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -806,7 +806,7 @@ return vld1_lane_u16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -818,7 +818,7 @@ return vld1_lane_u32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -830,7 +830,7 @@ return vld1_lane_u64(a, b, 0); } -// CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #1 { // CHECK: [[TMP0:%.*]] = load i8, i8* %a // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 // CHECK: ret <8 x i8> [[VLD1_LANE]] @@ -838,7 +838,7 @@ return vld1_lane_s8(a, b, 7); } -// CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -850,7 +850,7 @@ return vld1_lane_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -862,7 +862,7 @@ return vld1_lane_s32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -874,7 +874,7 @@ return vld1_lane_s64(a, b, 0); } -// CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #0 { +// CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> @@ -886,7 +886,7 @@ return vld1_lane_f16(a, b, 3); } -// CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #0 { +// CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> @@ -898,7 +898,7 @@ return vld1_lane_f32(a, b, 1); } -// CHECK-LABEL: define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) #0 { +// CHECK-LABEL: define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> @@ -910,7 +910,7 @@ return vld1_lane_f64(a, b, 0); } -// CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #1 { // CHECK: [[TMP0:%.*]] = load i8, i8* %a // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 // CHECK: ret <8 x i8> [[VLD1_LANE]] @@ -918,7 +918,7 @@ return vld1_lane_p8(a, b, 7); } -// CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -930,7 +930,7 @@ return vld1_lane_p16(a, b, 3); } -// CHECK-LABEL: define <1 x i64> @test_vld1_lane_p64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: define <1 x i64> @test_vld1_lane_p64(i64* %a, <1 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -942,7 +942,7 @@ return vld1_lane_p64(a, b, 0); } -// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 { +// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[SRC:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 @@ -971,7 +971,7 @@ return vld2q_lane_s8(ptr, src, 15); } -// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 { +// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[SRC:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 @@ -1000,7 +1000,7 @@ return vld2q_lane_u8(ptr, src, 15); } -// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 { +// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[SRC:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 @@ -1029,7 +1029,7 @@ return vld2q_lane_p8(ptr, src, 15); } -// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #0 { +// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[SRC:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 @@ -1061,7 +1061,7 @@ return vld3q_lane_s8(ptr, src, 15); } -// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #0 { +// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[SRC:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 @@ -1093,7 +1093,7 @@ return vld3q_lane_u8(ptr, src, 15); } -// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 @@ -1127,7 +1127,7 @@ return vld2q_lane_u16(a, b, 7); } -// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 @@ -1161,7 +1161,7 @@ return vld2q_lane_u32(a, b, 3); } -// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 @@ -1195,7 +1195,7 @@ return vld2q_lane_u64(a, b, 1); } -// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 @@ -1229,7 +1229,7 @@ return vld2q_lane_s16(a, b, 7); } -// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 @@ -1263,7 +1263,7 @@ return vld2q_lane_s32(a, b, 3); } -// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 @@ -1297,7 +1297,7 @@ return vld2q_lane_s64(a, b, 1); } -// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 @@ -1331,7 +1331,7 @@ return vld2q_lane_f16(a, b, 7); } -// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 @@ -1365,7 +1365,7 @@ return vld2q_lane_f32(a, b, 3); } -// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 @@ -1399,7 +1399,7 @@ return vld2q_lane_f64(a, b, 1); } -// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 @@ -1433,7 +1433,7 @@ return vld2q_lane_p16(a, b, 7); } -// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 @@ -1467,7 +1467,7 @@ return vld2q_lane_p64(a, b, 1); } -// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 @@ -1496,7 +1496,7 @@ return vld2_lane_u8(a, b, 7); } -// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 @@ -1530,7 +1530,7 @@ return vld2_lane_u16(a, b, 3); } -// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 @@ -1564,7 +1564,7 @@ return vld2_lane_u32(a, b, 1); } -// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 @@ -1598,7 +1598,7 @@ return vld2_lane_u64(a, b, 0); } -// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 @@ -1627,7 +1627,7 @@ return vld2_lane_s8(a, b, 7); } -// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 @@ -1661,7 +1661,7 @@ return vld2_lane_s16(a, b, 3); } -// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 @@ -1695,7 +1695,7 @@ return vld2_lane_s32(a, b, 1); } -// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 @@ -1729,7 +1729,7 @@ return vld2_lane_s64(a, b, 0); } -// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 @@ -1763,7 +1763,7 @@ return vld2_lane_f16(a, b, 3); } -// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 @@ -1797,7 +1797,7 @@ return vld2_lane_f32(a, b, 1); } -// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 @@ -1831,7 +1831,7 @@ return vld2_lane_f64(a, b, 0); } -// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 @@ -1860,7 +1860,7 @@ return vld2_lane_p8(a, b, 7); } -// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 @@ -1894,7 +1894,7 @@ return vld2_lane_p16(a, b, 3); } -// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 @@ -1928,7 +1928,7 @@ return vld2_lane_p64(a, b, 0); } -// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 @@ -1967,7 +1967,7 @@ return vld3q_lane_u16(a, b, 7); } -// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 @@ -2006,7 +2006,7 @@ return vld3q_lane_u32(a, b, 3); } -// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 @@ -2045,7 +2045,7 @@ return vld3q_lane_u64(a, b, 1); } -// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 @@ -2084,7 +2084,7 @@ return vld3q_lane_s16(a, b, 7); } -// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 @@ -2123,7 +2123,7 @@ return vld3q_lane_s32(a, b, 3); } -// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 @@ -2162,7 +2162,7 @@ return vld3q_lane_s64(a, b, 1); } -// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 @@ -2201,7 +2201,7 @@ return vld3q_lane_f16(a, b, 7); } -// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 @@ -2240,7 +2240,7 @@ return vld3q_lane_f32(a, b, 3); } -// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 @@ -2279,7 +2279,7 @@ return vld3q_lane_f64(a, b, 1); } -// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 @@ -2311,7 +2311,7 @@ return vld3q_lane_p8(a, b, 15); } -// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 @@ -2350,7 +2350,7 @@ return vld3q_lane_p16(a, b, 7); } -// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 @@ -2389,7 +2389,7 @@ return vld3q_lane_p64(a, b, 1); } -// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 @@ -2421,7 +2421,7 @@ return vld3_lane_u8(a, b, 7); } -// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 @@ -2460,7 +2460,7 @@ return vld3_lane_u16(a, b, 3); } -// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 @@ -2499,7 +2499,7 @@ return vld3_lane_u32(a, b, 1); } -// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 @@ -2538,7 +2538,7 @@ return vld3_lane_u64(a, b, 0); } -// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 @@ -2570,7 +2570,7 @@ return vld3_lane_s8(a, b, 7); } -// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 @@ -2609,7 +2609,7 @@ return vld3_lane_s16(a, b, 3); } -// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 @@ -2648,7 +2648,7 @@ return vld3_lane_s32(a, b, 1); } -// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 @@ -2687,7 +2687,7 @@ return vld3_lane_s64(a, b, 0); } -// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 @@ -2726,7 +2726,7 @@ return vld3_lane_f16(a, b, 3); } -// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 @@ -2765,7 +2765,7 @@ return vld3_lane_f32(a, b, 1); } -// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 @@ -2804,7 +2804,7 @@ return vld3_lane_f64(a, b, 0); } -// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 @@ -2836,7 +2836,7 @@ return vld3_lane_p8(a, b, 7); } -// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 @@ -2875,7 +2875,7 @@ return vld3_lane_p16(a, b, 3); } -// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 @@ -2914,7 +2914,7 @@ return vld3_lane_p64(a, b, 0); } -// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 @@ -2949,7 +2949,7 @@ return vld4q_lane_u8(a, b, 15); } -// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 @@ -2993,7 +2993,7 @@ return vld4q_lane_u16(a, b, 7); } -// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 @@ -3037,7 +3037,7 @@ return vld4q_lane_u32(a, b, 3); } -// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 @@ -3081,7 +3081,7 @@ return vld4q_lane_u64(a, b, 1); } -// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 @@ -3116,7 +3116,7 @@ return vld4q_lane_s8(a, b, 15); } -// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 @@ -3160,7 +3160,7 @@ return vld4q_lane_s16(a, b, 7); } -// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 @@ -3204,7 +3204,7 @@ return vld4q_lane_s32(a, b, 3); } -// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 @@ -3248,7 +3248,7 @@ return vld4q_lane_s64(a, b, 1); } -// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 @@ -3292,7 +3292,7 @@ return vld4q_lane_f16(a, b, 7); } -// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 @@ -3336,7 +3336,7 @@ return vld4q_lane_f32(a, b, 3); } -// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 @@ -3380,7 +3380,7 @@ return vld4q_lane_f64(a, b, 1); } -// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 @@ -3415,7 +3415,7 @@ return vld4q_lane_p8(a, b, 15); } -// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 @@ -3459,7 +3459,7 @@ return vld4q_lane_p16(a, b, 7); } -// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 @@ -3503,7 +3503,7 @@ return vld4q_lane_p64(a, b, 1); } -// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 @@ -3538,7 +3538,7 @@ return vld4_lane_u8(a, b, 7); } -// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 @@ -3582,7 +3582,7 @@ return vld4_lane_u16(a, b, 3); } -// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 @@ -3626,7 +3626,7 @@ return vld4_lane_u32(a, b, 1); } -// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 @@ -3670,7 +3670,7 @@ return vld4_lane_u64(a, b, 0); } -// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 @@ -3705,7 +3705,7 @@ return vld4_lane_s8(a, b, 7); } -// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 @@ -3749,7 +3749,7 @@ return vld4_lane_s16(a, b, 3); } -// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 @@ -3793,7 +3793,7 @@ return vld4_lane_s32(a, b, 1); } -// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 @@ -3837,7 +3837,7 @@ return vld4_lane_s64(a, b, 0); } -// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 @@ -3881,7 +3881,7 @@ return vld4_lane_f16(a, b, 3); } -// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 @@ -3925,7 +3925,7 @@ return vld4_lane_f32(a, b, 1); } -// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 @@ -3969,7 +3969,7 @@ return vld4_lane_f64(a, b, 0); } -// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 @@ -4004,7 +4004,7 @@ return vld4_lane_p8(a, b, 7); } -// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 @@ -4048,7 +4048,7 @@ return vld4_lane_p16(a, b, 3); } -// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 @@ -4248,7 +4248,7 @@ vst1q_lane_p64(a, b, 1); } -// CHECK-LABEL: define void @test_vst1_lane_u8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_u8(i8* %a, <8 x i8> %b) #1 { // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 // CHECK: store i8 [[TMP0]], i8* %a // CHECK: ret void @@ -4256,7 +4256,7 @@ vst1_lane_u8(a, b, 7); } -// CHECK-LABEL: define void @test_vst1_lane_u16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_u16(i16* %a, <4 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -4268,7 +4268,7 @@ vst1_lane_u16(a, b, 3); } -// CHECK-LABEL: define void @test_vst1_lane_u32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_u32(i32* %a, <2 x i32> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -4280,7 +4280,7 @@ vst1_lane_u32(a, b, 1); } -// CHECK-LABEL: define void @test_vst1_lane_u64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_u64(i64* %a, <1 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -4292,7 +4292,7 @@ vst1_lane_u64(a, b, 0); } -// CHECK-LABEL: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) #1 { // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 // CHECK: store i8 [[TMP0]], i8* %a // CHECK: ret void @@ -4300,7 +4300,7 @@ vst1_lane_s8(a, b, 7); } -// CHECK-LABEL: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -4312,7 +4312,7 @@ vst1_lane_s16(a, b, 3); } -// CHECK-LABEL: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -4324,7 +4324,7 @@ vst1_lane_s32(a, b, 1); } -// CHECK-LABEL: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -4336,7 +4336,7 @@ vst1_lane_s64(a, b, 0); } -// CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> @@ -4348,7 +4348,7 @@ vst1_lane_f16(a, b, 3); } -// CHECK-LABEL: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> @@ -4360,7 +4360,7 @@ vst1_lane_f32(a, b, 1); } -// CHECK-LABEL: define void @test_vst1_lane_f64(double* %a, <1 x double> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_f64(double* %a, <1 x double> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> @@ -4372,7 +4372,7 @@ vst1_lane_f64(a, b, 0); } -// CHECK-LABEL: define void @test_vst1_lane_p8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_p8(i8* %a, <8 x i8> %b) #1 { // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 // CHECK: store i8 [[TMP0]], i8* %a // CHECK: ret void @@ -4380,7 +4380,7 @@ vst1_lane_p8(a, b, 7); } -// CHECK-LABEL: define void @test_vst1_lane_p16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_p16(i16* %a, <4 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -4392,7 +4392,7 @@ vst1_lane_p16(a, b, 3); } -// CHECK-LABEL: define void @test_vst1_lane_p64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: define void @test_vst1_lane_p64(i64* %a, <1 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -4404,7 +4404,7 @@ vst1_lane_p64(a, b, 0); } -// CHECK-LABEL: define void @test_vst2q_lane_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 @@ -4424,7 +4424,7 @@ vst2q_lane_u8(a, b, 15); } -// CHECK-LABEL: define void @test_vst2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 @@ -4449,7 +4449,7 @@ vst2q_lane_u16(a, b, 7); } -// CHECK-LABEL: define void @test_vst2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 @@ -4474,7 +4474,7 @@ vst2q_lane_u32(a, b, 3); } -// CHECK-LABEL: define void @test_vst2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0 @@ -4499,7 +4499,7 @@ vst2q_lane_u64(a, b, 1); } -// CHECK-LABEL: define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 @@ -4519,7 +4519,7 @@ vst2q_lane_s8(a, b, 15); } -// CHECK-LABEL: define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 @@ -4544,7 +4544,7 @@ vst2q_lane_s16(a, b, 7); } -// CHECK-LABEL: define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 @@ -4569,7 +4569,7 @@ vst2q_lane_s32(a, b, 3); } -// CHECK-LABEL: define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0 @@ -4594,7 +4594,7 @@ vst2q_lane_s64(a, b, 1); } -// CHECK-LABEL: define void @test_vst2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 @@ -4619,7 +4619,7 @@ vst2q_lane_f16(a, b, 7); } -// CHECK-LABEL: define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 @@ -4644,7 +4644,7 @@ vst2q_lane_f32(a, b, 3); } -// CHECK-LABEL: define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0 @@ -4669,7 +4669,7 @@ vst2q_lane_f64(a, b, 1); } -// CHECK-LABEL: define void @test_vst2q_lane_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 @@ -4689,7 +4689,7 @@ vst2q_lane_p8(a, b, 15); } -// CHECK-LABEL: define void @test_vst2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 @@ -4714,7 +4714,7 @@ vst2q_lane_p16(a, b, 7); } -// CHECK-LABEL: define void @test_vst2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0 @@ -4739,7 +4739,7 @@ vst2q_lane_p64(a, b, 1); } -// CHECK-LABEL: define void @test_vst2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 @@ -4759,7 +4759,7 @@ vst2_lane_u8(a, b, 7); } -// CHECK-LABEL: define void @test_vst2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 @@ -4784,7 +4784,7 @@ vst2_lane_u16(a, b, 3); } -// CHECK-LABEL: define void @test_vst2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 @@ -4809,7 +4809,7 @@ vst2_lane_u32(a, b, 1); } -// CHECK-LABEL: define void @test_vst2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 @@ -4834,7 +4834,7 @@ vst2_lane_u64(a, b, 0); } -// CHECK-LABEL: define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 @@ -4854,7 +4854,7 @@ vst2_lane_s8(a, b, 7); } -// CHECK-LABEL: define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 @@ -4879,7 +4879,7 @@ vst2_lane_s16(a, b, 3); } -// CHECK-LABEL: define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 @@ -4904,7 +4904,7 @@ vst2_lane_s32(a, b, 1); } -// CHECK-LABEL: define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 @@ -4929,7 +4929,7 @@ vst2_lane_s64(a, b, 0); } -// CHECK-LABEL: define void @test_vst2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 @@ -4954,7 +4954,7 @@ vst2_lane_f16(a, b, 3); } -// CHECK-LABEL: define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 @@ -4979,7 +4979,7 @@ vst2_lane_f32(a, b, 1); } -// CHECK-LABEL: define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0 @@ -5004,7 +5004,7 @@ vst2_lane_f64(a, b, 0); } -// CHECK-LABEL: define void @test_vst2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 @@ -5024,7 +5024,7 @@ vst2_lane_p8(a, b, 7); } -// CHECK-LABEL: define void @test_vst2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 @@ -5049,7 +5049,7 @@ vst2_lane_p16(a, b, 3); } -// CHECK-LABEL: define void @test_vst2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0 @@ -5074,7 +5074,7 @@ vst2_lane_p64(a, b, 0); } -// CHECK-LABEL: define void @test_vst3q_lane_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 @@ -5097,7 +5097,7 @@ vst3q_lane_u8(a, b, 15); } -// CHECK-LABEL: define void @test_vst3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 @@ -5127,7 +5127,7 @@ vst3q_lane_u16(a, b, 7); } -// CHECK-LABEL: define void @test_vst3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 @@ -5157,7 +5157,7 @@ vst3q_lane_u32(a, b, 3); } -// CHECK-LABEL: define void @test_vst3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0 @@ -5187,7 +5187,7 @@ vst3q_lane_u64(a, b, 1); } -// CHECK-LABEL: define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 @@ -5210,7 +5210,7 @@ vst3q_lane_s8(a, b, 15); } -// CHECK-LABEL: define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 @@ -5240,7 +5240,7 @@ vst3q_lane_s16(a, b, 7); } -// CHECK-LABEL: define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 @@ -5270,7 +5270,7 @@ vst3q_lane_s32(a, b, 3); } -// CHECK-LABEL: define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0 @@ -5300,7 +5300,7 @@ vst3q_lane_s64(a, b, 1); } -// CHECK-LABEL: define void @test_vst3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 @@ -5330,7 +5330,7 @@ vst3q_lane_f16(a, b, 7); } -// CHECK-LABEL: define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 @@ -5360,7 +5360,7 @@ vst3q_lane_f32(a, b, 3); } -// CHECK-LABEL: define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0 @@ -5390,7 +5390,7 @@ vst3q_lane_f64(a, b, 1); } -// CHECK-LABEL: define void @test_vst3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 @@ -5413,7 +5413,7 @@ vst3q_lane_p8(a, b, 15); } -// CHECK-LABEL: define void @test_vst3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 @@ -5443,7 +5443,7 @@ vst3q_lane_p16(a, b, 7); } -// CHECK-LABEL: define void @test_vst3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0 @@ -5473,7 +5473,7 @@ vst3q_lane_p64(a, b, 1); } -// CHECK-LABEL: define void @test_vst3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 @@ -5496,7 +5496,7 @@ vst3_lane_u8(a, b, 7); } -// CHECK-LABEL: define void @test_vst3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 @@ -5526,7 +5526,7 @@ vst3_lane_u16(a, b, 3); } -// CHECK-LABEL: define void @test_vst3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 @@ -5556,7 +5556,7 @@ vst3_lane_u32(a, b, 1); } -// CHECK-LABEL: define void @test_vst3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 @@ -5586,7 +5586,7 @@ vst3_lane_u64(a, b, 0); } -// CHECK-LABEL: define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 @@ -5609,7 +5609,7 @@ vst3_lane_s8(a, b, 7); } -// CHECK-LABEL: define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 @@ -5639,7 +5639,7 @@ vst3_lane_s16(a, b, 3); } -// CHECK-LABEL: define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 @@ -5669,7 +5669,7 @@ vst3_lane_s32(a, b, 1); } -// CHECK-LABEL: define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 @@ -5699,7 +5699,7 @@ vst3_lane_s64(a, b, 0); } -// CHECK-LABEL: define void @test_vst3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 @@ -5729,7 +5729,7 @@ vst3_lane_f16(a, b, 3); } -// CHECK-LABEL: define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 @@ -5759,7 +5759,7 @@ vst3_lane_f32(a, b, 1); } -// CHECK-LABEL: define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0 @@ -5789,7 +5789,7 @@ vst3_lane_f64(a, b, 0); } -// CHECK-LABEL: define void @test_vst3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 @@ -5812,7 +5812,7 @@ vst3_lane_p8(a, b, 7); } -// CHECK-LABEL: define void @test_vst3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 @@ -5842,7 +5842,7 @@ vst3_lane_p16(a, b, 3); } -// CHECK-LABEL: define void @test_vst3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0 @@ -5872,7 +5872,7 @@ vst3_lane_p64(a, b, 0); } -// CHECK-LABEL: define void @test_vst4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 @@ -5898,7 +5898,7 @@ vst4q_lane_u8(a, b, 15); } -// CHECK-LABEL: define void @test_vst4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 @@ -5933,7 +5933,7 @@ vst4q_lane_u16(a, b, 7); } -// CHECK-LABEL: define void @test_vst4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 @@ -5968,7 +5968,7 @@ vst4q_lane_u32(a, b, 3); } -// CHECK-LABEL: define void @test_vst4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0 @@ -6003,7 +6003,7 @@ vst4q_lane_u64(a, b, 1); } -// CHECK-LABEL: define void @test_vst4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 @@ -6029,7 +6029,7 @@ vst4q_lane_s8(a, b, 15); } -// CHECK-LABEL: define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 @@ -6064,7 +6064,7 @@ vst4q_lane_s16(a, b, 7); } -// CHECK-LABEL: define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 @@ -6099,7 +6099,7 @@ vst4q_lane_s32(a, b, 3); } -// CHECK-LABEL: define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0 @@ -6134,7 +6134,7 @@ vst4q_lane_s64(a, b, 1); } -// CHECK-LABEL: define void @test_vst4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 @@ -6169,7 +6169,7 @@ vst4q_lane_f16(a, b, 7); } -// CHECK-LABEL: define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 @@ -6204,7 +6204,7 @@ vst4q_lane_f32(a, b, 3); } -// CHECK-LABEL: define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0 @@ -6239,7 +6239,7 @@ vst4q_lane_f64(a, b, 1); } -// CHECK-LABEL: define void @test_vst4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 @@ -6265,7 +6265,7 @@ vst4q_lane_p8(a, b, 15); } -// CHECK-LABEL: define void @test_vst4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 @@ -6300,7 +6300,7 @@ vst4q_lane_p16(a, b, 7); } -// CHECK-LABEL: define void @test_vst4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0 @@ -6335,7 +6335,7 @@ vst4q_lane_p64(a, b, 1); } -// CHECK-LABEL: define void @test_vst4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 @@ -6361,7 +6361,7 @@ vst4_lane_u8(a, b, 7); } -// CHECK-LABEL: define void @test_vst4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 @@ -6396,7 +6396,7 @@ vst4_lane_u16(a, b, 3); } -// CHECK-LABEL: define void @test_vst4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 @@ -6431,7 +6431,7 @@ vst4_lane_u32(a, b, 1); } -// CHECK-LABEL: define void @test_vst4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 @@ -6466,7 +6466,7 @@ vst4_lane_u64(a, b, 0); } -// CHECK-LABEL: define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 @@ -6492,7 +6492,7 @@ vst4_lane_s8(a, b, 7); } -// CHECK-LABEL: define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 @@ -6527,7 +6527,7 @@ vst4_lane_s16(a, b, 3); } -// CHECK-LABEL: define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 @@ -6562,7 +6562,7 @@ vst4_lane_s32(a, b, 1); } -// CHECK-LABEL: define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 @@ -6597,7 +6597,7 @@ vst4_lane_s64(a, b, 0); } -// CHECK-LABEL: define void @test_vst4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 @@ -6632,7 +6632,7 @@ vst4_lane_f16(a, b, 3); } -// CHECK-LABEL: define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 @@ -6667,7 +6667,7 @@ vst4_lane_f32(a, b, 1); } -// CHECK-LABEL: define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0 @@ -6702,7 +6702,7 @@ vst4_lane_f64(a, b, 0); } -// CHECK-LABEL: define void @test_vst4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 @@ -6728,7 +6728,7 @@ vst4_lane_p8(a, b, 7); } -// CHECK-LABEL: define void @test_vst4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 @@ -6763,7 +6763,7 @@ vst4_lane_p16(a, b, 3); } -// CHECK-LABEL: define void @test_vst4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 { // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0 @@ -6797,3 +6797,7 @@ void test_vst4_lane_p64(poly64_t *a, poly64x1x4_t b) { vst4_lane_p64(a, b, 0); } + +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="128" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="64" +// CHECK-NOT: attributes #2 ={{.*}}"min-legal-vector-width" Index: test/CodeGen/aarch64-neon-scalar-copy.c =================================================================== --- test/CodeGen/aarch64-neon-scalar-copy.c +++ test/CodeGen/aarch64-neon-scalar-copy.c @@ -23,7 +23,7 @@ } -// CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 @@ -33,7 +33,7 @@ } -// CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 @@ -118,7 +118,7 @@ return vdupd_lane_u64(a, 0); } -// CHECK-LABEL: define i8 @test_vdupb_laneq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: define i8 @test_vdupb_laneq_s8(<16 x i8> %a) #1 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 // CHECK: ret i8 [[VGETQ_LANE]] int8_t test_vdupb_laneq_s8(int8x16_t a) { @@ -126,7 +126,7 @@ } -// CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -136,7 +136,7 @@ } -// CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -146,7 +146,7 @@ } -// CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -156,7 +156,7 @@ } -// CHECK-LABEL: define i8 @test_vdupb_laneq_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: define i8 @test_vdupb_laneq_u8(<16 x i8> %a) #1 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 // CHECK: ret i8 [[VGETQ_LANE]] uint8_t test_vdupb_laneq_u8(uint8x16_t a) { @@ -164,7 +164,7 @@ } -// CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -174,7 +174,7 @@ } -// CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -184,7 +184,7 @@ } -// CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -209,14 +209,14 @@ return vduph_lane_p16(a, 3); } -// CHECK-LABEL: define i8 @test_vdupb_laneq_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: define i8 @test_vdupb_laneq_p8(<16 x i8> %a) #1 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 // CHECK: ret i8 [[VGETQ_LANE]] poly8_t test_vdupb_laneq_p8(poly8x16_t a) { return vdupb_laneq_p8(a, 15); } -// CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -225,3 +225,5 @@ return vduph_laneq_p16(a, 7); } +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" Index: test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c =================================================================== --- test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c +++ test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c @@ -26,7 +26,7 @@ return vmuld_lane_f64(a, b, 0); } -// CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #0 { +// CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 @@ -36,7 +36,7 @@ return vmuls_laneq_f32(a, b, 3); } -// CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #0 { +// CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 @@ -65,7 +65,7 @@ return vmulxs_lane_f32(a, b, 1); } -// CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #0 { +// CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 @@ -85,7 +85,7 @@ return vmulxd_lane_f64(a, b, 0); } -// CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #0 { +// CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 @@ -112,7 +112,7 @@ } -// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 @@ -128,7 +128,7 @@ return vmulx_laneq_f64(a, b, 0); } -// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 @@ -165,7 +165,7 @@ return vfmad_lane_f64(a, b, c, 0); } -// CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #0 { +// CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %c to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 @@ -215,7 +215,7 @@ return vfms_lane_f64(a, b, v, 0); } -// CHECK-LABEL: define <1 x double> @test_vfma_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #0 { +// CHECK-LABEL: define <1 x double> @test_vfma_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 { // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> @@ -230,7 +230,7 @@ return vfma_laneq_f64(a, b, v, 0); } -// CHECK-LABEL: define <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #0 { +// CHECK-LABEL: define <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 { // CHECK: [[SUB:%.*]] = fsub <1 x double> , %b // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8> @@ -269,7 +269,7 @@ return vqdmulls_lane_s32(a, b, 1); } -// CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -282,7 +282,7 @@ return vqdmullh_laneq_s16(a, b, 7); } -// CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -316,7 +316,7 @@ } -// CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -330,7 +330,7 @@ } -// CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -364,7 +364,7 @@ } -// CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -378,7 +378,7 @@ } -// CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -413,7 +413,7 @@ return vqdmlals_lane_s32(a, b, c, 1); } -// CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #0 { +// CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -427,7 +427,7 @@ return vqdmlalh_laneq_s16(a, b, c, 7); } -// CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #0 { +// CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -463,7 +463,7 @@ return vqdmlsls_lane_s32(a, b, c, 1); } -// CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #0 { +// CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -477,7 +477,7 @@ return vqdmlslh_laneq_s16(a, b, c, 7); } -// CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #0 { +// CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -513,7 +513,7 @@ return result; } -// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #0 { +// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #1 { // CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double> // CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> @@ -540,3 +540,6 @@ result = vmulx_laneq_f64(arg1, arg3, 1); return result; } + +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" Index: test/CodeGen/aarch64-neon-tbl.c =================================================================== --- test/CodeGen/aarch64-neon-tbl.c +++ test/CodeGen/aarch64-neon-tbl.c @@ -7,14 +7,14 @@ // CHECK-LABEL: define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) #0 { // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL11_I]] int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) { return vtbl1_s8(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) #0 { -// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2 +// CHECK-LABEL: define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) #1 { +// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL1_I]] int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) { return vqtbl1_s8(a, b); @@ -36,7 +36,7 @@ // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> -// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL13_I]] int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) { return vtbl2_s8(a, b); @@ -57,7 +57,7 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2 +// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL2_I]] int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) { return vqtbl2_s8(a, b); @@ -83,7 +83,7 @@ // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL26_I]] int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) { return vtbl3_s8(a, b); @@ -107,7 +107,7 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2 +// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL3_I]] int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) { return vqtbl3_s8(a, b); @@ -136,7 +136,7 @@ // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> -// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL28_I]] int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) { return vtbl4_s8(a, b); @@ -163,20 +163,20 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2 +// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL4_I]] int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) { return vqtbl4_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) #0 { -// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2 +// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) #1 { +// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL1_I]] int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) { return vqtbl1q_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0 @@ -191,13 +191,13 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2 +// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL2_I]] int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) { return vqtbl2q_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0 @@ -215,13 +215,13 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2 +// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL3_I]] int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) { return vqtbl3q_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0 @@ -242,7 +242,7 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2 +// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL4_I]] int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) { return vqtbl4q_s8(a, b); @@ -250,7 +250,7 @@ // CHECK-LABEL: define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2 +// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3 // CHECK: [[TMP0:%.*]] = icmp uge <8 x i8> %c, // CHECK: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a @@ -278,7 +278,7 @@ // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> -// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2 +// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX13_I]] int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) { return vtbx2_s8(a, b, c); @@ -304,7 +304,7 @@ // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2 +// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3 // CHECK: [[TMP4:%.*]] = icmp uge <8 x i8> %c, // CHECK: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a @@ -339,14 +339,14 @@ // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 // CHECK: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> -// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2 +// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX28_I]] int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) { return vtbx4_s8(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 { -// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2 +// CHECK-LABEL: define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 { +// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX1_I]] int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) { return vqtbx1_s8(a, b, c); @@ -367,7 +367,7 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2 +// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX2_I]] int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) { return vqtbx2_s8(a, b, c); @@ -391,7 +391,7 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2 +// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX3_I]] int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) { return vqtbx3_s8(a, b, c); @@ -418,20 +418,20 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2 +// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX4_I]] int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) { return vqtbx4_s8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { -// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2 +// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 { +// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX1_I]] int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) { return vqtbx1q_s8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 @@ -446,13 +446,13 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2 +// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX2_I]] int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) { return vqtbx2q_s8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 @@ -470,13 +470,13 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2 +// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX3_I]] int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) { return vqtbx3q_s8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 @@ -497,7 +497,7 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2 +// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX4_I]] int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) { return vqtbx4q_s8(a, b, c); @@ -505,14 +505,14 @@ // CHECK-LABEL: define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) #0 { // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL11_I]] uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) { return vtbl1_u8(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) #0 { -// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2 +// CHECK-LABEL: define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) #1 { +// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL1_I]] uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) { return vqtbl1_u8(a, b); @@ -534,7 +534,7 @@ // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> -// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL13_I]] uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) { return vtbl2_u8(a, b); @@ -555,7 +555,7 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2 +// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL2_I]] uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) { return vqtbl2_u8(a, b); @@ -581,7 +581,7 @@ // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL26_I]] uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) { return vtbl3_u8(a, b); @@ -605,7 +605,7 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2 +// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL3_I]] uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) { return vqtbl3_u8(a, b); @@ -634,7 +634,7 @@ // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> -// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL28_I]] uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) { return vtbl4_u8(a, b); @@ -661,20 +661,20 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2 +// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL4_I]] uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) { return vqtbl4_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) #0 { -// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2 +// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) #1 { +// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL1_I]] uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) { return vqtbl1q_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0 @@ -689,13 +689,13 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2 +// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL2_I]] uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) { return vqtbl2q_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0 @@ -713,13 +713,13 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2 +// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL3_I]] uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) { return vqtbl3q_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0 @@ -740,7 +740,7 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2 +// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL4_I]] uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) { return vqtbl4q_u8(a, b); @@ -748,7 +748,7 @@ // CHECK-LABEL: define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2 +// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3 // CHECK: [[TMP0:%.*]] = icmp uge <8 x i8> %c, // CHECK: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a @@ -776,7 +776,7 @@ // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> -// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2 +// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX13_I]] uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { return vtbx2_u8(a, b, c); @@ -802,7 +802,7 @@ // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2 +// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3 // CHECK: [[TMP4:%.*]] = icmp uge <8 x i8> %c, // CHECK: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a @@ -837,14 +837,14 @@ // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 // CHECK: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> -// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2 +// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX28_I]] uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) { return vtbx4_u8(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 { -// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2 +// CHECK-LABEL: define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 { +// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX1_I]] uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) { return vqtbx1_u8(a, b, c); @@ -865,7 +865,7 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2 +// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX2_I]] uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) { return vqtbx2_u8(a, b, c); @@ -889,7 +889,7 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2 +// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX3_I]] uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) { return vqtbx3_u8(a, b, c); @@ -916,20 +916,20 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2 +// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX4_I]] uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) { return vqtbx4_u8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { -// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2 +// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 { +// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX1_I]] uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { return vqtbx1q_u8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 @@ -944,13 +944,13 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2 +// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX2_I]] uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) { return vqtbx2q_u8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 @@ -968,13 +968,13 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2 +// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX3_I]] uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) { return vqtbx3q_u8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 @@ -995,7 +995,7 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2 +// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX4_I]] uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) { return vqtbx4q_u8(a, b, c); @@ -1003,14 +1003,14 @@ // CHECK-LABEL: define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) #0 { // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL11_I]] poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) { return vtbl1_p8(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) #0 { -// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2 +// CHECK-LABEL: define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) #1 { +// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL1_I]] poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) { return vqtbl1_p8(a, b); @@ -1032,7 +1032,7 @@ // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> -// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL13_I]] poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) { return vtbl2_p8(a, b); @@ -1053,7 +1053,7 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2 +// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL2_I]] poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) { return vqtbl2_p8(a, b); @@ -1079,7 +1079,7 @@ // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL26_I]] poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) { return vtbl3_p8(a, b); @@ -1103,7 +1103,7 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2 +// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL3_I]] poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) { return vqtbl3_p8(a, b); @@ -1132,7 +1132,7 @@ // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> -// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2 +// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL28_I]] poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) { return vtbl4_p8(a, b); @@ -1159,20 +1159,20 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2 +// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3 // CHECK: ret <8 x i8> [[VTBL4_I]] poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) { return vqtbl4_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) #0 { -// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2 +// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) #1 { +// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL1_I]] poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) { return vqtbl1q_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0 @@ -1187,13 +1187,13 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2 +// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL2_I]] poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) { return vqtbl2q_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0 @@ -1211,13 +1211,13 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2 +// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL3_I]] poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) { return vqtbl3q_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0 @@ -1238,7 +1238,7 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2 +// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3 // CHECK: ret <16 x i8> [[VTBL4_I]] poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) { return vqtbl4q_p8(a, b); @@ -1246,7 +1246,7 @@ // CHECK-LABEL: define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2 +// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3 // CHECK: [[TMP0:%.*]] = icmp uge <8 x i8> %c, // CHECK: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a @@ -1274,7 +1274,7 @@ // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> -// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2 +// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX13_I]] poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) { return vtbx2_p8(a, b, c); @@ -1300,7 +1300,7 @@ // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> -// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2 +// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3 // CHECK: [[TMP4:%.*]] = icmp uge <8 x i8> %c, // CHECK: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a @@ -1335,14 +1335,14 @@ // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 // CHECK: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> -// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2 +// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX28_I]] poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) { return vtbx4_p8(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 { -// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2 +// CHECK-LABEL: define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 { +// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX1_I]] poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) { return vqtbx1_p8(a, b, c); @@ -1363,7 +1363,7 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2 +// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX2_I]] poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) { return vqtbx2_p8(a, b, c); @@ -1387,7 +1387,7 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2 +// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX3_I]] poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) { return vqtbx3_p8(a, b, c); @@ -1414,20 +1414,20 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2 +// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3 // CHECK: ret <8 x i8> [[VTBX4_I]] poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) { return vqtbx4_p8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { -// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2 +// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 { +// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX1_I]] poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) { return vqtbx1q_p8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 @@ -1442,13 +1442,13 @@ // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 -// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2 +// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX2_I]] poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) { return vqtbx2q_p8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 @@ -1466,13 +1466,13 @@ // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 -// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2 +// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX3_I]] poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) { return vqtbx3q_p8(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 { +// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 @@ -1493,8 +1493,11 @@ // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 -// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2 +// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3 // CHECK: ret <16 x i8> [[VTBX4_I]] poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) { return vqtbx4q_p8(a, b, c); } + +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" Index: test/CodeGen/aarch64-neon-vget.c =================================================================== --- test/CodeGen/aarch64-neon-vget.c +++ test/CodeGen/aarch64-neon-vget.c @@ -97,14 +97,14 @@ return vget_lane_f16(a, 1); } -// CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #1 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 // CHECK: ret i8 [[VGETQ_LANE]] uint8_t test_vgetq_lane_u8(uint8x16_t a) { return vgetq_lane_u8(a, 15); } -// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -113,7 +113,7 @@ return vgetq_lane_u16(a, 7); } -// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -122,14 +122,14 @@ return vgetq_lane_u32(a, 3); } -// CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #1 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 // CHECK: ret i8 [[VGETQ_LANE]] int8_t test_vgetq_lane_s8(int8x16_t a) { return vgetq_lane_s8(a, 15); } -// CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -138,7 +138,7 @@ return vgetq_lane_s16(a, 7); } -// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -147,14 +147,14 @@ return vgetq_lane_s32(a, 3); } -// CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #1 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 // CHECK: ret i8 [[VGETQ_LANE]] poly8_t test_vgetq_lane_p8(poly8x16_t a) { return vgetq_lane_p8(a, 15); } -// CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -163,7 +163,7 @@ return vgetq_lane_p16(a, 7); } -// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 { +// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 @@ -172,7 +172,7 @@ return vgetq_lane_f32(a, 3); } -// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 { +// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #1 { // CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16 // CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2 // CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16 @@ -208,7 +208,7 @@ return vget_lane_u64(a, 0); } -// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -217,7 +217,7 @@ return vgetq_lane_s64(a, 1); } -// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -324,14 +324,14 @@ return vset_lane_f16(*a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #1 { // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 // CHECK: ret <16 x i8> [[VSET_LANE]] uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) { return vsetq_lane_u8(a, b, 15); } -// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 @@ -340,7 +340,7 @@ return vsetq_lane_u16(a, b, 7); } -// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 @@ -349,14 +349,14 @@ return vsetq_lane_u32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #1 { // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 // CHECK: ret <16 x i8> [[VSET_LANE]] int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) { return vsetq_lane_s8(a, b, 15); } -// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 @@ -365,7 +365,7 @@ return vsetq_lane_s16(a, b, 7); } -// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 { +// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 @@ -374,14 +374,14 @@ return vsetq_lane_s32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #0 { +// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #1 { // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 // CHECK: ret <16 x i8> [[VSET_LANE]] poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) { return vsetq_lane_p8(a, b, 15); } -// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #0 { +// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 @@ -390,7 +390,7 @@ return vsetq_lane_p16(a, b, 7); } -// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 { +// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3 @@ -399,7 +399,7 @@ return vsetq_lane_f32(a, b, 3); } -// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 { +// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #1 { // CHECK: [[__REINT_248:%.*]] = alloca half, align 2 // CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16 // CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16 @@ -439,7 +439,7 @@ return vset_lane_u64(a, b, 0); } -// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 @@ -448,7 +448,7 @@ return vsetq_lane_s64(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 @@ -456,3 +456,6 @@ uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) { return vsetq_lane_u64(a, b, 1); } + +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" Index: test/CodeGen/aarch64-poly64.c =================================================================== --- test/CodeGen/aarch64-poly64.c +++ test/CodeGen/aarch64-poly64.c @@ -14,7 +14,7 @@ return vceq_p64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -31,7 +31,7 @@ return vtst_p64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[TMP4:%.*]] = and <2 x i64> %a, %b // CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> @@ -50,7 +50,7 @@ return vbsl_p64(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 { +// CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #1 { // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %a, %b // CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c @@ -69,7 +69,7 @@ return vget_lane_p64(v, 0); } -// CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #0 { +// CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -87,7 +87,7 @@ return vset_lane_p64(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #0 { +// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 @@ -109,7 +109,7 @@ } -// CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 @@ -121,7 +121,7 @@ return vcopyq_lane_p64(a, 1, b, 0); } -// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -146,7 +146,7 @@ poly64x1_t test_vdup_n_p64(poly64_t a) { return vdup_n_p64(a); } -// CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #0 { +// CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #1 { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 // CHECK: ret <2 x i64> [[VECINIT1_I]] @@ -161,7 +161,7 @@ return vmov_n_p64(a); } -// CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #0 { +// CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #1 { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 // CHECK: ret <2 x i64> [[VECINIT1_I]] @@ -176,21 +176,21 @@ return vdup_lane_p64(vec, 0); } -// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #0 { +// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <2 x i32> zeroinitializer // CHECK: ret <2 x i64> [[SHUFFLE]] poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) { return vdupq_lane_p64(vec, 0); } -// CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #0 { +// CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #1 { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i64> %vec, <2 x i64> %vec, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE]] poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) { return vdupq_laneq_p64(vec, 1); } -// CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #0 { +// CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #1 { // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) { @@ -206,7 +206,7 @@ return vld1_p64(ptr); } -// CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #0 { +// CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #1 { // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] @@ -226,7 +226,7 @@ return vst1_p64(ptr, val); } -// CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #0 { +// CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #1 { // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* @@ -237,7 +237,7 @@ return vst1q_p64(ptr, val); } -// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #0 { +// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* @@ -255,7 +255,7 @@ return vld2_p64(ptr); } -// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #0 { +// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* @@ -273,7 +273,7 @@ return vld2q_p64(ptr); } -// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #0 { +// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* @@ -291,7 +291,7 @@ return vld3_p64(ptr); } -// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #0 { +// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* @@ -309,7 +309,7 @@ return vld3q_p64(ptr); } -// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #0 { +// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* @@ -327,7 +327,7 @@ return vld4_p64(ptr); } -// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #0 { +// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #2 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* @@ -345,7 +345,7 @@ return vld4q_p64(ptr); } -// CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #0 { +// CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #2 { // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[VAL]], i32 0, i32 0 @@ -370,7 +370,7 @@ return vst2_p64(ptr, val); } -// CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #0 { +// CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #2 { // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[VAL]], i32 0, i32 0 @@ -395,7 +395,7 @@ return vst2q_p64(ptr, val); } -// CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #0 { +// CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #2 { // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[VAL]], i32 0, i32 0 @@ -425,7 +425,7 @@ return vst3_p64(ptr, val); } -// CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #0 { +// CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #2 { // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[VAL]], i32 0, i32 0 @@ -455,7 +455,7 @@ return vst3q_p64(ptr, val); } -// CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #0 { +// CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #2 { // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[VAL]], i32 0, i32 0 @@ -490,7 +490,7 @@ return vst4_p64(ptr, val); } -// CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #0 { +// CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #2 { // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[VAL]], i32 0, i32 0 @@ -537,7 +537,7 @@ } -// CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -548,42 +548,42 @@ return vextq_p64(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) { return vzip1q_p64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) { return vzip2q_u64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) { return vuzp1q_p64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) { return vuzp2q_u64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) { return vtrn1q_p64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) { @@ -601,7 +601,7 @@ return vsri_n_p64(a, b, 33); } -// CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #1 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -612,3 +612,6 @@ return vsriq_n_p64(a, b, 64); } +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" +// CHECK-NOT: attributes #2 ={{.*}}"min-legal-vector-width" Index: test/CodeGen/arm-neon-fma.c =================================================================== --- test/CodeGen/arm-neon-fma.c +++ test/CodeGen/arm-neon-fma.c @@ -8,14 +8,14 @@ #include // CHECK-LABEL: define <2 x float> @test_fma_order(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) #0 { -// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum) #2 +// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum) #3 // CHECK: ret <2 x float> [[TMP6]] float32x2_t test_fma_order(float32x2_t accum, float32x2_t lhs, float32x2_t rhs) { return vfma_f32(accum, lhs, rhs); } -// CHECK-LABEL: define <4 x float> @test_fmaq_order(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) #0 { -// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum) #2 +// CHECK-LABEL: define <4 x float> @test_fmaq_order(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) #1 { +// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum) #3 // CHECK: ret <4 x float> [[TMP6]] float32x4_t test_fmaq_order(float32x4_t accum, float32x4_t lhs, float32x4_t rhs) { return vfmaq_f32(accum, lhs, rhs); @@ -32,7 +32,7 @@ return vfma_n_f32(a, b, n); } -// CHECK-LABEL: define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { +// CHECK-LABEL: define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #1 { // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2 @@ -44,3 +44,6 @@ float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { return vfmaq_n_f32(a, b, n); } + +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" Index: test/CodeGen/arm-neon-numeric-maxmin.c =================================================================== --- test/CodeGen/arm-neon-numeric-maxmin.c +++ test/CodeGen/arm-neon-numeric-maxmin.c @@ -3,29 +3,32 @@ #include // CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 { -// CHECK: [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #2 +// CHECK: [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #3 // CHECK: ret <2 x float> [[VMAXNM_V2_I]] float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { return vmaxnm_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 { -// CHECK: [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #2 +// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #1 { +// CHECK: [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #3 // CHECK: ret <4 x float> [[VMAXNMQ_V2_I]] float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { return vmaxnmq_f32(a, b); } // CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 { -// CHECK: [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b) #2 +// CHECK: [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b) #3 // CHECK: ret <2 x float> [[VMINNM_V2_I]] float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { return vminnm_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 { -// CHECK: [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b) #2 +// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #1 { +// CHECK: [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b) #3 // CHECK: ret <4 x float> [[VMINNMQ_V2_I]] float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { return vminnmq_f32(a, b); } + +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" Index: test/CodeGen/arm-neon-vcvtX.c =================================================================== --- test/CodeGen/arm-neon-vcvtX.c +++ test/CodeGen/arm-neon-vcvtX.c @@ -3,113 +3,116 @@ #include // CHECK-LABEL: define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 { -// CHECK: [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) #3 // CHECK: ret <2 x i32> [[VCVTA_S32_V1_I]] int32x2_t test_vcvta_s32_f32(float32x2_t a) { return vcvta_s32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 { -// CHECK: [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) #3 // CHECK: ret <2 x i32> [[VCVTA_U32_V1_I]] uint32x2_t test_vcvta_u32_f32(float32x2_t a) { return vcvta_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 { -// CHECK: [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #1 { +// CHECK: [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) #3 // CHECK: ret <4 x i32> [[VCVTAQ_S32_V1_I]] int32x4_t test_vcvtaq_s32_f32(float32x4_t a) { return vcvtaq_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 { -// CHECK: [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #1 { +// CHECK: [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) #3 // CHECK: ret <4 x i32> [[VCVTAQ_U32_V1_I]] uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) { return vcvtaq_u32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 { -// CHECK: [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) #3 // CHECK: ret <2 x i32> [[VCVTN_S32_V1_I]] int32x2_t test_vcvtn_s32_f32(float32x2_t a) { return vcvtn_s32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 { -// CHECK: [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) #3 // CHECK: ret <2 x i32> [[VCVTN_U32_V1_I]] uint32x2_t test_vcvtn_u32_f32(float32x2_t a) { return vcvtn_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 { -// CHECK: [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #1 { +// CHECK: [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) #3 // CHECK: ret <4 x i32> [[VCVTNQ_S32_V1_I]] int32x4_t test_vcvtnq_s32_f32(float32x4_t a) { return vcvtnq_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 { -// CHECK: [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #1 { +// CHECK: [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) #3 // CHECK: ret <4 x i32> [[VCVTNQ_U32_V1_I]] uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) { return vcvtnq_u32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 { -// CHECK: [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) #3 // CHECK: ret <2 x i32> [[VCVTP_S32_V1_I]] int32x2_t test_vcvtp_s32_f32(float32x2_t a) { return vcvtp_s32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 { -// CHECK: [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) #3 // CHECK: ret <2 x i32> [[VCVTP_U32_V1_I]] uint32x2_t test_vcvtp_u32_f32(float32x2_t a) { return vcvtp_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 { -// CHECK: [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #1 { +// CHECK: [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) #3 // CHECK: ret <4 x i32> [[VCVTPQ_S32_V1_I]] int32x4_t test_vcvtpq_s32_f32(float32x4_t a) { return vcvtpq_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 { -// CHECK: [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #1 { +// CHECK: [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) #3 // CHECK: ret <4 x i32> [[VCVTPQ_U32_V1_I]] uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) { return vcvtpq_u32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 { -// CHECK: [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) #3 // CHECK: ret <2 x i32> [[VCVTM_S32_V1_I]] int32x2_t test_vcvtm_s32_f32(float32x2_t a) { return vcvtm_s32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 { -// CHECK: [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) #3 // CHECK: ret <2 x i32> [[VCVTM_U32_V1_I]] uint32x2_t test_vcvtm_u32_f32(float32x2_t a) { return vcvtm_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 { -// CHECK: [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #1 { +// CHECK: [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) #3 // CHECK: ret <4 x i32> [[VCVTMQ_S32_V1_I]] int32x4_t test_vcvtmq_s32_f32(float32x4_t a) { return vcvtmq_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 { -// CHECK: [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) #2 +// CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #1 { +// CHECK: [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) #3 // CHECK: ret <4 x i32> [[VCVTMQ_U32_V1_I]] uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) { return vcvtmq_u32_f32(a); } + +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" Index: test/CodeGen/arm64_vdupq_n_f64.c =================================================================== --- test/CodeGen/arm64_vdupq_n_f64.c +++ test/CodeGen/arm64_vdupq_n_f64.c @@ -44,7 +44,7 @@ return vmovq_n_f64(w); } -// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a1) #0 { +// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a1) #1 { // CHECK: [[TMP0:%.*]] = load half, half* %a1, align 2 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1 @@ -76,3 +76,5 @@ return vmovq_n_f16(*a1); } +// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="128" +// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="64" Index: test/CodeGen/x86-vector-width.c =================================================================== --- /dev/null +++ test/CodeGen/x86-vector-width.c @@ -0,0 +1,61 @@ +// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -emit-llvm %s -o - | FileCheck %s + +typedef signed long long V2LLi __attribute__((vector_size(16))); +typedef signed long long V4LLi __attribute__((vector_size(32))); + +V2LLi ret_128(); +V4LLi ret_256(); +void arg_128(V2LLi); +void arg_256(V4LLi); + +// Make sure return type forces a min-legal-width +V2LLi foo(void) { + return (V2LLi){ 0, 0 }; +} + +V4LLi goo(void) { + return (V4LLi){ 0, 0 }; +} + +// Make sure return type of called function forces a min-legal-width +void hoo(void) { + V2LLi tmp_V2LLi; + tmp_V2LLi = ret_128(); +} + +void joo(void) { + V4LLi tmp_V4LLi; + tmp_V4LLi = ret_256(); +} + +// Make sure arg type of called function forces a min-legal-width +void koo(void) { + V2LLi tmp_V2LLi; + arg_128(tmp_V2LLi); +} + +void loo(void) { + V4LLi tmp_V4LLi; + arg_256(tmp_V4LLi); +} + +// Make sure arg type of our function forces a min-legal-width +void moo(V2LLi x) { + +} + +void noo(V4LLi x) { + +} + +// CHECK: {{.*}}@foo{{.*}} #0 +// CHECK: {{.*}}@goo{{.*}} #1 +// CHECK: {{.*}}@hoo{{.*}} #0 +// CHECK: {{.*}}@joo{{.*}} #1 +// CHECK: {{.*}}@koo{{.*}} #0 +// CHECK: {{.*}}@loo{{.*}} #1 +// CHECK: {{.*}}@moo{{.*}} #0 +// CHECK: {{.*}}@noo{{.*}} #1 + +// CHECK: #0 = {{.*}}"min-legal-vector-width"="128" +// CHECK: #1 = {{.*}}"min-legal-vector-width"="256" Index: test/CodeGenOpenCL/fpmath.cl =================================================================== --- test/CodeGenOpenCL/fpmath.cl +++ test/CodeGenOpenCL/fpmath.cl @@ -16,7 +16,7 @@ float4 spvectordiv(float4 a, float4 b) { // CHECK: @spvectordiv - // CHECK: #[[ATTR]] + // CHECK: #[[ATTR2:[0-9]+]] // CHECK: fdiv{{.*}}, // NODIVOPT: !fpmath ![[MD]] // DIVOPT-NOT: !fpmath ![[MD]] @@ -45,7 +45,11 @@ #endif // CHECK: attributes #[[ATTR]] = { -// NODIVOPT: "correctly-rounded-divide-sqrt-fp-math"="false" -// DIVOPT: "correctly-rounded-divide-sqrt-fp-math"="true" -// CHECK: } +// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false" +// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true" +// CHECK-SAME: } +// CHECK: attributes #[[ATTR2]] = { +// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false" +// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true" +// CHECK-SAME: } // NODIVOPT: ![[MD]] = !{float 2.500000e+00}